Re:

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* Re:
@ 2011-10-26 20:51 bfeely
  0 siblings, 0 replies; 414+ messages in thread
From: bfeely @ 2011-10-26 20:51 UTC (permalink / raw)
  To: lighth7015, linux-kernel, listserv, literature, lpulsifer

..Fulfill your life with only positive emotions due to it!  
http://www.cavexpert.com/m.friends.page.php?ahaid_hotmail=60b6

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2024-03-07  6:07 KR Kim
  2024-03-07  8:01 ` Miquel Raynal
  0 siblings, 1 reply; 414+ messages in thread
From: KR Kim @ 2024-03-07  6:07 UTC (permalink / raw)
  To: miquel.raynal, richard, vigneshr, mmkurbanov, ddrokosov, gch981213
  Cc: kr.kim, michael, broonie, mika.westerberg, acelan.kao,
	linux-kernel, linux-mtd, moh.sardi, changsub.shim

Feat: Add SkyHigh Memory Patch code

Add SPI Nand Patch code of SkyHigh Memory
- Add company dependent code with 'skyhigh.c'
- Insert into 'core.c' so that 'always ECC on'

commit 6061b97a830af8cb5fd0917e833e779451f9046a (HEAD -> master)
Author: KR Kim <kr.kim@skyhighmemory.com>
Date:   Thu Mar 7 13:24:11 2024 +0900

    SPI Nand Patch code of SkyHigh Momory

    Signed-off-by: KR Kim <kr.kim@skyhighmemory.com>

From 6061b97a830af8cb5fd0917e833e779451f9046a Mon Sep 17 00:00:00 2001
From: KR Kim <kr.kim@skyhighmemory.com>
Date: Thu, 7 Mar 2024 13:24:11 +0900
Subject: [PATCH] SPI Nand Patch code of SkyHigh Memory

---
 drivers/mtd/nand/spi/Makefile  |   2 +-
 drivers/mtd/nand/spi/core.c    |   7 +-
 drivers/mtd/nand/spi/skyhigh.c | 155 +++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h    |   3 +
 4 files changed, 165 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 drivers/mtd/nand/spi/Makefile
 mode change 100644 => 100755 drivers/mtd/nand/spi/core.c
 create mode 100644 drivers/mtd/nand/spi/skyhigh.c
 mode change 100644 => 100755 include/linux/mtd/spinand.h

diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
old mode 100644
new mode 100755
index 19cc77288ebb..1e61ab21893a
--- a/drivers/mtd/nand/spi/Makefile
+++ b/drivers/mtd/nand/spi/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 spinand-objs := core.o alliancememory.o ato.o esmt.o foresee.o gigadevice.o macronix.o
-spinand-objs += micron.o paragon.o toshiba.o winbond.o xtx.o
+spinand-objs += micron.o paragon.o skyhigh.o toshiba.o winbond.o xtx.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
old mode 100644
new mode 100755
index e0b6715e5dfe..e3f0a7544ba4
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -34,7 +34,7 @@ static int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val)
 	return 0;
 }
 
-static int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val)
+int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val)
 {
 	struct spi_mem_op op = SPINAND_SET_FEATURE_OP(reg,
 						      spinand->scratchbuf);
@@ -196,6 +196,10 @@ static int spinand_init_quad_enable(struct spinand_device *spinand)
 static int spinand_ecc_enable(struct spinand_device *spinand,
 			      bool enable)
 {
+	/* SHM : always ECC enable */
+	if (spinand->flags & SPINAND_ON_DIE_ECC_MANDATORY)
+		return 0;
+
 	return spinand_upd_cfg(spinand, CFG_ECC_ENABLE,
 			       enable ? CFG_ECC_ENABLE : 0);
 }
@@ -945,6 +949,7 @@ static const struct spinand_manufacturer *spinand_manufacturers[] = {
 	&macronix_spinand_manufacturer,
 	&micron_spinand_manufacturer,
 	&paragon_spinand_manufacturer,
+	&skyhigh_spinand_manufacturer,
 	&toshiba_spinand_manufacturer,
 	&winbond_spinand_manufacturer,
 	&xtx_spinand_manufacturer,
diff --git a/drivers/mtd/nand/spi/skyhigh.c b/drivers/mtd/nand/spi/skyhigh.c
new file mode 100644
index 000000000000..92e7572094ff
--- /dev/null
+++ b/drivers/mtd/nand/spi/skyhigh.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 SkyHigh Memory Limited
+ *
+ * Author: Takahiro Kuwano <takahiro.kuwano@infineon.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_SKYHIGH		0x01
+
+#define SKYHIGH_STATUS_ECC_1TO2_BITFLIPS	(1 << 4)
+#define SKYHIGH_STATUS_ECC_3TO6_BITFLIPS	(2 << 4)
+#define SKYHIGH_STATUS_ECC_UNCOR_ERROR  	(3 << 4)
+
+#define SKYHIGH_CONFIG_PROTECT_EN	BIT(1)
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 4, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 2, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+		SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+		SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int skyhigh_spinand_ooblayout_ecc(struct mtd_info *mtd, int section,
+					 struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	/* SkyHigh's ecc parity is stored in the internal hidden area and is not needed for them. */
+	region->length = 0;
+	region->offset = mtd->oobsize;
+
+	return 0;
+}
+
+static int skyhigh_spinand_ooblayout_free(struct mtd_info *mtd, int section,
+					  struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	region->length = mtd->oobsize - 2;
+	region->offset = 2;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops skyhigh_spinand_ooblayout = {
+	.ecc = skyhigh_spinand_ooblayout_ecc,
+	.free = skyhigh_spinand_ooblayout_free,
+};
+
+static int skyhigh_spinand_ecc_get_status(struct spinand_device *spinand,
+				  u8 status)
+{
+	/* SHM
+	 * 00 : No bit-flip
+	 * 01 : 1-2 errors corrected
+	 * 10 : 3-6 errors corrected         
+	 * 11 : uncorrectable
+	 */
+
+	switch (status & STATUS_ECC_MASK) {
+	case STATUS_ECC_NO_BITFLIPS:
+		return 0;
+
+	case SKYHIGH_STATUS_ECC_1TO2_BITFLIPS:
+		return 2;
+
+ 	case SKYHIGH_STATUS_ECC_3TO6_BITFLIPS:
+		return 6; 
+
+ 	case SKYHIGH_STATUS_ECC_UNCOR_ERROR:
+		return -EBADMSG;;
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static const struct spinand_info skyhigh_spinand_table[] = {
+	SPINAND_INFO("S35ML01G301",
+		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x15),
+		     NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
+		     NAND_ECCREQ(6, 32),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     SPINAND_ON_DIE_ECC_MANDATORY,
+		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
+		     		     skyhigh_spinand_ecc_get_status)),
+	SPINAND_INFO("S35ML01G300",
+		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x14),
+		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
+		     NAND_ECCREQ(6, 32),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     SPINAND_ON_DIE_ECC_MANDATORY,
+		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
+		     		     skyhigh_spinand_ecc_get_status)),
+	SPINAND_INFO("S35ML02G300",
+		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x25),
+		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 2, 1, 1),
+		     NAND_ECCREQ(6, 32),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     SPINAND_ON_DIE_ECC_MANDATORY,
+		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
+		     		     skyhigh_spinand_ecc_get_status)),
+	SPINAND_INFO("S35ML04G300",
+		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x35),
+		     NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 2, 1, 1),
+		     NAND_ECCREQ(6, 32),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     SPINAND_ON_DIE_ECC_MANDATORY,
+		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
+		     		     skyhigh_spinand_ecc_get_status)),
+};
+
+static int skyhigh_spinand_init(struct spinand_device *spinand)
+{
+	return spinand_write_reg_op(spinand, REG_BLOCK_LOCK,
+				    SKYHIGH_CONFIG_PROTECT_EN);
+}
+
+static const struct spinand_manufacturer_ops skyhigh_spinand_manuf_ops = {
+	.init = skyhigh_spinand_init,
+ };
+
+const struct spinand_manufacturer skyhigh_spinand_manufacturer = {
+	.id = SPINAND_MFR_SKYHIGH,
+	.name = "SkyHigh",
+	.chips = skyhigh_spinand_table,
+	.nchips = ARRAY_SIZE(skyhigh_spinand_table),
+	.ops = &skyhigh_spinand_manuf_ops,
+};
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
old mode 100644
new mode 100755
index badb4c1ac079..0e135076df24
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -268,6 +268,7 @@ extern const struct spinand_manufacturer gigadevice_spinand_manufacturer;
 extern const struct spinand_manufacturer macronix_spinand_manufacturer;
 extern const struct spinand_manufacturer micron_spinand_manufacturer;
 extern const struct spinand_manufacturer paragon_spinand_manufacturer;
+extern const struct spinand_manufacturer skyhigh_spinand_manufacturer;
 extern const struct spinand_manufacturer toshiba_spinand_manufacturer;
 extern const struct spinand_manufacturer winbond_spinand_manufacturer;
 extern const struct spinand_manufacturer xtx_spinand_manufacturer;
@@ -312,6 +313,7 @@ struct spinand_ecc_info {
 
 #define SPINAND_HAS_QE_BIT		BIT(0)
 #define SPINAND_HAS_CR_FEAT_BIT		BIT(1)
+#define SPINAND_ON_DIE_ECC_MANDATORY	BIT(2)	/* SHM */
 
 /**
  * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure
@@ -518,5 +520,6 @@ int spinand_match_and_init(struct spinand_device *spinand,
 
 int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val);
 int spinand_select_target(struct spinand_device *spinand, unsigned int target);
+int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val);
 
 #endif /* __LINUX_MTD_SPINAND_H */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2024-03-07  6:07 KR Kim
@ 2024-03-07  8:01 ` Miquel Raynal
  2024-03-08  1:27   ` Re: Kyeongrho.Kim
       [not found]   ` <SE2P216MB210205B301549661575720CC833A2@SE2P216MB2102.KORP216.PROD.OUTLOOK.COM>
  0 siblings, 2 replies; 414+ messages in thread
From: Miquel Raynal @ 2024-03-07  8:01 UTC (permalink / raw)
  To: KR Kim
  Cc: richard, vigneshr, mmkurbanov, ddrokosov, gch981213, michael,
	broonie, mika.westerberg, acelan.kao, linux-kernel, linux-mtd,
	moh.sardi, changsub.shim

Hi,

kr.kim@skyhighmemory.com wrote on Thu,  7 Mar 2024 15:07:29 +0900:

> Feat: Add SkyHigh Memory Patch code
> 
> Add SPI Nand Patch code of SkyHigh Memory
> - Add company dependent code with 'skyhigh.c'
> - Insert into 'core.c' so that 'always ECC on'

Patch formatting is still messed up.

> commit 6061b97a830af8cb5fd0917e833e779451f9046a (HEAD -> master)
> Author: KR Kim <kr.kim@skyhighmemory.com>
> Date:   Thu Mar 7 13:24:11 2024 +0900
> 
>     SPI Nand Patch code of SkyHigh Momory
> 
>     Signed-off-by: KR Kim <kr.kim@skyhighmemory.com>
> 
> From 6061b97a830af8cb5fd0917e833e779451f9046a Mon Sep 17 00:00:00 2001
> From: KR Kim <kr.kim@skyhighmemory.com>
> Date: Thu, 7 Mar 2024 13:24:11 +0900
> Subject: [PATCH] SPI Nand Patch code of SkyHigh Memory
> 
> ---
>  drivers/mtd/nand/spi/Makefile  |   2 +-
>  drivers/mtd/nand/spi/core.c    |   7 +-
>  drivers/mtd/nand/spi/skyhigh.c | 155 +++++++++++++++++++++++++++++++++
>  include/linux/mtd/spinand.h    |   3 +
>  4 files changed, 165 insertions(+), 2 deletions(-)
>  mode change 100644 => 100755 drivers/mtd/nand/spi/Makefile
>  mode change 100644 => 100755 drivers/mtd/nand/spi/core.c
>  create mode 100644 drivers/mtd/nand/spi/skyhigh.c
>  mode change 100644 => 100755 include/linux/mtd/spinand.h
> 
> diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
> old mode 100644
> new mode 100755
> index 19cc77288ebb..1e61ab21893a
> --- a/drivers/mtd/nand/spi/Makefile
> +++ b/drivers/mtd/nand/spi/Makefile
> @@ -1,4 +1,4 @@
>  # SPDX-License-Identifier: GPL-2.0
>  spinand-objs := core.o alliancememory.o ato.o esmt.o foresee.o gigadevice.o macronix.o
> -spinand-objs += micron.o paragon.o toshiba.o winbond.o xtx.o
> +spinand-objs += micron.o paragon.o skyhigh.o toshiba.o winbond.o xtx.o
>  obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
> diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
> old mode 100644
> new mode 100755
> index e0b6715e5dfe..e3f0a7544ba4
> --- a/drivers/mtd/nand/spi/core.c
> +++ b/drivers/mtd/nand/spi/core.c
> @@ -34,7 +34,7 @@ static int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val)
>  	return 0;
>  }
>  
> -static int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val)
> +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val)

Please do this in a separate commit.

>  {
>  	struct spi_mem_op op = SPINAND_SET_FEATURE_OP(reg,
>  						      spinand->scratchbuf);
> @@ -196,6 +196,10 @@ static int spinand_init_quad_enable(struct spinand_device *spinand)
>  static int spinand_ecc_enable(struct spinand_device *spinand,
>  			      bool enable)
>  {
> +	/* SHM : always ECC enable */
> +	if (spinand->flags & SPINAND_ON_DIE_ECC_MANDATORY)
> +		return 0;

Silently always enabling ECC is not possible. If you cannot disable the
on-die engine, then:
- you should prevent any other engine type to be used
- you should error out if a raw access is requested
- these chips are broken, IMO

> +
>  	return spinand_upd_cfg(spinand, CFG_ECC_ENABLE,
>  			       enable ? CFG_ECC_ENABLE : 0);
>  }
> @@ -945,6 +949,7 @@ static const struct spinand_manufacturer *spinand_manufacturers[] = {
>  	&macronix_spinand_manufacturer,
>  	&micron_spinand_manufacturer,
>  	&paragon_spinand_manufacturer,
> +	&skyhigh_spinand_manufacturer,
>  	&toshiba_spinand_manufacturer,
>  	&winbond_spinand_manufacturer,
>  	&xtx_spinand_manufacturer,
> diff --git a/drivers/mtd/nand/spi/skyhigh.c b/drivers/mtd/nand/spi/skyhigh.c
> new file mode 100644
> index 000000000000..92e7572094ff
> --- /dev/null
> +++ b/drivers/mtd/nand/spi/skyhigh.c
> @@ -0,0 +1,155 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2022 SkyHigh Memory Limited
> + *
> + * Author: Takahiro Kuwano <takahiro.kuwano@infineon.com>
> + */
> +
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/mtd/spinand.h>
> +
> +#define SPINAND_MFR_SKYHIGH		0x01
> +
> +#define SKYHIGH_STATUS_ECC_1TO2_BITFLIPS	(1 << 4)
> +#define SKYHIGH_STATUS_ECC_3TO6_BITFLIPS	(2 << 4)
> +#define SKYHIGH_STATUS_ECC_UNCOR_ERROR  	(3 << 4)
> +
> +#define SKYHIGH_CONFIG_PROTECT_EN	BIT(1)
> +
> +static SPINAND_OP_VARIANTS(read_cache_variants,
> +		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 4, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 2, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(write_cache_variants,
> +		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
> +		SPINAND_PROG_LOAD(true, 0, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(update_cache_variants,
> +		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
> +		SPINAND_PROG_LOAD(false, 0, NULL, 0));
> +
> +static int skyhigh_spinand_ooblayout_ecc(struct mtd_info *mtd, int section,
> +					 struct mtd_oob_region *region)
> +{
> +	if (section)
> +		return -ERANGE;
> +
> +	/* SkyHigh's ecc parity is stored in the internal hidden area and is not needed for them. */

		     ECC		     an

"needed" is wrong here. Just stop after "area"


> +	region->length = 0;
> +	region->offset = mtd->oobsize;
> +
> +	return 0;
> +}
> +
> +static int skyhigh_spinand_ooblayout_free(struct mtd_info *mtd, int section,
> +					  struct mtd_oob_region *region)
> +{
> +	if (section)
> +		return -ERANGE;
> +
> +	region->length = mtd->oobsize - 2;
> +	region->offset = 2;
> +
> +	return 0;
> +}
> +
> +static const struct mtd_ooblayout_ops skyhigh_spinand_ooblayout = {
> +	.ecc = skyhigh_spinand_ooblayout_ecc,
> +	.free = skyhigh_spinand_ooblayout_free,
> +};
> +
> +static int skyhigh_spinand_ecc_get_status(struct spinand_device *spinand,
> +				  u8 status)
> +{
> +	/* SHM
> +	 * 00 : No bit-flip
> +	 * 01 : 1-2 errors corrected
> +	 * 10 : 3-6 errors corrected         
> +	 * 11 : uncorrectable
> +	 */

Thanks for the comment but the switch case looks rather
straightforward, it is self-sufficient in this case.

> +
> +	switch (status & STATUS_ECC_MASK) {
> +	case STATUS_ECC_NO_BITFLIPS:
> +		return 0;
> +
> +	case SKYHIGH_STATUS_ECC_1TO2_BITFLIPS:
> +		return 2;
> +
> + 	case SKYHIGH_STATUS_ECC_3TO6_BITFLIPS:
> +		return 6; 
> +
> + 	case SKYHIGH_STATUS_ECC_UNCOR_ERROR:
> +		return -EBADMSG;;
> +
> +	default:
> +		break;

I guess you can directly call return -EINVAL here?

> +	}
> +
> +	return -EINVAL;
> +}
> +
> +static const struct spinand_info skyhigh_spinand_table[] = {
> +	SPINAND_INFO("S35ML01G301",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x15),
> +		     NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)),
> +	SPINAND_INFO("S35ML01G300",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x14),
> +		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)),
> +	SPINAND_INFO("S35ML02G300",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x25),
> +		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 2, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)),
> +	SPINAND_INFO("S35ML04G300",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x35),
> +		     NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 2, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)),
> +};
> +
> +static int skyhigh_spinand_init(struct spinand_device *spinand)
> +{
> +	return spinand_write_reg_op(spinand, REG_BLOCK_LOCK,
> +				    SKYHIGH_CONFIG_PROTECT_EN);

Is this really relevant? Isn't there an API for the block lock
mechanism?

> +}
> +
> +static const struct spinand_manufacturer_ops skyhigh_spinand_manuf_ops = {
> +	.init = skyhigh_spinand_init,
> + };
> +
> +const struct spinand_manufacturer skyhigh_spinand_manufacturer = {
> +	.id = SPINAND_MFR_SKYHIGH,
> +	.name = "SkyHigh",
> +	.chips = skyhigh_spinand_table,
> +	.nchips = ARRAY_SIZE(skyhigh_spinand_table),
> +	.ops = &skyhigh_spinand_manuf_ops,
> +};
> diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
> old mode 100644
> new mode 100755
> index badb4c1ac079..0e135076df24
> --- a/include/linux/mtd/spinand.h
> +++ b/include/linux/mtd/spinand.h
> @@ -268,6 +268,7 @@ extern const struct spinand_manufacturer gigadevice_spinand_manufacturer;
>  extern const struct spinand_manufacturer macronix_spinand_manufacturer;
>  extern const struct spinand_manufacturer micron_spinand_manufacturer;
>  extern const struct spinand_manufacturer paragon_spinand_manufacturer;
> +extern const struct spinand_manufacturer skyhigh_spinand_manufacturer;
>  extern const struct spinand_manufacturer toshiba_spinand_manufacturer;
>  extern const struct spinand_manufacturer winbond_spinand_manufacturer;
>  extern const struct spinand_manufacturer xtx_spinand_manufacturer;
> @@ -312,6 +313,7 @@ struct spinand_ecc_info {
>  
>  #define SPINAND_HAS_QE_BIT		BIT(0)
>  #define SPINAND_HAS_CR_FEAT_BIT		BIT(1)
> +#define SPINAND_ON_DIE_ECC_MANDATORY	BIT(2)	/* SHM */

If we go this route, then "mandatory" is not relevant here, we shall
convey the fact that the on-die ECC engine cannot be disabled and as
mentioned above, there are other impacts.

>  
>  /**
>   * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure
> @@ -518,5 +520,6 @@ int spinand_match_and_init(struct spinand_device *spinand,
>  
>  int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val);
>  int spinand_select_target(struct spinand_device *spinand, unsigned int target);
> +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val);
>  
>  #endif /* __LINUX_MTD_SPINAND_H */


Thanks,
Miquèl

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE: Re:
  2024-03-07  8:01 ` Miquel Raynal
@ 2024-03-08  1:27   ` Kyeongrho.Kim
       [not found]   ` <SE2P216MB210205B301549661575720CC833A2@SE2P216MB2102.KORP216.PROD.OUTLOOK.COM>
  1 sibling, 0 replies; 414+ messages in thread
From: Kyeongrho.Kim @ 2024-03-08  1:27 UTC (permalink / raw)
  To: Miquel Raynal
  Cc: richard, vigneshr, mmkurbanov, ddrokosov, gch981213, michael,
	broonie, mika.westerberg, acelan.kao, linux-kernel, linux-mtd,
	Mohamed Sardi, Changsub.Shim

Hi Miquel,
Thank you for your comment.
I tried to match the patch format, but it seems to be not enough yet. 
Can you send me a good sample for the patch format?
Thanks,
KR
-----Original Message-----
From: Miquel Raynal <miquel.raynal@bootlin.com> 
Sent: Thursday, March 7, 2024 5:01 PM
To: Kyeongrho.Kim <kr.kim@skyhighmemory.com>
Cc: richard@nod.at; vigneshr@ti.com; mmkurbanov@salutedevices.com; ddrokosov@sberdevices.ru; gch981213@gmail.com; michael@walle.cc; broonie@kernel.org; mika.westerberg@linux.intel.com; acelan.kao@canonical.com; linux-kernel@vger.kernel.org; linux-mtd@lists.infradead.org; Mohamed Sardi <moh.sardi@skyhighmemory.com>; Changsub.Shim <changsub.shim@skyhighmemory.com>
Subject: Re:

Hi,

kr.kim@skyhighmemory.com wrote on Thu,  7 Mar 2024 15:07:29 +0900:

> Feat: Add SkyHigh Memory Patch code
> 
> Add SPI Nand Patch code of SkyHigh Memory
> - Add company dependent code with 'skyhigh.c'
> - Insert into 'core.c' so that 'always ECC on'

Patch formatting is still messed up.

> commit 6061b97a830af8cb5fd0917e833e779451f9046a (HEAD -> master)
> Author: KR Kim <kr.kim@skyhighmemory.com>
> Date:   Thu Mar 7 13:24:11 2024 +0900
> 
>     SPI Nand Patch code of SkyHigh Momory
> 
>     Signed-off-by: KR Kim <kr.kim@skyhighmemory.com>
> 
> From 6061b97a830af8cb5fd0917e833e779451f9046a Mon Sep 17 00:00:00 2001
> From: KR Kim <kr.kim@skyhighmemory.com>
> Date: Thu, 7 Mar 2024 13:24:11 +0900
> Subject: [PATCH] SPI Nand Patch code of SkyHigh Memory
> 
> ---
>  drivers/mtd/nand/spi/Makefile  |   2 +-
>  drivers/mtd/nand/spi/core.c    |   7 +-
>  drivers/mtd/nand/spi/skyhigh.c | 155 +++++++++++++++++++++++++++++++++
>  include/linux/mtd/spinand.h    |   3 +
>  4 files changed, 165 insertions(+), 2 deletions(-)  mode change 
> 100644 => 100755 drivers/mtd/nand/spi/Makefile  mode change 100644 => 
> 100755 drivers/mtd/nand/spi/core.c  create mode 100644 
> drivers/mtd/nand/spi/skyhigh.c  mode change 100644 => 100755 
> include/linux/mtd/spinand.h
> 
> diff --git a/drivers/mtd/nand/spi/Makefile 
> b/drivers/mtd/nand/spi/Makefile old mode 100644 new mode 100755 index 
> 19cc77288ebb..1e61ab21893a
> --- a/drivers/mtd/nand/spi/Makefile
> +++ b/drivers/mtd/nand/spi/Makefile
> @@ -1,4 +1,4 @@
>  # SPDX-License-Identifier: GPL-2.0
>  spinand-objs := core.o alliancememory.o ato.o esmt.o foresee.o 
> gigadevice.o macronix.o -spinand-objs += micron.o paragon.o toshiba.o 
> winbond.o xtx.o
> +spinand-objs += micron.o paragon.o skyhigh.o toshiba.o winbond.o 
> +xtx.o
>  obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git 
> a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c old mode 
> 100644 new mode 100755 index e0b6715e5dfe..e3f0a7544ba4
> --- a/drivers/mtd/nand/spi/core.c
> +++ b/drivers/mtd/nand/spi/core.c
> @@ -34,7 +34,7 @@ static int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val)
>  	return 0;
>  }
>  
> -static int spinand_write_reg_op(struct spinand_device *spinand, u8 
> reg, u8 val)
> +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 
> +val)

Please do this in a separate commit.

>  {
>  	struct spi_mem_op op = SPINAND_SET_FEATURE_OP(reg,
>  						      spinand->scratchbuf);
> @@ -196,6 +196,10 @@ static int spinand_init_quad_enable(struct 
> spinand_device *spinand)  static int spinand_ecc_enable(struct spinand_device *spinand,
>  			      bool enable)
>  {
> +	/* SHM : always ECC enable */
> +	if (spinand->flags & SPINAND_ON_DIE_ECC_MANDATORY)
> +		return 0;

Silently always enabling ECC is not possible. If you cannot disable the on-die engine, then:
- you should prevent any other engine type to be used
- you should error out if a raw access is requested
- these chips are broken, IMO

> +
>  	return spinand_upd_cfg(spinand, CFG_ECC_ENABLE,
>  			       enable ? CFG_ECC_ENABLE : 0);  } @@ -945,6 +949,7 @@ static 
> const struct spinand_manufacturer *spinand_manufacturers[] = {
>  	&macronix_spinand_manufacturer,
>  	&micron_spinand_manufacturer,
>  	&paragon_spinand_manufacturer,
> +	&skyhigh_spinand_manufacturer,
>  	&toshiba_spinand_manufacturer,
>  	&winbond_spinand_manufacturer,
>  	&xtx_spinand_manufacturer,
> diff --git a/drivers/mtd/nand/spi/skyhigh.c 
> b/drivers/mtd/nand/spi/skyhigh.c new file mode 100644 index 
> 000000000000..92e7572094ff
> --- /dev/null
> +++ b/drivers/mtd/nand/spi/skyhigh.c
> @@ -0,0 +1,155 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2022 SkyHigh Memory Limited
> + *
> + * Author: Takahiro Kuwano <takahiro.kuwano@infineon.com>  */
> +
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/mtd/spinand.h>
> +
> +#define SPINAND_MFR_SKYHIGH		0x01
> +
> +#define SKYHIGH_STATUS_ECC_1TO2_BITFLIPS	(1 << 4)
> +#define SKYHIGH_STATUS_ECC_3TO6_BITFLIPS	(2 << 4)
> +#define SKYHIGH_STATUS_ECC_UNCOR_ERROR  	(3 << 4)
> +
> +#define SKYHIGH_CONFIG_PROTECT_EN	BIT(1)
> +
> +static SPINAND_OP_VARIANTS(read_cache_variants,
> +		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 4, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 2, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
> +		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(write_cache_variants,
> +		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
> +		SPINAND_PROG_LOAD(true, 0, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(update_cache_variants,
> +		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
> +		SPINAND_PROG_LOAD(false, 0, NULL, 0));
> +
> +static int skyhigh_spinand_ooblayout_ecc(struct mtd_info *mtd, int section,
> +					 struct mtd_oob_region *region)
> +{
> +	if (section)
> +		return -ERANGE;
> +
> +	/* SkyHigh's ecc parity is stored in the internal hidden area and is 
> +not needed for them. */

		     ECC		     an

"needed" is wrong here. Just stop after "area"


> +	region->length = 0;
> +	region->offset = mtd->oobsize;
> +
> +	return 0;
> +}
> +
> +static int skyhigh_spinand_ooblayout_free(struct mtd_info *mtd, int section,
> +					  struct mtd_oob_region *region) {
> +	if (section)
> +		return -ERANGE;
> +
> +	region->length = mtd->oobsize - 2;
> +	region->offset = 2;
> +
> +	return 0;
> +}
> +
> +static const struct mtd_ooblayout_ops skyhigh_spinand_ooblayout = {
> +	.ecc = skyhigh_spinand_ooblayout_ecc,
> +	.free = skyhigh_spinand_ooblayout_free, };
> +
> +static int skyhigh_spinand_ecc_get_status(struct spinand_device *spinand,
> +				  u8 status)
> +{
> +	/* SHM
> +	 * 00 : No bit-flip
> +	 * 01 : 1-2 errors corrected
> +	 * 10 : 3-6 errors corrected         
> +	 * 11 : uncorrectable
> +	 */

Thanks for the comment but the switch case looks rather straightforward, it is self-sufficient in this case.

> +
> +	switch (status & STATUS_ECC_MASK) {
> +	case STATUS_ECC_NO_BITFLIPS:
> +		return 0;
> +
> +	case SKYHIGH_STATUS_ECC_1TO2_BITFLIPS:
> +		return 2;
> +
> + 	case SKYHIGH_STATUS_ECC_3TO6_BITFLIPS:
> +		return 6;
> +
> + 	case SKYHIGH_STATUS_ECC_UNCOR_ERROR:
> +		return -EBADMSG;;
> +
> +	default:
> +		break;

I guess you can directly call return -EINVAL here?

> +	}
> +
> +	return -EINVAL;
> +}
> +
> +static const struct spinand_info skyhigh_spinand_table[] = {
> +	SPINAND_INFO("S35ML01G301",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x15),
> +		     NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)),
> +	SPINAND_INFO("S35ML01G300",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x14),
> +		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)),
> +	SPINAND_INFO("S35ML02G300",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x25),
> +		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 2, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)),
> +	SPINAND_INFO("S35ML04G300",
> +		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x35),
> +		     NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 2, 1, 1),
> +		     NAND_ECCREQ(6, 32),
> +		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +					      &write_cache_variants,
> +					      &update_cache_variants),
> +		     SPINAND_ON_DIE_ECC_MANDATORY,
> +		     SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +		     		     skyhigh_spinand_ecc_get_status)), };
> +
> +static int skyhigh_spinand_init(struct spinand_device *spinand) {
> +	return spinand_write_reg_op(spinand, REG_BLOCK_LOCK,
> +				    SKYHIGH_CONFIG_PROTECT_EN);

Is this really relevant? Isn't there an API for the block lock mechanism?

> +}
> +
> +static const struct spinand_manufacturer_ops skyhigh_spinand_manuf_ops = {
> +	.init = skyhigh_spinand_init,
> + };
> +
> +const struct spinand_manufacturer skyhigh_spinand_manufacturer = {
> +	.id = SPINAND_MFR_SKYHIGH,
> +	.name = "SkyHigh",
> +	.chips = skyhigh_spinand_table,
> +	.nchips = ARRAY_SIZE(skyhigh_spinand_table),
> +	.ops = &skyhigh_spinand_manuf_ops,
> +};
> diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h 
> old mode 100644 new mode 100755 index badb4c1ac079..0e135076df24
> --- a/include/linux/mtd/spinand.h
> +++ b/include/linux/mtd/spinand.h
> @@ -268,6 +268,7 @@ extern const struct spinand_manufacturer 
> gigadevice_spinand_manufacturer;  extern const struct 
> spinand_manufacturer macronix_spinand_manufacturer;  extern const 
> struct spinand_manufacturer micron_spinand_manufacturer;  extern const 
> struct spinand_manufacturer paragon_spinand_manufacturer;
> +extern const struct spinand_manufacturer 
> +skyhigh_spinand_manufacturer;
>  extern const struct spinand_manufacturer 
> toshiba_spinand_manufacturer;  extern const struct 
> spinand_manufacturer winbond_spinand_manufacturer;  extern const 
> struct spinand_manufacturer xtx_spinand_manufacturer; @@ -312,6 +313,7 
> @@ struct spinand_ecc_info {
>  
>  #define SPINAND_HAS_QE_BIT		BIT(0)
>  #define SPINAND_HAS_CR_FEAT_BIT		BIT(1)
> +#define SPINAND_ON_DIE_ECC_MANDATORY	BIT(2)	/* SHM */

If we go this route, then "mandatory" is not relevant here, we shall convey the fact that the on-die ECC engine cannot be disabled and as mentioned above, there are other impacts.

>  
>  /**
>   * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine 
> structure @@ -518,5 +520,6 @@ int spinand_match_and_init(struct 
> spinand_device *spinand,
>  
>  int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val);  
> int spinand_select_target(struct spinand_device *spinand, unsigned int 
> target);
> +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 
> +val);
>  
>  #endif /* __LINUX_MTD_SPINAND_H */


Thanks,
Miquèl

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <SE2P216MB210205B301549661575720CC833A2@SE2P216MB2102.KORP216.PROD.OUTLOOK.COM>]

* RE: Re:
       [not found]   ` <SE2P216MB210205B301549661575720CC833A2@SE2P216MB2102.KORP216.PROD.OUTLOOK.COM>
@ 2024-03-29  4:41     ` Kyeongrho.Kim
  0 siblings, 0 replies; 414+ messages in thread
From: Kyeongrho.Kim @ 2024-03-29  4:41 UTC (permalink / raw)
  To: Miquel Raynal
  Cc: richard, vigneshr, mmkurbanov, ddrokosov, gch981213, michael,
	broonie, mika.westerberg, acelan.kao, linux-kernel, linux-mtd,
	Mohamed Sardi, Changsub.Shim

(I send again this mail with plain text not HTML.)

Dear Miquel,
Please see my reply in below email.
And please comment if you have any others.
Thanks,
KR

-----Original Message-----
From: Miquel Raynal <mailto:miquel.raynal@bootlin.com> 
Sent: Thursday, March 7, 2024 5:01 PM
To: Kyeongrho.Kim <mailto:kr.kim@skyhighmemory.com>
Cc: mailto:richard@nod.at; mailto:vigneshr@ti.com; mailto:mmkurbanov@salutedevices.com; mailto:ddrokosov@sberdevices.ru; mailto:gch981213@gmail.com; mailto:michael@walle.cc; mailto:broonie@kernel.org; mailto:mika.westerberg@linux.intel.com; mailto:acelan.kao@canonical.com; mailto:linux-kernel@vger.kernel.org; mailto:linux-mtd@lists.infradead.org; Mohamed Sardi <mailto:moh.sardi@skyhighmemory.com>; Changsub.Shim <mailto:changsub.shim@skyhighmemory.com>
Subject: Re:

Hi,

mailto:kr.kim@skyhighmemory.com wrote on Thu,  7 Mar 2024 15:07:29 +0900:

> Feat: Add SkyHigh Memory Patch code
> 
> Add SPI Nand Patch code of SkyHigh Memory
> - Add company dependent code with 'skyhigh.c'
> - Insert into 'core.c' so that 'always ECC on'

Patch formatting is still messed up.

> commit 6061b97a830af8cb5fd0917e833e779451f9046a (HEAD -> master)
> Author: KR Kim <mailto:kr.kim@skyhighmemory.com>
> Date:   Thu Mar 7 13:24:11 2024 +0900
> 
>     SPI Nand Patch code of SkyHigh Momory
> 
>     Signed-off-by: KR Kim <mailto:kr.kim@skyhighmemory.com>
> 
> From 6061b97a830af8cb5fd0917e833e779451f9046a Mon Sep 17 00:00:00 2001
> From: KR Kim <mailto:kr.kim@skyhighmemory.com>
> Date: Thu, 7 Mar 2024 13:24:11 +0900
> Subject: [PATCH] SPI Nand Patch code of SkyHigh Memory
> 
> ---
>  drivers/mtd/nand/spi/Makefile  |   2 +-
>  drivers/mtd/nand/spi/core.c    |   7 +-
>  drivers/mtd/nand/spi/skyhigh.c | 155 +++++++++++++++++++++++++++++++++
>  include/linux/mtd/spinand.h    |   3 +
>  4 files changed, 165 insertions(+), 2 deletions(-)  mode change 
> 100644 => 100755 drivers/mtd/nand/spi/Makefile  mode change 100644 => 
> 100755 drivers/mtd/nand/spi/core.c  create mode 100644 
> drivers/mtd/nand/spi/skyhigh.c  mode change 100644 => 100755 
> include/linux/mtd/spinand.h
> 
> diff --git a/drivers/mtd/nand/spi/Makefile 
> b/drivers/mtd/nand/spi/Makefile old mode 100644 new mode 100755 index 
> 19cc77288ebb..1e61ab21893a
> --- a/drivers/mtd/nand/spi/Makefile
> +++ b/drivers/mtd/nand/spi/Makefile
> @@ -1,4 +1,4 @@
>  # SPDX-License-Identifier: GPL-2.0
>  spinand-objs := core.o alliancememory.o ato.o esmt.o foresee.o 
> gigadevice.o macronix.o -spinand-objs += micron.o paragon.o toshiba.o 
> winbond.o xtx.o
> +spinand-objs += micron.o paragon.o skyhigh.o toshiba.o winbond.o 
> +xtx.o
>  obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git 
> a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c old mode 
> 100644 new mode 100755 index e0b6715e5dfe..e3f0a7544ba4
> --- a/drivers/mtd/nand/spi/core.c
> +++ b/drivers/mtd/nand/spi/core.c
> @@ -34,7 +34,7 @@ static int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val)
>     return 0;
>  }
>  
> -static int spinand_write_reg_op(struct spinand_device *spinand, u8 
> reg, u8 val)
> +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 
> +val)

Please do this in a separate commit.
[SHM] May I know why we need to do a separate commit?
Please elaborate for the reason.
>  {
>     struct spi_mem_op op = SPINAND_SET_FEATURE_OP(reg,
>                                         spinand->scratchbuf);
> @@ -196,6 +196,10 @@ static int spinand_init_quad_enable(struct 
> spinand_device *spinand)  static int spinand_ecc_enable(struct spinand_device *spinand,
>                       bool enable)
>  {
> +   /* SHM : always ECC enable */
> +   if (spinand->flags & SPINAND_ON_DIE_ECC_MANDATORY)
> +         return 0;

Silently always enabling ECC is not possible. If you cannot disable the on-die engine, then:
- you should prevent any other engine type to be used
- you should error out if a raw access is requested
- these chips are broken, IMO
[SHM] I understand that you are concern.
We have already reviewed 'Always ECC on' to see if there was any problem in many aspects and confirmed that there was no problem.

> +
>     return spinand_upd_cfg(spinand, CFG_ECC_ENABLE,
>                        enable ? CFG_ECC_ENABLE : 0);  } @@ -945,6 +949,7 @@ static 
> const struct spinand_manufacturer *spinand_manufacturers[] = {
>     &macronix_spinand_manufacturer,
>     &micron_spinand_manufacturer,
>     &paragon_spinand_manufacturer,
> +   &skyhigh_spinand_manufacturer,
>     &toshiba_spinand_manufacturer,
>     &winbond_spinand_manufacturer,
>     &xtx_spinand_manufacturer,
> diff --git a/drivers/mtd/nand/spi/skyhigh.c 
> b/drivers/mtd/nand/spi/skyhigh.c new file mode 100644 index 
> 000000000000..92e7572094ff
> --- /dev/null
> +++ b/drivers/mtd/nand/spi/skyhigh.c
> @@ -0,0 +1,155 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2022 SkyHigh Memory Limited
> + *
> + * Author: Takahiro Kuwano <mailto:takahiro.kuwano@infineon.com>  */
> +
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/mtd/spinand.h>
> +
> +#define SPINAND_MFR_SKYHIGH      0x01
> +
> +#define SKYHIGH_STATUS_ECC_1TO2_BITFLIPS     (1 << 4)
> +#define SKYHIGH_STATUS_ECC_3TO6_BITFLIPS     (2 << 4)
> +#define SKYHIGH_STATUS_ECC_UNCOR_ERROR        (3 << 4)
> +
> +#define SKYHIGH_CONFIG_PROTECT_EN BIT(1)
> +
> +static SPINAND_OP_VARIANTS(read_cache_variants,
> +         SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 4, NULL, 0),
> +         SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
> +         SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 2, NULL, 0),
> +         SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
> +         SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
> +         SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(write_cache_variants,
> +         SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
> +         SPINAND_PROG_LOAD(true, 0, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(update_cache_variants,
> +         SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
> +         SPINAND_PROG_LOAD(false, 0, NULL, 0));
> +
> +static int skyhigh_spinand_ooblayout_ecc(struct mtd_info *mtd, int section,
> +                           struct mtd_oob_region *region)
> +{
> +   if (section)
> +         return -ERANGE;
> +
> +   /* SkyHigh's ecc parity is stored in the internal hidden area and is 
> +not needed for them. */

                 ECC                an

"needed" is wrong here. Just stop after "area"


> +   region->length = 0;
> +   region->offset = mtd->oobsize;
> +
> +   return 0;
> +}
> +
> +static int skyhigh_spinand_ooblayout_free(struct mtd_info *mtd, int section,
> +                             struct mtd_oob_region *region) {
> +   if (section)
> +         return -ERANGE;
> +
> +   region->length = mtd->oobsize - 2;
> +   region->offset = 2;
> +
> +   return 0;
> +}
> +
> +static const struct mtd_ooblayout_ops skyhigh_spinand_ooblayout = {
> +   .ecc = skyhigh_spinand_ooblayout_ecc,
> +   .free = skyhigh_spinand_ooblayout_free, };
> +
> +static int skyhigh_spinand_ecc_get_status(struct spinand_device *spinand,
> +                       u8 status)
> +{
> +   /* SHM
> +   * 00 : No bit-flip
> +   * 01 : 1-2 errors corrected
> +   * 10 : 3-6 errors corrected         
> +   * 11 : uncorrectable
> +   */

Thanks for the comment but the switch case looks rather straightforward, it is self-sufficient in this case.

> +
> +   switch (status & STATUS_ECC_MASK) {
> +   case STATUS_ECC_NO_BITFLIPS:
> +         return 0;
> +
> +   case SKYHIGH_STATUS_ECC_1TO2_BITFLIPS:
> +         return 2;
> +
> +   case SKYHIGH_STATUS_ECC_3TO6_BITFLIPS:
> +         return 6;
> +
> +   case SKYHIGH_STATUS_ECC_UNCOR_ERROR:
> +         return -EBADMSG;;
> +
> +   default:
> +         break;

I guess you can directly call return -EINVAL here?

> +   }
> +
> +   return -EINVAL;
> +}
> +
> +static const struct spinand_info skyhigh_spinand_table[] = {
> +   SPINAND_INFO("S35ML01G301",
> +              SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x15),
> +              NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
> +              NAND_ECCREQ(6, 32),
> +              SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +                                 &write_cache_variants,
> +                                 &update_cache_variants),
> +              SPINAND_ON_DIE_ECC_MANDATORY,
> +              SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +                           skyhigh_spinand_ecc_get_status)),
> +   SPINAND_INFO("S35ML01G300",
> +              SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x14),
> +              NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
> +              NAND_ECCREQ(6, 32),
> +              SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +                                 &write_cache_variants,
> +                                 &update_cache_variants),
> +              SPINAND_ON_DIE_ECC_MANDATORY,
> +              SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +                           skyhigh_spinand_ecc_get_status)),
> +   SPINAND_INFO("S35ML02G300",
> +              SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x25),
> +              NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 2, 1, 1),
> +              NAND_ECCREQ(6, 32),
> +              SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +                                 &write_cache_variants,
> +                                 &update_cache_variants),
> +              SPINAND_ON_DIE_ECC_MANDATORY,
> +              SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +                           skyhigh_spinand_ecc_get_status)),
> +   SPINAND_INFO("S35ML04G300",
> +              SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x35),
> +              NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 2, 1, 1),
> +              NAND_ECCREQ(6, 32),
> +              SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
> +                                 &write_cache_variants,
> +                                 &update_cache_variants),
> +              SPINAND_ON_DIE_ECC_MANDATORY,
> +              SPINAND_ECCINFO(&skyhigh_spinand_ooblayout,
> +                           skyhigh_spinand_ecc_get_status)), };
> +
> +static int skyhigh_spinand_init(struct spinand_device *spinand) {
> +   return spinand_write_reg_op(spinand, REG_BLOCK_LOCK,
> +                         SKYHIGH_CONFIG_PROTECT_EN);

Is this really relevant? Isn't there an API for the block lock mechanism?
[SHM] SHM device should be done ‘Config Protect Enable’ first for unlock.
I changed to use the 'spinand_lock_block' function instead of the 'spinand_write_reg_op' function.

> +}
> +
> +static const struct spinand_manufacturer_ops skyhigh_spinand_manuf_ops = {
> +   .init = skyhigh_spinand_init,
> + };
> +
> +const struct spinand_manufacturer skyhigh_spinand_manufacturer = {
> +   .id = SPINAND_MFR_SKYHIGH,
> +   .name = "SkyHigh",
> +   .chips = skyhigh_spinand_table,
> +   .nchips = ARRAY_SIZE(skyhigh_spinand_table),
> +   .ops = &skyhigh_spinand_manuf_ops,
> +};
> diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h 
> old mode 100644 new mode 100755 index badb4c1ac079..0e135076df24
> --- a/include/linux/mtd/spinand.h
> +++ b/include/linux/mtd/spinand.h
> @@ -268,6 +268,7 @@ extern const struct spinand_manufacturer 
> gigadevice_spinand_manufacturer;  extern const struct 
> spinand_manufacturer macronix_spinand_manufacturer;  extern const 
> struct spinand_manufacturer micron_spinand_manufacturer;  extern const 
> struct spinand_manufacturer paragon_spinand_manufacturer;
> +extern const struct spinand_manufacturer 
> +skyhigh_spinand_manufacturer;
>  extern const struct spinand_manufacturer 
> toshiba_spinand_manufacturer;  extern const struct 
> spinand_manufacturer winbond_spinand_manufacturer;  extern const 
> struct spinand_manufacturer xtx_spinand_manufacturer; @@ -312,6 +313,7 
> @@ struct spinand_ecc_info {
>  
>  #define SPINAND_HAS_QE_BIT        BIT(0)
>  #define SPINAND_HAS_CR_FEAT_BIT         BIT(1)
> +#define SPINAND_ON_DIE_ECC_MANDATORY   BIT(2) /* SHM */

If we go this route, then "mandatory" is not relevant here, we shall convey the fact that the on-die ECC engine cannot be disabled and as mentioned above, there are other impacts.
[SHM] Please elaborate in more specific what I should do.
>  
>  /**
>   * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine 
> structure @@ -518,5 +520,6 @@ int spinand_match_and_init(struct 
> spinand_device *spinand,
>  
>  int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val);  
> int spinand_select_target(struct spinand_device *spinand, unsigned int 
> target);
> +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 
> +val);
>  
>  #endif /* __LINUX_MTD_SPINAND_H */


Thanks,
Miquèl

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 2/3] net: microchip_t1s: add support for LAN867x Rev.C1
@ 2023-11-27 13:37 Andrew Lunn
  2023-12-05 10:20 ` Félix Piédallu
  0 siblings, 1 reply; 414+ messages in thread
From: Andrew Lunn @ 2023-11-27 13:37 UTC (permalink / raw)
  To: Ramón N.Rodriguez
  Cc: Heiner Kallweit, Russell King, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, netdev, linux-kernel

>  #define PHY_ID_LAN867X_REVB1 0x0007C162
> +#define PHY_ID_LAN867X_REVC1 0x0007C164

So there is a gap in the revisions. Maybe a B2 exists?

> +static int lan867x_revc1_read_fixup_value(struct phy_device *phydev, u16 addr)
> +{
> +	int regval;
> +	/* The AN pretty much just states 'trust us' regarding these magic vals */
> +	const u16 magic_or = 0xE0;
> +	const u16 magic_reg_mask = 0x1F;
> +	const u16 magic_check_mask = 0x10;

Reverse christmass tree please. Longest first, shorted last.

> +	regval = lan865x_revb0_indirect_read(phydev, addr);
> +	if (regval < 0)
> +		return regval;
> +
> +	regval &= magic_reg_mask;
> +
> +	return (regval & magic_check_mask) ? regval | magic_or : regval;
> +}
> +
> +static int lan867x_revc1_config_init(struct phy_device *phydev)
> +{
> +	int err;
> +	int regval;
> +	u16 override0;
> +	u16 override1;
> +	const u16 override_addr0 = 0x4;
> +	const u16 override_addr1 = 0x8;
> +	const u8 index_to_override0 = 2;
> +	const u8 index_to_override1 = 3;

Same here.

> +
> +	err = lan867x_wait_for_reset_complete(phydev);
> +	if (err)
> +		return err;
> +
> +	/* The application note specifies a super convenient process
> +	 * where 2 of the fixup regs needs a write with a value that is
> +	 * a modified result of another reg read.
> +	 * Enjoy the magic show.
> +	 */

I really do hope that by revision D1 they get the firmware sorted out
so none of this undocumented magic is needed.

	Andrew

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2023-11-27 13:37 [PATCH 2/3] net: microchip_t1s: add support for LAN867x Rev.C1 Andrew Lunn
@ 2023-12-05 10:20 ` Félix Piédallu
  2023-12-06 20:58   ` Ramón Nordin Rodriguez
  0 siblings, 1 reply; 414+ messages in thread
From: Félix Piédallu @ 2023-12-05 10:20 UTC (permalink / raw)
  To: andrew
  Cc: davem, edumazet, hkallweit1, kuba, linux-kernel, linux, netdev,
	pabeni, ramon.nordin.rodriguez

Subject: Re: [PATCH 2/3] net: microchip_t1s: add support for LAN867x Rev.C1

Hi, 

> So there is a gap in the revisions. Maybe a B2 exists?

Actually, probably not. Some search gives this datasheet:

https://ww1.microchip.com/downloads/aemDocuments/documents/AIS/ProductDocuments/DataSheets/LAN8670-1-2-Data-Sheet-60001573.pdf

And page 2 (table 1) shows only revisions A0 (rev0), B1, (rev2), C1 (rev4).
Not sure about why only even revision numbers are released ?

Page 193 (table 10-1) also shows only B1 and C1. So you can be confident that only those exist.

@Ramón, thank you for your work on this driver!

Félix Piédallu

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-12-05 10:20 ` Félix Piédallu
@ 2023-12-06 20:58   ` Ramón Nordin Rodriguez
  0 siblings, 0 replies; 414+ messages in thread
From: Ramón Nordin Rodriguez @ 2023-12-06 20:58 UTC (permalink / raw)
  To: Félix Piédallu
  Cc: andrew, davem, edumazet, hkallweit1, kuba, linux-kernel, linux,
	netdev, pabeni

> > So there is a gap in the revisions. Maybe a B2 exists?
> 
> Actually, probably not. Some search gives this datasheet:
> 
> https://ww1.microchip.com/downloads/aemDocuments/documents/AIS/ProductDocuments/DataSheets/LAN8670-1-2-Data-Sheet-60001573.pdf
> 
> And page 2 (table 1) shows only revisions A0 (rev0), B1, (rev2), C1 (rev4).
> Not sure about why only even revision numbers are released ?
> 
> Page 193 (table 10-1) also shows only B1 and C1. So you can be confident that only those exist.
> 

Thanks for clearing that up!

> @Ramón, thank you for your work on this driver!

Much appreciated
R

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2023-11-11  4:21 Andrew Worsley
  2023-11-11  8:22 ` Javier Martinez Canillas
  0 siblings, 1 reply; 414+ messages in thread
From: Andrew Worsley @ 2023-11-11  4:21 UTC (permalink / raw)
  To: Thomas Zimmermann, Javier Martinez Canillas, Maarten Lankhorst,
	Maxime Ripard, David Airlie, Daniel Vetter,
	open list:DRM DRIVER FOR FIRMWARE FRAMEBUFFERS, open list

   This patch fix's the failure of the frame buffer driver on my Asahi kernel
which prevented X11 from starting on my Apple M1 laptop. It seems like a straight
forward failure to follow the procedure described in drivers/video/aperture.c
to remove the ealier driver. This patch is very simple and minimal. Very likely
there may be better ways to fix this and very like there may be other drivers
which have the same problem but I submit this so at least there is
an interim fix for my problem.

    Thanks

    Andrew Worsley

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-11-11  4:21 Andrew Worsley
@ 2023-11-11  8:22 ` Javier Martinez Canillas
  0 siblings, 0 replies; 414+ messages in thread
From: Javier Martinez Canillas @ 2023-11-11  8:22 UTC (permalink / raw)
  To: Andrew Worsley, Thomas Zimmermann, Maarten Lankhorst,
	Maxime Ripard, David Airlie, Daniel Vetter,
	open list:DRM DRIVER FOR FIRMWARE FRAMEBUFFERS, open list

Andrew Worsley <amworsley@gmail.com> writes:

Hello Andrew,

>    This patch fix's the failure of the frame buffer driver on my Asahi kernel
> which prevented X11 from starting on my Apple M1 laptop. It seems like a straight
> forward failure to follow the procedure described in drivers/video/aperture.c
> to remove the ealier driver. This patch is very simple and minimal. Very likely
> there may be better ways to fix this and very like there may be other drivers
> which have the same problem but I submit this so at least there is
> an interim fix for my problem.
>

Which partch? I think you forgot to include in your email?

>     Thanks
>
>     Andrew Worsley
>

-- 
Best regards,

Javier Martinez Canillas
Core Platforms
Red Hat


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <DB3PR10MB6835AF75D60D9A96465F35C2E8AAA@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM>]

* Re:
       [not found] <DB3PR10MB6835AF75D60D9A96465F35C2E8AAA@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM>
@ 2023-11-06 12:55 ` syzbot
  0 siblings, 0 replies; 414+ messages in thread
From: syzbot @ 2023-11-06 12:55 UTC (permalink / raw)
  To: yuran.pereira; +Cc: yuran.pereira, linux-kernel, syzkaller-bugs

> #syz test: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git�master

want either no args or 2 args (repo, branch), got 1


^ permalink raw reply	[flat|nested] 414+ messages in thread

* PIC probing code from e179f6914152 failing
@ 2023-10-18 18:50 Mario Limonciello
  2023-10-18 22:50 ` Thomas Gleixner
  0 siblings, 1 reply; 414+ messages in thread
From: Mario Limonciello @ 2023-10-18 18:50 UTC (permalink / raw)
  To: Hans de Goede, kys, tglx, hpa; +Cc: x86, LKML, Petkov, Borislav

Hi,

Recently an issue was reported to Bugzilla [1] that the Keyboard (IRQ 1) 
and GPIO controller (IRQ 7) weren't working properly on two separate 
Lenovo machines.  The issues are unique to Linux.

In digging through them, they happen because Lenovo didn't set up the 
PIC in the BIOS.
Specifically the PIC probing code introduced by e179f6914152 ("x86, irq, 
pic: Probe for legacy PIC and set legacy_pic appropriately") expects 
that the BIOS sets up the PIC and uses that assertion to let Linux set 
it up.

One of the reporters confirmed that reverting e179f6914152 fixes the 
issue.  Keyboard and GPIO controller both work properly.

I wanted to ask if we can please revert that and come up with a 
different solution for kexec with HyperV.
Can guests instead perhaps detect in early boot code they're running in 
an EFI based hypervisor and explicitly set 'legacy_pic = &null_legacy_pic;'?

[1] https://bugzilla.kernel.org/show_bug.cgi?id=218003

Thanks,

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: PIC probing code from e179f6914152 failing
  2023-10-18 18:50 PIC probing code from e179f6914152 failing Mario Limonciello
@ 2023-10-18 22:50 ` Thomas Gleixner
  2023-10-19 21:20   ` Mario Limonciello
  0 siblings, 1 reply; 414+ messages in thread
From: Thomas Gleixner @ 2023-10-18 22:50 UTC (permalink / raw)
  To: Mario Limonciello, Hans de Goede, kys, hpa; +Cc: x86, LKML, Borislav Petkov

On Wed, Oct 18 2023 at 13:50, Mario Limonciello wrote:
> Recently an issue was reported to Bugzilla [1] that the Keyboard (IRQ 1) 
> and GPIO controller (IRQ 7) weren't working properly on two separate 
> Lenovo machines.  The issues are unique to Linux.
>
> In digging through them, they happen because Lenovo didn't set up the 
> PIC in the BIOS.
> Specifically the PIC probing code introduced by e179f6914152 ("x86, irq, 
> pic: Probe for legacy PIC and set legacy_pic appropriately") expects 
> that the BIOS sets up the PIC and uses that assertion to let Linux set 
> it up.
>
> One of the reporters confirmed that reverting e179f6914152 fixes the 
> issue.  Keyboard and GPIO controller both work properly.
>
> I wanted to ask if we can please revert that and come up with a 
> different solution for kexec with HyperV.
> Can guests instead perhaps detect in early boot code they're running in 
> an EFI based hypervisor and explicitly set 'legacy_pic = &null_legacy_pic;'?

No. This detection mechanism prevents PIC usage also in other
scenarios.

It's perfectly valid code and the assumption that you can read back what
you wrote to the master IMR is entirely correct. If that's not the case
then there is no PIC, the BIOS has disabled some parts of the legacy
block or did not initialize it.

Letting the kernel blindly assume that there is always a PIC is just
wrong. Worse, it puts the burden on everyone else to sprinkle
"legacy_pic = null_pic;" all over the place with dubious
conditions. That's exactly what the commit in question avoided.

So no, we are not going back there.

We could obviously change the probe() function to issue a full PIC
initialization sequence before reading a known written value
back. That's basically what the revert causes to happen via
legacy_pic->init().

But I fundamentally hate to do that because forcing the init sequence
just to make presumably broken code which has some dubious dependencies
on the PIC or on nr_legacy_irqs > 0 happy is not really a solution when
the PIC is actually not needed at all. For anything modern where all
legacy interrupts are routed to the IO/APIC the PIC does not make any
sense whatsoever.

We rather go and fix the real underlying problem.

The kernel can handle the legacy interrupts perfectly fine through the
IOAPIC. There is no hard requirement for the PIC at all except for
really old systems which had the timer interrupt wired to the PIC and
therefore required to route the timer interrupt through the PIC instead
of the IO/APIC or did not provide routing entries for the IO/APIC. See
the horrible hackery in check_timer() and the grossly misnomed
init_IO_APIC_traps().

I just took a random machine, forced the NULL PIC and added
'apic=verbose' to the kernel command line and magically all the legacy
interrupts are set up via IO/APIC despite the NULL PIC and therefore 0
preallocated legacy interrupts.

  apic 0 pin 0 not connected
 IOAPIC[0]: Preconfigured routing entry (0-1 -> IRQ 1 Level:0 ActiveLow:0)
 IOAPIC[0]: Preconfigured routing entry (0-2 -> IRQ 2 Level:0 ActiveLow:0)
 IOAPIC[0]: Preconfigured routing entry (0-3 -> IRQ 3 Level:0 ActiveLow:0)
 ...

which is identical to the output with PIC enabled. That debug message is
emitted from mp_irqdomain_alloc() which is invoked via the PNP resource
management code.

Now /proc/interrupts:

           CPU0       CPU1       CPU2       CPU3        
  1:          0         56          0          0    IO-APIC   1-edge      i8042
  4:        442          0          0          0    IO-APIC   4-edge      ttyS0
  8:          0          0          0          0    IO-APIC   8-edge      rtc0
  9:          0          0          0          0    IO-APIC   9-fasteoi   acpi
 12:          0          0        122          0    IO-APIC  12-edge      i8042

Keyboard and serial are working, see?

The only interrupt which does not work is interrupt 0 because nothing
allocates interrupt 0 due to nr_legacy_irqs == 0, but that's a trivially
solvable problem.

That machine does not even need the timer interrupt because it has a
usable APIC and TSC deadline timer, so no APIC timer calibration
required. The same is true for CPUs which do not have a TSC deadline
timer, but enumerate the APIC frequency via CPUID or MSRs.

But that brings up an interesting question. How are those affected
machines even reaching a state where the user notices that just the
keyboard and the GPIO are not working? Why?

The CPUID/MSR APIC frequency enumeration is Intel specific and
everything else depends on a working timer interrupt to calibrate the
APIC timer frequency. So AMD CPUs require the timer interrupt to
work. The only explanation why this "works" in that null PIC case is
that the PIT/HPET interrupt is actually wired to pin 0, but that's
something to be determined...

Can I please get the following information from an affected machine:

  1) dmesg with 'apic=verbose' on the command line
  2) /proc/interrupts
  3) /sys/kernel/debug/irq/irqs/{0..15}

  #3 requires CONFIG_GENERIC_IRQ_DEBUGFS to be set.

Two versions of that please:

  1) Unpatched kernel
  2) Patched kernel

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: PIC probing code from e179f6914152 failing
  2023-10-18 22:50 ` Thomas Gleixner
@ 2023-10-19 21:20   ` Mario Limonciello
  2023-10-23 15:59     ` Thomas Gleixner
  0 siblings, 1 reply; 414+ messages in thread
From: Mario Limonciello @ 2023-10-19 21:20 UTC (permalink / raw)
  To: Thomas Gleixner, Hans de Goede, kys, hpa; +Cc: x86, LKML, Borislav Petkov

On 10/18/2023 17:50, Thomas Gleixner wrote:
> On Wed, Oct 18 2023 at 13:50, Mario Limonciello wrote:
>> Recently an issue was reported to Bugzilla [1] that the Keyboard (IRQ 1)
>> and GPIO controller (IRQ 7) weren't working properly on two separate
>> Lenovo machines.  The issues are unique to Linux.
>>
>> In digging through them, they happen because Lenovo didn't set up the
>> PIC in the BIOS.
>> Specifically the PIC probing code introduced by e179f6914152 ("x86, irq,
>> pic: Probe for legacy PIC and set legacy_pic appropriately") expects
>> that the BIOS sets up the PIC and uses that assertion to let Linux set
>> it up.
>>
>> One of the reporters confirmed that reverting e179f6914152 fixes the
>> issue.  Keyboard and GPIO controller both work properly.
>>
>> I wanted to ask if we can please revert that and come up with a
>> different solution for kexec with HyperV.
>> Can guests instead perhaps detect in early boot code they're running in
>> an EFI based hypervisor and explicitly set 'legacy_pic = &null_legacy_pic;'?
> 
> No. This detection mechanism prevents PIC usage also in other
> scenarios.
> 
> It's perfectly valid code and the assumption that you can read back what
> you wrote to the master IMR is entirely correct. If that's not the case
> then there is no PIC, the BIOS has disabled some parts of the legacy
> block or did not initialize it.
> 
> Letting the kernel blindly assume that there is always a PIC is just
> wrong. Worse, it puts the burden on everyone else to sprinkle
> "legacy_pic = null_pic;" all over the place with dubious
> conditions. That's exactly what the commit in question avoided.
> 
> So no, we are not going back there.
> 
> We could obviously change the probe() function to issue a full PIC
> initialization sequence before reading a known written value
> back. That's basically what the revert causes to happen via
> legacy_pic->init().
> 
> But I fundamentally hate to do that because forcing the init sequence
> just to make presumably broken code which has some dubious dependencies
> on the PIC or on nr_legacy_irqs > 0 happy is not really a solution when
> the PIC is actually not needed at all. For anything modern where all
> legacy interrupts are routed to the IO/APIC the PIC does not make any
> sense whatsoever.
> 
> We rather go and fix the real underlying problem.

Looking at the logs from David and also trying to mock up the failure on 
my side I have a few findings I'll share, please agree or disagree with 
them.

> 
> The kernel can handle the legacy interrupts perfectly fine through the
> IOAPIC. There is no hard requirement for the PIC at all except for
> really old systems which had the timer interrupt wired to the PIC and
> therefore required to route the timer interrupt through the PIC instead
> of the IO/APIC or did not provide routing entries for the IO/APIC. See
> the horrible hackery in check_timer() and the grossly misnomed
> init_IO_APIC_traps().
> 
> I just took a random machine, forced the NULL PIC and added
> 'apic=verbose' to the kernel command line and magically all the legacy
> interrupts are set up via IO/APIC despite the NULL PIC and therefore 0
> preallocated legacy interrupts.
> 
>    apic 0 pin 0 not connected
>   IOAPIC[0]: Preconfigured routing entry (0-1 -> IRQ 1 Level:0 ActiveLow:0)
>   IOAPIC[0]: Preconfigured routing entry (0-2 -> IRQ 2 Level:0 ActiveLow:0)
>   IOAPIC[0]: Preconfigured routing entry (0-3 -> IRQ 3 Level:0 ActiveLow:0)
>   ...
> 
> which is identical to the output with PIC enabled. That debug message is
> emitted from mp_irqdomain_alloc() which is invoked via the PNP resource
> management code.
> 
> Now /proc/interrupts:
> 
>             CPU0       CPU1       CPU2       CPU3
>    1:          0         56          0          0    IO-APIC   1-edge      i8042
>    4:        442          0          0          0    IO-APIC   4-edge      ttyS0
>    8:          0          0          0          0    IO-APIC   8-edge      rtc0
>    9:          0          0          0          0    IO-APIC   9-fasteoi   acpi
>   12:          0          0        122          0    IO-APIC  12-edge      i8042
> 
> Keyboard and serial are working, see?
> 
> The only interrupt which does not work is interrupt 0 because nothing
> allocates interrupt 0 due to nr_legacy_irqs == 0, but that's a trivially
> solvable problem.

 From David's logs I can see that the timer interrupt gets wired up to 
IRQ2 instead of IRQ0.

IOAPIC[0]: Preconfigured routing entry (33-2 -> IRQ 2 Level:0 ActiveLow:0)

In my hacked up forcing NULL pic case this fixes that:

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 43c1c24e934b..885687e64e4e 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -424,7 +424,7 @@ static int legacy_pic_probe(void)
  }

  struct legacy_pic null_legacy_pic = {
-       .nr_legacy_irqs = 0,
+       .nr_legacy_irqs = 1,
         .chip = &dummy_irq_chip,
         .mask = legacy_pic_uint_noop,
         .unmask = legacy_pic_uint_noop,

I think it's cleaner than changing all the places that use 
nr_legacy_irqs().  On my side this makes:

IOAPIC[0]: Preconfigured routing entry (33-2 -> IRQ 0 Level:0 ActiveLow:0)

> 
> That machine does not even need the timer interrupt because it has a
> usable APIC and TSC deadline timer, so no APIC timer calibration
> required. The same is true for CPUs which do not have a TSC deadline
> timer, but enumerate the APIC frequency via CPUID or MSRs.

Don't you need it for things like rtcwake to be able to work?

> 
> But that brings up an interesting question. How are those affected
> machines even reaching a state where the user notices that just the
> keyboard and the GPIO are not working? Why?

So the GPIO controller driver (pinctrl-amd) uses platform_get_irq() to 
try to discover the IRQ to use.

This calls acpi_irq_get() which isn't implemented on x86 (hardcodes 
-EINVAL).

I can "work around it" by:

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 76bfcba25003..2b4b436c65d8 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -187,7 +187,8 @@ int platform_get_irq_optional(struct platform_device 
*dev, unsigned int num)
         }

         r = platform_get_resource(dev, IORESOURCE_IRQ, num);
-       if (has_acpi_companion(&dev->dev)) {
+       if (IS_ENABLED(CONFIG_ACPI_GENERIC_GSI) &&
+            has_acpi_companion(&dev->dev)) {
                 if (r && r->flags & IORESOURCE_DISABLED) {
                         ret = acpi_irq_get(ACPI_HANDLE(&dev->dev), num, r);
                         if (ret)

but the resource that is returned from the next hunk has the resource 
flags set wrong in the NULL pic case:

NULL case:
r: AMDI0030:00 flags: 0x30000418
PIC case:
r: AMDI0030:00 flags: 0x418

IOW NULL pic case has IORESOURCE_DISABLED / IORESOURCE_UNSET

This then later the GPIO controller interrupts are not actually working.
For example the attn pin for my I2C touchpad doesn't work.

Checking the debugfs with my "work around" in place I can see a few 
things set up differently:

NULL case:
handler:  handle_edge_irq
dstate:   0x3740c208
             IRQ_TYPE_LEVEL_LOW
             IRQD_ACTIVATED
             IRQD_IRQ_STARTED
             IRQD_SINGLE_TARGET
             IRQD_MOVE_PCNTXT
             IRQD_AFFINITY_ON_ACTIVATE
             IRQD_CAN_RESERVE
             IRQD_WAKEUP_STATE
             IRQD_DEFAULT_TRIGGER_SET
             IRQD_HANDLE_ENFORCE_IRQCTX

PIC case:
handler:  handle_fasteoi_irq
dstate:   0x3740e208
             IRQ_TYPE_LEVEL_LOW
             IRQD_LEVEL
             IRQD_ACTIVATED
             IRQD_IRQ_STARTED
             IRQD_SINGLE_TARGET
             IRQD_MOVE_PCNTXT
             IRQD_AFFINITY_ON_ACTIVATE
             IRQD_CAN_RESERVE
             IRQD_WAKEUP_STATE
             IRQD_DEFAULT_TRIGGER_SET
             IRQD_HANDLE_ENFORCE_IRQCTX

I guess something related to the callpath for mp_register_handler().

Maybe this is the same reason for the keyboard not working right.

> 
> The CPUID/MSR APIC frequency enumeration is Intel specific and
> everything else depends on a working timer interrupt to calibrate the
> APIC timer frequency. So AMD CPUs require the timer interrupt to
> work. The only explanation why this "works" in that null PIC case is
> that the PIT/HPET interrupt is actually wired to pin 0, but that's
> something to be determined...
> 
> Can I please get the following information from an affected machine:
> 
>    1) dmesg with 'apic=verbose' on the command line
>    2) /proc/interrupts
>    3) /sys/kernel/debug/irq/irqs/{0..15}
> 
>    #3 requires CONFIG_GENERIC_IRQ_DEBUGFS to be set.
> 
> Two versions of that please:
> 
>    1) Unpatched kernel
>    2) Patched kernel
> 
> Thanks,
> 
>          tglx


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re: PIC probing code from e179f6914152 failing
  2023-10-19 21:20   ` Mario Limonciello
@ 2023-10-23 15:59     ` Thomas Gleixner
  2023-10-25  9:23       ` Thomas Gleixner
  0 siblings, 1 reply; 414+ messages in thread
From: Thomas Gleixner @ 2023-10-23 15:59 UTC (permalink / raw)
  To: Mario Limonciello, Hans de Goede, kys, hpa; +Cc: x86, LKML, Borislav Petkov

On Thu, Oct 19 2023 at 16:20, Mario Limonciello wrote:
> On 10/18/2023 17:50, Thomas Gleixner wrote:
>> The only interrupt which does not work is interrupt 0 because nothing
>> allocates interrupt 0 due to nr_legacy_irqs == 0, but that's a trivially
>> solvable problem.
>
>  From David's logs I can see that the timer interrupt gets wired up to 
> IRQ2 instead of IRQ0.

Sure, but that's not really a problem. Nothing needs the timer
interrupt in principle.

> IOAPIC[0]: Preconfigured routing entry (33-2 -> IRQ 2 Level:0 ActiveLow:0)
>
> In my hacked up forcing NULL pic case this fixes that:
>
> diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
> index 43c1c24e934b..885687e64e4e 100644
> --- a/arch/x86/kernel/i8259.c
> +++ b/arch/x86/kernel/i8259.c
> @@ -424,7 +424,7 @@ static int legacy_pic_probe(void)
>   }
>
>   struct legacy_pic null_legacy_pic = {
> -       .nr_legacy_irqs = 0,
> +       .nr_legacy_irqs = 1,
>          .chip = &dummy_irq_chip,
>          .mask = legacy_pic_uint_noop,
>          .unmask = legacy_pic_uint_noop,
>
> I think it's cleaner than changing all the places that use 
> nr_legacy_irqs().

No. It's not cleaner. It's a hack and you still need to audit all places
which depend on nr_legacy_irqs(). Also why '1'? You could as well use
'16', no?

> On my side this makes:
>
> IOAPIC[0]: Preconfigured routing entry (33-2 -> IRQ 0 Level:0
> ActiveLow:0)

Sure, but that can be achieved by other means in a clean way as
well. Can we please focus on analyzing the underlying problems instead
of trying random hacks? The timer part is well understood already.

>> That machine does not even need the timer interrupt because it has a
>> usable APIC and TSC deadline timer, so no APIC timer calibration
>> required. The same is true for CPUs which do not have a TSC deadline
>> timer, but enumerate the APIC frequency via CPUID or MSRs.
>
> Don't you need it for things like rtcwake to be able to work?

Timer != RTC.

The RTC interrupt is separate (IRQ 8), but in the case of this system it
is using the HPET-RTC emulation which fails to initialize because
interrupt 0 is not available.

>> But that brings up an interesting question. How are those affected
>> machines even reaching a state where the user notices that just the
>> keyboard and the GPIO are not working? Why?
>
> So the GPIO controller driver (pinctrl-amd) uses platform_get_irq() to 
> try to discover the IRQ to use.
>
> This calls acpi_irq_get() which isn't implemented on x86 (hardcodes 
> -EINVAL).
>
> I can "work around it" by:
>
> diff --git a/drivers/base/platform.c b/drivers/base/platform.c
> index 76bfcba25003..2b4b436c65d8 100644
> --- a/drivers/base/platform.c
> +++ b/drivers/base/platform.c
> @@ -187,7 +187,8 @@ int platform_get_irq_optional(struct platform_device 
> *dev, unsigned int num)
>          }
>
>          r = platform_get_resource(dev, IORESOURCE_IRQ, num);
> -       if (has_acpi_companion(&dev->dev)) {
> +       if (IS_ENABLED(CONFIG_ACPI_GENERIC_GSI) &&
> +            has_acpi_companion(&dev->dev)) {
>                  if (r && r->flags & IORESOURCE_DISABLED) {
>                          ret = acpi_irq_get(ACPI_HANDLE(&dev->dev), num, r);
>                          if (ret)

So why is acpi_irq_get() reached when the PIC is disabled, but not when
the PIC is enabled? Because of the below:

> but the resource that is returned from the next hunk ?

next hunk? The resource is returned by platform_get_resource() above, no?

> has the resource flags set wrong in the NULL pic case:
>
> NULL case:
> r: AMDI0030:00 flags: 0x30000418
> PIC case:
> r: AMDI0030:00 flags: 0x418
>
> IOW NULL pic case has IORESOURCE_DISABLED / IORESOURCE_UNSET

So the real question is WHY are the DISABLED/UNSET flags not set in the
PIC case?

> NULL case:
> handler:  handle_edge_irq
> dstate:   0x3740c208
>              IRQ_TYPE_LEVEL_LOW
>
> PIC case:
> handler:  handle_fasteoi_irq
> dstate:   0x3740e208
>              IRQ_TYPE_LEVEL_LOW
>              IRQD_LEVEL
>
> I guess something related to the callpath for mp_register_handler().

Guessing is not helpful.

There is a difference in how the allocation info is set up when legacy
PIC is enabled, but that does not explain the above resource flag
difference.

As there is no override for IRQ7:

[    0.011415] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.011417] Int: type 0, pol 0, trig 0, bus 00, IRQ 00, APIC ID 20, APIC INT 02
[    0.011418] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
[    0.011419] Int: type 0, pol 3, trig 3, bus 00, IRQ 09, APIC ID 20, APIC INT 09
...
[    0.011425] Int: type 0, pol 0, trig 0, bus 00, IRQ 07, APIC ID 20, APIC INT 07

the initial setup of the IOAPIC interrupt is edge, while the initial
setup of the legacy PIC is level. But that gets changed later to edge
when the IOAPIC is initialized.

I'm not seeing the magic which make the above different yet, though I'm
100% sure by now that this "works" definitely not by design. It just
works by pure luck.

Now when platform_get_irq_optional() sets the trigger type via
irqd_set_trigger_type() it just sets LEVEL_LOW, but does not change the
handler and does not set IRQD_LEVEL. It does neither change the IO/APIC
pin setup. This happens because the IOAPIC interrupt chip does not
implement an irq_set_type() callback.

IOW the whole machinery depends on magic setup ordering vs. PIC and pure
luck. Adding the callback is not rocket science, but while it should
make the interrupt work it still does not explain the magic "working"
when the legacy PIC is enabled.

Let me sit down and add a pile of debug printks to all the relevant
places as we really need to understand the underlying magic effects of
legacy PIC first.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: PIC probing code from e179f6914152 failing
  2023-10-23 15:59     ` Thomas Gleixner
@ 2023-10-25  9:23       ` Thomas Gleixner
  2023-10-25 14:41         ` Mario Limonciello
  0 siblings, 1 reply; 414+ messages in thread
From: Thomas Gleixner @ 2023-10-25  9:23 UTC (permalink / raw)
  To: Mario Limonciello, Hans de Goede, kys, hpa
  Cc: x86, LKML, Borislav Petkov, Rafael J. Wysocki

On Mon, Oct 23 2023 at 17:59, Thomas Gleixner wrote:
> On Thu, Oct 19 2023 at 16:20, Mario Limonciello wrote:
>>   struct legacy_pic null_legacy_pic = {
>> -       .nr_legacy_irqs = 0,
>> +       .nr_legacy_irqs = 1,
>>          .chip = &dummy_irq_chip,
>>          .mask = legacy_pic_uint_noop,
>>          .unmask = legacy_pic_uint_noop,
>>
>> I think it's cleaner than changing all the places that use 
>> nr_legacy_irqs().
>
> No. It's not cleaner. It's a hack and you still need to audit all places
> which depend on nr_legacy_irqs(). Also why '1'? You could as well use
> '16', no?

So I sat down and did a thorough analysis of legacy PIC dependencies.

Unfortunately this is an unholy mess and sprinkled all over the place,
so there is no trivial way to resolve this quickly. This needs a proper
overhaul to decouple the actual PIC driver selection from the fact that
the kernel runs on a i8259 equipped hardware and therefore needs to
honour the legacy PNP overrides etc.

The probing itself is to stay in order to avoid sprinkling weird
conditions and NULL PIC selections all over the place.

It could be argued that the probe function should try to initialize the
PIC, but that's overkill for scenarios where the PIC does not exist.

Though it turns out that ACPI/MADT is helpful here because the MADT
header has a flags field which denotes in bit 0, whether the system has
a 8259 setup or not.

This allows to override the probe for now until we actually resolved the
dependency problems in a clean way.

Untested patch below.

Thanks,

        tglx
---
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -69,6 +69,8 @@ struct legacy_pic {
 	void (*make_irq)(unsigned int irq);
 };
 
+void legacy_pic_pcat_compat(void);
+
 extern struct legacy_pic *legacy_pic;
 extern struct legacy_pic null_legacy_pic;
 
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -148,6 +148,9 @@ static int __init acpi_parse_madt(struct
 		pr_debug("Local APIC address 0x%08x\n", madt->address);
 	}
 
+	if (madt->flags & ACPI_MADT_PCAT_COMPAT)
+		legacy_pic_pcat_compat();
+
 	/* ACPI 6.3 and newer support the online capable bit. */
 	if (acpi_gbl_FADT.header.revision > 6 ||
 	    (acpi_gbl_FADT.header.revision == 6 &&
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,6 +32,7 @@
  */
 static void init_8259A(int auto_eoi);
 
+static bool pcat_compat __ro_after_init;
 static int i8259A_auto_eoi;
 DEFINE_RAW_SPINLOCK(i8259A_lock);
 
@@ -299,15 +300,32 @@ static void unmask_8259A(void)
 
 static int probe_8259A(void)
 {
+	unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR);
 	unsigned long flags;
-	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
-	unsigned char new_val;
+
+	/*
+	 * If MADT has the PCAT_COMPAT flag set, then do not bother probing
+	 * for the PIC. Some BIOSes leave the PIC uninitialized and probing
+	 * fails.
+	 *
+	 * Right now this causes problems as quite some code depends on
+	 * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly
+	 * when the system has an IO/APIC because then PIC is not required
+	 * at all, except for really old machines where the timer interrupt
+	 * must be routed through the PIC. So just pretend that the PIC is
+	 * there and let legacy_pic->init() initialize it for nothing.
+	 *
+	 * Alternatively this could just try to initialize the PIC and
+	 * repeat the probe, but for cases where there is no PIC that's
+	 * just pointless.
+	 */
+	if (pcat_compat)
+		return nr_legacy_irqs();
+
 	/*
-	 * Check to see if we have a PIC.
-	 * Mask all except the cascade and read
-	 * back the value we just wrote. If we don't
-	 * have a PIC, we will read 0xff as opposed to the
-	 * value we wrote.
+	 * Check to see if we have a PIC.  Mask all except the cascade and
+	 * read back the value we just wrote. If we don't have a PIC, we
+	 * will read 0xff as opposed to the value we wrote.
 	 */
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 
@@ -429,5 +447,9 @@ static int __init i8259A_init_ops(void)
 
 	return 0;
 }
-
 device_initcall(i8259A_init_ops);
+
+void __init legacy_pic_pcat_compat(void)
+{
+	pcat_compat = true;
+}


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: PIC probing code from e179f6914152 failing
  2023-10-25  9:23       ` Thomas Gleixner
@ 2023-10-25 14:41         ` Mario Limonciello
  2023-10-25 15:25           ` David Lazar
  0 siblings, 1 reply; 414+ messages in thread
From: Mario Limonciello @ 2023-10-25 14:41 UTC (permalink / raw)
  To: Thomas Gleixner, Hans de Goede, kys, hpa, dlazar
  Cc: x86, LKML, Borislav Petkov, Rafael J. Wysocki

On 10/25/2023 04:23, Thomas Gleixner wrote:
> On Mon, Oct 23 2023 at 17:59, Thomas Gleixner wrote:
>> On Thu, Oct 19 2023 at 16:20, Mario Limonciello wrote:
>>>    struct legacy_pic null_legacy_pic = {
>>> -       .nr_legacy_irqs = 0,
>>> +       .nr_legacy_irqs = 1,
>>>           .chip = &dummy_irq_chip,
>>>           .mask = legacy_pic_uint_noop,
>>>           .unmask = legacy_pic_uint_noop,
>>>
>>> I think it's cleaner than changing all the places that use
>>> nr_legacy_irqs().
>>
>> No. It's not cleaner. It's a hack and you still need to audit all places
>> which depend on nr_legacy_irqs(). Also why '1'? You could as well use
>> '16', no?
> 
> So I sat down and did a thorough analysis of legacy PIC dependencies.
> 
> Unfortunately this is an unholy mess and sprinkled all over the place,
> so there is no trivial way to resolve this quickly. This needs a proper
> overhaul to decouple the actual PIC driver selection from the fact that
> the kernel runs on a i8259 equipped hardware and therefore needs to
> honour the legacy PNP overrides etc.
> 
> The probing itself is to stay in order to avoid sprinkling weird
> conditions and NULL PIC selections all over the place.
> 
> It could be argued that the probe function should try to initialize the
> PIC, but that's overkill for scenarios where the PIC does not exist.
> 
> Though it turns out that ACPI/MADT is helpful here because the MADT
> header has a flags field which denotes in bit 0, whether the system has
> a 8259 setup or not.
> 
> This allows to override the probe for now until we actually resolved the
> dependency problems in a clean way.
> 
> Untested patch below.

+David from the bugzilla.

I checked his acpidump and I do think this will work for him.

[024h 0036   4]           Local Apic Address : FEE00000
[028h 0040   4]        Flags (decoded below) : 00000001
                          PC-AT Compatibility : 1


David - can you see if the below helps your hardware?

> 
> Thanks,
> 
>          tglx
> ---
> --- a/arch/x86/include/asm/i8259.h
> +++ b/arch/x86/include/asm/i8259.h
> @@ -69,6 +69,8 @@ struct legacy_pic {
>   	void (*make_irq)(unsigned int irq);
>   };
>   
> +void legacy_pic_pcat_compat(void);
> +
>   extern struct legacy_pic *legacy_pic;
>   extern struct legacy_pic null_legacy_pic;
>   
> --- a/arch/x86/kernel/acpi/boot.c
> +++ b/arch/x86/kernel/acpi/boot.c
> @@ -148,6 +148,9 @@ static int __init acpi_parse_madt(struct
>   		pr_debug("Local APIC address 0x%08x\n", madt->address);
>   	}
>   
> +	if (madt->flags & ACPI_MADT_PCAT_COMPAT)
> +		legacy_pic_pcat_compat();
> +
>   	/* ACPI 6.3 and newer support the online capable bit. */
>   	if (acpi_gbl_FADT.header.revision > 6 ||
>   	    (acpi_gbl_FADT.header.revision == 6 &&
> --- a/arch/x86/kernel/i8259.c
> +++ b/arch/x86/kernel/i8259.c
> @@ -32,6 +32,7 @@
>    */
>   static void init_8259A(int auto_eoi);
>   
> +static bool pcat_compat __ro_after_init;
>   static int i8259A_auto_eoi;
>   DEFINE_RAW_SPINLOCK(i8259A_lock);
>   
> @@ -299,15 +300,32 @@ static void unmask_8259A(void)
>   
>   static int probe_8259A(void)
>   {
> +	unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR);
>   	unsigned long flags;
> -	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
> -	unsigned char new_val;
> +
> +	/*
> +	 * If MADT has the PCAT_COMPAT flag set, then do not bother probing
> +	 * for the PIC. Some BIOSes leave the PIC uninitialized and probing
> +	 * fails.
> +	 *
> +	 * Right now this causes problems as quite some code depends on
> +	 * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly
> +	 * when the system has an IO/APIC because then PIC is not required
> +	 * at all, except for really old machines where the timer interrupt
> +	 * must be routed through the PIC. So just pretend that the PIC is
> +	 * there and let legacy_pic->init() initialize it for nothing.
> +	 *
> +	 * Alternatively this could just try to initialize the PIC and
> +	 * repeat the probe, but for cases where there is no PIC that's
> +	 * just pointless.
> +	 */
> +	if (pcat_compat)
> +		return nr_legacy_irqs();
> +
>   	/*
> -	 * Check to see if we have a PIC.
> -	 * Mask all except the cascade and read
> -	 * back the value we just wrote. If we don't
> -	 * have a PIC, we will read 0xff as opposed to the
> -	 * value we wrote.
> +	 * Check to see if we have a PIC.  Mask all except the cascade and
> +	 * read back the value we just wrote. If we don't have a PIC, we
> +	 * will read 0xff as opposed to the value we wrote.
>   	 */
>   	raw_spin_lock_irqsave(&i8259A_lock, flags);
>   
> @@ -429,5 +447,9 @@ static int __init i8259A_init_ops(void)
>   
>   	return 0;
>   }
> -
>   device_initcall(i8259A_init_ops);
> +
> +void __init legacy_pic_pcat_compat(void)
> +{
> +	pcat_compat = true;
> +}
> 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: PIC probing code from e179f6914152 failing
  2023-10-25 14:41         ` Mario Limonciello
@ 2023-10-25 15:25           ` David Lazar
  2023-10-25 17:31             ` Thomas Gleixner
  0 siblings, 1 reply; 414+ messages in thread
From: David Lazar @ 2023-10-25 15:25 UTC (permalink / raw)
  To: Mario Limonciello, Thomas Gleixner
  Cc: Hans de Goede, kys, hpa, x86, LKML, Borislav Petkov, Rafael J. Wysocki

--- On Wed, 25 Oct 2023, Mario Limonciello wrote:
> David - can you see if the below helps your hardware?

The keyboard and mouse work fine with Thomas' patch.

I've uploaded the debug info to the bug:

https://bugzilla.kernel.org/attachment.cgi?id=305291&action=edit

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: PIC probing code from e179f6914152 failing
  2023-10-25 15:25           ` David Lazar
@ 2023-10-25 17:31             ` Thomas Gleixner
  2023-10-25 21:04               ` [PATCH] x86/i8259: Skip probing when ACPI/MADT advertises PCAT compatibility, Thomas Gleixner
  0 siblings, 1 reply; 414+ messages in thread
From: Thomas Gleixner @ 2023-10-25 17:31 UTC (permalink / raw)
  To: David Lazar, Mario Limonciello
  Cc: Hans de Goede, kys, hpa, x86, LKML, Borislav Petkov, Rafael J. Wysocki

On Wed, Oct 25 2023 at 17:25, David Lazar wrote:
> --- On Wed, 25 Oct 2023, Mario Limonciello wrote:
>> David - can you see if the below helps your hardware?
>
> The keyboard and mouse work fine with Thomas' patch.
>
> I've uploaded the debug info to the bug:
>
> https://bugzilla.kernel.org/attachment.cgi?id=305291&action=edit

Let me write a changelog then. Unless Rafael has any objections to that
approach.

Thanks,

        tglx


^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH] x86/i8259: Skip probing when ACPI/MADT advertises PCAT compatibility, 
  2023-10-25 17:31             ` Thomas Gleixner
@ 2023-10-25 21:04               ` Thomas Gleixner
  2023-10-25 22:11                 ` Mario Limonciello
  0 siblings, 1 reply; 414+ messages in thread
From: Thomas Gleixner @ 2023-10-25 21:04 UTC (permalink / raw)
  To: David Lazar, Mario Limonciello
  Cc: Hans de Goede, kys, hpa, x86, LKML, Borislav Petkov, Rafael J. Wysocki

David and a few others reported that on certain newer systems some legacy
interrupts fail to work correctly.

Debugging revealed that the BIOS of these systems leaves the legacy PIC in
uninitialized state which makes the PIC detection fail and the kernel
switches to a dummy implementation.

Unfortunately this fallback causes quite some code to fail as it depends on
checks for the number of legacy PIC interrupts or the availability of the
real PIC.

In theory there is no reason to use the PIC on any modern system when
IO/APIC is available, but the dependencies on the related checks cannot be
resolved trivially and on short notice. This needs lots of analysis and
rework.

The PIC detection has been added to avoid quirky checks and force selection
of the dummy implementation all over the place, especially in VM guest
scenarios. So it's not an option to revert the relevant commit as that
would break a lot of other scenarios.

One solution would be to try to initialize the PIC on detection fail and
retry the detection, but that puts the burden on everything which does not
have a PIC.

Fortunately the ACPI/MADT table header has a flag field, which advertises
in bit 0 that the system is PCAT compatible, which means it has a legacy
8259 PIC.

Evaluate that bit and if set avoid the detection routine and keep the real
PIC installed, which then gets initialized (for nothing) and makes the rest
of the code with all the dependencies work again.

Fixes: e179f6914152 ("x86, irq, pic: Probe for legacy PIC and set legacy_pic appropriately")
Reported-by: David Lazar <dlazar@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: David Lazar <dlazar@gmail.com>
Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218003
---
---
 arch/x86/include/asm/i8259.h |    2 ++
 arch/x86/kernel/acpi/boot.c  |    3 +++
 arch/x86/kernel/i8259.c      |   38 ++++++++++++++++++++++++++++++--------
 3 files changed, 35 insertions(+), 8 deletions(-)

--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -69,6 +69,8 @@ struct legacy_pic {
 	void (*make_irq)(unsigned int irq);
 };
 
+void legacy_pic_pcat_compat(void);
+
 extern struct legacy_pic *legacy_pic;
 extern struct legacy_pic null_legacy_pic;
 
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -148,6 +148,9 @@ static int __init acpi_parse_madt(struct
 		pr_debug("Local APIC address 0x%08x\n", madt->address);
 	}
 
+	if (madt->flags & ACPI_MADT_PCAT_COMPAT)
+		legacy_pic_pcat_compat();
+
 	/* ACPI 6.3 and newer support the online capable bit. */
 	if (acpi_gbl_FADT.header.revision > 6 ||
 	    (acpi_gbl_FADT.header.revision == 6 &&
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,6 +32,7 @@
  */
 static void init_8259A(int auto_eoi);
 
+static bool pcat_compat __ro_after_init;
 static int i8259A_auto_eoi;
 DEFINE_RAW_SPINLOCK(i8259A_lock);
 
@@ -299,15 +300,32 @@ static void unmask_8259A(void)
 
 static int probe_8259A(void)
 {
+	unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR);
 	unsigned long flags;
-	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
-	unsigned char new_val;
+
+	/*
+	 * If MADT has the PCAT_COMPAT flag set, then do not bother probing
+	 * for the PIC. Some BIOSes leave the PIC uninitialized and probing
+	 * fails.
+	 *
+	 * Right now this causes problems as quite some code depends on
+	 * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly
+	 * when the system has an IO/APIC because then PIC is not required
+	 * at all, except for really old machines where the timer interrupt
+	 * must be routed through the PIC. So just pretend that the PIC is
+	 * there and let legacy_pic->init() initialize it for nothing.
+	 *
+	 * Alternatively this could just try to initialize the PIC and
+	 * repeat the probe, but for cases where there is no PIC that's
+	 * just pointless.
+	 */
+	if (pcat_compat)
+		return nr_legacy_irqs();
+
 	/*
-	 * Check to see if we have a PIC.
-	 * Mask all except the cascade and read
-	 * back the value we just wrote. If we don't
-	 * have a PIC, we will read 0xff as opposed to the
-	 * value we wrote.
+	 * Check to see if we have a PIC.  Mask all except the cascade and
+	 * read back the value we just wrote. If we don't have a PIC, we
+	 * will read 0xff as opposed to the value we wrote.
 	 */
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 
@@ -429,5 +447,9 @@ static int __init i8259A_init_ops(void)
 
 	return 0;
 }
-
 device_initcall(i8259A_init_ops);
+
+void __init legacy_pic_pcat_compat(void)
+{
+	pcat_compat = true;
+}

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-10-25 21:04               ` [PATCH] x86/i8259: Skip probing when ACPI/MADT advertises PCAT compatibility, Thomas Gleixner
@ 2023-10-25 22:11                 ` Mario Limonciello
  2023-10-26  9:27                   ` Re: Thomas Gleixner
  0 siblings, 1 reply; 414+ messages in thread
From: Mario Limonciello @ 2023-10-25 22:11 UTC (permalink / raw)
  To: Thomas Gleixner, David Lazar
  Cc: Hans de Goede, kys, hpa, x86, LKML, Borislav Petkov,
	Rafael J. Wysocki, Linux kernel regressions list

On 10/25/2023 16:04, Thomas Gleixner wrote:
> David and a few others reported that on certain newer systems some legacy
> interrupts fail to work correctly.
> 
> Debugging revealed that the BIOS of these systems leaves the legacy PIC in
> uninitialized state which makes the PIC detection fail and the kernel
> switches to a dummy implementation.
> 
> Unfortunately this fallback causes quite some code to fail as it depends on
> checks for the number of legacy PIC interrupts or the availability of the
> real PIC.
> 
> In theory there is no reason to use the PIC on any modern system when
> IO/APIC is available, but the dependencies on the related checks cannot be
> resolved trivially and on short notice. This needs lots of analysis and
> rework.
> 
> The PIC detection has been added to avoid quirky checks and force selection
> of the dummy implementation all over the place, especially in VM guest
> scenarios. So it's not an option to revert the relevant commit as that
> would break a lot of other scenarios.
> 
> One solution would be to try to initialize the PIC on detection fail and
> retry the detection, but that puts the burden on everything which does not
> have a PIC.
> 
> Fortunately the ACPI/MADT table header has a flag field, which advertises
> in bit 0 that the system is PCAT compatible, which means it has a legacy
> 8259 PIC.
> 
> Evaluate that bit and if set avoid the detection routine and keep the real
> PIC installed, which then gets initialized (for nothing) and makes the rest
> of the code with all the dependencies work again.
> 
> Fixes: e179f6914152 ("x86, irq, pic: Probe for legacy PIC and set legacy_pic appropriately")
> Reported-by: David Lazar <dlazar@gmail.com>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: David Lazar <dlazar@gmail.com>
> Cc: stable@vger.kernel.org
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218003

s/Link/Closes/

Presumably you will add a proper subject when this is committed?

With adding title and fixing that tag:

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>

> ---
> ---
>   arch/x86/include/asm/i8259.h |    2 ++
>   arch/x86/kernel/acpi/boot.c  |    3 +++
>   arch/x86/kernel/i8259.c      |   38 ++++++++++++++++++++++++++++++--------
>   3 files changed, 35 insertions(+), 8 deletions(-)
> 
> --- a/arch/x86/include/asm/i8259.h
> +++ b/arch/x86/include/asm/i8259.h
> @@ -69,6 +69,8 @@ struct legacy_pic {
>   	void (*make_irq)(unsigned int irq);
>   };
>   
> +void legacy_pic_pcat_compat(void);
> +
>   extern struct legacy_pic *legacy_pic;
>   extern struct legacy_pic null_legacy_pic;
>   
> --- a/arch/x86/kernel/acpi/boot.c
> +++ b/arch/x86/kernel/acpi/boot.c
> @@ -148,6 +148,9 @@ static int __init acpi_parse_madt(struct
>   		pr_debug("Local APIC address 0x%08x\n", madt->address);
>   	}
>   
> +	if (madt->flags & ACPI_MADT_PCAT_COMPAT)
> +		legacy_pic_pcat_compat();
> +
>   	/* ACPI 6.3 and newer support the online capable bit. */
>   	if (acpi_gbl_FADT.header.revision > 6 ||
>   	    (acpi_gbl_FADT.header.revision == 6 &&
> --- a/arch/x86/kernel/i8259.c
> +++ b/arch/x86/kernel/i8259.c
> @@ -32,6 +32,7 @@
>    */
>   static void init_8259A(int auto_eoi);
>   
> +static bool pcat_compat __ro_after_init;
>   static int i8259A_auto_eoi;
>   DEFINE_RAW_SPINLOCK(i8259A_lock);
>   
> @@ -299,15 +300,32 @@ static void unmask_8259A(void)
>   
>   static int probe_8259A(void)
>   {
> +	unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR);
>   	unsigned long flags;
> -	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
> -	unsigned char new_val;
> +
> +	/*
> +	 * If MADT has the PCAT_COMPAT flag set, then do not bother probing
> +	 * for the PIC. Some BIOSes leave the PIC uninitialized and probing
> +	 * fails.
> +	 *
> +	 * Right now this causes problems as quite some code depends on
> +	 * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly
> +	 * when the system has an IO/APIC because then PIC is not required
> +	 * at all, except for really old machines where the timer interrupt
> +	 * must be routed through the PIC. So just pretend that the PIC is
> +	 * there and let legacy_pic->init() initialize it for nothing.
> +	 *
> +	 * Alternatively this could just try to initialize the PIC and
> +	 * repeat the probe, but for cases where there is no PIC that's
> +	 * just pointless.
> +	 */
> +	if (pcat_compat)
> +		return nr_legacy_irqs();
> +
>   	/*
> -	 * Check to see if we have a PIC.
> -	 * Mask all except the cascade and read
> -	 * back the value we just wrote. If we don't
> -	 * have a PIC, we will read 0xff as opposed to the
> -	 * value we wrote.
> +	 * Check to see if we have a PIC.  Mask all except the cascade and
> +	 * read back the value we just wrote. If we don't have a PIC, we
> +	 * will read 0xff as opposed to the value we wrote.
>   	 */
>   	raw_spin_lock_irqsave(&i8259A_lock, flags);
>   
> @@ -429,5 +447,9 @@ static int __init i8259A_init_ops(void)
>   
>   	return 0;
>   }
> -
>   device_initcall(i8259A_init_ops);
> +
> +void __init legacy_pic_pcat_compat(void)
> +{
> +	pcat_compat = true;
> +}


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-10-25 22:11                 ` Mario Limonciello
@ 2023-10-26  9:27                   ` Thomas Gleixner
  0 siblings, 0 replies; 414+ messages in thread
From: Thomas Gleixner @ 2023-10-26  9:27 UTC (permalink / raw)
  To: Mario Limonciello, David Lazar
  Cc: Hans de Goede, kys, hpa, x86, LKML, Borislav Petkov,
	Rafael J. Wysocki, Linux kernel regressions list

On Wed, Oct 25 2023 at 17:11, Mario Limonciello wrote:
> On 10/25/2023 16:04, Thomas Gleixner wrote:
>> Cc: stable@vger.kernel.org
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218003
>
> s/Link/Closes/

Sure.

> Presumably you will add a proper subject when this is committed?

Bah, yes. I stopped replacing the subject line right after clearing it :(

> With adding title and fixing that tag:
>
> Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <64b09dbb.630a0220.e80b9.e2ed@mx.google.com>]

* Re:
       [not found] <64b09dbb.630a0220.e80b9.e2ed@mx.google.com>
@ 2023-07-14  8:05 ` Andy Shevchenko
  0 siblings, 0 replies; 414+ messages in thread
From: Andy Shevchenko @ 2023-07-14  8:05 UTC (permalink / raw)
  To: luoruihong
  Cc: ilpo.jarvinen, gregkh, jirislaby, linux-kernel, linux-serial,
	luoruihong, weipengliang, wengjinfei

On Fri, Jul 14, 2023 at 08:58:29AM +0800, luoruihong wrote:
> On Thu, Jul 13, 2023 at 07:51:14PM +0300, Andy Shevchenko wrote:
> > On Thu, Jul 13, 2023 at 08:42:36AM +0800, Ruihong Luo wrote:
> > > Preserve the original value of the Divisor Latch Fraction (DLF) register.
> > > When the DLF register is modified without preservation, it can disrupt
> > > the baudrate settings established by firmware or bootloader, leading to
> > > data corruption and the generation of unreadable or distorted characters.
> >
> > You forgot to add my tag. Why? Do you think the name of variable warrants this?
> > Whatever,
> > Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> >
> > Next time if you don't pick up somebody's tag, care to explain in the changelog
> > why.
> >
> > > Fixes: 701c5e73b296 ("serial: 8250_dw: add fractional divisor support")
> > > Signed-off-by: Ruihong Luo <colorsu1922@gmail.com>
> 
> I'm sorry, I didn't know about this rule. Thank you for helping me add
> the missing tags back and for all your previous kind assistance.

For now no need to do anything, just wait for Ilpo's and/or Greg's answer(s),

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2023-05-11 12:58 Ryan Roberts
  2023-05-11 13:13 ` Ryan Roberts
  0 siblings, 1 reply; 414+ messages in thread
From: Ryan Roberts @ 2023-05-11 12:58 UTC (permalink / raw)
  To: Andrew Morton, Matthew Wilcox (Oracle),
	Kirill A. Shutemov, SeongJae Park
  Cc: Ryan Roberts, linux-kernel, linux-mm, damon

Date: Thu, 11 May 2023 11:38:28 +0100
Subject: [PATCH v1 0/5] Encapsulate PTE contents from non-arch code

Hi All,

This series improves the encapsulation of pte entries by disallowing non-arch
code from directly dereferencing pte_t pointers. Instead code must use a new
helper, `pte_t ptep_deref(pte_t *ptep)`. By default, this helper does a direct
dereference of the pointer, so generated code should be exactly the same. But
it's presence sets us up for arch code being able to override the default to
"virtualize" the ptes without needing to maintain a shadow table.

I intend to take advantage of this for arm64 to enable use of its "contiguous
bit" to coalesce multiple ptes into a single tlb entry, reducing pressure and
improving performance. I have an RFC for the first part of this work at [1]. The
cover letter there also explains the second part, which this series is enabling.

I intend to post an RFC for the contpte changes in due course, but it would be
good to get the ball rolling on this enabler.

There are 2 reasons that I need the encapsulation:

  - Prevent leaking the arch-private PTE_CONT bit to the core code. If the core
    code reads a pte that contains this bit, it could end up calling
    set_pte_at() with the bit set which would confuse the implementation. So we
    can always clear PTE_CONT in ptep_deref() (and ptep_get()) to avoid a leaky
    abstraction.
  - Contiguous ptes have a single access and dirty bit for the contiguous range.
    So we need to "mix-in" those bits when the core is dereferencing a pte that
    lies in the contig range. There is code that dereferences the pte then takes
    different actions based on access/dirty (see e.g. write_protect_page()).

While ptep_get() and ptep_get_lockless() already exist, both of them are
implemented using READ_ONCE() by default. While we could use ptep_get() instead
of the new ptep_deref(), I didn't want to risk performance regression.
Alternatively, all call sites that currently use ptep_get() that need the
lockless behaviour could be upgraded to ptep_get_lockless() and ptep_get() could
be downgraded to a simple dereference. That would be cleanest, but is a much
bigger (and likely error prone) change because all the arch code would need to
be updated for the new definitions of ptep_get().

The series is split up as follows:

patchs 1-2: Fix bugs where code was _setting_ ptes directly, rather than using
            set_pte_at() and friends.
patch 3:    Fix highmem unmapping issue I spotted while doing the work.
patch 4:    Introduce the new ptep_deref() helper with default implementation.
patch 5:    Convert all direct dereferences to use ptep_deref().

[1] https://lore.kernel.org/linux-mm/20230414130303.2345383-1-ryan.roberts@arm.com/

Thanks,
Ryan


Ryan Roberts (5):
  mm: vmalloc must set pte via arch code
  mm: damon must atomically clear young on ptes and pmds
  mm: Fix failure to unmap pte on highmem systems
  mm: Add new ptep_deref() helper to fully encapsulate pte_t
  mm: ptep_deref() conversion

 .../drm/i915/gem/selftests/i915_gem_mman.c    |   8 +-
 drivers/misc/sgi-gru/grufault.c               |   2 +-
 drivers/vfio/vfio_iommu_type1.c               |   7 +-
 drivers/xen/privcmd.c                         |   2 +-
 fs/proc/task_mmu.c                            |  33 +++---
 fs/userfaultfd.c                              |   6 +-
 include/linux/hugetlb.h                       |   2 +-
 include/linux/mm_inline.h                     |   2 +-
 include/linux/pgtable.h                       |  13 ++-
 kernel/events/uprobes.c                       |   2 +-
 mm/damon/ops-common.c                         |  18 ++-
 mm/damon/ops-common.h                         |   4 +-
 mm/damon/paddr.c                              |   6 +-
 mm/damon/vaddr.c                              |  14 ++-
 mm/filemap.c                                  |   2 +-
 mm/gup.c                                      |  21 ++--
 mm/highmem.c                                  |  12 +-
 mm/hmm.c                                      |   2 +-
 mm/huge_memory.c                              |   4 +-
 mm/hugetlb.c                                  |   2 +-
 mm/hugetlb_vmemmap.c                          |   6 +-
 mm/kasan/init.c                               |   9 +-
 mm/kasan/shadow.c                             |  10 +-
 mm/khugepaged.c                               |  24 ++--
 mm/ksm.c                                      |  22 ++--
 mm/madvise.c                                  |   6 +-
 mm/mapping_dirty_helpers.c                    |   4 +-
 mm/memcontrol.c                               |   4 +-
 mm/memory-failure.c                           |   6 +-
 mm/memory.c                                   | 103 +++++++++---------
 mm/mempolicy.c                                |   6 +-
 mm/migrate.c                                  |  14 ++-
 mm/migrate_device.c                           |  14 ++-
 mm/mincore.c                                  |   2 +-
 mm/mlock.c                                    |   6 +-
 mm/mprotect.c                                 |   8 +-
 mm/mremap.c                                   |   2 +-
 mm/page_table_check.c                         |   4 +-
 mm/page_vma_mapped.c                          |  26 +++--
 mm/pgtable-generic.c                          |   2 +-
 mm/rmap.c                                     |  32 +++---
 mm/sparse-vmemmap.c                           |   8 +-
 mm/swap_state.c                               |   4 +-
 mm/swapfile.c                                 |  16 +--
 mm/userfaultfd.c                              |   4 +-
 mm/vmalloc.c                                  |  11 +-
 mm/vmscan.c                                   |  14 ++-
 virt/kvm/kvm_main.c                           |   9 +-
 48 files changed, 302 insertions(+), 236 deletions(-)

--
2.25.1


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-11 12:58 Ryan Roberts
@ 2023-05-11 13:13 ` Ryan Roberts
  0 siblings, 0 replies; 414+ messages in thread
From: Ryan Roberts @ 2023-05-11 13:13 UTC (permalink / raw)
  To: Andrew Morton, Matthew Wilcox (Oracle),
	Kirill A. Shutemov, SeongJae Park
  Cc: linux-kernel, linux-mm, damon

My appologies for the noise: A blank line between Cc and Subject has broken the
subject and grouping in lore.

Please Ignore this, I will resend.


On 11/05/2023 13:58, Ryan Roberts wrote:
> Date: Thu, 11 May 2023 11:38:28 +0100
> Subject: [PATCH v1 0/5] Encapsulate PTE contents from non-arch code
> 
> Hi All,
> 
> This series improves the encapsulation of pte entries by disallowing non-arch
> code from directly dereferencing pte_t pointers. Instead code must use a new
> helper, `pte_t ptep_deref(pte_t *ptep)`. By default, this helper does a direct
> dereference of the pointer, so generated code should be exactly the same. But
> it's presence sets us up for arch code being able to override the default to
> "virtualize" the ptes without needing to maintain a shadow table.
> 
> I intend to take advantage of this for arm64 to enable use of its "contiguous
> bit" to coalesce multiple ptes into a single tlb entry, reducing pressure and
> improving performance. I have an RFC for the first part of this work at [1]. The
> cover letter there also explains the second part, which this series is enabling.
> 
> I intend to post an RFC for the contpte changes in due course, but it would be
> good to get the ball rolling on this enabler.
> 
> There are 2 reasons that I need the encapsulation:
> 
>   - Prevent leaking the arch-private PTE_CONT bit to the core code. If the core
>     code reads a pte that contains this bit, it could end up calling
>     set_pte_at() with the bit set which would confuse the implementation. So we
>     can always clear PTE_CONT in ptep_deref() (and ptep_get()) to avoid a leaky
>     abstraction.
>   - Contiguous ptes have a single access and dirty bit for the contiguous range.
>     So we need to "mix-in" those bits when the core is dereferencing a pte that
>     lies in the contig range. There is code that dereferences the pte then takes
>     different actions based on access/dirty (see e.g. write_protect_page()).
> 
> While ptep_get() and ptep_get_lockless() already exist, both of them are
> implemented using READ_ONCE() by default. While we could use ptep_get() instead
> of the new ptep_deref(), I didn't want to risk performance regression.
> Alternatively, all call sites that currently use ptep_get() that need the
> lockless behaviour could be upgraded to ptep_get_lockless() and ptep_get() could
> be downgraded to a simple dereference. That would be cleanest, but is a much
> bigger (and likely error prone) change because all the arch code would need to
> be updated for the new definitions of ptep_get().
> 
> The series is split up as follows:
> 
> patchs 1-2: Fix bugs where code was _setting_ ptes directly, rather than using
>             set_pte_at() and friends.
> patch 3:    Fix highmem unmapping issue I spotted while doing the work.
> patch 4:    Introduce the new ptep_deref() helper with default implementation.
> patch 5:    Convert all direct dereferences to use ptep_deref().
> 
> [1] https://lore.kernel.org/linux-mm/20230414130303.2345383-1-ryan.roberts@arm.com/
> 
> Thanks,
> Ryan
> 
> 
> Ryan Roberts (5):
>   mm: vmalloc must set pte via arch code
>   mm: damon must atomically clear young on ptes and pmds
>   mm: Fix failure to unmap pte on highmem systems
>   mm: Add new ptep_deref() helper to fully encapsulate pte_t
>   mm: ptep_deref() conversion
> 
>  .../drm/i915/gem/selftests/i915_gem_mman.c    |   8 +-
>  drivers/misc/sgi-gru/grufault.c               |   2 +-
>  drivers/vfio/vfio_iommu_type1.c               |   7 +-
>  drivers/xen/privcmd.c                         |   2 +-
>  fs/proc/task_mmu.c                            |  33 +++---
>  fs/userfaultfd.c                              |   6 +-
>  include/linux/hugetlb.h                       |   2 +-
>  include/linux/mm_inline.h                     |   2 +-
>  include/linux/pgtable.h                       |  13 ++-
>  kernel/events/uprobes.c                       |   2 +-
>  mm/damon/ops-common.c                         |  18 ++-
>  mm/damon/ops-common.h                         |   4 +-
>  mm/damon/paddr.c                              |   6 +-
>  mm/damon/vaddr.c                              |  14 ++-
>  mm/filemap.c                                  |   2 +-
>  mm/gup.c                                      |  21 ++--
>  mm/highmem.c                                  |  12 +-
>  mm/hmm.c                                      |   2 +-
>  mm/huge_memory.c                              |   4 +-
>  mm/hugetlb.c                                  |   2 +-
>  mm/hugetlb_vmemmap.c                          |   6 +-
>  mm/kasan/init.c                               |   9 +-
>  mm/kasan/shadow.c                             |  10 +-
>  mm/khugepaged.c                               |  24 ++--
>  mm/ksm.c                                      |  22 ++--
>  mm/madvise.c                                  |   6 +-
>  mm/mapping_dirty_helpers.c                    |   4 +-
>  mm/memcontrol.c                               |   4 +-
>  mm/memory-failure.c                           |   6 +-
>  mm/memory.c                                   | 103 +++++++++---------
>  mm/mempolicy.c                                |   6 +-
>  mm/migrate.c                                  |  14 ++-
>  mm/migrate_device.c                           |  14 ++-
>  mm/mincore.c                                  |   2 +-
>  mm/mlock.c                                    |   6 +-
>  mm/mprotect.c                                 |   8 +-
>  mm/mremap.c                                   |   2 +-
>  mm/page_table_check.c                         |   4 +-
>  mm/page_vma_mapped.c                          |  26 +++--
>  mm/pgtable-generic.c                          |   2 +-
>  mm/rmap.c                                     |  32 +++---
>  mm/sparse-vmemmap.c                           |   8 +-
>  mm/swap_state.c                               |   4 +-
>  mm/swapfile.c                                 |  16 +--
>  mm/userfaultfd.c                              |   4 +-
>  mm/vmalloc.c                                  |  11 +-
>  mm/vmscan.c                                   |  14 ++-
>  virt/kvm/kvm_main.c                           |   9 +-
>  48 files changed, 302 insertions(+), 236 deletions(-)
> 
> --
> 2.25.1
> 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH v2] uas: Add US_FL_NO_REPORT_OPCODES for JMicron JMS583Gen 2
@ 2023-03-12  6:52 Greg Kroah-Hartman
  2023-03-27 13:54 ` Yaroslav Furman
  0 siblings, 1 reply; 414+ messages in thread
From: Greg Kroah-Hartman @ 2023-03-12  6:52 UTC (permalink / raw)
  To: Yaroslav Furman; +Cc: Alan Stern, linux-usb, usb-storage, linux-kernel

On Sat, Mar 11, 2023 at 07:12:26PM +0200, Yaroslav Furman wrote:
> Just like other JMicron JMS5xx enclosures, it chokes on report-opcodes,
> let's avoid them.
> 
> Signed-off-by: Yaroslav Furman <yaro330@gmail.com>
> ---
>  drivers/usb/storage/unusual_uas.h | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
> index c7b763d6d102..1f8c9b16a0fb 100644
> --- a/drivers/usb/storage/unusual_uas.h
> +++ b/drivers/usb/storage/unusual_uas.h
> @@ -111,6 +111,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
>  		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
>  		US_FL_BROKEN_FUA),
>  
> +/* Reported by: Yaroslav Furman <yaro330@gmail.com> */
> +UNUSUAL_DEV(0x152d, 0x0583, 0x0000, 0x9999,
> +		"JMicron",
> +		"JMS583Gen 2",
> +		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
> +		US_FL_NO_REPORT_OPCODES),
> +
>  /* Reported-by: Thinh Nguyen <thinhn@synopsys.com> */
>  UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999,
>  		"PNY",
> -- 
> 2.39.2
> 

Hi,

This is the friendly patch-bot of Greg Kroah-Hartman.  You have sent him
a patch that has triggered this response.  He used to manually respond
to these common problems, but in order to save his sanity (he kept
writing the same thing over and over, yet to different people), I was
created.  Hopefully you will not take offence and will fix the problem
in your patch and resubmit it so that it can be accepted into the Linux
kernel tree.

You are receiving this message because of the following common error(s)
as indicated below:

- This looks like a new version of a previously submitted patch, but you
  did not list below the --- line any changes from the previous version.
  Please read the section entitled "The canonical patch format" in the
  kernel file, Documentation/process/submitting-patches.rst for what
  needs to be done here to properly describe this.

If you wish to discuss this problem further, or you have questions about
how to resolve this issue, please feel free to respond to this email and
Greg will reply once he has dug out from the pending patches received
from other developers.

thanks,

greg k-h's patch email bot

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2023-03-12  6:52 [PATCH v2] uas: Add US_FL_NO_REPORT_OPCODES for JMicron JMS583Gen 2 Greg Kroah-Hartman
@ 2023-03-27 13:54 ` Yaroslav Furman
  2023-03-27 14:19   ` Greg Kroah-Hartman
  0 siblings, 1 reply; 414+ messages in thread
From: Yaroslav Furman @ 2023-03-27 13:54 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: yaro330, Alan Stern, linux-usb, usb-storage, linux-kernel


Will this patch get ported to LTS trees? It applies cleanly.
Would love to see it in 6.1 and 5.15 trees.

6.1 is what my steam deck is going to start using soon-ish.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-03-27 13:54 ` Yaroslav Furman
@ 2023-03-27 14:19   ` Greg Kroah-Hartman
  0 siblings, 0 replies; 414+ messages in thread
From: Greg Kroah-Hartman @ 2023-03-27 14:19 UTC (permalink / raw)
  To: Yaroslav Furman; +Cc: Alan Stern, linux-usb, usb-storage, linux-kernel

On Mon, Mar 27, 2023 at 04:54:22PM +0300, Yaroslav Furman wrote:
> 
> Will this patch get ported to LTS trees? It applies cleanly.
> Would love to see it in 6.1 and 5.15 trees.

What patch?

confused,

greg k-h

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH v5 0/5] CXL Poison List Retrieval & Tracing
@ 2023-01-18 20:59 alison.schofield
  2023-01-27  1:59 ` Dan Williams
  0 siblings, 1 reply; 414+ messages in thread
From: alison.schofield @ 2023-01-18 20:59 UTC (permalink / raw)
  To: Dan Williams, Ira Weiny, Vishal Verma, Dave Jiang, Ben Widawsky,
	Steven Rostedt
  Cc: Alison Schofield, linux-cxl, linux-kernel

From: Alison Schofield <alison.schofield@intel.com>

**RESENDING this cover letter previously mis-threaded.

Changes in v5:
- Rebase on cxl/next 
- Use struct_size() to calc mbox cmd payload .min_out
- s/INTERNAL/INJECTED mocked poison record source
- Added Jonathan Reviewed-by tag on Patch 3

Link to v4:
https://lore.kernel.org/linux-cxl/cover.1671135967.git.alison.schofield@intel.com/

Add support for retrieving device poison lists and store the returned
error records as kernel trace events.

The handling of the poison list is guided by the CXL 3.0 Specification
Section 8.2.9.8.4.1. [1] 

Example, triggered by memdev:
$ echo 1 > /sys/bus/cxl/devices/mem3/trigger_poison_list
cxl_poison: memdev=mem3 pcidev=cxl_mem.3 region= region_uuid=00000000-0000-0000-0000-000000000000 dpa=0x0 length=0x40 source=Internal flags= overflow_time=0

Example, triggered by region:
$ echo 1 > /sys/bus/cxl/devices/region5/trigger_poison_list
cxl_poison: memdev=mem0 pcidev=cxl_mem.0 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
cxl_poison: memdev=mem1 pcidev=cxl_mem.1 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0

[1]: https://www.computeexpresslink.org/download-the-specification

Alison Schofield (5):
  cxl/mbox: Add GET_POISON_LIST mailbox command
  cxl/trace: Add TRACE support for CXL media-error records
  cxl/memdev: Add trigger_poison_list sysfs attribute
  cxl/region: Add trigger_poison_list sysfs attribute
  tools/testing/cxl: Mock support for Get Poison List

 Documentation/ABI/testing/sysfs-bus-cxl | 28 +++++++++
 drivers/cxl/core/mbox.c                 | 78 +++++++++++++++++++++++
 drivers/cxl/core/memdev.c               | 45 ++++++++++++++
 drivers/cxl/core/region.c               | 33 ++++++++++
 drivers/cxl/core/trace.h                | 83 +++++++++++++++++++++++++
 drivers/cxl/cxlmem.h                    | 69 +++++++++++++++++++-
 drivers/cxl/pci.c                       |  4 ++
 tools/testing/cxl/test/mem.c            | 42 +++++++++++++
 8 files changed, 381 insertions(+), 1 deletion(-)


base-commit: 589c3357370a596ef7c99c00baca8ac799fce531
-- 
2.37.3


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2023-01-18 20:59 [PATCH v5 0/5] CXL Poison List Retrieval & Tracing alison.schofield
@ 2023-01-27  1:59 ` Dan Williams
  2023-01-27 16:10   ` Alison Schofield
  0 siblings, 1 reply; 414+ messages in thread
From: Dan Williams @ 2023-01-27  1:59 UTC (permalink / raw)
  To: alison.schofield, Dan Williams, Ira Weiny, Vishal Verma,
	Dave Jiang, Ben Widawsky, Steven Rostedt
  Cc: Alison Schofield, linux-cxl, linux-kernel

alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Subject: [PATCH v5 0/5] CXL Poison List Retrieval & Tracing
> 
> Changes in v5:
> - Rebase on cxl/next 
> - Use struct_size() to calc mbox cmd payload .min_out
> - s/INTERNAL/INJECTED mocked poison record source
> - Added Jonathan Reviewed-by tag on Patch 3
> 
> Link to v4:
> https://lore.kernel.org/linux-cxl/cover.1671135967.git.alison.schofield@intel.com/
> 
> Add support for retrieving device poison lists and store the returned
> error records as kernel trace events.
> 
> The handling of the poison list is guided by the CXL 3.0 Specification
> Section 8.2.9.8.4.1. [1] 
> 
> Example, triggered by memdev:
> $ echo 1 > /sys/bus/cxl/devices/mem3/trigger_poison_list
> cxl_poison: memdev=mem3 pcidev=cxl_mem.3 region= region_uuid=00000000-0000-0000-0000-000000000000 dpa=0x0 length=0x40 source=Internal flags= overflow_time=0

I think the pcidev= field wants to be called something like "host" or
"parent", because there is no strict requirement that a 'struct
cxl_memdev' is related to a 'struct pci_dev'. In fact in that example
"cxl_mem.3" is a 'struct platform_device'. Now that I think about it, I
think all CXL device events should be emitting the PCIe serial number
for the memdev.

I will look in the implementation, but do region= and region_uuid= get
populated when mem3 is a member of the region?

> 
> Example, triggered by region:
> $ echo 1 > /sys/bus/cxl/devices/region5/trigger_poison_list
> cxl_poison: memdev=mem0 pcidev=cxl_mem.0 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> cxl_poison: memdev=mem1 pcidev=cxl_mem.1 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> 
> [1]: https://www.computeexpresslink.org/download-the-specification
> 
> Alison Schofield (5):
>   cxl/mbox: Add GET_POISON_LIST mailbox command
>   cxl/trace: Add TRACE support for CXL media-error records
>   cxl/memdev: Add trigger_poison_list sysfs attribute
>   cxl/region: Add trigger_poison_list sysfs attribute
>   tools/testing/cxl: Mock support for Get Poison List
> 
>  Documentation/ABI/testing/sysfs-bus-cxl | 28 +++++++++
>  drivers/cxl/core/mbox.c                 | 78 +++++++++++++++++++++++
>  drivers/cxl/core/memdev.c               | 45 ++++++++++++++
>  drivers/cxl/core/region.c               | 33 ++++++++++
>  drivers/cxl/core/trace.h                | 83 +++++++++++++++++++++++++
>  drivers/cxl/cxlmem.h                    | 69 +++++++++++++++++++-
>  drivers/cxl/pci.c                       |  4 ++
>  tools/testing/cxl/test/mem.c            | 42 +++++++++++++
>  8 files changed, 381 insertions(+), 1 deletion(-)
> 
> 
> base-commit: 589c3357370a596ef7c99c00baca8ac799fce531
> -- 
> 2.37.3
> 



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-01-27  1:59 ` Dan Williams
@ 2023-01-27 16:10   ` Alison Schofield
  2023-01-27 19:16     ` Re: Dan Williams
  0 siblings, 1 reply; 414+ messages in thread
From: Alison Schofield @ 2023-01-27 16:10 UTC (permalink / raw)
  To: Dan Williams
  Cc: Ira Weiny, Vishal Verma, Dave Jiang, Ben Widawsky,
	Steven Rostedt, linux-cxl, linux-kernel

On Thu, Jan 26, 2023 at 05:59:03PM -0800, Dan Williams wrote:
> alison.schofield@ wrote:
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > Subject: [PATCH v5 0/5] CXL Poison List Retrieval & Tracing
> > 
> > Changes in v5:
> > - Rebase on cxl/next 
> > - Use struct_size() to calc mbox cmd payload .min_out
> > - s/INTERNAL/INJECTED mocked poison record source
> > - Added Jonathan Reviewed-by tag on Patch 3
> > 
> > Link to v4:
> > https://lore.kernel.org/linux-cxl/cover.1671135967.git.alison.schofield@intel.com/
> > 
> > Add support for retrieving device poison lists and store the returned
> > error records as kernel trace events.
> > 
> > The handling of the poison list is guided by the CXL 3.0 Specification
> > Section 8.2.9.8.4.1. [1] 
> > 
> > Example, triggered by memdev:
> > $ echo 1 > /sys/bus/cxl/devices/mem3/trigger_poison_list
> > cxl_poison: memdev=mem3 pcidev=cxl_mem.3 region= region_uuid=00000000-0000-0000-0000-000000000000 dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> 
> I think the pcidev= field wants to be called something like "host" or
> "parent", because there is no strict requirement that a 'struct
> cxl_memdev' is related to a 'struct pci_dev'. In fact in that example
> "cxl_mem.3" is a 'struct platform_device'. Now that I think about it, I
> think all CXL device events should be emitting the PCIe serial number
> for the memdev.
]

Will do, 'host' and add PCIe serial no.

> 
> I will look in the implementation, but do region= and region_uuid= get
> populated when mem3 is a member of the region?

Not always.
In the case above, where the trigger was by memdev, no.
Region= and region_uuid= (and in the follow-on patch, hpa=) only get
populated if the poison was triggered by region, like the case below.

It could be looked up for the by memdev cases. Is that wanted?

Thanks for the reviews Dan!
> 
> > 
> > Example, triggered by region:
> > $ echo 1 > /sys/bus/cxl/devices/region5/trigger_poison_list
> > cxl_poison: memdev=mem0 pcidev=cxl_mem.0 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > cxl_poison: memdev=mem1 pcidev=cxl_mem.1 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > 
> > [1]: https://www.computeexpresslink.org/download-the-specification
> > 
> > Alison Schofield (5):
> >   cxl/mbox: Add GET_POISON_LIST mailbox command
> >   cxl/trace: Add TRACE support for CXL media-error records
> >   cxl/memdev: Add trigger_poison_list sysfs attribute
> >   cxl/region: Add trigger_poison_list sysfs attribute
> >   tools/testing/cxl: Mock support for Get Poison List
> > 
> >  Documentation/ABI/testing/sysfs-bus-cxl | 28 +++++++++
> >  drivers/cxl/core/mbox.c                 | 78 +++++++++++++++++++++++
> >  drivers/cxl/core/memdev.c               | 45 ++++++++++++++
> >  drivers/cxl/core/region.c               | 33 ++++++++++
> >  drivers/cxl/core/trace.h                | 83 +++++++++++++++++++++++++
> >  drivers/cxl/cxlmem.h                    | 69 +++++++++++++++++++-
> >  drivers/cxl/pci.c                       |  4 ++
> >  tools/testing/cxl/test/mem.c            | 42 +++++++++++++
> >  8 files changed, 381 insertions(+), 1 deletion(-)
> > 
> > 
> > base-commit: 589c3357370a596ef7c99c00baca8ac799fce531
> > -- 
> > 2.37.3
> > 
> 
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-01-27 16:10   ` Alison Schofield
@ 2023-01-27 19:16     ` Dan Williams
  2023-01-27 21:36       ` Re: Alison Schofield
  0 siblings, 1 reply; 414+ messages in thread
From: Dan Williams @ 2023-01-27 19:16 UTC (permalink / raw)
  To: Alison Schofield, Dan Williams
  Cc: Ira Weiny, Vishal Verma, Dave Jiang, Ben Widawsky,
	Steven Rostedt, linux-cxl, linux-kernel

Alison Schofield wrote:
> On Thu, Jan 26, 2023 at 05:59:03PM -0800, Dan Williams wrote:
> > alison.schofield@ wrote:
> > > From: Alison Schofield <alison.schofield@intel.com>
> > > 
> > > Subject: [PATCH v5 0/5] CXL Poison List Retrieval & Tracing
> > > 
> > > Changes in v5:
> > > - Rebase on cxl/next 
> > > - Use struct_size() to calc mbox cmd payload .min_out
> > > - s/INTERNAL/INJECTED mocked poison record source
> > > - Added Jonathan Reviewed-by tag on Patch 3
> > > 
> > > Link to v4:
> > > https://lore.kernel.org/linux-cxl/cover.1671135967.git.alison.schofield@intel.com/
> > > 
> > > Add support for retrieving device poison lists and store the returned
> > > error records as kernel trace events.
> > > 
> > > The handling of the poison list is guided by the CXL 3.0 Specification
> > > Section 8.2.9.8.4.1. [1] 
> > > 
> > > Example, triggered by memdev:
> > > $ echo 1 > /sys/bus/cxl/devices/mem3/trigger_poison_list
> > > cxl_poison: memdev=mem3 pcidev=cxl_mem.3 region= region_uuid=00000000-0000-0000-0000-000000000000 dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > 
> > I think the pcidev= field wants to be called something like "host" or
> > "parent", because there is no strict requirement that a 'struct
> > cxl_memdev' is related to a 'struct pci_dev'. In fact in that example
> > "cxl_mem.3" is a 'struct platform_device'. Now that I think about it, I
> > think all CXL device events should be emitting the PCIe serial number
> > for the memdev.
> ]
> 
> Will do, 'host' and add PCIe serial no.
> 
> > 
> > I will look in the implementation, but do region= and region_uuid= get
> > populated when mem3 is a member of the region?
> 
> Not always.
> In the case above, where the trigger was by memdev, no.
> Region= and region_uuid= (and in the follow-on patch, hpa=) only get
> populated if the poison was triggered by region, like the case below.
> 
> It could be looked up for the by memdev cases. Is that wanted?

Just trying to understand the semantics. However, I do think it makes sense
for a memdev trigger to lookup information on all impacted regions
across all of the device's DPA and the region trigger makes sense to
lookup all memdevs, but bounded by the DPA that contributes to that
region. I just want to avoid someone having to trigger the region to get
extra information that was readily available from a memdev listing.

> 
> Thanks for the reviews Dan!
> > 
> > > 
> > > Example, triggered by region:
> > > $ echo 1 > /sys/bus/cxl/devices/region5/trigger_poison_list
> > > cxl_poison: memdev=mem0 pcidev=cxl_mem.0 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > > cxl_poison: memdev=mem1 pcidev=cxl_mem.1 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > > 
> > > [1]: https://www.computeexpresslink.org/download-the-specification
> > > 
> > > Alison Schofield (5):
> > >   cxl/mbox: Add GET_POISON_LIST mailbox command
> > >   cxl/trace: Add TRACE support for CXL media-error records
> > >   cxl/memdev: Add trigger_poison_list sysfs attribute
> > >   cxl/region: Add trigger_poison_list sysfs attribute
> > >   tools/testing/cxl: Mock support for Get Poison List
> > > 
> > >  Documentation/ABI/testing/sysfs-bus-cxl | 28 +++++++++
> > >  drivers/cxl/core/mbox.c                 | 78 +++++++++++++++++++++++
> > >  drivers/cxl/core/memdev.c               | 45 ++++++++++++++
> > >  drivers/cxl/core/region.c               | 33 ++++++++++
> > >  drivers/cxl/core/trace.h                | 83 +++++++++++++++++++++++++
> > >  drivers/cxl/cxlmem.h                    | 69 +++++++++++++++++++-
> > >  drivers/cxl/pci.c                       |  4 ++
> > >  tools/testing/cxl/test/mem.c            | 42 +++++++++++++
> > >  8 files changed, 381 insertions(+), 1 deletion(-)
> > > 
> > > 
> > > base-commit: 589c3357370a596ef7c99c00baca8ac799fce531
> > > -- 
> > > 2.37.3
> > > 
> > 
> > 



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-01-27 19:16     ` Re: Dan Williams
@ 2023-01-27 21:36       ` Alison Schofield
  2023-01-27 22:04         ` Re: Dan Williams
  0 siblings, 1 reply; 414+ messages in thread
From: Alison Schofield @ 2023-01-27 21:36 UTC (permalink / raw)
  To: Dan Williams
  Cc: Ira Weiny, Vishal Verma, Dave Jiang, Ben Widawsky,
	Steven Rostedt, linux-cxl, linux-kernel

On Fri, Jan 27, 2023 at 11:16:49AM -0800, Dan Williams wrote:
> Alison Schofield wrote:
> > On Thu, Jan 26, 2023 at 05:59:03PM -0800, Dan Williams wrote:
> > > alison.schofield@ wrote:
> > > > From: Alison Schofield <alison.schofield@intel.com>
> > > > 
> > > > Subject: [PATCH v5 0/5] CXL Poison List Retrieval & Tracing
> > > > 
> > > > Changes in v5:
> > > > - Rebase on cxl/next 
> > > > - Use struct_size() to calc mbox cmd payload .min_out
> > > > - s/INTERNAL/INJECTED mocked poison record source
> > > > - Added Jonathan Reviewed-by tag on Patch 3
> > > > 
> > > > Link to v4:
> > > > https://lore.kernel.org/linux-cxl/cover.1671135967.git.alison.schofield@intel.com/
> > > > 
> > > > Add support for retrieving device poison lists and store the returned
> > > > error records as kernel trace events.
> > > > 
> > > > The handling of the poison list is guided by the CXL 3.0 Specification
> > > > Section 8.2.9.8.4.1. [1] 
> > > > 
> > > > Example, triggered by memdev:
> > > > $ echo 1 > /sys/bus/cxl/devices/mem3/trigger_poison_list
> > > > cxl_poison: memdev=mem3 pcidev=cxl_mem.3 region= region_uuid=00000000-0000-0000-0000-000000000000 dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > > 
> > > I think the pcidev= field wants to be called something like "host" or
> > > "parent", because there is no strict requirement that a 'struct
> > > cxl_memdev' is related to a 'struct pci_dev'. In fact in that example
> > > "cxl_mem.3" is a 'struct platform_device'. Now that I think about it, I
> > > think all CXL device events should be emitting the PCIe serial number
> > > for the memdev.
> > ]
> > 
> > Will do, 'host' and add PCIe serial no.
> > 
> > > 
> > > I will look in the implementation, but do region= and region_uuid= get
> > > populated when mem3 is a member of the region?
> > 
> > Not always.
> > In the case above, where the trigger was by memdev, no.
> > Region= and region_uuid= (and in the follow-on patch, hpa=) only get
> > populated if the poison was triggered by region, like the case below.
> > 
> > It could be looked up for the by memdev cases. Is that wanted?
> 
> Just trying to understand the semantics. However, I do think it makes sense
> for a memdev trigger to lookup information on all impacted regions
> across all of the device's DPA and the region trigger makes sense to
> lookup all memdevs, but bounded by the DPA that contributes to that
> region. I just want to avoid someone having to trigger the region to get
> extra information that was readily available from a memdev listing.
> 

Dan - 

Confirming my take-away from this email, and our chat:

Remove the by-region trigger_poison_list option entirely. User space
needs to trigger by-memdev the memdevs participating in the region and
filter those events by region.

Add the region info (region name, uuid) to the TRACE_EVENTs when the
poisoned DPA is part of any region.

Alison

> > 
> > Thanks for the reviews Dan!
> > > 
> > > > 
> > > > Example, triggered by region:
> > > > $ echo 1 > /sys/bus/cxl/devices/region5/trigger_poison_list
> > > > cxl_poison: memdev=mem0 pcidev=cxl_mem.0 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > > > cxl_poison: memdev=mem1 pcidev=cxl_mem.1 region=region5 region_uuid=bfcb7a29-890e-4a41-8236-fe22221fc75c dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > > > 
> > > > [1]: https://www.computeexpresslink.org/download-the-specification
> > > > 
> > > > Alison Schofield (5):
> > > >   cxl/mbox: Add GET_POISON_LIST mailbox command
> > > >   cxl/trace: Add TRACE support for CXL media-error records
> > > >   cxl/memdev: Add trigger_poison_list sysfs attribute
> > > >   cxl/region: Add trigger_poison_list sysfs attribute
> > > >   tools/testing/cxl: Mock support for Get Poison List
> > > > 
> > > >  Documentation/ABI/testing/sysfs-bus-cxl | 28 +++++++++
> > > >  drivers/cxl/core/mbox.c                 | 78 +++++++++++++++++++++++
> > > >  drivers/cxl/core/memdev.c               | 45 ++++++++++++++
> > > >  drivers/cxl/core/region.c               | 33 ++++++++++
> > > >  drivers/cxl/core/trace.h                | 83 +++++++++++++++++++++++++
> > > >  drivers/cxl/cxlmem.h                    | 69 +++++++++++++++++++-
> > > >  drivers/cxl/pci.c                       |  4 ++
> > > >  tools/testing/cxl/test/mem.c            | 42 +++++++++++++
> > > >  8 files changed, 381 insertions(+), 1 deletion(-)
> > > > 
> > > > 
> > > > base-commit: 589c3357370a596ef7c99c00baca8ac799fce531
> > > > -- 
> > > > 2.37.3
> > > > 
> > > 
> > > 
> 
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-01-27 21:36       ` Re: Alison Schofield
@ 2023-01-27 22:04         ` Dan Williams
  0 siblings, 0 replies; 414+ messages in thread
From: Dan Williams @ 2023-01-27 22:04 UTC (permalink / raw)
  To: Alison Schofield, Dan Williams
  Cc: Ira Weiny, Vishal Verma, Dave Jiang, Ben Widawsky,
	Steven Rostedt, linux-cxl, linux-kernel

Alison Schofield wrote:
> On Fri, Jan 27, 2023 at 11:16:49AM -0800, Dan Williams wrote:
> > Alison Schofield wrote:
> > > On Thu, Jan 26, 2023 at 05:59:03PM -0800, Dan Williams wrote:
> > > > alison.schofield@ wrote:
> > > > > From: Alison Schofield <alison.schofield@intel.com>
> > > > > 
> > > > > Subject: [PATCH v5 0/5] CXL Poison List Retrieval & Tracing
> > > > > 
> > > > > Changes in v5:
> > > > > - Rebase on cxl/next 
> > > > > - Use struct_size() to calc mbox cmd payload .min_out
> > > > > - s/INTERNAL/INJECTED mocked poison record source
> > > > > - Added Jonathan Reviewed-by tag on Patch 3
> > > > > 
> > > > > Link to v4:
> > > > > https://lore.kernel.org/linux-cxl/cover.1671135967.git.alison.schofield@intel.com/
> > > > > 
> > > > > Add support for retrieving device poison lists and store the returned
> > > > > error records as kernel trace events.
> > > > > 
> > > > > The handling of the poison list is guided by the CXL 3.0 Specification
> > > > > Section 8.2.9.8.4.1. [1] 
> > > > > 
> > > > > Example, triggered by memdev:
> > > > > $ echo 1 > /sys/bus/cxl/devices/mem3/trigger_poison_list
> > > > > cxl_poison: memdev=mem3 pcidev=cxl_mem.3 region= region_uuid=00000000-0000-0000-0000-000000000000 dpa=0x0 length=0x40 source=Internal flags= overflow_time=0
> > > > 
> > > > I think the pcidev= field wants to be called something like "host" or
> > > > "parent", because there is no strict requirement that a 'struct
> > > > cxl_memdev' is related to a 'struct pci_dev'. In fact in that example
> > > > "cxl_mem.3" is a 'struct platform_device'. Now that I think about it, I
> > > > think all CXL device events should be emitting the PCIe serial number
> > > > for the memdev.
> > > ]
> > > 
> > > Will do, 'host' and add PCIe serial no.
> > > 
> > > > 
> > > > I will look in the implementation, but do region= and region_uuid= get
> > > > populated when mem3 is a member of the region?
> > > 
> > > Not always.
> > > In the case above, where the trigger was by memdev, no.
> > > Region= and region_uuid= (and in the follow-on patch, hpa=) only get
> > > populated if the poison was triggered by region, like the case below.
> > > 
> > > It could be looked up for the by memdev cases. Is that wanted?
> > 
> > Just trying to understand the semantics. However, I do think it makes sense
> > for a memdev trigger to lookup information on all impacted regions
> > across all of the device's DPA and the region trigger makes sense to
> > lookup all memdevs, but bounded by the DPA that contributes to that
> > region. I just want to avoid someone having to trigger the region to get
> > extra information that was readily available from a memdev listing.
> > 
> 
> Dan - 
> 
> Confirming my take-away from this email, and our chat:
> 
> Remove the by-region trigger_poison_list option entirely. User space
> needs to trigger by-memdev the memdevs participating in the region and
> filter those events by region.
> 
> Add the region info (region name, uuid) to the TRACE_EVENTs when the
> poisoned DPA is part of any region.

That's what I was thinking, yes. So the internals of
cxl_mem_get_poison() will take the cxl_region_rwsem for read and compare
the device's endpoint decoder settings against the media error records
to do the region (and later HPA) lookup.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2022-11-21 11:11 Denis Arefev
  2022-11-21 14:28 ` Jason Yan
  0 siblings, 1 reply; 414+ messages in thread
From: Denis Arefev @ 2022-11-21 11:11 UTC (permalink / raw)
  To: Anil Gurumurthy
  Cc: Sudarsana Kalluru, James E.J. Bottomley, Martin K. Petersen,
	linux-scsi, linux-kernel, trufanov, vfh

Date: Mon, 21 Nov 2022 13:29:03 +0300
Subject: [PATCH] scsi:bfa: Eliminated buffer overflow

Buffer 'cmd->adapter_hwpath' of size 32 accessed at
bfad_bsg.c:101:103 can overflow, since its index 'i'
can have value 32 that is out of range.

Signed-off-by: Denis Arefev <arefev@swemel.ru>
---
 drivers/scsi/bfa/bfad_bsg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index be8dfbe13e90..78615ffc62ef 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -98,9 +98,9 @@ bfad_iocmd_ioc_get_info(struct bfad_s *bfad, void *cmd)
 
 	/* set adapter hw path */
 	strcpy(iocmd->adapter_hwpath, bfad->pci_name);
-	for (i = 0; iocmd->adapter_hwpath[i] != ':' && i < BFA_STRING_32; i++)
+	for (i = 0; iocmd->adapter_hwpath[i] != ':' && i < BFA_STRING_32-2; i++)
 		;
-	for (; iocmd->adapter_hwpath[++i] != ':' && i < BFA_STRING_32; )
+	for (; iocmd->adapter_hwpath[++i] != ':' && i < BFA_STRING_32-1; )
 		;
 	iocmd->adapter_hwpath[i] = '\0';
 	iocmd->status = BFA_STATUS_OK;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2022-11-21 11:11 Denis Arefev
@ 2022-11-21 14:28 ` Jason Yan
  0 siblings, 0 replies; 414+ messages in thread
From: Jason Yan @ 2022-11-21 14:28 UTC (permalink / raw)
  To: Denis Arefev, Anil Gurumurthy
  Cc: Sudarsana Kalluru, James E.J. Bottomley, Martin K. Petersen,
	linux-scsi, linux-kernel, trufanov, vfh

You may need a real subject, not a subject text in the email.

type "git help send-email" if you don't know how to use it.

On 2022/11/21 19:11, Denis Arefev wrote:
> Date: Mon, 21 Nov 2022 13:29:03 +0300
> Subject: [PATCH] scsi:bfa: Eliminated buffer overflow
> 
> Buffer 'cmd->adapter_hwpath' of size 32 accessed at
> bfad_bsg.c:101:103 can overflow, since its index 'i'
> can have value 32 that is out of range.
> 
> Signed-off-by: Denis Arefev <arefev@swemel.ru>
> ---
>   drivers/scsi/bfa/bfad_bsg.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
> index be8dfbe13e90..78615ffc62ef 100644
> --- a/drivers/scsi/bfa/bfad_bsg.c
> +++ b/drivers/scsi/bfa/bfad_bsg.c
> @@ -98,9 +98,9 @@ bfad_iocmd_ioc_get_info(struct bfad_s *bfad, void *cmd)
>   
>   	/* set adapter hw path */
>   	strcpy(iocmd->adapter_hwpath, bfad->pci_name);
> -	for (i = 0; iocmd->adapter_hwpath[i] != ':' && i < BFA_STRING_32; i++)
> +	for (i = 0; iocmd->adapter_hwpath[i] != ':' && i < BFA_STRING_32-2; i++)
>   		;
> -	for (; iocmd->adapter_hwpath[++i] != ':' && i < BFA_STRING_32; )
> +	for (; iocmd->adapter_hwpath[++i] != ':' && i < BFA_STRING_32-1; )
>   		;
>   	iocmd->adapter_hwpath[i] = '\0';
>   	iocmd->status = BFA_STATUS_OK;
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2022-09-14 13:12 Amjad Ouled-Ameur
  2022-09-14 13:18 ` Amjad Ouled-Ameur
  0 siblings, 1 reply; 414+ messages in thread
From: Amjad Ouled-Ameur @ 2022-09-14 13:12 UTC (permalink / raw)
  To: Rob Herring
  Cc: Amjad Ouled-Ameur, Krzysztof Kozlowski, Matthias Brugger,
	devicetree, linux-arm-kernel, linux-mediatek, linux-kernel

Subject: [PATCH] arm64: dts: mediatek: mt8183: remove thermal zones without
 trips.

Thermal zones without trip point are not registered by thermal core.

tzts1 ~ tzts6 zones of mt8183 were intially introduced for test-purpose
only but are not supposed to remain on DT.

Remove the zones above and keep only cpu_thermal.

Signed-off-by: Amjad Ouled-Ameur <aouledameur@baylibre.com>
---
 arch/arm64/boot/dts/mediatek/mt8183.dtsi | 57 ------------------------
 1 file changed, 57 deletions(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index 9d32871973a2..f65fae8939de 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -1182,63 +1182,6 @@ THERMAL_NO_LIMIT
 					};
 				};
 			};
-
-			/* The tzts1 ~ tzts6 don't need to polling */
-			/* The tzts1 ~ tzts6 don't need to thermal throttle */
-
-			tzts1: tzts1 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 1>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts2: tzts2 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 2>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts3: tzts3 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 3>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts4: tzts4 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 4>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts5: tzts5 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 5>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tztsABB: tztsABB {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 6>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
 		};
 
 		pwm0: pwm@1100e000 {
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2022-09-14 13:12 Amjad Ouled-Ameur
@ 2022-09-14 13:18 ` Amjad Ouled-Ameur
  0 siblings, 0 replies; 414+ messages in thread
From: Amjad Ouled-Ameur @ 2022-09-14 13:18 UTC (permalink / raw)
  To: Rob Herring
  Cc: Krzysztof Kozlowski, Matthias Brugger, devicetree,
	linux-arm-kernel, linux-mediatek, linux-kernel

Hi,

The subject has not been parsed correctly, I resent a proper patch here:

https://patchwork.kernel.org/project/linux-mediatek/patch/20220914131339.18348-1-aouledameur@baylibre.com/


Sorry for the noise.

Regards,

Amjad

On 9/14/22 15:12, Amjad Ouled-Ameur wrote:
> Subject: [PATCH] arm64: dts: mediatek: mt8183: remove thermal zones without
>   trips.
>
> Thermal zones without trip point are not registered by thermal core.
>
> tzts1 ~ tzts6 zones of mt8183 were intially introduced for test-purpose
> only but are not supposed to remain on DT.
>
> Remove the zones above and keep only cpu_thermal.
>
> Signed-off-by: Amjad Ouled-Ameur <aouledameur@baylibre.com>
> ---
>   arch/arm64/boot/dts/mediatek/mt8183.dtsi | 57 ------------------------
>   1 file changed, 57 deletions(-)
>
> diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
> index 9d32871973a2..f65fae8939de 100644
> --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
> +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
> @@ -1182,63 +1182,6 @@ THERMAL_NO_LIMIT
>   					};
>   				};
>   			};
> -
> -			/* The tzts1 ~ tzts6 don't need to polling */
> -			/* The tzts1 ~ tzts6 don't need to thermal throttle */
> -
> -			tzts1: tzts1 {
> -				polling-delay-passive = <0>;
> -				polling-delay = <0>;
> -				thermal-sensors = <&thermal 1>;
> -				sustainable-power = <5000>;
> -				trips {};
> -				cooling-maps {};
> -			};
> -
> -			tzts2: tzts2 {
> -				polling-delay-passive = <0>;
> -				polling-delay = <0>;
> -				thermal-sensors = <&thermal 2>;
> -				sustainable-power = <5000>;
> -				trips {};
> -				cooling-maps {};
> -			};
> -
> -			tzts3: tzts3 {
> -				polling-delay-passive = <0>;
> -				polling-delay = <0>;
> -				thermal-sensors = <&thermal 3>;
> -				sustainable-power = <5000>;
> -				trips {};
> -				cooling-maps {};
> -			};
> -
> -			tzts4: tzts4 {
> -				polling-delay-passive = <0>;
> -				polling-delay = <0>;
> -				thermal-sensors = <&thermal 4>;
> -				sustainable-power = <5000>;
> -				trips {};
> -				cooling-maps {};
> -			};
> -
> -			tzts5: tzts5 {
> -				polling-delay-passive = <0>;
> -				polling-delay = <0>;
> -				thermal-sensors = <&thermal 5>;
> -				sustainable-power = <5000>;
> -				trips {};
> -				cooling-maps {};
> -			};
> -
> -			tztsABB: tztsABB {
> -				polling-delay-passive = <0>;
> -				polling-delay = <0>;
> -				thermal-sensors = <&thermal 6>;
> -				sustainable-power = <5000>;
> -				trips {};
> -				cooling-maps {};
> -			};
>   		};
>   
>   		pwm0: pwm@1100e000 {

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH bpf-next 1/2] cpuidle/rcu: Making arch_cpu_idle and rcu_idle_exit noinstr
@ 2022-05-15 20:36 Jiri Olsa
  2023-05-20  9:47 ` Ze Gao
  0 siblings, 1 reply; 414+ messages in thread
From: Jiri Olsa @ 2022-05-15 20:36 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Masami Hiramatsu, Paul E. McKenney
  Cc: netdev, bpf, lkml, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Steven Rostedt

Making arch_cpu_idle and rcu_idle_exit noinstr. Both functions run
in rcu 'not watching' context and if there's tracer attached to
them, which uses rcu (e.g. kprobe multi interface) it will hit RCU
warning like:

  [    3.017540] WARNING: suspicious RCU usage
  ...
  [    3.018363]  kprobe_multi_link_handler+0x68/0x1c0
  [    3.018364]  ? kprobe_multi_link_handler+0x3e/0x1c0
  [    3.018366]  ? arch_cpu_idle_dead+0x10/0x10
  [    3.018367]  ? arch_cpu_idle_dead+0x10/0x10
  [    3.018371]  fprobe_handler.part.0+0xab/0x150
  [    3.018374]  0xffffffffa00080c8
  [    3.018393]  ? arch_cpu_idle+0x5/0x10
  [    3.018398]  arch_cpu_idle+0x5/0x10
  [    3.018399]  default_idle_call+0x59/0x90
  [    3.018401]  do_idle+0x1c3/0x1d0

The call path is following:

default_idle_call
  rcu_idle_enter
  arch_cpu_idle
  rcu_idle_exit

The arch_cpu_idle and rcu_idle_exit are the only ones from above
path that are traceble and cause this problem on my setup.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/kernel/process.c | 2 +-
 kernel/rcu/tree.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b370767f5b19..1345cb0124a6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -720,7 +720,7 @@ void arch_cpu_idle_dead(void)
 /*
  * Called from the generic idle code.
  */
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
 {
 	x86_idle();
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4b8189455d5..20d529722f51 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -896,7 +896,7 @@ static void noinstr rcu_eqs_exit(bool user)
  * If you add or remove a call to rcu_idle_exit(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_idle_exit(void)
+void noinstr rcu_idle_exit(void)
 {
 	unsigned long flags;
 
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* (no subject)
  2022-05-15 20:36 [PATCH bpf-next 1/2] cpuidle/rcu: Making arch_cpu_idle and rcu_idle_exit noinstr Jiri Olsa
@ 2023-05-20  9:47 ` Ze Gao
  2023-05-21  3:58   ` Yonghong Song
  2023-05-21  8:08   ` Re: Jiri Olsa
  0 siblings, 2 replies; 414+ messages in thread
From: Ze Gao @ 2023-05-20  9:47 UTC (permalink / raw)
  To: jolsa
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Hao Luo,
	John Fastabend, KP Singh, Martin KaFai Lau, Masami Hiramatsu,
	Song Liu, Stanislav Fomichev, Steven Rostedt, Yonghong Song, bpf,
	linux-kernel, linux-trace-kernel, kafai, kpsingh, netdev,
	paulmck, songliubraving, Ze Gao


Hi Jiri,

Would you like to consider to add rcu_is_watching check in
to solve this from the viewpoint of kprobe_multi_link_prog_run
itself? And accounting of missed runs can be added as well
to imporve observability.

Regards,
Ze


-----------------
From 29fd3cd713e65461325c2703cf5246a6fae5d4fe Mon Sep 17 00:00:00 2001
From: Ze Gao <zegao@tencent.com>
Date: Sat, 20 May 2023 17:32:05 +0800
Subject: [PATCH] bpf: kprobe_multi runs bpf progs only when rcu_is_watching

From the perspective of kprobe_multi_link_prog_run, any traceable
functions can be attached while bpf progs need specical care and
ought to be under rcu protection. To solve the likely rcu lockdep
warns once for good, when (future) functions in idle path were
attached accidentally, we better paying some cost to check at least
in kernel-side, and return when rcu is not watching, which helps
to avoid any unpredictable results.

Signed-off-by: Ze Gao <zegao@tencent.com>
---
 kernel/trace/bpf_trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9a050e36dc6c..3e6ea7274765 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2622,7 +2622,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
 	struct bpf_run_ctx *old_run_ctx;
 	int err;
 
-	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1 || !rcu_is_watching())) {
 		err = 0;
 		goto out;
 	}
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2023-05-20  9:47 ` Ze Gao
@ 2023-05-21  3:58   ` Yonghong Song
  2023-05-21 15:10     ` Re: Ze Gao
  2023-05-21  8:08   ` Re: Jiri Olsa
  1 sibling, 1 reply; 414+ messages in thread
From: Yonghong Song @ 2023-05-21  3:58 UTC (permalink / raw)
  To: Ze Gao, jolsa
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Hao Luo,
	John Fastabend, KP Singh, Martin KaFai Lau, Masami Hiramatsu,
	Song Liu, Stanislav Fomichev, Steven Rostedt, Yonghong Song, bpf,
	linux-kernel, linux-trace-kernel, kafai, kpsingh, netdev,
	paulmck, songliubraving, Ze Gao



On 5/20/23 2:47 AM, Ze Gao wrote:
> 
> Hi Jiri,
> 
> Would you like to consider to add rcu_is_watching check in
> to solve this from the viewpoint of kprobe_multi_link_prog_run
> itself? And accounting of missed runs can be added as well
> to imporve observability.
> 
> Regards,
> Ze
> 
> 
> -----------------
>  From 29fd3cd713e65461325c2703cf5246a6fae5d4fe Mon Sep 17 00:00:00 2001
> From: Ze Gao <zegao@tencent.com>
> Date: Sat, 20 May 2023 17:32:05 +0800
> Subject: [PATCH] bpf: kprobe_multi runs bpf progs only when rcu_is_watching
> 
>  From the perspective of kprobe_multi_link_prog_run, any traceable
> functions can be attached while bpf progs need specical care and
> ought to be under rcu protection. To solve the likely rcu lockdep
> warns once for good, when (future) functions in idle path were
> attached accidentally, we better paying some cost to check at least
> in kernel-side, and return when rcu is not watching, which helps
> to avoid any unpredictable results.

kprobe_multi/fprobe share the same set of attachments with fentry.
Currently, fentry does not filter with !rcu_is_watching, maybe
because this is an extreme corner case. Not sure whether it is
worthwhile or not.

Maybe if you can give a concrete example (e.g., attachment point)
with current code base to show what the issue you encountered and
it will make it easier to judge whether adding !rcu_is_watching()
is necessary or not.

> 
> Signed-off-by: Ze Gao <zegao@tencent.com>
> ---
>   kernel/trace/bpf_trace.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 9a050e36dc6c..3e6ea7274765 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2622,7 +2622,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
>   	struct bpf_run_ctx *old_run_ctx;
>   	int err;
>   
> -	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> +	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1 || !rcu_is_watching())) {
>   		err = 0;
>   		goto out;
>   	}

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-21  3:58   ` Yonghong Song
@ 2023-05-21 15:10     ` Ze Gao
  2023-05-21 20:26       ` Re: Jiri Olsa
  0 siblings, 1 reply; 414+ messages in thread
From: Ze Gao @ 2023-05-21 15:10 UTC (permalink / raw)
  To: Yonghong Song
  Cc: jolsa, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Hao Luo, John Fastabend, KP Singh, Martin KaFai Lau,
	Masami Hiramatsu, Song Liu, Stanislav Fomichev, Steven Rostedt,
	Yonghong Song, bpf, linux-kernel, linux-trace-kernel, kafai,
	kpsingh, netdev, paulmck, songliubraving, Ze Gao

> kprobe_multi/fprobe share the same set of attachments with fentry.
> Currently, fentry does not filter with !rcu_is_watching, maybe
> because this is an extreme corner case. Not sure whether it is
> worthwhile or not.

Agreed, it's rare, especially after Peter's patches which push narrow
down rcu eqs regions
in the idle path and reduce the chance of any traceable functions
happening in between.

However, from RCU's perspective, we ought to check if rcu_is_watching
theoretically
when there's a chance our code will run in the idle path and also we
need rcu to be alive,
And also we cannot simply make assumptions for any future changes in
the idle path.
You know, just like what was hit in the thread.

> Maybe if you can give a concrete example (e.g., attachment point)
> with current code base to show what the issue you encountered and
> it will make it easier to judge whether adding !rcu_is_watching()
> is necessary or not.

I can reproduce likely warnings on v6.1.18 where arch_cpu_idle is
traceable but not on the latest version
so far. But as I state above, in theory we need it. So here is a
gentle ping :) .

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-21 15:10     ` Re: Ze Gao
@ 2023-05-21 20:26       ` Jiri Olsa
  2023-05-22  1:36         ` Re: Masami Hiramatsu
  2023-05-22  2:07         ` Re: Ze Gao
  0 siblings, 2 replies; 414+ messages in thread
From: Jiri Olsa @ 2023-05-21 20:26 UTC (permalink / raw)
  To: Ze Gao
  Cc: Yonghong Song, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Hao Luo, John Fastabend, KP Singh,
	Martin KaFai Lau, Masami Hiramatsu, Song Liu, Stanislav Fomichev,
	Steven Rostedt, Yonghong Song, bpf, linux-kernel,
	linux-trace-kernel, kafai, kpsingh, netdev, paulmck,
	songliubraving, Ze Gao

On Sun, May 21, 2023 at 11:10:16PM +0800, Ze Gao wrote:
> > kprobe_multi/fprobe share the same set of attachments with fentry.
> > Currently, fentry does not filter with !rcu_is_watching, maybe
> > because this is an extreme corner case. Not sure whether it is
> > worthwhile or not.
> 
> Agreed, it's rare, especially after Peter's patches which push narrow
> down rcu eqs regions
> in the idle path and reduce the chance of any traceable functions
> happening in between.
> 
> However, from RCU's perspective, we ought to check if rcu_is_watching
> theoretically
> when there's a chance our code will run in the idle path and also we
> need rcu to be alive,
> And also we cannot simply make assumptions for any future changes in
> the idle path.
> You know, just like what was hit in the thread.
> 
> > Maybe if you can give a concrete example (e.g., attachment point)
> > with current code base to show what the issue you encountered and
> > it will make it easier to judge whether adding !rcu_is_watching()
> > is necessary or not.
> 
> I can reproduce likely warnings on v6.1.18 where arch_cpu_idle is
> traceable but not on the latest version
> so far. But as I state above, in theory we need it. So here is a
> gentle ping :) .

hum, this change [1] added rcu_is_watching check to ftrace_test_recursion_trylock,
which we use in fprobe_handler and is coming to fprobe_exit_handler in [2]

I might be missing something, but it seems like we don't need another
rcu_is_watching call on kprobe_multi level

jirka


[1] d099dbfd3306 cpuidle: tracing: Warn about !rcu_is_watching()
[2] https://lore.kernel.org/bpf/20230517034510.15639-4-zegao@tencent.com/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-21 20:26       ` Re: Jiri Olsa
@ 2023-05-22  1:36         ` Masami Hiramatsu
  2023-05-22  2:07         ` Re: Ze Gao
  1 sibling, 0 replies; 414+ messages in thread
From: Masami Hiramatsu @ 2023-05-22  1:36 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Ze Gao, Yonghong Song, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Hao Luo, John Fastabend, KP Singh,
	Martin KaFai Lau, Masami Hiramatsu, Song Liu, Stanislav Fomichev,
	Steven Rostedt, Yonghong Song, bpf, linux-kernel,
	linux-trace-kernel, kafai, kpsingh, netdev, paulmck,
	songliubraving, Ze Gao

On Sun, 21 May 2023 22:26:37 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> On Sun, May 21, 2023 at 11:10:16PM +0800, Ze Gao wrote:
> > > kprobe_multi/fprobe share the same set of attachments with fentry.
> > > Currently, fentry does not filter with !rcu_is_watching, maybe
> > > because this is an extreme corner case. Not sure whether it is
> > > worthwhile or not.
> > 
> > Agreed, it's rare, especially after Peter's patches which push narrow
> > down rcu eqs regions
> > in the idle path and reduce the chance of any traceable functions
> > happening in between.
> > 
> > However, from RCU's perspective, we ought to check if rcu_is_watching
> > theoretically
> > when there's a chance our code will run in the idle path and also we
> > need rcu to be alive,
> > And also we cannot simply make assumptions for any future changes in
> > the idle path.
> > You know, just like what was hit in the thread.
> > 
> > > Maybe if you can give a concrete example (e.g., attachment point)
> > > with current code base to show what the issue you encountered and
> > > it will make it easier to judge whether adding !rcu_is_watching()
> > > is necessary or not.
> > 
> > I can reproduce likely warnings on v6.1.18 where arch_cpu_idle is
> > traceable but not on the latest version
> > so far. But as I state above, in theory we need it. So here is a
> > gentle ping :) .
> 
> hum, this change [1] added rcu_is_watching check to ftrace_test_recursion_trylock,
> which we use in fprobe_handler and is coming to fprobe_exit_handler in [2]
> 
> I might be missing something, but it seems like we don't need another
> rcu_is_watching call on kprobe_multi level

Good point! OK, then it seems we don't need it. The rethook continues to
use the rcu_is_watching() because it is also used from kprobes, but the
kprobe_multi doesn't need it.

Thank you,

> 
> jirka
> 
> 
> [1] d099dbfd3306 cpuidle: tracing: Warn about !rcu_is_watching()
> [2] https://lore.kernel.org/bpf/20230517034510.15639-4-zegao@tencent.com/


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-21 20:26       ` Re: Jiri Olsa
  2023-05-22  1:36         ` Re: Masami Hiramatsu
@ 2023-05-22  2:07         ` Ze Gao
  2023-05-23  4:38           ` Re: Yonghong Song
  2023-05-23  5:30           ` Re: Masami Hiramatsu
  1 sibling, 2 replies; 414+ messages in thread
From: Ze Gao @ 2023-05-22  2:07 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Yonghong Song, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Hao Luo, John Fastabend, KP Singh,
	Martin KaFai Lau, Masami Hiramatsu, Song Liu, Stanislav Fomichev,
	Steven Rostedt, Yonghong Song, bpf, linux-kernel,
	linux-trace-kernel, kafai, kpsingh, netdev, paulmck,
	songliubraving, Ze Gao

Oops, I missed that. Thanks for pointing that out, which I thought is
conditional use of rcu_is_watching before.

One last point, I think we should double check on this
     "fentry does not filter with !rcu_is_watching"
as quoted from Yonghong and argue whether it needs
the same check for fentry as well.

Regards,
Ze

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-22  2:07         ` Re: Ze Gao
@ 2023-05-23  4:38           ` Yonghong Song
  2023-05-23  5:30           ` Re: Masami Hiramatsu
  1 sibling, 0 replies; 414+ messages in thread
From: Yonghong Song @ 2023-05-23  4:38 UTC (permalink / raw)
  To: Ze Gao, Jiri Olsa
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Hao Luo,
	John Fastabend, KP Singh, Martin KaFai Lau, Masami Hiramatsu,
	Song Liu, Stanislav Fomichev, Steven Rostedt, Yonghong Song, bpf,
	linux-kernel, linux-trace-kernel, kafai, kpsingh, netdev,
	paulmck, songliubraving, Ze Gao



On 5/21/23 7:07 PM, Ze Gao wrote:
> Oops, I missed that. Thanks for pointing that out, which I thought is
> conditional use of rcu_is_watching before.
> 
> One last point, I think we should double check on this
>       "fentry does not filter with !rcu_is_watching"
> as quoted from Yonghong and argue whether it needs
> the same check for fentry as well.

I would suggest that we address rcu_is_watching issue for fentry
only if we do have a reproducible case to show something goes wrong...

> 
> Regards,
> Ze

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-22  2:07         ` Re: Ze Gao
  2023-05-23  4:38           ` Re: Yonghong Song
@ 2023-05-23  5:30           ` Masami Hiramatsu
  2023-05-23  6:59             ` Re: Paul E. McKenney
  1 sibling, 1 reply; 414+ messages in thread
From: Masami Hiramatsu @ 2023-05-23  5:30 UTC (permalink / raw)
  To: Ze Gao
  Cc: Jiri Olsa, Yonghong Song, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Hao Luo, John Fastabend, KP Singh,
	Martin KaFai Lau, Masami Hiramatsu, Song Liu, Stanislav Fomichev,
	Steven Rostedt, Yonghong Song, bpf, linux-kernel,
	linux-trace-kernel, kafai, kpsingh, netdev, paulmck,
	songliubraving, Ze Gao

On Mon, 22 May 2023 10:07:42 +0800
Ze Gao <zegao2021@gmail.com> wrote:

> Oops, I missed that. Thanks for pointing that out, which I thought is
> conditional use of rcu_is_watching before.
> 
> One last point, I think we should double check on this
>      "fentry does not filter with !rcu_is_watching"
> as quoted from Yonghong and argue whether it needs
> the same check for fentry as well.

rcu_is_watching() comment says;

 * if the current CPU is not in its idle loop or is in an interrupt or
 * NMI handler, return true.

Thus it returns *fault* if the current CPU is in the idle loop and not
any interrupt(including NMI) context. This means if any tracable function
is called from idle loop, it can be !rcu_is_watching(). I meant, this is
'context' based check, thus fentry can not filter out that some commonly
used functions is called from that context but it can be detected.

Thank you,

> 
> Regards,
> Ze


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-23  5:30           ` Re: Masami Hiramatsu
@ 2023-05-23  6:59             ` Paul E. McKenney
  2023-05-25  0:13               ` Re: Masami Hiramatsu
  0 siblings, 1 reply; 414+ messages in thread
From: Paul E. McKenney @ 2023-05-23  6:59 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Ze Gao, Jiri Olsa, Yonghong Song, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Hao Luo, John Fastabend,
	KP Singh, Martin KaFai Lau, Song Liu, Stanislav Fomichev,
	Steven Rostedt, Yonghong Song, bpf, linux-kernel,
	linux-trace-kernel, kafai, kpsingh, netdev, songliubraving,
	Ze Gao

On Tue, May 23, 2023 at 01:30:19PM +0800, Masami Hiramatsu wrote:
> On Mon, 22 May 2023 10:07:42 +0800
> Ze Gao <zegao2021@gmail.com> wrote:
> 
> > Oops, I missed that. Thanks for pointing that out, which I thought is
> > conditional use of rcu_is_watching before.
> > 
> > One last point, I think we should double check on this
> >      "fentry does not filter with !rcu_is_watching"
> > as quoted from Yonghong and argue whether it needs
> > the same check for fentry as well.
> 
> rcu_is_watching() comment says;
> 
>  * if the current CPU is not in its idle loop or is in an interrupt or
>  * NMI handler, return true.
> 
> Thus it returns *fault* if the current CPU is in the idle loop and not
> any interrupt(including NMI) context. This means if any tracable function
> is called from idle loop, it can be !rcu_is_watching(). I meant, this is
> 'context' based check, thus fentry can not filter out that some commonly
> used functions is called from that context but it can be detected.

It really does return false (rather than faulting?) if the current CPU
is deep within the idle loop.

In addition, the recent x86/entry rework (thank you Peter and
Thomas!) mean that the "idle loop" is quite restricted, as can be
seen by the invocations of ct_cpuidle_enter() and ct_cpuidle_exit().
For example, in default_idle_call(), these are immediately before and
after the call to arch_cpu_idle().

Would the following help?  Or am I missing your point?

							Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1449cb69a0e0..fae9b4e29c93 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -679,10 +679,14 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
 /**
  * rcu_is_watching - see if RCU thinks that the current CPU is not idle
  *
- * Return true if RCU is watching the running CPU, which means that this
- * CPU can safely enter RCU read-side critical sections.  In other words,
- * if the current CPU is not in its idle loop or is in an interrupt or
- * NMI handler, return true.
+ * Return @true if RCU is watching the running CPU and @false otherwise.
+ * An @true return means that this CPU can safely enter RCU read-side
+ * critical sections.
+ *
+ * More specifically, if the current CPU is not deep within its idle
+ * loop, return @true.  Note that rcu_is_watching() will return @true if
+ * invoked from an interrupt or NMI handler, even if that interrupt or
+ * NMI interrupted the CPU while it was deep within its idle loop.
  *
  * Make notrace because it can be called by the internal functions of
  * ftrace, and making this notrace removes unnecessary recursion calls.

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2023-05-23  6:59             ` Re: Paul E. McKenney
@ 2023-05-25  0:13               ` Masami Hiramatsu
  0 siblings, 0 replies; 414+ messages in thread
From: Masami Hiramatsu @ 2023-05-25  0:13 UTC (permalink / raw)
  To: paulmck
  Cc: Ze Gao, Jiri Olsa, Yonghong Song, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Hao Luo, John Fastabend,
	KP Singh, Martin KaFai Lau, Song Liu, Stanislav Fomichev,
	Steven Rostedt, Yonghong Song, bpf, linux-kernel,
	linux-trace-kernel, kafai, kpsingh, netdev, songliubraving,
	Ze Gao

On Mon, 22 May 2023 23:59:28 -0700
"Paul E. McKenney" <paulmck@kernel.org> wrote:

> On Tue, May 23, 2023 at 01:30:19PM +0800, Masami Hiramatsu wrote:
> > On Mon, 22 May 2023 10:07:42 +0800
> > Ze Gao <zegao2021@gmail.com> wrote:
> > 
> > > Oops, I missed that. Thanks for pointing that out, which I thought is
> > > conditional use of rcu_is_watching before.
> > > 
> > > One last point, I think we should double check on this
> > >      "fentry does not filter with !rcu_is_watching"
> > > as quoted from Yonghong and argue whether it needs
> > > the same check for fentry as well.
> > 
> > rcu_is_watching() comment says;
> > 
> >  * if the current CPU is not in its idle loop or is in an interrupt or
> >  * NMI handler, return true.
> > 
> > Thus it returns *fault* if the current CPU is in the idle loop and not
> > any interrupt(including NMI) context. This means if any tracable function
> > is called from idle loop, it can be !rcu_is_watching(). I meant, this is
> > 'context' based check, thus fentry can not filter out that some commonly
> > used functions is called from that context but it can be detected.
> 
> It really does return false (rather than faulting?) if the current CPU
> is deep within the idle loop.
> 
> In addition, the recent x86/entry rework (thank you Peter and
> Thomas!) mean that the "idle loop" is quite restricted, as can be
> seen by the invocations of ct_cpuidle_enter() and ct_cpuidle_exit().
> For example, in default_idle_call(), these are immediately before and
> after the call to arch_cpu_idle().

Thanks! I also found that the default_idle_call() is enough small and
it seems not happening on fentry because there are no commonly used
functions on that path.

> 
> Would the following help?  Or am I missing your point?

Yes, thank you for the update!

> 
> 							Thanx, Paul
> 
> ------------------------------------------------------------------------
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 1449cb69a0e0..fae9b4e29c93 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -679,10 +679,14 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
>  /**
>   * rcu_is_watching - see if RCU thinks that the current CPU is not idle
>   *
> - * Return true if RCU is watching the running CPU, which means that this
> - * CPU can safely enter RCU read-side critical sections.  In other words,
> - * if the current CPU is not in its idle loop or is in an interrupt or
> - * NMI handler, return true.
> + * Return @true if RCU is watching the running CPU and @false otherwise.
> + * An @true return means that this CPU can safely enter RCU read-side
> + * critical sections.
> + *
> + * More specifically, if the current CPU is not deep within its idle
> + * loop, return @true.  Note that rcu_is_watching() will return @true if
> + * invoked from an interrupt or NMI handler, even if that interrupt or
> + * NMI interrupted the CPU while it was deep within its idle loop.
>   *
>   * Make notrace because it can be called by the internal functions of
>   * ftrace, and making this notrace removes unnecessary recursion calls.


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-20  9:47 ` Ze Gao
  2023-05-21  3:58   ` Yonghong Song
@ 2023-05-21  8:08   ` Jiri Olsa
  2023-05-21 10:09     ` Re: Masami Hiramatsu
  1 sibling, 1 reply; 414+ messages in thread
From: Jiri Olsa @ 2023-05-21  8:08 UTC (permalink / raw)
  To: Ze Gao
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Hao Luo,
	John Fastabend, KP Singh, Martin KaFai Lau, Masami Hiramatsu,
	Song Liu, Stanislav Fomichev, Steven Rostedt, Yonghong Song, bpf,
	linux-kernel, linux-trace-kernel, kafai, kpsingh, netdev,
	paulmck, songliubraving, Ze Gao

On Sat, May 20, 2023 at 05:47:24PM +0800, Ze Gao wrote:
> 
> Hi Jiri,
> 
> Would you like to consider to add rcu_is_watching check in
> to solve this from the viewpoint of kprobe_multi_link_prog_run

I think this was discussed in here:
  https://lore.kernel.org/bpf/20230321020103.13494-1-laoar.shao@gmail.com/

and was considered a bug, there's fix mentioned later in the thread

there's also this recent patchset:
  https://lore.kernel.org/bpf/20230517034510.15639-3-zegao@tencent.com/

that solves related problems

> itself? And accounting of missed runs can be added as well
> to imporve observability.

right, we count fprobe->nmissed but it's not exposed, we should allow
to get 'missed' stats from both fprobe and kprobe_multi later, which
is missing now, will check

thanks,
jirka

> 
> Regards,
> Ze
> 
> 
> -----------------
> From 29fd3cd713e65461325c2703cf5246a6fae5d4fe Mon Sep 17 00:00:00 2001
> From: Ze Gao <zegao@tencent.com>
> Date: Sat, 20 May 2023 17:32:05 +0800
> Subject: [PATCH] bpf: kprobe_multi runs bpf progs only when rcu_is_watching
> 
> From the perspective of kprobe_multi_link_prog_run, any traceable
> functions can be attached while bpf progs need specical care and
> ought to be under rcu protection. To solve the likely rcu lockdep
> warns once for good, when (future) functions in idle path were
> attached accidentally, we better paying some cost to check at least
> in kernel-side, and return when rcu is not watching, which helps
> to avoid any unpredictable results.
> 
> Signed-off-by: Ze Gao <zegao@tencent.com>
> ---
>  kernel/trace/bpf_trace.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 9a050e36dc6c..3e6ea7274765 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2622,7 +2622,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
>  	struct bpf_run_ctx *old_run_ctx;
>  	int err;
>  
> -	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> +	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1 || !rcu_is_watching())) {
>  		err = 0;
>  		goto out;
>  	}
> -- 
> 2.40.1
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-21  8:08   ` Re: Jiri Olsa
@ 2023-05-21 10:09     ` Masami Hiramatsu
  2023-05-21 14:19       ` Re: Ze Gao
  0 siblings, 1 reply; 414+ messages in thread
From: Masami Hiramatsu @ 2023-05-21 10:09 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Ze Gao, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Hao Luo, John Fastabend, KP Singh, Martin KaFai Lau,
	Masami Hiramatsu, Song Liu, Stanislav Fomichev, Steven Rostedt,
	Yonghong Song, bpf, linux-kernel, linux-trace-kernel, kafai,
	kpsingh, netdev, paulmck, songliubraving, Ze Gao

On Sun, 21 May 2023 10:08:46 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> On Sat, May 20, 2023 at 05:47:24PM +0800, Ze Gao wrote:
> > 
> > Hi Jiri,
> > 
> > Would you like to consider to add rcu_is_watching check in
> > to solve this from the viewpoint of kprobe_multi_link_prog_run
> 
> I think this was discussed in here:
>   https://lore.kernel.org/bpf/20230321020103.13494-1-laoar.shao@gmail.com/
> 
> and was considered a bug, there's fix mentioned later in the thread
> 
> there's also this recent patchset:
>   https://lore.kernel.org/bpf/20230517034510.15639-3-zegao@tencent.com/
> 
> that solves related problems

I think this rcu_is_watching() is a bit different issue. This rcu_is_watching()
check is required if the kprobe_multi_link_prog_run() uses any RCU API.
E.g. rethook_try_get() is also checks rcu_is_watching() because it uses
call_rcu().

Thank you,

> 
> > itself? And accounting of missed runs can be added as well
> > to imporve observability.
> 
> right, we count fprobe->nmissed but it's not exposed, we should allow
> to get 'missed' stats from both fprobe and kprobe_multi later, which
> is missing now, will check
> 
> thanks,
> jirka
> 
> > 
> > Regards,
> > Ze
> > 
> > 
> > -----------------
> > From 29fd3cd713e65461325c2703cf5246a6fae5d4fe Mon Sep 17 00:00:00 2001
> > From: Ze Gao <zegao@tencent.com>
> > Date: Sat, 20 May 2023 17:32:05 +0800
> > Subject: [PATCH] bpf: kprobe_multi runs bpf progs only when rcu_is_watching
> > 
> > From the perspective of kprobe_multi_link_prog_run, any traceable
> > functions can be attached while bpf progs need specical care and
> > ought to be under rcu protection. To solve the likely rcu lockdep
> > warns once for good, when (future) functions in idle path were
> > attached accidentally, we better paying some cost to check at least
> > in kernel-side, and return when rcu is not watching, which helps
> > to avoid any unpredictable results.
> > 
> > Signed-off-by: Ze Gao <zegao@tencent.com>
> > ---
> >  kernel/trace/bpf_trace.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index 9a050e36dc6c..3e6ea7274765 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -2622,7 +2622,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
> >  	struct bpf_run_ctx *old_run_ctx;
> >  	int err;
> >  
> > -	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> > +	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1 || !rcu_is_watching())) {
> >  		err = 0;
> >  		goto out;
> >  	}
> > -- 
> > 2.40.1
> > 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2023-05-21 10:09     ` Re: Masami Hiramatsu
@ 2023-05-21 14:19       ` Ze Gao
  0 siblings, 0 replies; 414+ messages in thread
From: Ze Gao @ 2023-05-21 14:19 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Jiri Olsa, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Hao Luo, John Fastabend, KP Singh, Martin KaFai Lau, Song Liu,
	Stanislav Fomichev, Steven Rostedt, Yonghong Song, bpf,
	linux-kernel, linux-trace-kernel, kafai, kpsingh, netdev,
	paulmck, songliubraving, Ze Gao

On Sun, May 21, 2023 at 6:09 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Sun, 21 May 2023 10:08:46 +0200
> Jiri Olsa <olsajiri@gmail.com> wrote:
>
> > On Sat, May 20, 2023 at 05:47:24PM +0800, Ze Gao wrote:
> > >
> > > Hi Jiri,
> > >
> > > Would you like to consider to add rcu_is_watching check in
> > > to solve this from the viewpoint of kprobe_multi_link_prog_run
> >
> > I think this was discussed in here:
> >   https://lore.kernel.org/bpf/20230321020103.13494-1-laoar.shao@gmail.com/
> >
> > and was considered a bug, there's fix mentioned later in the thread
> >
> > there's also this recent patchset:
> >   https://lore.kernel.org/bpf/20230517034510.15639-3-zegao@tencent.com/
> >
> > that solves related problems
>
> I think this rcu_is_watching() is a bit different issue. This rcu_is_watching()
> check is required if the kprobe_multi_link_prog_run() uses any RCU API.
> E.g. rethook_try_get() is also checks rcu_is_watching() because it uses
> call_rcu().

Yes, that's my point!

Regards,
Ze

>
> >
> > > itself? And accounting of missed runs can be added as well
> > > to imporve observability.
> >
> > right, we count fprobe->nmissed but it's not exposed, we should allow
> > to get 'missed' stats from both fprobe and kprobe_multi later, which
> > is missing now, will check
> >
> > thanks,
> > jirka
> >
> > >
> > > Regards,
> > > Ze
> > >
> > >
> > > -----------------
> > > From 29fd3cd713e65461325c2703cf5246a6fae5d4fe Mon Sep 17 00:00:00 2001
> > > From: Ze Gao <zegao@tencent.com>
> > > Date: Sat, 20 May 2023 17:32:05 +0800
> > > Subject: [PATCH] bpf: kprobe_multi runs bpf progs only when rcu_is_watching
> > >
> > > From the perspective of kprobe_multi_link_prog_run, any traceable
> > > functions can be attached while bpf progs need specical care and
> > > ought to be under rcu protection. To solve the likely rcu lockdep
> > > warns once for good, when (future) functions in idle path were
> > > attached accidentally, we better paying some cost to check at least
> > > in kernel-side, and return when rcu is not watching, which helps
> > > to avoid any unpredictable results.
> > >
> > > Signed-off-by: Ze Gao <zegao@tencent.com>
> > > ---
> > >  kernel/trace/bpf_trace.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > index 9a050e36dc6c..3e6ea7274765 100644
> > > --- a/kernel/trace/bpf_trace.c
> > > +++ b/kernel/trace/bpf_trace.c
> > > @@ -2622,7 +2622,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
> > >     struct bpf_run_ctx *old_run_ctx;
> > >     int err;
> > >
> > > -   if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> > > +   if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1 || !rcu_is_watching())) {
> > >             err = 0;
> > >             goto out;
> > >     }
> > > --
> > > 2.40.1
> > >
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CANiq72k+5Rdj7i3Df2dcE6_OPYPXK3z5EWLKnY56sSMz4G3OvA@mail.gmail.com>]

[parent not found: <CAABZP2z64aYWfVSdXHaQopWc+BAbJJUGqtrju2iWER3DDTDFWg@mail.gmail.com>]

[parent not found: <20220406170012.GO4285@paulmck-ThinkPad-P17-Gen-1>]

[parent not found: <87pmls6nt7.fsf@mpe.ellerman.id.au>]

[parent not found: <87k0bz7i1s.fsf@mpe.ellerman.id.au>]

* (no subject)
       [not found]       ` <87k0bz7i1s.fsf@mpe.ellerman.id.au>
@ 2022-04-13  5:11         ` Nicholas Piggin
  2022-04-22 15:53           ` Thomas Gleixner
  0 siblings, 1 reply; 414+ messages in thread
From: Nicholas Piggin @ 2022-04-13  5:11 UTC (permalink / raw)
  To: Michael Ellerman, paulmck, Zhouyi Zhou
  Cc: linuxppc-dev, Miguel Ojeda, rcu, Daniel Lezcano, Thomas Gleixner,
	linux-kernel, Viresh Kumar

+Daniel, Thomas, Viresh

Subject: Re: rcu_sched self-detected stall on CPU

Excerpts from Michael Ellerman's message of April 9, 2022 12:42 am:
> Michael Ellerman <mpe@ellerman.id.au> writes:
>> "Paul E. McKenney" <paulmck@kernel.org> writes:
>>> On Wed, Apr 06, 2022 at 05:31:10PM +0800, Zhouyi Zhou wrote:
>>>> Hi
>>>> 
>>>> I can reproduce it in a ppc virtual cloud server provided by Oregon
>>>> State University.  Following is what I do:
>>>> 1) curl -l https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/snapshot/linux-5.18-rc1.tar.gz
>>>> -o linux-5.18-rc1.tar.gz
>>>> 2) tar zxf linux-5.18-rc1.tar.gz
>>>> 3) cp config linux-5.18-rc1/.config
>>>> 4) cd linux-5.18-rc1
>>>> 5) make vmlinux -j 8
>>>> 6) qemu-system-ppc64 -kernel vmlinux -nographic -vga none -no-reboot
>>>> -smp 2 (QEMU 4.2.1)
>>>> 7) after 12 rounds, the bug got reproduced:
>>>> (http://154.223.142.244/logs/20220406/qemu.log.txt)
>>>
>>> Just to make sure, are you both seeing the same thing?  Last I knew,
>>> Zhouyi was chasing an RCU-tasks issue that appears only in kernels
>>> built with CONFIG_PROVE_RCU=y, which Miguel does not have set.  Or did
>>> I miss something?
>>>
>>> Miguel is instead seeing an RCU CPU stall warning where RCU's grace-period
>>> kthread slept for three milliseconds, but did not wake up for more than
>>> 20 seconds.  This kthread would normally have awakened on CPU 1, but
>>> CPU 1 looks to me to be very unhealthy, as can be seen in your console
>>> output below (but maybe my idea of what is healthy for powerpc systems
>>> is outdated).  Please see also the inline annotations.
>>>
>>> Thoughts from the PPC guys?
>>
>> I haven't seen it in my testing. But using Miguel's config I can
>> reproduce it seemingly on every boot.
>>
>> For me it bisects to:
>>
>>   35de589cb879 ("powerpc/time: improve decrementer clockevent processing")
>>
>> Which seems plausible.
>>
>> Reverting that on mainline makes the bug go away.
>>
>> I don't see an obvious bug in the diff, but I could be wrong, or the old
>> code was papering over an existing bug?
>>
>> I'll try and work out what it is about Miguel's config that exposes
>> this vs our defconfig, that might give us a clue.
> 
> It's CONFIG_HIGH_RES_TIMERS=n which triggers the stall.
> 
> I can reproduce just with:
> 
>   $ make ppc64le_guest_defconfig
>   $ ./scripts/config -d HIGH_RES_TIMERS
> 
> We have no defconfigs that disable HIGH_RES_TIMERS, I didn't even
> realise you could disable it TBH :)
> 
> The Rust CI has it disabled because I copied that from the x86 defconfig
> they were using back when I added the Rust support. I think that was
> meant to be a stripped down fast config for CI, but the result is it's
> just using a badly tested combination which is not helpful.
> 
> So I'll send a patch to turn HIGH_RES_TIMERS on for the Rust CI, and we
> can debug this further without blocking them.

So we traced the problem down to possibly a misunderstanding between 
decrementer clock event device and core code.

The decrementer is only oneshot*ish*. It actually needs to either be 
reprogrammed or shut down otherwise it just continues to cause 
interrupts.

Before commit 35de589cb879, it was sort of two-shot. The initial 
interrupt at the programmed time would set its internal next_tb variable 
to ~0 and call the ->event_handler(). If that did not set_next_event or 
stop the timer, the interrupt will fire again immediately, notice 
next_tb is ~0, and only then stop the decrementer interrupt.

So that was already kind of ugly, this patch just turned it into a hang.

The problem happens when the tick is stopped with an event still 
pending, then tick_nohz_handler() is called, but it bails out because 
tick_stopped == 1 so the device never gets programmed again, and so it 
keeps firing.

How to fix it? Before commit a7cba02deced, powerpc's decrementer was 
really oneshot, but we would like to avoid doing that because it requires 
additional programming of the hardware on each timer interrupt. We have 
the ONESHOT_STOPPED state which seems to be just about what we want.

Did the ONESHOT_STOPPED patch just miss this case, or is there a reason 
we don't stop it here? This patch seems to fix the hang (not heavily
tested though).
 
Thanks,
Nick

---
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2d76c91b85de..7e13a55b6b71 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1364,9 +1364,11 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 	tick_sched_do_timer(ts, now);
 	tick_sched_handle(ts, regs);
 
-	/* No need to reprogram if we are running tickless  */
-	if (unlikely(ts->tick_stopped))
+	if (unlikely(ts->tick_stopped)) {
+		/* If we are tickless, change the clock event to stopped */
+		tick_program_event(KTIME_MAX, 1);
 		return;
+	}
 
 	hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
 	tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2022-04-13  5:11         ` Nicholas Piggin
@ 2022-04-22 15:53           ` Thomas Gleixner
  2022-04-23  2:29             ` Re: Nicholas Piggin
  0 siblings, 1 reply; 414+ messages in thread
From: Thomas Gleixner @ 2022-04-22 15:53 UTC (permalink / raw)
  To: Nicholas Piggin, Michael Ellerman, paulmck, Zhouyi Zhou
  Cc: linuxppc-dev, Miguel Ojeda, rcu, Daniel Lezcano, linux-kernel,
	Viresh Kumar

On Wed, Apr 13 2022 at 15:11, Nicholas Piggin wrote:
> So we traced the problem down to possibly a misunderstanding between 
> decrementer clock event device and core code.
>
> The decrementer is only oneshot*ish*. It actually needs to either be 
> reprogrammed or shut down otherwise it just continues to cause 
> interrupts.

I always thought that PPC had sane timers. That's really disillusioning.

> Before commit 35de589cb879, it was sort of two-shot. The initial 
> interrupt at the programmed time would set its internal next_tb variable 
> to ~0 and call the ->event_handler(). If that did not set_next_event or 
> stop the timer, the interrupt will fire again immediately, notice 
> next_tb is ~0, and only then stop the decrementer interrupt.
>
> So that was already kind of ugly, this patch just turned it into a hang.
>
> The problem happens when the tick is stopped with an event still 
> pending, then tick_nohz_handler() is called, but it bails out because 
> tick_stopped == 1 so the device never gets programmed again, and so it 
> keeps firing.
>
> How to fix it? Before commit a7cba02deced, powerpc's decrementer was 
> really oneshot, but we would like to avoid doing that because it requires 
> additional programming of the hardware on each timer interrupt. We have 
> the ONESHOT_STOPPED state which seems to be just about what we want.
>
> Did the ONESHOT_STOPPED patch just miss this case, or is there a reason 
> we don't stop it here? This patch seems to fix the hang (not heavily
> tested though).

This was definitely overlooked, but it's arguable it is is not required
for real oneshot clockevent devices. This should only handle the case
where the interrupt was already pending.

The ONESHOT_STOPPED state was introduced to handle the case where the
last timer gets canceled, so the already programmed event does not fire.

It was not necessarily meant to "fix" clockevent devices which are
pretending to be ONESHOT, but keep firing over and over.

That, said. I'm fine with the change along with a big fat comment why
this is required.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-04-22 15:53           ` Thomas Gleixner
@ 2022-04-23  2:29             ` Nicholas Piggin
  0 siblings, 0 replies; 414+ messages in thread
From: Nicholas Piggin @ 2022-04-23  2:29 UTC (permalink / raw)
  To: Michael Ellerman, paulmck, Thomas Gleixner, Zhouyi Zhou
  Cc: Daniel
	 Lezcano, linux-kernel, linuxppc-dev, Miguel Ojeda, rcu,
	Viresh
	 Kumar

Excerpts from Thomas Gleixner's message of April 23, 2022 1:53 am:
> On Wed, Apr 13 2022 at 15:11, Nicholas Piggin wrote:
>> So we traced the problem down to possibly a misunderstanding between 
>> decrementer clock event device and core code.
>>
>> The decrementer is only oneshot*ish*. It actually needs to either be 
>> reprogrammed or shut down otherwise it just continues to cause 
>> interrupts.
> 
> I always thought that PPC had sane timers. That's really disillusioning.

My comment was probably a bit misleading explanation of the whole
situation. This weirdness is actually in software in the powerpc
clock event driver due to a recent change I made assuming the clock 
event goes to oneshot-stopped.

The hardware is relatively sane I think, global synchronized constant
rate high frequency clock distributed to the CPUs so reads don't
go off-core. And per-CPU "decrementer" event interrupt at the same
frequency as the clock -- program it to a +ve value and it decrements
until zero then creates basically a level triggered interrupt.

Before my change, the decrementer interrupt would always clear the
interrupt at entry. The event_handler usually programs another
timer in so I tried to avoid that first clear counting on the
oneshot_stopped callback to clear the interrupt if there was no
other timer.

>> Before commit 35de589cb879, it was sort of two-shot. The initial 
>> interrupt at the programmed time would set its internal next_tb variable 
>> to ~0 and call the ->event_handler(). If that did not set_next_event or 
>> stop the timer, the interrupt will fire again immediately, notice 
>> next_tb is ~0, and only then stop the decrementer interrupt.
>>
>> So that was already kind of ugly, this patch just turned it into a hang.
>>
>> The problem happens when the tick is stopped with an event still 
>> pending, then tick_nohz_handler() is called, but it bails out because 
>> tick_stopped == 1 so the device never gets programmed again, and so it 
>> keeps firing.
>>
>> How to fix it? Before commit a7cba02deced, powerpc's decrementer was 
>> really oneshot, but we would like to avoid doing that because it requires 
>> additional programming of the hardware on each timer interrupt. We have 
>> the ONESHOT_STOPPED state which seems to be just about what we want.
>>
>> Did the ONESHOT_STOPPED patch just miss this case, or is there a reason 
>> we don't stop it here? This patch seems to fix the hang (not heavily
>> tested though).
> 
> This was definitely overlooked, but it's arguable it is is not required
> for real oneshot clockevent devices. This should only handle the case
> where the interrupt was already pending.
> 
> The ONESHOT_STOPPED state was introduced to handle the case where the
> last timer gets canceled, so the already programmed event does not fire.
> 
> It was not necessarily meant to "fix" clockevent devices which are
> pretending to be ONESHOT, but keep firing over and over.
> 
> That, said. I'm fine with the change along with a big fat comment why
> this is required.

Thanks for taking a look and confirming. I just sent a patch with a
comment and what looks like another missed case. Hopefully it's okay.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2022-04-21 16:41 Yury Norov
  2022-04-21 23:04 ` John Hubbard
  0 siblings, 1 reply; 414+ messages in thread
From: Yury Norov @ 2022-04-21 16:41 UTC (permalink / raw)
  To: Andrew Morton, Minchan Kim, John Hubbard, linux-mm, linux-kernel
  Cc: Yury Norov

Subject: [PATCH] mm/gup: fix comments to pin_user_pages_*()

pin_user_pages API forces FOLL_PIN in gup_flags, which means that the
API requires struct page **pages to be provided (not NULL). However,
the comment to pin_user_pages() says:

    * @pages:      array that receives pointers to the pages pinned.
    *              Should be at least nr_pages long. Or NULL, if caller
    *              only intends to ensure the pages are faulted in.

This patch fixes comments along the pin_user_pages code, and also adds
WARN_ON(!pages), so that API users will have better understanding
on how to use it.

It has been independently spotted by Minchan Kim and confirmed with
John Hubbard:

https://lore.kernel.org/all/YgWA0ghrrzHONehH@google.com/

Signed-off-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
---
 mm/gup.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index f598a037eb04..559626457585 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2871,6 +2871,10 @@ int pin_user_pages_fast(unsigned long start, int nr_pages,
 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
 		return -EINVAL;
 
+	/* FOLL_PIN requires pages != NULL */
+	if (WARN_ON_ONCE(!pages))
+		return -EINVAL;
+
 	gup_flags |= FOLL_PIN;
 	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
@@ -2893,6 +2897,10 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
 	 */
 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
 		return 0;
+
+	/* FOLL_PIN requires pages != NULL */
+	if (WARN_ON_ONCE(!pages))
+		return 0;
 	/*
 	 * FOLL_FAST_ONLY is required in order to match the API description of
 	 * this routine: no fall back to regular ("slow") GUP.
@@ -2920,8 +2928,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
  * @nr_pages:	number of pages from start to pin
  * @gup_flags:	flags modifying lookup behaviour
  * @pages:	array that receives pointers to the pages pinned.
- *		Should be at least nr_pages long. Or NULL, if caller
- *		only intends to ensure the pages are faulted in.
+ *		Should be at least nr_pages long.
  * @vmas:	array of pointers to vmas corresponding to each page.
  *		Or NULL if the caller does not require them.
  * @locked:	pointer to lock flag indicating whether lock is held and
@@ -2944,6 +2951,10 @@ long pin_user_pages_remote(struct mm_struct *mm,
 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
 		return -EINVAL;
 
+	/* FOLL_PIN requires pages != NULL */
+	if (WARN_ON_ONCE(!pages))
+		return -EINVAL;
+
 	gup_flags |= FOLL_PIN;
 	return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
 				       pages, vmas, locked);
@@ -2957,8 +2968,7 @@ EXPORT_SYMBOL(pin_user_pages_remote);
  * @nr_pages:	number of pages from start to pin
  * @gup_flags:	flags modifying lookup behaviour
  * @pages:	array that receives pointers to the pages pinned.
- *		Should be at least nr_pages long. Or NULL, if caller
- *		only intends to ensure the pages are faulted in.
+ *		Should be at least nr_pages long.
  * @vmas:	array of pointers to vmas corresponding to each page.
  *		Or NULL if the caller does not require them.
  *
@@ -2976,6 +2986,10 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
 		return -EINVAL;
 
+	/* FOLL_PIN requires pages != NULL */
+	if (WARN_ON_ONCE(!pages))
+		return -EINVAL;
+
 	gup_flags |= FOLL_PIN;
 	return __gup_longterm_locked(current->mm, start, nr_pages,
 				     pages, vmas, gup_flags);
@@ -2994,6 +3008,10 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
 		return -EINVAL;
 
+	/* FOLL_PIN requires pages != NULL */
+	if (WARN_ON_ONCE(!pages))
+		return -EINVAL;
+
 	gup_flags |= FOLL_PIN;
 	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
 }
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2022-04-21 16:41 Yury Norov
@ 2022-04-21 23:04 ` John Hubbard
  2022-04-21 23:09   ` Re: John Hubbard
  2022-04-21 23:17   ` Re: Yury Norov
  0 siblings, 2 replies; 414+ messages in thread
From: John Hubbard @ 2022-04-21 23:04 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Minchan Kim, linux-mm, linux-kernel

On 4/21/22 09:41, Yury Norov wrote:
> Subject: [PATCH] mm/gup: fix comments to pin_user_pages_*()
> 

Hi Yuri,

Thanks for picking this up. I have been distracted and didn't trust
myself to focus on this properly, so it's good to have help!

IT/admin point: somehow the first line of the commit description didn't
make it into an actual email subject. The subject line was blank when it
arrived in my inbox, and the subject is in the body here instead. Not
sure how that happened.

Maybe check your git-sendemail setup?


> pin_user_pages API forces FOLL_PIN in gup_flags, which means that the
> API requires struct page **pages to be provided (not NULL). However,
> the comment to pin_user_pages() says:
> 
>      * @pages:      array that receives pointers to the pages pinned.
>      *              Should be at least nr_pages long. Or NULL, if caller
>      *              only intends to ensure the pages are faulted in.
> 
> This patch fixes comments along the pin_user_pages code, and also adds
> WARN_ON(!pages), so that API users will have better understanding
> on how to use it.

No need to quote the code in the commit log. Instead, just summarize.
For example:

pin_user_pages API forces FOLL_PIN in gup_flags, which means that the
API requires struct page **pages to be provided (not NULL). However, the
comment to pin_user_pages() clearly allows for passing in a NULL @pages
argument.

Remove the incorrect comments, and add WARN_ON_ONCE(!pages) calls to
enforce the API.

> 
> It has been independently spotted by Minchan Kim and confirmed with
> John Hubbard:
> 
> https://lore.kernel.org/all/YgWA0ghrrzHONehH@google.com/

Let's add a Cc: line for Michan as well:

Cc: Minchan Kim <minchan@kernel.org>

> 
> Signed-off-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
> ---
>   mm/gup.c | 26 ++++++++++++++++++++++----
>   1 file changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index f598a037eb04..559626457585 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2871,6 +2871,10 @@ int pin_user_pages_fast(unsigned long start, int nr_pages,
>   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
>   		return -EINVAL;
>   
> +	/* FOLL_PIN requires pages != NULL */

Please delete each and every one of these one-line comments, because
they merely echo what the code says.

> +	if (WARN_ON_ONCE(!pages))
> +		return -EINVAL;
> +
>   	gup_flags |= FOLL_PIN;
>   	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
>   }
> @@ -2893,6 +2897,10 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
>   	 */
>   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
>   		return 0;
> +
> +	/* FOLL_PIN requires pages != NULL */
> +	if (WARN_ON_ONCE(!pages))
> +		return 0;
>   	/*
>   	 * FOLL_FAST_ONLY is required in order to match the API description of
>   	 * this routine: no fall back to regular ("slow") GUP.
> @@ -2920,8 +2928,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
>    * @nr_pages:	number of pages from start to pin
>    * @gup_flags:	flags modifying lookup behaviour
>    * @pages:	array that receives pointers to the pages pinned.
> - *		Should be at least nr_pages long. Or NULL, if caller
> - *		only intends to ensure the pages are faulted in.
> + *		Should be at least nr_pages long.
>    * @vmas:	array of pointers to vmas corresponding to each page.
>    *		Or NULL if the caller does not require them.
>    * @locked:	pointer to lock flag indicating whether lock is held and
> @@ -2944,6 +2951,10 @@ long pin_user_pages_remote(struct mm_struct *mm,
>   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
>   		return -EINVAL;
>   
> +	/* FOLL_PIN requires pages != NULL */
> +	if (WARN_ON_ONCE(!pages))
> +		return -EINVAL;
> +
>   	gup_flags |= FOLL_PIN;
>   	return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
>   				       pages, vmas, locked);
> @@ -2957,8 +2968,7 @@ EXPORT_SYMBOL(pin_user_pages_remote);
>    * @nr_pages:	number of pages from start to pin
>    * @gup_flags:	flags modifying lookup behaviour
>    * @pages:	array that receives pointers to the pages pinned.
> - *		Should be at least nr_pages long. Or NULL, if caller
> - *		only intends to ensure the pages are faulted in.
> + *		Should be at least nr_pages long.
>    * @vmas:	array of pointers to vmas corresponding to each page.
>    *		Or NULL if the caller does not require them.
>    *
> @@ -2976,6 +2986,10 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
>   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
>   		return -EINVAL;
>   
> +	/* FOLL_PIN requires pages != NULL */
> +	if (WARN_ON_ONCE(!pages))
> +		return -EINVAL;
> +
>   	gup_flags |= FOLL_PIN;
>   	return __gup_longterm_locked(current->mm, start, nr_pages,
>   				     pages, vmas, gup_flags);
> @@ -2994,6 +3008,10 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
>   		return -EINVAL;
>   
> +	/* FOLL_PIN requires pages != NULL */
> +	if (WARN_ON_ONCE(!pages))
> +		return -EINVAL;
> +
>   	gup_flags |= FOLL_PIN;
>   	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
>   }

I hope we don't break any callers with the newly enforced !pages, but it's
the right thing to do, in order to avoid misunderstandings.

thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-04-21 23:04 ` John Hubbard
@ 2022-04-21 23:09   ` John Hubbard
  2022-04-21 23:17   ` Re: Yury Norov
  1 sibling, 0 replies; 414+ messages in thread
From: John Hubbard @ 2022-04-21 23:09 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Minchan Kim, linux-mm, linux-kernel

On 4/21/22 16:04, John Hubbard wrote:
> On 4/21/22 09:41, Yury Norov wrote:
>> Subject: [PATCH] mm/gup: fix comments to pin_user_pages_*()
>>
> 
> Hi Yuri,

...and I see that I have typo'd both Yury's and Minchan's name (further
down), in the same email!

Really apologize for screwing that up. It's Yury-with-a-"y", I know. :)


thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-04-21 23:04 ` John Hubbard
  2022-04-21 23:09   ` Re: John Hubbard
@ 2022-04-21 23:17   ` Yury Norov
  2022-04-21 23:21     ` Re: John Hubbard
  1 sibling, 1 reply; 414+ messages in thread
From: Yury Norov @ 2022-04-21 23:17 UTC (permalink / raw)
  To: John Hubbard; +Cc: Andrew Morton, Minchan Kim, linux-mm, linux-kernel

On Thu, Apr 21, 2022 at 04:04:44PM -0700, John Hubbard wrote:
> On 4/21/22 09:41, Yury Norov wrote:
> > Subject: [PATCH] mm/gup: fix comments to pin_user_pages_*()
> > 
> 
> Hi Yuri,
> 
> Thanks for picking this up. I have been distracted and didn't trust
> myself to focus on this properly, so it's good to have help!
> 
> IT/admin point: somehow the first line of the commit description didn't
> make it into an actual email subject. The subject line was blank when it
> arrived in my inbox, and the subject is in the body here instead. Not
> sure how that happened.
> 
> Maybe check your git-sendemail setup?
 
git-sendmail is OK. I just accidentally added empty line above Subject,
which broke format. My bad, sorry for this.
 
> > pin_user_pages API forces FOLL_PIN in gup_flags, which means that the
> > API requires struct page **pages to be provided (not NULL). However,
> > the comment to pin_user_pages() says:
> > 
> >      * @pages:      array that receives pointers to the pages pinned.
> >      *              Should be at least nr_pages long. Or NULL, if caller
> >      *              only intends to ensure the pages are faulted in.
> > 
> > This patch fixes comments along the pin_user_pages code, and also adds
> > WARN_ON(!pages), so that API users will have better understanding
> > on how to use it.
> 
> No need to quote the code in the commit log. Instead, just summarize.
> For example:
> 
> pin_user_pages API forces FOLL_PIN in gup_flags, which means that the
> API requires struct page **pages to be provided (not NULL). However, the
> comment to pin_user_pages() clearly allows for passing in a NULL @pages
> argument.
> 
> Remove the incorrect comments, and add WARN_ON_ONCE(!pages) calls to
> enforce the API.
> 
> > 
> > It has been independently spotted by Minchan Kim and confirmed with
> > John Hubbard:
> > 
> > https://lore.kernel.org/all/YgWA0ghrrzHONehH@google.com/
> 
> Let's add a Cc: line for Michan as well:
> 
> Cc: Minchan Kim <minchan@kernel.org>
 
He's in CC already, I think...
 
> > Signed-off-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
> > ---
> >   mm/gup.c | 26 ++++++++++++++++++++++----
> >   1 file changed, 22 insertions(+), 4 deletions(-)
> > 
> > diff --git a/mm/gup.c b/mm/gup.c
> > index f598a037eb04..559626457585 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -2871,6 +2871,10 @@ int pin_user_pages_fast(unsigned long start, int nr_pages,
> >   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> >   		return -EINVAL;
> > +	/* FOLL_PIN requires pages != NULL */
> 
> Please delete each and every one of these one-line comments, because
> they merely echo what the code says.

Sure.
 
> > +	if (WARN_ON_ONCE(!pages))
> > +		return -EINVAL;
> > +
> >   	gup_flags |= FOLL_PIN;
> >   	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
> >   }
> > @@ -2893,6 +2897,10 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
> >   	 */
> >   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> >   		return 0;
> > +
> > +	/* FOLL_PIN requires pages != NULL */
> > +	if (WARN_ON_ONCE(!pages))
> > +		return 0;
> >   	/*
> >   	 * FOLL_FAST_ONLY is required in order to match the API description of
> >   	 * this routine: no fall back to regular ("slow") GUP.
> > @@ -2920,8 +2928,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
> >    * @nr_pages:	number of pages from start to pin
> >    * @gup_flags:	flags modifying lookup behaviour
> >    * @pages:	array that receives pointers to the pages pinned.
> > - *		Should be at least nr_pages long. Or NULL, if caller
> > - *		only intends to ensure the pages are faulted in.
> > + *		Should be at least nr_pages long.
> >    * @vmas:	array of pointers to vmas corresponding to each page.
> >    *		Or NULL if the caller does not require them.
> >    * @locked:	pointer to lock flag indicating whether lock is held and
> > @@ -2944,6 +2951,10 @@ long pin_user_pages_remote(struct mm_struct *mm,
> >   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> >   		return -EINVAL;
> > +	/* FOLL_PIN requires pages != NULL */
> > +	if (WARN_ON_ONCE(!pages))
> > +		return -EINVAL;
> > +
> >   	gup_flags |= FOLL_PIN;
> >   	return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
> >   				       pages, vmas, locked);
> > @@ -2957,8 +2968,7 @@ EXPORT_SYMBOL(pin_user_pages_remote);
> >    * @nr_pages:	number of pages from start to pin
> >    * @gup_flags:	flags modifying lookup behaviour
> >    * @pages:	array that receives pointers to the pages pinned.
> > - *		Should be at least nr_pages long. Or NULL, if caller
> > - *		only intends to ensure the pages are faulted in.
> > + *		Should be at least nr_pages long.
> >    * @vmas:	array of pointers to vmas corresponding to each page.
> >    *		Or NULL if the caller does not require them.
> >    *
> > @@ -2976,6 +2986,10 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
> >   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> >   		return -EINVAL;
> > +	/* FOLL_PIN requires pages != NULL */
> > +	if (WARN_ON_ONCE(!pages))
> > +		return -EINVAL;
> > +
> >   	gup_flags |= FOLL_PIN;
> >   	return __gup_longterm_locked(current->mm, start, nr_pages,
> >   				     pages, vmas, gup_flags);
> > @@ -2994,6 +3008,10 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
> >   	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> >   		return -EINVAL;
> > +	/* FOLL_PIN requires pages != NULL */
> > +	if (WARN_ON_ONCE(!pages))
> > +		return -EINVAL;
> > +
> >   	gup_flags |= FOLL_PIN;
> >   	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
> >   }
> 
> I hope we don't break any callers with the newly enforced !pages, but it's
> the right thing to do, in order to avoid misunderstandings.
> 
> thanks,
> -- 
> John Hubbard
> NVIDIA

Let me test v2 and resend shortly.

Thanks,
Yury

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-04-21 23:17   ` Re: Yury Norov
@ 2022-04-21 23:21     ` John Hubbard
  0 siblings, 0 replies; 414+ messages in thread
From: John Hubbard @ 2022-04-21 23:21 UTC (permalink / raw)
  To: Yury Norov; +Cc: Andrew Morton, Minchan Kim, linux-mm, linux-kernel

On 4/21/22 16:17, Yury Norov wrote:
>> Let's add a Cc: line for Michan as well:
>>
>> Cc: Minchan Kim <minchan@kernel.org>
>   
> He's in CC already, I think...
>   

Here, I am talking about attribution in the commit log, as opposed
to the email Cc. In other words, I'm suggesting that you literally
add this line to the commit description:

Cc: Minchan Kim <minchan@kernel.org>


thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2022-03-25  6:30 Michael S. Tsirkin
  2022-03-25  7:52 ` Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-25  6:30 UTC (permalink / raw)
  To: Jason Wang
  Cc: virtualization, linux-kernel, maz, tglx, peterz, sgarzare, keirf,
	Paul E. McKenney

Bcc: 
Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
Reply-To: 
In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>

On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> 
> 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > This is a rework on the previous IRQ hardening that is done for
> > > virtio-pci where several drawbacks were found and were reverted:
> > > 
> > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > >     that is used by some device such as virtio-blk
> > > 2) done only for PCI transport
> > > 
> > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > by introducing a global irq_soft_enabled variable for each
> > > virtio_device. Then we can to toggle it during
> > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > the future, we may provide config_ops for the transport that doesn't
> > > use IRQ. With this, vring_interrupt() can return check and early if
> > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > but the cost should be acceptable.
> > Maybe it should be but is it? Can't we use synchronize_irq instead?
> 
> 
> Even if we allow the transport driver to synchornize through
> synchronize_irq() we still need a check in the vring_interrupt().
> 
> We do something like the following previously:
> 
>         if (!READ_ONCE(vp_dev->intx_soft_enabled))
>                 return IRQ_NONE;
> 
> But it looks like a bug since speculative read can be done before the check
> where the interrupt handler can't see the uncommitted setup which is done by
> the driver.

I don't think so - if you sync after setting the value then
you are guaranteed that any handler running afterwards
will see the new value.

Although I couldn't find anything about this in memory-barriers.txt
which surprises me.

CC Paul to help make sure I'm right.


> 
> > 
> > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > module parameter is introduced to enable the hardening so function
> > > hardening is disabled by default.
> > Which devices are these? How come they send an interrupt before there
> > are any buffers in any queues?
> 
> 
> I copied this from the commit log for 22b7050a024d7
> 
> "
> 
>     This change will also benefit old hypervisors (before 2009)
>     that send interrupts without checking DRIVER_OK: previously,
>     the callback could race with driver-specific initialization.
> "
> 
> If this is only for config interrupt, I can remove the above log.


This is only for config interrupt.

> 
> > 
> > > Note that the hardening is only done for vring interrupt since the
> > > config interrupt hardening is already done in commit 22b7050a024d7
> > > ("virtio: defer config changed notifications"). But the method that is
> > > used by config interrupt can't be reused by the vring interrupt
> > > handler because it uses spinlock to do the synchronization which is
> > > expensive.
> > > 
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > 
> > > ---
> > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > >   include/linux/virtio.h        |  4 ++++
> > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > index 8dde44ea044a..85e331efa9cc 100644
> > > --- a/drivers/virtio/virtio.c
> > > +++ b/drivers/virtio/virtio.c
> > > @@ -7,6 +7,12 @@
> > >   #include <linux/of.h>
> > >   #include <uapi/linux/virtio_ids.h>
> > > +static bool irq_hardening = false;
> > > +
> > > +module_param(irq_hardening, bool, 0444);
> > > +MODULE_PARM_DESC(irq_hardening,
> > > +		 "Disalbe IRQ software processing when it is not expected");
> > > +
> > >   /* Unique numbering for virtio devices. */
> > >   static DEFINE_IDA(virtio_index_ida);
> > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > >    * */
> > >   void virtio_reset_device(struct virtio_device *dev)
> > >   {
> > > +	/*
> > > +	 * The below synchronize_rcu() guarantees that any
> > > +	 * interrupt for this line arriving after
> > > +	 * synchronize_rcu() has completed is guaranteed to see
> > > +	 * irq_soft_enabled == false.
> > News to me I did not know synchronize_rcu has anything to do
> > with interrupts. Did not you intend to use synchronize_irq?
> > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > though it's most likely is ...
> 
> 
> According to the comment above tree RCU version of synchronize_rcu():
> 
> """
> 
>  * RCU read-side critical sections are delimited by rcu_read_lock()
>  * and rcu_read_unlock(), and may be nested.  In addition, but only in
>  * v5.0 and later, regions of code across which interrupts, preemption,
>  * or softirqs have been disabled also serve as RCU read-side critical
>  * sections.  This includes hardware interrupt handlers, softirq handlers,
>  * and NMI handlers.
> """
> 
> So interrupt handlers are treated as read-side critical sections.
> 
> And it has the comment for explain the barrier:
> 
> """
> 
>  * Note that this guarantee implies further memory-ordering guarantees.
>  * On systems with more than one CPU, when synchronize_rcu() returns,
>  * each CPU is guaranteed to have executed a full memory barrier since
>  * the end of its last RCU read-side critical section whose beginning
>  * preceded the call to synchronize_rcu().  In addition, each CPU having
> """
> 
> So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> barrier, if the interrupt come after WRITE_ONCE() it will see the
> irq_soft_enabled as false.
> 

You are right. So then
1. I do not think we need load_acquire - why is it needed? Just
   READ_ONCE should do.
2. isn't synchronize_irq also doing the same thing?


> > 
> > > +	 */
> > > +	WRITE_ONCE(dev->irq_soft_enabled, false);
> > > +	synchronize_rcu();
> > > +
> > >   	dev->config->reset(dev);
> > >   }
> > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > Please add comment explaining where it will be enabled.
> > Also, we *really* don't need to synch if it was already disabled,
> > let's not add useless overhead to the boot sequence.
> 
> 
> Ok.
> 
> 
> > 
> > 
> > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > >   	spin_lock_init(&dev->config_lock);
> > >   	dev->config_enabled = false;
> > >   	dev->config_change_pending = false;
> > > +	dev->irq_soft_check = irq_hardening;
> > > +
> > > +	if (dev->irq_soft_check)
> > > +		dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > >   	/* We always start by resetting the device, in case a previous
> > >   	 * driver messed it up.  This also tests that code path a little. */
> > one of the points of hardening is it's also helpful for buggy
> > devices. this flag defeats the purpose.
> 
> 
> Do you mean:
> 
> 1) we need something like config_enable? This seems not easy to be
> implemented without obvious overhead, mainly the synchronize with the
> interrupt handlers

But synchronize is only on tear-down path. That is not critical for any
users at the moment, even less than probe.

> 2) enable this by default, so I don't object, but this may have some risk
> for old hypervisors


The risk if there's a driver adding buffers without setting DRIVER_OK.
So with this approach, how about we rename the flag "driver_ok"?
And then add_buf can actually test it and BUG_ON if not there  (at least
in the debug build).

And going down from there, how about we cache status in the
device? Then we don't need to keep re-reading it every time,
speeding boot up a tiny bit.

> 
> > 
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 962f1477b1fa..0170f8c784d8 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > >   	return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > >   }
> > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > +irqreturn_t vring_interrupt(int irq, void *v)
> > >   {
> > > +	struct virtqueue *_vq = v;
> > > +	struct virtio_device *vdev = _vq->vdev;
> > >   	struct vring_virtqueue *vq = to_vvq(_vq);
> > > +	if (!virtio_irq_soft_enabled(vdev)) {
> > > +		dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > +		return IRQ_NONE;
> > > +	}
> > > +
> > >   	if (!more_used(vq)) {
> > >   		pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > >   		return IRQ_NONE;
> > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > index 5464f398912a..957d6ad604ac 100644
> > > --- a/include/linux/virtio.h
> > > +++ b/include/linux/virtio.h
> > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > >    * @config_enabled: configuration change reporting enabled
> > >    * @config_change_pending: configuration change reported while disabled
> > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > + * @irq_soft_enabled: callbacks enabled
> > >    * @config_lock: protects configuration change reporting
> > >    * @dev: underlying device.
> > >    * @id: the device type identification (used to match it with a driver).
> > > @@ -109,6 +111,8 @@ struct virtio_device {
> > >   	bool failed;
> > >   	bool config_enabled;
> > >   	bool config_change_pending;
> > > +	bool irq_soft_check;
> > > +	bool irq_soft_enabled;
> > >   	spinlock_t config_lock;
> > >   	spinlock_t vqs_list_lock; /* Protects VQs list access */
> > >   	struct device dev;
> > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > index dafdc7f48c01..9c1b61f2e525 100644
> > > --- a/include/linux/virtio_config.h
> > > +++ b/include/linux/virtio_config.h
> > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > >   	return __virtio_test_bit(vdev, fbit);
> > >   }
> > > +/*
> > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > + * @vdev: the device
> > > + */
> > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > +{
> > > +	if (!vdev->irq_soft_check)
> > > +		return true;
> > > +
> > > +	/*
> > > +	 * Read irq_soft_enabled before reading other device specific
> > > +	 * data. Paried with smp_store_relase() in
> > paired
> 
> 
> Will fix.
> 
> Thanks
> 
> 
> > 
> > > +	 * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > +	 * virtio_reset_device().
> > > +	 */
> > > +	return smp_load_acquire(&vdev->irq_soft_enabled);
> > > +}
> > > +
> > >   /**
> > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > >    * @vdev: the device
> > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > >   	if (dev->config->enable_cbs)
> > >                     dev->config->enable_cbs(dev);
> > > +	/*
> > > +	 * Commit the driver setup before enabling the virtqueue
> > > +	 * callbacks. Paried with smp_load_acuqire() in
> > > +	 * virtio_irq_soft_enabled()
> > > +	 */
> > > +	smp_store_release(&dev->irq_soft_enabled, true);
> > > +
> > >   	BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > >   	dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > >   }
> > > -- 
> > > 2.25.1


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-25  6:30 Michael S. Tsirkin
@ 2022-03-25  7:52 ` Jason Wang
  2022-03-25  9:10   ` Re: Michael S. Tsirkin
  0 siblings, 1 reply; 414+ messages in thread
From: Jason Wang @ 2022-03-25  7:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> Bcc:
> Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> Reply-To:
> In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
>
> On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> >
> > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > This is a rework on the previous IRQ hardening that is done for
> > > > virtio-pci where several drawbacks were found and were reverted:
> > > >
> > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > >     that is used by some device such as virtio-blk
> > > > 2) done only for PCI transport
> > > >
> > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > by introducing a global irq_soft_enabled variable for each
> > > > virtio_device. Then we can to toggle it during
> > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > the future, we may provide config_ops for the transport that doesn't
> > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > but the cost should be acceptable.
> > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> >
> >
> > Even if we allow the transport driver to synchornize through
> > synchronize_irq() we still need a check in the vring_interrupt().
> >
> > We do something like the following previously:
> >
> >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> >                 return IRQ_NONE;
> >
> > But it looks like a bug since speculative read can be done before the check
> > where the interrupt handler can't see the uncommitted setup which is done by
> > the driver.
>
> I don't think so - if you sync after setting the value then
> you are guaranteed that any handler running afterwards
> will see the new value.

The problem is not disabled but the enable. We use smp_store_relase()
to make sure the driver commits the setup before enabling the irq. It
means the read needs to be ordered as well in vring_interrupt().

>
> Although I couldn't find anything about this in memory-barriers.txt
> which surprises me.
>
> CC Paul to help make sure I'm right.
>
>
> >
> > >
> > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > module parameter is introduced to enable the hardening so function
> > > > hardening is disabled by default.
> > > Which devices are these? How come they send an interrupt before there
> > > are any buffers in any queues?
> >
> >
> > I copied this from the commit log for 22b7050a024d7
> >
> > "
> >
> >     This change will also benefit old hypervisors (before 2009)
> >     that send interrupts without checking DRIVER_OK: previously,
> >     the callback could race with driver-specific initialization.
> > "
> >
> > If this is only for config interrupt, I can remove the above log.
>
>
> This is only for config interrupt.

Ok.

>
> >
> > >
> > > > Note that the hardening is only done for vring interrupt since the
> > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > ("virtio: defer config changed notifications"). But the method that is
> > > > used by config interrupt can't be reused by the vring interrupt
> > > > handler because it uses spinlock to do the synchronization which is
> > > > expensive.
> > > >
> > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > >
> > > > ---
> > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > >   include/linux/virtio.h        |  4 ++++
> > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > --- a/drivers/virtio/virtio.c
> > > > +++ b/drivers/virtio/virtio.c
> > > > @@ -7,6 +7,12 @@
> > > >   #include <linux/of.h>
> > > >   #include <uapi/linux/virtio_ids.h>
> > > > +static bool irq_hardening = false;
> > > > +
> > > > +module_param(irq_hardening, bool, 0444);
> > > > +MODULE_PARM_DESC(irq_hardening,
> > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > +
> > > >   /* Unique numbering for virtio devices. */
> > > >   static DEFINE_IDA(virtio_index_ida);
> > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > >    * */
> > > >   void virtio_reset_device(struct virtio_device *dev)
> > > >   {
> > > > + /*
> > > > +  * The below synchronize_rcu() guarantees that any
> > > > +  * interrupt for this line arriving after
> > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > +  * irq_soft_enabled == false.
> > > News to me I did not know synchronize_rcu has anything to do
> > > with interrupts. Did not you intend to use synchronize_irq?
> > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > though it's most likely is ...
> >
> >
> > According to the comment above tree RCU version of synchronize_rcu():
> >
> > """
> >
> >  * RCU read-side critical sections are delimited by rcu_read_lock()
> >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> >  * v5.0 and later, regions of code across which interrupts, preemption,
> >  * or softirqs have been disabled also serve as RCU read-side critical
> >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> >  * and NMI handlers.
> > """
> >
> > So interrupt handlers are treated as read-side critical sections.
> >
> > And it has the comment for explain the barrier:
> >
> > """
> >
> >  * Note that this guarantee implies further memory-ordering guarantees.
> >  * On systems with more than one CPU, when synchronize_rcu() returns,
> >  * each CPU is guaranteed to have executed a full memory barrier since
> >  * the end of its last RCU read-side critical section whose beginning
> >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > """
> >
> > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > irq_soft_enabled as false.
> >
>
> You are right. So then
> 1. I do not think we need load_acquire - why is it needed? Just
>    READ_ONCE should do.

See above.

> 2. isn't synchronize_irq also doing the same thing?


Yes, but it requires a config ops since the IRQ knowledge is transport specific.

>
>
> > >
> > > > +  */
> > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > + synchronize_rcu();
> > > > +
> > > >           dev->config->reset(dev);
> > > >   }
> > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > Please add comment explaining where it will be enabled.
> > > Also, we *really* don't need to synch if it was already disabled,
> > > let's not add useless overhead to the boot sequence.
> >
> >
> > Ok.
> >
> >
> > >
> > >
> > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > >           spin_lock_init(&dev->config_lock);
> > > >           dev->config_enabled = false;
> > > >           dev->config_change_pending = false;
> > > > + dev->irq_soft_check = irq_hardening;
> > > > +
> > > > + if (dev->irq_soft_check)
> > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > >           /* We always start by resetting the device, in case a previous
> > > >            * driver messed it up.  This also tests that code path a little. */
> > > one of the points of hardening is it's also helpful for buggy
> > > devices. this flag defeats the purpose.
> >
> >
> > Do you mean:
> >
> > 1) we need something like config_enable? This seems not easy to be
> > implemented without obvious overhead, mainly the synchronize with the
> > interrupt handlers
>
> But synchronize is only on tear-down path. That is not critical for any
> users at the moment, even less than probe.

I meant if we have vq->irq_pending, we need to call vring_interrupt()
in the virtio_device_ready() and synchronize the IRQ handlers with
spinlock or others.

>
> > 2) enable this by default, so I don't object, but this may have some risk
> > for old hypervisors
>
>
> The risk if there's a driver adding buffers without setting DRIVER_OK.

Probably not, we have devices that accept random inputs from outside,
net, console, input etc. I've done a round of audits of the Qemu
codes. They look all fine since day0.

> So with this approach, how about we rename the flag "driver_ok"?
> And then add_buf can actually test it and BUG_ON if not there  (at least
> in the debug build).

This looks like a hardening of the driver in the core instead of the
device. I think it can be done but in a separate series.

>
> And going down from there, how about we cache status in the
> device? Then we don't need to keep re-reading it every time,
> speeding boot up a tiny bit.

I don't fully understand here, actually spec requires status to be
read back for validation in many cases.

Thanks

>
> >
> > >
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > >   }
> > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > >   {
> > > > + struct virtqueue *_vq = v;
> > > > + struct virtio_device *vdev = _vq->vdev;
> > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > +         return IRQ_NONE;
> > > > + }
> > > > +
> > > >           if (!more_used(vq)) {
> > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > >                   return IRQ_NONE;
> > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > index 5464f398912a..957d6ad604ac 100644
> > > > --- a/include/linux/virtio.h
> > > > +++ b/include/linux/virtio.h
> > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > >    * @config_enabled: configuration change reporting enabled
> > > >    * @config_change_pending: configuration change reported while disabled
> > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > + * @irq_soft_enabled: callbacks enabled
> > > >    * @config_lock: protects configuration change reporting
> > > >    * @dev: underlying device.
> > > >    * @id: the device type identification (used to match it with a driver).
> > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > >           bool failed;
> > > >           bool config_enabled;
> > > >           bool config_change_pending;
> > > > + bool irq_soft_check;
> > > > + bool irq_soft_enabled;
> > > >           spinlock_t config_lock;
> > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > >           struct device dev;
> > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > --- a/include/linux/virtio_config.h
> > > > +++ b/include/linux/virtio_config.h
> > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > >           return __virtio_test_bit(vdev, fbit);
> > > >   }
> > > > +/*
> > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > + * @vdev: the device
> > > > + */
> > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > +{
> > > > + if (!vdev->irq_soft_check)
> > > > +         return true;
> > > > +
> > > > + /*
> > > > +  * Read irq_soft_enabled before reading other device specific
> > > > +  * data. Paried with smp_store_relase() in
> > > paired
> >
> >
> > Will fix.
> >
> > Thanks
> >
> >
> > >
> > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > +  * virtio_reset_device().
> > > > +  */
> > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > +}
> > > > +
> > > >   /**
> > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > >    * @vdev: the device
> > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > >           if (dev->config->enable_cbs)
> > > >                     dev->config->enable_cbs(dev);
> > > > + /*
> > > > +  * Commit the driver setup before enabling the virtqueue
> > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > +  * virtio_irq_soft_enabled()
> > > > +  */
> > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > +
> > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > >   }
> > > > --
> > > > 2.25.1
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-25  7:52 ` Jason Wang
@ 2022-03-25  9:10   ` Michael S. Tsirkin
  2022-03-25  9:20     ` Re: Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-25  9:10 UTC (permalink / raw)
  To: Jason Wang
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > Bcc:
> > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > Reply-To:
> > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> >
> > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > >
> > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > >
> > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > >     that is used by some device such as virtio-blk
> > > > > 2) done only for PCI transport
> > > > >
> > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > by introducing a global irq_soft_enabled variable for each
> > > > > virtio_device. Then we can to toggle it during
> > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > but the cost should be acceptable.
> > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > >
> > >
> > > Even if we allow the transport driver to synchornize through
> > > synchronize_irq() we still need a check in the vring_interrupt().
> > >
> > > We do something like the following previously:
> > >
> > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > >                 return IRQ_NONE;
> > >
> > > But it looks like a bug since speculative read can be done before the check
> > > where the interrupt handler can't see the uncommitted setup which is done by
> > > the driver.
> >
> > I don't think so - if you sync after setting the value then
> > you are guaranteed that any handler running afterwards
> > will see the new value.
> 
> The problem is not disabled but the enable.

So a misbehaving device can lose interrupts? That's not a problem at all
imo.

> We use smp_store_relase()
> to make sure the driver commits the setup before enabling the irq. It
> means the read needs to be ordered as well in vring_interrupt().
> 
> >
> > Although I couldn't find anything about this in memory-barriers.txt
> > which surprises me.
> >
> > CC Paul to help make sure I'm right.
> >
> >
> > >
> > > >
> > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > module parameter is introduced to enable the hardening so function
> > > > > hardening is disabled by default.
> > > > Which devices are these? How come they send an interrupt before there
> > > > are any buffers in any queues?
> > >
> > >
> > > I copied this from the commit log for 22b7050a024d7
> > >
> > > "
> > >
> > >     This change will also benefit old hypervisors (before 2009)
> > >     that send interrupts without checking DRIVER_OK: previously,
> > >     the callback could race with driver-specific initialization.
> > > "
> > >
> > > If this is only for config interrupt, I can remove the above log.
> >
> >
> > This is only for config interrupt.
> 
> Ok.
> 
> >
> > >
> > > >
> > > > > Note that the hardening is only done for vring interrupt since the
> > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > handler because it uses spinlock to do the synchronization which is
> > > > > expensive.
> > > > >
> > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > >
> > > > > ---
> > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > >   include/linux/virtio.h        |  4 ++++
> > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > --- a/drivers/virtio/virtio.c
> > > > > +++ b/drivers/virtio/virtio.c
> > > > > @@ -7,6 +7,12 @@
> > > > >   #include <linux/of.h>
> > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > +static bool irq_hardening = false;
> > > > > +
> > > > > +module_param(irq_hardening, bool, 0444);
> > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > +
> > > > >   /* Unique numbering for virtio devices. */
> > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > >    * */
> > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > >   {
> > > > > + /*
> > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > +  * interrupt for this line arriving after
> > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > +  * irq_soft_enabled == false.
> > > > News to me I did not know synchronize_rcu has anything to do
> > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > though it's most likely is ...
> > >
> > >
> > > According to the comment above tree RCU version of synchronize_rcu():
> > >
> > > """
> > >
> > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > >  * or softirqs have been disabled also serve as RCU read-side critical
> > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > >  * and NMI handlers.
> > > """
> > >
> > > So interrupt handlers are treated as read-side critical sections.
> > >
> > > And it has the comment for explain the barrier:
> > >
> > > """
> > >
> > >  * Note that this guarantee implies further memory-ordering guarantees.
> > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > >  * each CPU is guaranteed to have executed a full memory barrier since
> > >  * the end of its last RCU read-side critical section whose beginning
> > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > """
> > >
> > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > irq_soft_enabled as false.
> > >
> >
> > You are right. So then
> > 1. I do not think we need load_acquire - why is it needed? Just
> >    READ_ONCE should do.
> 
> See above.
> 
> > 2. isn't synchronize_irq also doing the same thing?
> 
> 
> Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> 
> >
> >
> > > >
> > > > > +  */
> > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > + synchronize_rcu();
> > > > > +
> > > > >           dev->config->reset(dev);
> > > > >   }
> > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > Please add comment explaining where it will be enabled.
> > > > Also, we *really* don't need to synch if it was already disabled,
> > > > let's not add useless overhead to the boot sequence.
> > >
> > >
> > > Ok.
> > >
> > >
> > > >
> > > >
> > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > >           spin_lock_init(&dev->config_lock);
> > > > >           dev->config_enabled = false;
> > > > >           dev->config_change_pending = false;
> > > > > + dev->irq_soft_check = irq_hardening;
> > > > > +
> > > > > + if (dev->irq_soft_check)
> > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > >           /* We always start by resetting the device, in case a previous
> > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > one of the points of hardening is it's also helpful for buggy
> > > > devices. this flag defeats the purpose.
> > >
> > >
> > > Do you mean:
> > >
> > > 1) we need something like config_enable? This seems not easy to be
> > > implemented without obvious overhead, mainly the synchronize with the
> > > interrupt handlers
> >
> > But synchronize is only on tear-down path. That is not critical for any
> > users at the moment, even less than probe.
> 
> I meant if we have vq->irq_pending, we need to call vring_interrupt()
> in the virtio_device_ready() and synchronize the IRQ handlers with
> spinlock or others.
> 
> >
> > > 2) enable this by default, so I don't object, but this may have some risk
> > > for old hypervisors
> >
> >
> > The risk if there's a driver adding buffers without setting DRIVER_OK.
> 
> Probably not, we have devices that accept random inputs from outside,
> net, console, input etc. I've done a round of audits of the Qemu
> codes. They look all fine since day0.
> 
> > So with this approach, how about we rename the flag "driver_ok"?
> > And then add_buf can actually test it and BUG_ON if not there  (at least
> > in the debug build).
> 
> This looks like a hardening of the driver in the core instead of the
> device. I think it can be done but in a separate series.
> 
> >
> > And going down from there, how about we cache status in the
> > device? Then we don't need to keep re-reading it every time,
> > speeding boot up a tiny bit.
> 
> I don't fully understand here, actually spec requires status to be
> read back for validation in many cases.
> 
> Thanks
> 
> >
> > >
> > > >
> > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > >   }
> > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > >   {
> > > > > + struct virtqueue *_vq = v;
> > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > +         return IRQ_NONE;
> > > > > + }
> > > > > +
> > > > >           if (!more_used(vq)) {
> > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > >                   return IRQ_NONE;
> > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > --- a/include/linux/virtio.h
> > > > > +++ b/include/linux/virtio.h
> > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > >    * @config_enabled: configuration change reporting enabled
> > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > + * @irq_soft_enabled: callbacks enabled
> > > > >    * @config_lock: protects configuration change reporting
> > > > >    * @dev: underlying device.
> > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > >           bool failed;
> > > > >           bool config_enabled;
> > > > >           bool config_change_pending;
> > > > > + bool irq_soft_check;
> > > > > + bool irq_soft_enabled;
> > > > >           spinlock_t config_lock;
> > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > >           struct device dev;
> > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > --- a/include/linux/virtio_config.h
> > > > > +++ b/include/linux/virtio_config.h
> > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > >           return __virtio_test_bit(vdev, fbit);
> > > > >   }
> > > > > +/*
> > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > + * @vdev: the device
> > > > > + */
> > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > +{
> > > > > + if (!vdev->irq_soft_check)
> > > > > +         return true;
> > > > > +
> > > > > + /*
> > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > +  * data. Paried with smp_store_relase() in
> > > > paired
> > >
> > >
> > > Will fix.
> > >
> > > Thanks
> > >
> > >
> > > >
> > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > +  * virtio_reset_device().
> > > > > +  */
> > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > +}
> > > > > +
> > > > >   /**
> > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > >    * @vdev: the device
> > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > >           if (dev->config->enable_cbs)
> > > > >                     dev->config->enable_cbs(dev);
> > > > > + /*
> > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > +  * virtio_irq_soft_enabled()
> > > > > +  */
> > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > +
> > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > >   }
> > > > > --
> > > > > 2.25.1
> >


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-25  9:10   ` Re: Michael S. Tsirkin
@ 2022-03-25  9:20     ` Jason Wang
  2022-03-25 10:09       ` Re: Michael S. Tsirkin
  0 siblings, 1 reply; 414+ messages in thread
From: Jason Wang @ 2022-03-25  9:20 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Fri, Mar 25, 2022 at 5:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> > On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > Bcc:
> > > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > > Reply-To:
> > > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> > >
> > > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > > >
> > > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > >
> > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > > >     that is used by some device such as virtio-blk
> > > > > > 2) done only for PCI transport
> > > > > >
> > > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > > by introducing a global irq_soft_enabled variable for each
> > > > > > virtio_device. Then we can to toggle it during
> > > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > > but the cost should be acceptable.
> > > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > > >
> > > >
> > > > Even if we allow the transport driver to synchornize through
> > > > synchronize_irq() we still need a check in the vring_interrupt().
> > > >
> > > > We do something like the following previously:
> > > >
> > > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > > >                 return IRQ_NONE;
> > > >
> > > > But it looks like a bug since speculative read can be done before the check
> > > > where the interrupt handler can't see the uncommitted setup which is done by
> > > > the driver.
> > >
> > > I don't think so - if you sync after setting the value then
> > > you are guaranteed that any handler running afterwards
> > > will see the new value.
> >
> > The problem is not disabled but the enable.
>
> So a misbehaving device can lose interrupts? That's not a problem at all
> imo.

It's the interrupt raised before setting irq_soft_enabled to true:

CPU 0 probe) driver specific setup (not commited)
CPU 1 IRQ handler) read the uninitialized variable
CPU 0 probe) set irq_soft_enabled to true
CPU 1 IRQ handler) read irq_soft_enable as true
CPU 1 IRQ handler) use the uninitialized variable

Thanks

>
> > We use smp_store_relase()
> > to make sure the driver commits the setup before enabling the irq. It
> > means the read needs to be ordered as well in vring_interrupt().
> >
> > >
> > > Although I couldn't find anything about this in memory-barriers.txt
> > > which surprises me.
> > >
> > > CC Paul to help make sure I'm right.
> > >
> > >
> > > >
> > > > >
> > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > module parameter is introduced to enable the hardening so function
> > > > > > hardening is disabled by default.
> > > > > Which devices are these? How come they send an interrupt before there
> > > > > are any buffers in any queues?
> > > >
> > > >
> > > > I copied this from the commit log for 22b7050a024d7
> > > >
> > > > "
> > > >
> > > >     This change will also benefit old hypervisors (before 2009)
> > > >     that send interrupts without checking DRIVER_OK: previously,
> > > >     the callback could race with driver-specific initialization.
> > > > "
> > > >
> > > > If this is only for config interrupt, I can remove the above log.
> > >
> > >
> > > This is only for config interrupt.
> >
> > Ok.
> >
> > >
> > > >
> > > > >
> > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > expensive.
> > > > > >
> > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > >
> > > > > > ---
> > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > --- a/drivers/virtio/virtio.c
> > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > @@ -7,6 +7,12 @@
> > > > > >   #include <linux/of.h>
> > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > +static bool irq_hardening = false;
> > > > > > +
> > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > +
> > > > > >   /* Unique numbering for virtio devices. */
> > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > >    * */
> > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > >   {
> > > > > > + /*
> > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > +  * interrupt for this line arriving after
> > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > +  * irq_soft_enabled == false.
> > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > though it's most likely is ...
> > > >
> > > >
> > > > According to the comment above tree RCU version of synchronize_rcu():
> > > >
> > > > """
> > > >
> > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > >  * and NMI handlers.
> > > > """
> > > >
> > > > So interrupt handlers are treated as read-side critical sections.
> > > >
> > > > And it has the comment for explain the barrier:
> > > >
> > > > """
> > > >
> > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > >  * the end of its last RCU read-side critical section whose beginning
> > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > """
> > > >
> > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > irq_soft_enabled as false.
> > > >
> > >
> > > You are right. So then
> > > 1. I do not think we need load_acquire - why is it needed? Just
> > >    READ_ONCE should do.
> >
> > See above.
> >
> > > 2. isn't synchronize_irq also doing the same thing?
> >
> >
> > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> >
> > >
> > >
> > > > >
> > > > > > +  */
> > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > + synchronize_rcu();
> > > > > > +
> > > > > >           dev->config->reset(dev);
> > > > > >   }
> > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > Please add comment explaining where it will be enabled.
> > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > let's not add useless overhead to the boot sequence.
> > > >
> > > >
> > > > Ok.
> > > >
> > > >
> > > > >
> > > > >
> > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > >           spin_lock_init(&dev->config_lock);
> > > > > >           dev->config_enabled = false;
> > > > > >           dev->config_change_pending = false;
> > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > +
> > > > > > + if (dev->irq_soft_check)
> > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > >           /* We always start by resetting the device, in case a previous
> > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > one of the points of hardening is it's also helpful for buggy
> > > > > devices. this flag defeats the purpose.
> > > >
> > > >
> > > > Do you mean:
> > > >
> > > > 1) we need something like config_enable? This seems not easy to be
> > > > implemented without obvious overhead, mainly the synchronize with the
> > > > interrupt handlers
> > >
> > > But synchronize is only on tear-down path. That is not critical for any
> > > users at the moment, even less than probe.
> >
> > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > in the virtio_device_ready() and synchronize the IRQ handlers with
> > spinlock or others.
> >
> > >
> > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > for old hypervisors
> > >
> > >
> > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> >
> > Probably not, we have devices that accept random inputs from outside,
> > net, console, input etc. I've done a round of audits of the Qemu
> > codes. They look all fine since day0.
> >
> > > So with this approach, how about we rename the flag "driver_ok"?
> > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > in the debug build).
> >
> > This looks like a hardening of the driver in the core instead of the
> > device. I think it can be done but in a separate series.
> >
> > >
> > > And going down from there, how about we cache status in the
> > > device? Then we don't need to keep re-reading it every time,
> > > speeding boot up a tiny bit.
> >
> > I don't fully understand here, actually spec requires status to be
> > read back for validation in many cases.
> >
> > Thanks
> >
> > >
> > > >
> > > > >
> > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > >   }
> > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > >   {
> > > > > > + struct virtqueue *_vq = v;
> > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > +         return IRQ_NONE;
> > > > > > + }
> > > > > > +
> > > > > >           if (!more_used(vq)) {
> > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > >                   return IRQ_NONE;
> > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > --- a/include/linux/virtio.h
> > > > > > +++ b/include/linux/virtio.h
> > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > >    * @config_lock: protects configuration change reporting
> > > > > >    * @dev: underlying device.
> > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > >           bool failed;
> > > > > >           bool config_enabled;
> > > > > >           bool config_change_pending;
> > > > > > + bool irq_soft_check;
> > > > > > + bool irq_soft_enabled;
> > > > > >           spinlock_t config_lock;
> > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > >           struct device dev;
> > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > --- a/include/linux/virtio_config.h
> > > > > > +++ b/include/linux/virtio_config.h
> > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > >   }
> > > > > > +/*
> > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > + * @vdev: the device
> > > > > > + */
> > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > +{
> > > > > > + if (!vdev->irq_soft_check)
> > > > > > +         return true;
> > > > > > +
> > > > > > + /*
> > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > +  * data. Paried with smp_store_relase() in
> > > > > paired
> > > >
> > > >
> > > > Will fix.
> > > >
> > > > Thanks
> > > >
> > > >
> > > > >
> > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > +  * virtio_reset_device().
> > > > > > +  */
> > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > +}
> > > > > > +
> > > > > >   /**
> > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > >    * @vdev: the device
> > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > >           if (dev->config->enable_cbs)
> > > > > >                     dev->config->enable_cbs(dev);
> > > > > > + /*
> > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > +  * virtio_irq_soft_enabled()
> > > > > > +  */
> > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > +
> > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > >   }
> > > > > > --
> > > > > > 2.25.1
> > >
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-25  9:20     ` Re: Jason Wang
@ 2022-03-25 10:09       ` Michael S. Tsirkin
  2022-03-28  4:56         ` Re: Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-25 10:09 UTC (permalink / raw)
  To: Jason Wang
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Fri, Mar 25, 2022 at 05:20:19PM +0800, Jason Wang wrote:
> On Fri, Mar 25, 2022 at 5:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> > > On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > Bcc:
> > > > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > > > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > > > Reply-To:
> > > > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> > > >
> > > > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > > > >
> > > > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > >
> > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > > > >     that is used by some device such as virtio-blk
> > > > > > > 2) done only for PCI transport
> > > > > > >
> > > > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > > > by introducing a global irq_soft_enabled variable for each
> > > > > > > virtio_device. Then we can to toggle it during
> > > > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > > > but the cost should be acceptable.
> > > > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > > > >
> > > > >
> > > > > Even if we allow the transport driver to synchornize through
> > > > > synchronize_irq() we still need a check in the vring_interrupt().
> > > > >
> > > > > We do something like the following previously:
> > > > >
> > > > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > > > >                 return IRQ_NONE;
> > > > >
> > > > > But it looks like a bug since speculative read can be done before the check
> > > > > where the interrupt handler can't see the uncommitted setup which is done by
> > > > > the driver.
> > > >
> > > > I don't think so - if you sync after setting the value then
> > > > you are guaranteed that any handler running afterwards
> > > > will see the new value.
> > >
> > > The problem is not disabled but the enable.
> >
> > So a misbehaving device can lose interrupts? That's not a problem at all
> > imo.
> 
> It's the interrupt raised before setting irq_soft_enabled to true:
> 
> CPU 0 probe) driver specific setup (not commited)
> CPU 1 IRQ handler) read the uninitialized variable
> CPU 0 probe) set irq_soft_enabled to true
> CPU 1 IRQ handler) read irq_soft_enable as true
> CPU 1 IRQ handler) use the uninitialized variable
> 
> Thanks

Yea, it hurts if you do it.  So do not do it then ;).

irq_soft_enabled (I think driver_ok or status is a better name)
should be initialized to false *before* irq is requested.

And requesting irq commits all memory otherwise all drivers would be
broken, if it doesn't it just needs to be fixed, not worked around in
virtio.


> >
> > > We use smp_store_relase()
> > > to make sure the driver commits the setup before enabling the irq. It
> > > means the read needs to be ordered as well in vring_interrupt().
> > >
> > > >
> > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > which surprises me.
> > > >
> > > > CC Paul to help make sure I'm right.
> > > >
> > > >
> > > > >
> > > > > >
> > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > hardening is disabled by default.
> > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > are any buffers in any queues?
> > > > >
> > > > >
> > > > > I copied this from the commit log for 22b7050a024d7
> > > > >
> > > > > "
> > > > >
> > > > >     This change will also benefit old hypervisors (before 2009)
> > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > >     the callback could race with driver-specific initialization.
> > > > > "
> > > > >
> > > > > If this is only for config interrupt, I can remove the above log.
> > > >
> > > >
> > > > This is only for config interrupt.
> > >
> > > Ok.
> > >
> > > >
> > > > >
> > > > > >
> > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > expensive.
> > > > > > >
> > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > >
> > > > > > > ---
> > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > @@ -7,6 +7,12 @@
> > > > > > >   #include <linux/of.h>
> > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > +static bool irq_hardening = false;
> > > > > > > +
> > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > +
> > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > >    * */
> > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > >   {
> > > > > > > + /*
> > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > +  * interrupt for this line arriving after
> > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > +  * irq_soft_enabled == false.
> > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > though it's most likely is ...
> > > > >
> > > > >
> > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > >
> > > > > """
> > > > >
> > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > >  * and NMI handlers.
> > > > > """
> > > > >
> > > > > So interrupt handlers are treated as read-side critical sections.
> > > > >
> > > > > And it has the comment for explain the barrier:
> > > > >
> > > > > """
> > > > >
> > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > """
> > > > >
> > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > irq_soft_enabled as false.
> > > > >
> > > >
> > > > You are right. So then
> > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > >    READ_ONCE should do.
> > >
> > > See above.
> > >
> > > > 2. isn't synchronize_irq also doing the same thing?
> > >
> > >
> > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > >
> > > >
> > > >
> > > > > >
> > > > > > > +  */
> > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > + synchronize_rcu();
> > > > > > > +
> > > > > > >           dev->config->reset(dev);
> > > > > > >   }
> > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > Please add comment explaining where it will be enabled.
> > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > let's not add useless overhead to the boot sequence.
> > > > >
> > > > >
> > > > > Ok.
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > >           dev->config_enabled = false;
> > > > > > >           dev->config_change_pending = false;
> > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > +
> > > > > > > + if (dev->irq_soft_check)
> > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > devices. this flag defeats the purpose.
> > > > >
> > > > >
> > > > > Do you mean:
> > > > >
> > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > interrupt handlers
> > > >
> > > > But synchronize is only on tear-down path. That is not critical for any
> > > > users at the moment, even less than probe.
> > >
> > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > spinlock or others.
> > >
> > > >
> > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > for old hypervisors
> > > >
> > > >
> > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > >
> > > Probably not, we have devices that accept random inputs from outside,
> > > net, console, input etc. I've done a round of audits of the Qemu
> > > codes. They look all fine since day0.
> > >
> > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > in the debug build).
> > >
> > > This looks like a hardening of the driver in the core instead of the
> > > device. I think it can be done but in a separate series.
> > >
> > > >
> > > > And going down from there, how about we cache status in the
> > > > device? Then we don't need to keep re-reading it every time,
> > > > speeding boot up a tiny bit.
> > >
> > > I don't fully understand here, actually spec requires status to be
> > > read back for validation in many cases.
> > >
> > > Thanks
> > >
> > > >
> > > > >
> > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > >   }
> > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > >   {
> > > > > > > + struct virtqueue *_vq = v;
> > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > +         return IRQ_NONE;
> > > > > > > + }
> > > > > > > +
> > > > > > >           if (!more_used(vq)) {
> > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > >                   return IRQ_NONE;
> > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > --- a/include/linux/virtio.h
> > > > > > > +++ b/include/linux/virtio.h
> > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > >    * @dev: underlying device.
> > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > >           bool failed;
> > > > > > >           bool config_enabled;
> > > > > > >           bool config_change_pending;
> > > > > > > + bool irq_soft_check;
> > > > > > > + bool irq_soft_enabled;
> > > > > > >           spinlock_t config_lock;
> > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > >           struct device dev;
> > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > >   }
> > > > > > > +/*
> > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > + * @vdev: the device
> > > > > > > + */
> > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > +{
> > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > +         return true;
> > > > > > > +
> > > > > > > + /*
> > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > paired
> > > > >
> > > > >
> > > > > Will fix.
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > >
> > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > +  * virtio_reset_device().
> > > > > > > +  */
> > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > +}
> > > > > > > +
> > > > > > >   /**
> > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > >    * @vdev: the device
> > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > >           if (dev->config->enable_cbs)
> > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > + /*
> > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > +  */
> > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > +
> > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > >   }
> > > > > > > --
> > > > > > > 2.25.1
> > > >
> >


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-25 10:09       ` Re: Michael S. Tsirkin
@ 2022-03-28  4:56         ` Jason Wang
  2022-03-28  5:59           ` Re: Michael S. Tsirkin
  0 siblings, 1 reply; 414+ messages in thread
From: Jason Wang @ 2022-03-28  4:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Fri, Mar 25, 2022 at 6:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Mar 25, 2022 at 05:20:19PM +0800, Jason Wang wrote:
> > On Fri, Mar 25, 2022 at 5:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> > > > On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > Bcc:
> > > > > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > > > > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > > > > Reply-To:
> > > > > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> > > > >
> > > > > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > > > > >
> > > > > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > > >
> > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > > > > >     that is used by some device such as virtio-blk
> > > > > > > > 2) done only for PCI transport
> > > > > > > >
> > > > > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > > > > by introducing a global irq_soft_enabled variable for each
> > > > > > > > virtio_device. Then we can to toggle it during
> > > > > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > > > > but the cost should be acceptable.
> > > > > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > > > > >
> > > > > >
> > > > > > Even if we allow the transport driver to synchornize through
> > > > > > synchronize_irq() we still need a check in the vring_interrupt().
> > > > > >
> > > > > > We do something like the following previously:
> > > > > >
> > > > > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > > > > >                 return IRQ_NONE;
> > > > > >
> > > > > > But it looks like a bug since speculative read can be done before the check
> > > > > > where the interrupt handler can't see the uncommitted setup which is done by
> > > > > > the driver.
> > > > >
> > > > > I don't think so - if you sync after setting the value then
> > > > > you are guaranteed that any handler running afterwards
> > > > > will see the new value.
> > > >
> > > > The problem is not disabled but the enable.
> > >
> > > So a misbehaving device can lose interrupts? That's not a problem at all
> > > imo.
> >
> > It's the interrupt raised before setting irq_soft_enabled to true:
> >
> > CPU 0 probe) driver specific setup (not commited)
> > CPU 1 IRQ handler) read the uninitialized variable
> > CPU 0 probe) set irq_soft_enabled to true
> > CPU 1 IRQ handler) read irq_soft_enable as true
> > CPU 1 IRQ handler) use the uninitialized variable
> >
> > Thanks
>
> Yea, it hurts if you do it.  So do not do it then ;).
>
> irq_soft_enabled (I think driver_ok or status is a better name)

I can change it to driver_ok.

> should be initialized to false *before* irq is requested.
>
> And requesting irq commits all memory otherwise all drivers would be
> broken,

So I think we might talk different issues:

1) Whether request_irq() commits the previous setups, I think the
answer is yes, since the spin_unlock of desc->lock (release) can
guarantee this though there seems no documentation around
request_irq() to say this.

And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
using smp_wmb() before the request_irq().

And even if write is ordered we still need read to be ordered to be
paired with that.

> if it doesn't it just needs to be fixed, not worked around in
> virtio.

2) virtio drivers might do a lot of setups between request_irq() and
virtio_device_ready():

request_irq()
driver specific setups
virtio_device_ready()

CPU 0 probe) request_irq()
CPU 1 IRQ handler) read the uninitialized variable
CPU 0 probe) driver specific setups
CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
CPU 1 IRQ handler) read irq_soft_enable as true
CPU 1 IRQ handler) use the uninitialized variable

Thanks

>
>
> > >
> > > > We use smp_store_relase()
> > > > to make sure the driver commits the setup before enabling the irq. It
> > > > means the read needs to be ordered as well in vring_interrupt().
> > > >
> > > > >
> > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > which surprises me.
> > > > >
> > > > > CC Paul to help make sure I'm right.
> > > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > hardening is disabled by default.
> > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > are any buffers in any queues?
> > > > > >
> > > > > >
> > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > >
> > > > > > "
> > > > > >
> > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > >     the callback could race with driver-specific initialization.
> > > > > > "
> > > > > >
> > > > > > If this is only for config interrupt, I can remove the above log.
> > > > >
> > > > >
> > > > > This is only for config interrupt.
> > > >
> > > > Ok.
> > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > expensive.
> > > > > > > >
> > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > >
> > > > > > > > ---
> > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > >   #include <linux/of.h>
> > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > +static bool irq_hardening = false;
> > > > > > > > +
> > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > +
> > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > >    * */
> > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > >   {
> > > > > > > > + /*
> > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > though it's most likely is ...
> > > > > >
> > > > > >
> > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > >
> > > > > > """
> > > > > >
> > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > >  * and NMI handlers.
> > > > > > """
> > > > > >
> > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > >
> > > > > > And it has the comment for explain the barrier:
> > > > > >
> > > > > > """
> > > > > >
> > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > """
> > > > > >
> > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > irq_soft_enabled as false.
> > > > > >
> > > > >
> > > > > You are right. So then
> > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > >    READ_ONCE should do.
> > > >
> > > > See above.
> > > >
> > > > > 2. isn't synchronize_irq also doing the same thing?
> > > >
> > > >
> > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > >
> > > > >
> > > > >
> > > > > > >
> > > > > > > > +  */
> > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > + synchronize_rcu();
> > > > > > > > +
> > > > > > > >           dev->config->reset(dev);
> > > > > > > >   }
> > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > let's not add useless overhead to the boot sequence.
> > > > > >
> > > > > >
> > > > > > Ok.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > >           dev->config_enabled = false;
> > > > > > > >           dev->config_change_pending = false;
> > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > +
> > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > devices. this flag defeats the purpose.
> > > > > >
> > > > > >
> > > > > > Do you mean:
> > > > > >
> > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > interrupt handlers
> > > > >
> > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > users at the moment, even less than probe.
> > > >
> > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > spinlock or others.
> > > >
> > > > >
> > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > for old hypervisors
> > > > >
> > > > >
> > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > >
> > > > Probably not, we have devices that accept random inputs from outside,
> > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > codes. They look all fine since day0.
> > > >
> > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > in the debug build).
> > > >
> > > > This looks like a hardening of the driver in the core instead of the
> > > > device. I think it can be done but in a separate series.
> > > >
> > > > >
> > > > > And going down from there, how about we cache status in the
> > > > > device? Then we don't need to keep re-reading it every time,
> > > > > speeding boot up a tiny bit.
> > > >
> > > > I don't fully understand here, actually spec requires status to be
> > > > read back for validation in many cases.
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > >   }
> > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > >   {
> > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > +         return IRQ_NONE;
> > > > > > > > + }
> > > > > > > > +
> > > > > > > >           if (!more_used(vq)) {
> > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > >                   return IRQ_NONE;
> > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > >    * @dev: underlying device.
> > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > >           bool failed;
> > > > > > > >           bool config_enabled;
> > > > > > > >           bool config_change_pending;
> > > > > > > > + bool irq_soft_check;
> > > > > > > > + bool irq_soft_enabled;
> > > > > > > >           spinlock_t config_lock;
> > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > >           struct device dev;
> > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > >   }
> > > > > > > > +/*
> > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > + * @vdev: the device
> > > > > > > > + */
> > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > +{
> > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > +         return true;
> > > > > > > > +
> > > > > > > > + /*
> > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > paired
> > > > > >
> > > > > >
> > > > > > Will fix.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > +  * virtio_reset_device().
> > > > > > > > +  */
> > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >   /**
> > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > >    * @vdev: the device
> > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > + /*
> > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > +  */
> > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > +
> > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > >   }
> > > > > > > > --
> > > > > > > > 2.25.1
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-28  4:56         ` Re: Jason Wang
@ 2022-03-28  5:59           ` Michael S. Tsirkin
  2022-03-28  6:18             ` Re: Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-28  5:59 UTC (permalink / raw)
  To: Jason Wang
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Mon, Mar 28, 2022 at 12:56:41PM +0800, Jason Wang wrote:
> On Fri, Mar 25, 2022 at 6:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Mar 25, 2022 at 05:20:19PM +0800, Jason Wang wrote:
> > > On Fri, Mar 25, 2022 at 5:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> > > > > On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > Bcc:
> > > > > > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > > > > > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > > > > > Reply-To:
> > > > > > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> > > > > >
> > > > > > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > > > > > >
> > > > > > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > > > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > > > >
> > > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > > > > > >     that is used by some device such as virtio-blk
> > > > > > > > > 2) done only for PCI transport
> > > > > > > > >
> > > > > > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > > > > > by introducing a global irq_soft_enabled variable for each
> > > > > > > > > virtio_device. Then we can to toggle it during
> > > > > > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > > > > > but the cost should be acceptable.
> > > > > > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > > > > > >
> > > > > > >
> > > > > > > Even if we allow the transport driver to synchornize through
> > > > > > > synchronize_irq() we still need a check in the vring_interrupt().
> > > > > > >
> > > > > > > We do something like the following previously:
> > > > > > >
> > > > > > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > > > > > >                 return IRQ_NONE;
> > > > > > >
> > > > > > > But it looks like a bug since speculative read can be done before the check
> > > > > > > where the interrupt handler can't see the uncommitted setup which is done by
> > > > > > > the driver.
> > > > > >
> > > > > > I don't think so - if you sync after setting the value then
> > > > > > you are guaranteed that any handler running afterwards
> > > > > > will see the new value.
> > > > >
> > > > > The problem is not disabled but the enable.
> > > >
> > > > So a misbehaving device can lose interrupts? That's not a problem at all
> > > > imo.
> > >
> > > It's the interrupt raised before setting irq_soft_enabled to true:
> > >
> > > CPU 0 probe) driver specific setup (not commited)
> > > CPU 1 IRQ handler) read the uninitialized variable
> > > CPU 0 probe) set irq_soft_enabled to true
> > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > CPU 1 IRQ handler) use the uninitialized variable
> > >
> > > Thanks
> >
> > Yea, it hurts if you do it.  So do not do it then ;).
> >
> > irq_soft_enabled (I think driver_ok or status is a better name)
> 
> I can change it to driver_ok.
> 
> > should be initialized to false *before* irq is requested.
> >
> > And requesting irq commits all memory otherwise all drivers would be
> > broken,
> 
> So I think we might talk different issues:
> 
> 1) Whether request_irq() commits the previous setups, I think the
> answer is yes, since the spin_unlock of desc->lock (release) can
> guarantee this though there seems no documentation around
> request_irq() to say this.
> 
> And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> using smp_wmb() before the request_irq().
> 
> And even if write is ordered we still need read to be ordered to be
> paired with that.
> 
> > if it doesn't it just needs to be fixed, not worked around in
> > virtio.
> 
> 2) virtio drivers might do a lot of setups between request_irq() and
> virtio_device_ready():
> 
> request_irq()
> driver specific setups
> virtio_device_ready()
> 
> CPU 0 probe) request_irq()
> CPU 1 IRQ handler) read the uninitialized variable
> CPU 0 probe) driver specific setups
> CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> CPU 1 IRQ handler) read irq_soft_enable as true
> CPU 1 IRQ handler) use the uninitialized variable
> 
> Thanks


As I said, virtio_device_ready needs to do synchronize_irq.
That will guarantee all setup is visible to the specific IRQ, this
is what it's point is.


> >
> >
> > > >
> > > > > We use smp_store_relase()
> > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > >
> > > > > >
> > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > which surprises me.
> > > > > >
> > > > > > CC Paul to help make sure I'm right.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > hardening is disabled by default.
> > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > are any buffers in any queues?
> > > > > > >
> > > > > > >
> > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > >
> > > > > > > "
> > > > > > >
> > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > "
> > > > > > >
> > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > >
> > > > > >
> > > > > > This is only for config interrupt.
> > > > >
> > > > > Ok.
> > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > expensive.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > >
> > > > > > > > > ---
> > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > >   #include <linux/of.h>
> > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > +
> > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > +
> > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > >    * */
> > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > >   {
> > > > > > > > > + /*
> > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > though it's most likely is ...
> > > > > > >
> > > > > > >
> > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > >
> > > > > > > """
> > > > > > >
> > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > >  * and NMI handlers.
> > > > > > > """
> > > > > > >
> > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > >
> > > > > > > And it has the comment for explain the barrier:
> > > > > > >
> > > > > > > """
> > > > > > >
> > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > """
> > > > > > >
> > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > irq_soft_enabled as false.
> > > > > > >
> > > > > >
> > > > > > You are right. So then
> > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > >    READ_ONCE should do.
> > > > >
> > > > > See above.
> > > > >
> > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > >
> > > > >
> > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > > > +  */
> > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > + synchronize_rcu();
> > > > > > > > > +
> > > > > > > > >           dev->config->reset(dev);
> > > > > > > > >   }
> > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > >
> > > > > > >
> > > > > > > Ok.
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > >           dev->config_enabled = false;
> > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > +
> > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > devices. this flag defeats the purpose.
> > > > > > >
> > > > > > >
> > > > > > > Do you mean:
> > > > > > >
> > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > interrupt handlers
> > > > > >
> > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > users at the moment, even less than probe.
> > > > >
> > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > spinlock or others.
> > > > >
> > > > > >
> > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > for old hypervisors
> > > > > >
> > > > > >
> > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > >
> > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > codes. They look all fine since day0.
> > > > >
> > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > in the debug build).
> > > > >
> > > > > This looks like a hardening of the driver in the core instead of the
> > > > > device. I think it can be done but in a separate series.
> > > > >
> > > > > >
> > > > > > And going down from there, how about we cache status in the
> > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > speeding boot up a tiny bit.
> > > > >
> > > > > I don't fully understand here, actually spec requires status to be
> > > > > read back for validation in many cases.
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > >   }
> > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > >   {
> > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > + }
> > > > > > > > > +
> > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > >    * @dev: underlying device.
> > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > >           bool failed;
> > > > > > > > >           bool config_enabled;
> > > > > > > > >           bool config_change_pending;
> > > > > > > > > + bool irq_soft_check;
> > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > >           spinlock_t config_lock;
> > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > >           struct device dev;
> > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > >   }
> > > > > > > > > +/*
> > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > + * @vdev: the device
> > > > > > > > > + */
> > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > +{
> > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > +         return true;
> > > > > > > > > +
> > > > > > > > > + /*
> > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > paired
> > > > > > >
> > > > > > >
> > > > > > > Will fix.
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > +  */
> > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >   /**
> > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > >    * @vdev: the device
> > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > + /*
> > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > +  */
> > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > +
> > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > >   }
> > > > > > > > > --
> > > > > > > > > 2.25.1
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-28  5:59           ` Re: Michael S. Tsirkin
@ 2022-03-28  6:18             ` Jason Wang
  2022-03-28 10:40               ` Re: Michael S. Tsirkin
  0 siblings, 1 reply; 414+ messages in thread
From: Jason Wang @ 2022-03-28  6:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Mon, Mar 28, 2022 at 1:59 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Mar 28, 2022 at 12:56:41PM +0800, Jason Wang wrote:
> > On Fri, Mar 25, 2022 at 6:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Fri, Mar 25, 2022 at 05:20:19PM +0800, Jason Wang wrote:
> > > > On Fri, Mar 25, 2022 at 5:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> > > > > > On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > Bcc:
> > > > > > > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > > > > > > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > > > > > > Reply-To:
> > > > > > > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> > > > > > >
> > > > > > > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > > > > > > >
> > > > > > > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > > > > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > > > > >
> > > > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > > > > > > >     that is used by some device such as virtio-blk
> > > > > > > > > > 2) done only for PCI transport
> > > > > > > > > >
> > > > > > > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > > > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > > > > > > by introducing a global irq_soft_enabled variable for each
> > > > > > > > > > virtio_device. Then we can to toggle it during
> > > > > > > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > > > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > > > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > > > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > > > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > > > > > > but the cost should be acceptable.
> > > > > > > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > > > > > > >
> > > > > > > >
> > > > > > > > Even if we allow the transport driver to synchornize through
> > > > > > > > synchronize_irq() we still need a check in the vring_interrupt().
> > > > > > > >
> > > > > > > > We do something like the following previously:
> > > > > > > >
> > > > > > > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > > > > > > >                 return IRQ_NONE;
> > > > > > > >
> > > > > > > > But it looks like a bug since speculative read can be done before the check
> > > > > > > > where the interrupt handler can't see the uncommitted setup which is done by
> > > > > > > > the driver.
> > > > > > >
> > > > > > > I don't think so - if you sync after setting the value then
> > > > > > > you are guaranteed that any handler running afterwards
> > > > > > > will see the new value.
> > > > > >
> > > > > > The problem is not disabled but the enable.
> > > > >
> > > > > So a misbehaving device can lose interrupts? That's not a problem at all
> > > > > imo.
> > > >
> > > > It's the interrupt raised before setting irq_soft_enabled to true:
> > > >
> > > > CPU 0 probe) driver specific setup (not commited)
> > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > CPU 0 probe) set irq_soft_enabled to true
> > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > CPU 1 IRQ handler) use the uninitialized variable
> > > >
> > > > Thanks
> > >
> > > Yea, it hurts if you do it.  So do not do it then ;).
> > >
> > > irq_soft_enabled (I think driver_ok or status is a better name)
> >
> > I can change it to driver_ok.
> >
> > > should be initialized to false *before* irq is requested.
> > >
> > > And requesting irq commits all memory otherwise all drivers would be
> > > broken,
> >
> > So I think we might talk different issues:
> >
> > 1) Whether request_irq() commits the previous setups, I think the
> > answer is yes, since the spin_unlock of desc->lock (release) can
> > guarantee this though there seems no documentation around
> > request_irq() to say this.
> >
> > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> > using smp_wmb() before the request_irq().
> >
> > And even if write is ordered we still need read to be ordered to be
> > paired with that.
> >
> > > if it doesn't it just needs to be fixed, not worked around in
> > > virtio.
> >
> > 2) virtio drivers might do a lot of setups between request_irq() and
> > virtio_device_ready():
> >
> > request_irq()
> > driver specific setups
> > virtio_device_ready()
> >
> > CPU 0 probe) request_irq()
> > CPU 1 IRQ handler) read the uninitialized variable
> > CPU 0 probe) driver specific setups
> > CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> > CPU 1 IRQ handler) read irq_soft_enable as true
> > CPU 1 IRQ handler) use the uninitialized variable
> >
> > Thanks
>
>
> As I said, virtio_device_ready needs to do synchronize_irq.
> That will guarantee all setup is visible to the specific IRQ,

Only the interrupt after synchronize_irq() returns.

>this
> is what it's point is.

What happens if an interrupt is raised in the middle like:

smp_store_release(dev->irq_soft_enabled, true)
IRQ handler
synchornize_irq()

If we don't enforce a reading order, the IRQ handler may still see the
uninitialized variable.

Thanks

>
>
> > >
> > >
> > > > >
> > > > > > We use smp_store_relase()
> > > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > > >
> > > > > > >
> > > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > > which surprises me.
> > > > > > >
> > > > > > > CC Paul to help make sure I'm right.
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > > hardening is disabled by default.
> > > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > > are any buffers in any queues?
> > > > > > > >
> > > > > > > >
> > > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > > >
> > > > > > > > "
> > > > > > > >
> > > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > > "
> > > > > > > >
> > > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > > >
> > > > > > >
> > > > > > > This is only for config interrupt.
> > > > > >
> > > > > > Ok.
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > > expensive.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > >
> > > > > > > > > > ---
> > > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > > >   #include <linux/of.h>
> > > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > > +
> > > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > > +
> > > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > > >    * */
> > > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > > >   {
> > > > > > > > > > + /*
> > > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > > though it's most likely is ...
> > > > > > > >
> > > > > > > >
> > > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > > >
> > > > > > > > """
> > > > > > > >
> > > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > > >  * and NMI handlers.
> > > > > > > > """
> > > > > > > >
> > > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > > >
> > > > > > > > And it has the comment for explain the barrier:
> > > > > > > >
> > > > > > > > """
> > > > > > > >
> > > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > > """
> > > > > > > >
> > > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > > irq_soft_enabled as false.
> > > > > > > >
> > > > > > >
> > > > > > > You are right. So then
> > > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > > >    READ_ONCE should do.
> > > > > >
> > > > > > See above.
> > > > > >
> > > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > > >
> > > > > >
> > > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > > > +  */
> > > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > > + synchronize_rcu();
> > > > > > > > > > +
> > > > > > > > > >           dev->config->reset(dev);
> > > > > > > > > >   }
> > > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > > >
> > > > > > > >
> > > > > > > > Ok.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > > >           dev->config_enabled = false;
> > > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > > +
> > > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > > devices. this flag defeats the purpose.
> > > > > > > >
> > > > > > > >
> > > > > > > > Do you mean:
> > > > > > > >
> > > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > > interrupt handlers
> > > > > > >
> > > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > > users at the moment, even less than probe.
> > > > > >
> > > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > > spinlock or others.
> > > > > >
> > > > > > >
> > > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > > for old hypervisors
> > > > > > >
> > > > > > >
> > > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > > >
> > > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > > codes. They look all fine since day0.
> > > > > >
> > > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > > in the debug build).
> > > > > >
> > > > > > This looks like a hardening of the driver in the core instead of the
> > > > > > device. I think it can be done but in a separate series.
> > > > > >
> > > > > > >
> > > > > > > And going down from there, how about we cache status in the
> > > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > > speeding boot up a tiny bit.
> > > > > >
> > > > > > I don't fully understand here, actually spec requires status to be
> > > > > > read back for validation in many cases.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > > >   }
> > > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > > >   {
> > > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > > + }
> > > > > > > > > > +
> > > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > > >    * @dev: underlying device.
> > > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > > >           bool failed;
> > > > > > > > > >           bool config_enabled;
> > > > > > > > > >           bool config_change_pending;
> > > > > > > > > > + bool irq_soft_check;
> > > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > > >           spinlock_t config_lock;
> > > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > > >           struct device dev;
> > > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > > >   }
> > > > > > > > > > +/*
> > > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > > + * @vdev: the device
> > > > > > > > > > + */
> > > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > > +{
> > > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > > +         return true;
> > > > > > > > > > +
> > > > > > > > > > + /*
> > > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > > paired
> > > > > > > >
> > > > > > > >
> > > > > > > > Will fix.
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > > +  */
> > > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >   /**
> > > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > > >    * @vdev: the device
> > > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > > + /*
> > > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > > +  */
> > > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > > +
> > > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > >   }
> > > > > > > > > > --
> > > > > > > > > > 2.25.1
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-28  6:18             ` Re: Jason Wang
@ 2022-03-28 10:40               ` Michael S. Tsirkin
  2022-03-29  7:12                 ` Re: Jason Wang
  2022-03-29  8:35                 ` Re: Thomas Gleixner
  0 siblings, 2 replies; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-28 10:40 UTC (permalink / raw)
  To: Jason Wang
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Mon, Mar 28, 2022 at 02:18:22PM +0800, Jason Wang wrote:
> On Mon, Mar 28, 2022 at 1:59 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Mar 28, 2022 at 12:56:41PM +0800, Jason Wang wrote:
> > > On Fri, Mar 25, 2022 at 6:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Fri, Mar 25, 2022 at 05:20:19PM +0800, Jason Wang wrote:
> > > > > On Fri, Mar 25, 2022 at 5:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> > > > > > > On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > Bcc:
> > > > > > > > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > > > > > > > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > > > > > > > Reply-To:
> > > > > > > > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> > > > > > > >
> > > > > > > > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > > > > > > > >
> > > > > > > > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > > > > > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > > > > > >
> > > > > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > > > > > > > >     that is used by some device such as virtio-blk
> > > > > > > > > > > 2) done only for PCI transport
> > > > > > > > > > >
> > > > > > > > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > > > > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > > > > > > > by introducing a global irq_soft_enabled variable for each
> > > > > > > > > > > virtio_device. Then we can to toggle it during
> > > > > > > > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > > > > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > > > > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > > > > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > > > > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > > > > > > > but the cost should be acceptable.
> > > > > > > > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Even if we allow the transport driver to synchornize through
> > > > > > > > > synchronize_irq() we still need a check in the vring_interrupt().
> > > > > > > > >
> > > > > > > > > We do something like the following previously:
> > > > > > > > >
> > > > > > > > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > > > > > > > >                 return IRQ_NONE;
> > > > > > > > >
> > > > > > > > > But it looks like a bug since speculative read can be done before the check
> > > > > > > > > where the interrupt handler can't see the uncommitted setup which is done by
> > > > > > > > > the driver.
> > > > > > > >
> > > > > > > > I don't think so - if you sync after setting the value then
> > > > > > > > you are guaranteed that any handler running afterwards
> > > > > > > > will see the new value.
> > > > > > >
> > > > > > > The problem is not disabled but the enable.
> > > > > >
> > > > > > So a misbehaving device can lose interrupts? That's not a problem at all
> > > > > > imo.
> > > > >
> > > > > It's the interrupt raised before setting irq_soft_enabled to true:
> > > > >
> > > > > CPU 0 probe) driver specific setup (not commited)
> > > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > > CPU 0 probe) set irq_soft_enabled to true
> > > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > > CPU 1 IRQ handler) use the uninitialized variable
> > > > >
> > > > > Thanks
> > > >
> > > > Yea, it hurts if you do it.  So do not do it then ;).
> > > >
> > > > irq_soft_enabled (I think driver_ok or status is a better name)
> > >
> > > I can change it to driver_ok.
> > >
> > > > should be initialized to false *before* irq is requested.
> > > >
> > > > And requesting irq commits all memory otherwise all drivers would be
> > > > broken,
> > >
> > > So I think we might talk different issues:
> > >
> > > 1) Whether request_irq() commits the previous setups, I think the
> > > answer is yes, since the spin_unlock of desc->lock (release) can
> > > guarantee this though there seems no documentation around
> > > request_irq() to say this.
> > >
> > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> > > using smp_wmb() before the request_irq().
> > >
> > > And even if write is ordered we still need read to be ordered to be
> > > paired with that.

IMO it synchronizes with the CPU to which irq is
delivered. Otherwise basically all drivers would be broken,
wouldn't they be?
I don't know whether it's correct on all platforms, but if not
we need to fix request_irq.

> > >
> > > > if it doesn't it just needs to be fixed, not worked around in
> > > > virtio.
> > >
> > > 2) virtio drivers might do a lot of setups between request_irq() and
> > > virtio_device_ready():
> > >
> > > request_irq()
> > > driver specific setups
> > > virtio_device_ready()
> > >
> > > CPU 0 probe) request_irq()
> > > CPU 1 IRQ handler) read the uninitialized variable
> > > CPU 0 probe) driver specific setups
> > > CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > CPU 1 IRQ handler) use the uninitialized variable
> > >
> > > Thanks
> >
> >
> > As I said, virtio_device_ready needs to do synchronize_irq.
> > That will guarantee all setup is visible to the specific IRQ,
> 
> Only the interrupt after synchronize_irq() returns.

Anything else is a buggy device though.

> >this
> > is what it's point is.
> 
> What happens if an interrupt is raised in the middle like:
> 
> smp_store_release(dev->irq_soft_enabled, true)
> IRQ handler
> synchornize_irq()
> 
> If we don't enforce a reading order, the IRQ handler may still see the
> uninitialized variable.
> 
> Thanks

IMHO variables should be initialized before request_irq
to a value meaning "not a valid interrupt".
Specifically driver_ok = false.
Handler in the scenario you describe will then see !driver_ok
and exit immediately.


> >
> >
> > > >
> > > >
> > > > > >
> > > > > > > We use smp_store_relase()
> > > > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > > > >
> > > > > > > >
> > > > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > > > which surprises me.
> > > > > > > >
> > > > > > > > CC Paul to help make sure I'm right.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > > > hardening is disabled by default.
> > > > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > > > are any buffers in any queues?
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > > > >
> > > > > > > > > "
> > > > > > > > >
> > > > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > > > "
> > > > > > > > >
> > > > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > > > >
> > > > > > > >
> > > > > > > > This is only for config interrupt.
> > > > > > >
> > > > > > > Ok.
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > > > expensive.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > >
> > > > > > > > > > > ---
> > > > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > > > >   #include <linux/of.h>
> > > > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > > > +
> > > > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > > > +
> > > > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > > > >    * */
> > > > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > > > >   {
> > > > > > > > > > > + /*
> > > > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > > > though it's most likely is ...
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > > > >
> > > > > > > > > """
> > > > > > > > >
> > > > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > > > >  * and NMI handlers.
> > > > > > > > > """
> > > > > > > > >
> > > > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > > > >
> > > > > > > > > And it has the comment for explain the barrier:
> > > > > > > > >
> > > > > > > > > """
> > > > > > > > >
> > > > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > > > """
> > > > > > > > >
> > > > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > > > irq_soft_enabled as false.
> > > > > > > > >
> > > > > > > >
> > > > > > > > You are right. So then
> > > > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > > > >    READ_ONCE should do.
> > > > > > >
> > > > > > > See above.
> > > > > > >
> > > > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > > > >
> > > > > > >
> > > > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > +  */
> > > > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > > > + synchronize_rcu();
> > > > > > > > > > > +
> > > > > > > > > > >           dev->config->reset(dev);
> > > > > > > > > > >   }
> > > > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Ok.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > > > >           dev->config_enabled = false;
> > > > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > > > +
> > > > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > > > devices. this flag defeats the purpose.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Do you mean:
> > > > > > > > >
> > > > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > > > interrupt handlers
> > > > > > > >
> > > > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > > > users at the moment, even less than probe.
> > > > > > >
> > > > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > > > spinlock or others.
> > > > > > >
> > > > > > > >
> > > > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > > > for old hypervisors
> > > > > > > >
> > > > > > > >
> > > > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > > > >
> > > > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > > > codes. They look all fine since day0.
> > > > > > >
> > > > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > > > in the debug build).
> > > > > > >
> > > > > > > This looks like a hardening of the driver in the core instead of the
> > > > > > > device. I think it can be done but in a separate series.
> > > > > > >
> > > > > > > >
> > > > > > > > And going down from there, how about we cache status in the
> > > > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > > > speeding boot up a tiny bit.
> > > > > > >
> > > > > > > I don't fully understand here, actually spec requires status to be
> > > > > > > read back for validation in many cases.
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > > > >   }
> > > > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > > > >   {
> > > > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > > > + }
> > > > > > > > > > > +
> > > > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > > > >    * @dev: underlying device.
> > > > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > > > >           bool failed;
> > > > > > > > > > >           bool config_enabled;
> > > > > > > > > > >           bool config_change_pending;
> > > > > > > > > > > + bool irq_soft_check;
> > > > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > > > >           spinlock_t config_lock;
> > > > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > > > >           struct device dev;
> > > > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > > > >   }
> > > > > > > > > > > +/*
> > > > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > > > + * @vdev: the device
> > > > > > > > > > > + */
> > > > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > > > +{
> > > > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > > > +         return true;
> > > > > > > > > > > +
> > > > > > > > > > > + /*
> > > > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > > > paired
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Will fix.
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > > > +  */
> > > > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > > > +}
> > > > > > > > > > > +
> > > > > > > > > > >   /**
> > > > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > > > >    * @vdev: the device
> > > > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > > > + /*
> > > > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > > > +  */
> > > > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > > > +
> > > > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > >   }
> > > > > > > > > > > --
> > > > > > > > > > > 2.25.1
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-28 10:40               ` Re: Michael S. Tsirkin
@ 2022-03-29  7:12                 ` Jason Wang
  2022-03-29 14:08                   ` Re: Michael S. Tsirkin
  2022-03-29  8:35                 ` Re: Thomas Gleixner
  1 sibling, 1 reply; 414+ messages in thread
From: Jason Wang @ 2022-03-29  7:12 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Mon, Mar 28, 2022 at 6:41 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Mar 28, 2022 at 02:18:22PM +0800, Jason Wang wrote:
> > On Mon, Mar 28, 2022 at 1:59 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Mon, Mar 28, 2022 at 12:56:41PM +0800, Jason Wang wrote:
> > > > On Fri, Mar 25, 2022 at 6:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Fri, Mar 25, 2022 at 05:20:19PM +0800, Jason Wang wrote:
> > > > > > On Fri, Mar 25, 2022 at 5:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Fri, Mar 25, 2022 at 03:52:00PM +0800, Jason Wang wrote:
> > > > > > > > On Fri, Mar 25, 2022 at 2:31 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > Bcc:
> > > > > > > > > Subject: Re: [PATCH 3/3] virtio: harden vring IRQ
> > > > > > > > > Message-ID: <20220325021422-mutt-send-email-mst@kernel.org>
> > > > > > > > > Reply-To:
> > > > > > > > > In-Reply-To: <f7046303-7d7d-e39f-3c71-3688126cc812@redhat.com>
> > > > > > > > >
> > > > > > > > > On Fri, Mar 25, 2022 at 11:04:08AM +0800, Jason Wang wrote:
> > > > > > > > > >
> > > > > > > > > > 在 2022/3/24 下午7:03, Michael S. Tsirkin 写道:
> > > > > > > > > > > On Thu, Mar 24, 2022 at 04:40:04PM +0800, Jason Wang wrote:
> > > > > > > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > > > > > > >
> > > > > > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > > > > > > > > > > >     that is used by some device such as virtio-blk
> > > > > > > > > > > > 2) done only for PCI transport
> > > > > > > > > > > >
> > > > > > > > > > > > In this patch, we tries to borrow the idea from the INTX IRQ hardening
> > > > > > > > > > > > in the reverted commit 080cd7c3ac87 ("virtio-pci: harden INTX interrupts")
> > > > > > > > > > > > by introducing a global irq_soft_enabled variable for each
> > > > > > > > > > > > virtio_device. Then we can to toggle it during
> > > > > > > > > > > > virtio_reset_device()/virtio_device_ready(). A synchornize_rcu() is
> > > > > > > > > > > > used in virtio_reset_device() to synchronize with the IRQ handlers. In
> > > > > > > > > > > > the future, we may provide config_ops for the transport that doesn't
> > > > > > > > > > > > use IRQ. With this, vring_interrupt() can return check and early if
> > > > > > > > > > > > irq_soft_enabled is false. This lead to smp_load_acquire() to be used
> > > > > > > > > > > > but the cost should be acceptable.
> > > > > > > > > > > Maybe it should be but is it? Can't we use synchronize_irq instead?
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Even if we allow the transport driver to synchornize through
> > > > > > > > > > synchronize_irq() we still need a check in the vring_interrupt().
> > > > > > > > > >
> > > > > > > > > > We do something like the following previously:
> > > > > > > > > >
> > > > > > > > > >         if (!READ_ONCE(vp_dev->intx_soft_enabled))
> > > > > > > > > >                 return IRQ_NONE;
> > > > > > > > > >
> > > > > > > > > > But it looks like a bug since speculative read can be done before the check
> > > > > > > > > > where the interrupt handler can't see the uncommitted setup which is done by
> > > > > > > > > > the driver.
> > > > > > > > >
> > > > > > > > > I don't think so - if you sync after setting the value then
> > > > > > > > > you are guaranteed that any handler running afterwards
> > > > > > > > > will see the new value.
> > > > > > > >
> > > > > > > > The problem is not disabled but the enable.
> > > > > > >
> > > > > > > So a misbehaving device can lose interrupts? That's not a problem at all
> > > > > > > imo.
> > > > > >
> > > > > > It's the interrupt raised before setting irq_soft_enabled to true:
> > > > > >
> > > > > > CPU 0 probe) driver specific setup (not commited)
> > > > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > > > CPU 0 probe) set irq_soft_enabled to true
> > > > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > > > CPU 1 IRQ handler) use the uninitialized variable
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > Yea, it hurts if you do it.  So do not do it then ;).
> > > > >
> > > > > irq_soft_enabled (I think driver_ok or status is a better name)
> > > >
> > > > I can change it to driver_ok.
> > > >
> > > > > should be initialized to false *before* irq is requested.
> > > > >
> > > > > And requesting irq commits all memory otherwise all drivers would be
> > > > > broken,
> > > >
> > > > So I think we might talk different issues:
> > > >
> > > > 1) Whether request_irq() commits the previous setups, I think the
> > > > answer is yes, since the spin_unlock of desc->lock (release) can
> > > > guarantee this though there seems no documentation around
> > > > request_irq() to say this.
> > > >
> > > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> > > > using smp_wmb() before the request_irq().
> > > >
> > > > And even if write is ordered we still need read to be ordered to be
> > > > paired with that.
>
> IMO it synchronizes with the CPU to which irq is
> delivered. Otherwise basically all drivers would be broken,
> wouldn't they be?

I guess it's because most of the drivers don't care much about the
buggy/malicious device.  And most of the devices may require an extra
step to enable device IRQ after request_irq(). Or it's the charge of
the driver to do the synchronization.

> I don't know whether it's correct on all platforms, but if not
> we need to fix request_irq.
>
> > > >
> > > > > if it doesn't it just needs to be fixed, not worked around in
> > > > > virtio.
> > > >
> > > > 2) virtio drivers might do a lot of setups between request_irq() and
> > > > virtio_device_ready():
> > > >
> > > > request_irq()
> > > > driver specific setups
> > > > virtio_device_ready()
> > > >
> > > > CPU 0 probe) request_irq()
> > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > CPU 0 probe) driver specific setups
> > > > CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > CPU 1 IRQ handler) use the uninitialized variable
> > > >
> > > > Thanks
> > >
> > >
> > > As I said, virtio_device_ready needs to do synchronize_irq.
> > > That will guarantee all setup is visible to the specific IRQ,
> >
> > Only the interrupt after synchronize_irq() returns.
>
> Anything else is a buggy device though.

Yes, but the goal of this patch is to prevent the possible attack from
buggy(malicious) devices.

>
> > >this
> > > is what it's point is.
> >
> > What happens if an interrupt is raised in the middle like:
> >
> > smp_store_release(dev->irq_soft_enabled, true)
> > IRQ handler
> > synchornize_irq()
> >
> > If we don't enforce a reading order, the IRQ handler may still see the
> > uninitialized variable.
> >
> > Thanks
>
> IMHO variables should be initialized before request_irq
> to a value meaning "not a valid interrupt".
> Specifically driver_ok = false.
> Handler in the scenario you describe will then see !driver_ok
> and exit immediately.

So just to make sure we're on the same page.

1) virtio_reset_device() will set the driver_ok to false;
2) virtio_device_ready() will set the driver_ok to true

So for virtio drivers, it often did:

1) virtio_reset_device()
2) find_vqs() which will call request_irq()
3) other driver specific setups
4) virtio_device_ready()

In virtio_device_ready(), the patch perform the following currently:

smp_store_release(driver_ok, true);
set_status(DRIVER_OK);

Per your suggestion, to add synchronize_irq() after
smp_store_release() so we had

smp_store_release(driver_ok, true);
synchornize_irq()
set_status(DRIVER_OK)

Suppose there's a interrupt raised before the synchronize_irq(), if we do:

if (READ_ONCE(driver_ok)) {
      vq->callback()
}

It will see the driver_ok as true but how can we make sure
vq->callback sees the driver specific setups (3) above?

And an example is virtio_scsi():

virtio_reset_device()
virtscsi_probe()
    virtscsi_init()
        virtio_find_vqs()
        ...
        virtscsi_init_vq(&vscsi->event_vq, vqs[1])
    ....
    virtio_device_ready()

In virtscsi_event_done():

virtscsi_event_done():
    virtscsi_vq_done(vscsi, &vscsi->event_vq, ...);

We need to make sure the even_done reads driver_ok before read vscsi->event_vq.

Thanks

>
>
> > >
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > > > We use smp_store_relase()
> > > > > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > > > > which surprises me.
> > > > > > > > >
> > > > > > > > > CC Paul to help make sure I'm right.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > > > > hardening is disabled by default.
> > > > > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > > > > are any buffers in any queues?
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > > > > >
> > > > > > > > > > "
> > > > > > > > > >
> > > > > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > > > > "
> > > > > > > > > >
> > > > > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > This is only for config interrupt.
> > > > > > > >
> > > > > > > > Ok.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > > > > expensive.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > >
> > > > > > > > > > > > ---
> > > > > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > > > > >   #include <linux/of.h>
> > > > > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > > > > +
> > > > > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > > > > +
> > > > > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > > > > >    * */
> > > > > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > > > > >   {
> > > > > > > > > > > > + /*
> > > > > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > > > > though it's most likely is ...
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > > > > >
> > > > > > > > > > """
> > > > > > > > > >
> > > > > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > > > > >  * and NMI handlers.
> > > > > > > > > > """
> > > > > > > > > >
> > > > > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > > > > >
> > > > > > > > > > And it has the comment for explain the barrier:
> > > > > > > > > >
> > > > > > > > > > """
> > > > > > > > > >
> > > > > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > > > > """
> > > > > > > > > >
> > > > > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > > > > irq_soft_enabled as false.
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > You are right. So then
> > > > > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > > > > >    READ_ONCE should do.
> > > > > > > >
> > > > > > > > See above.
> > > > > > > >
> > > > > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > > > > >
> > > > > > > >
> > > > > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > +  */
> > > > > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > > > > + synchronize_rcu();
> > > > > > > > > > > > +
> > > > > > > > > > > >           dev->config->reset(dev);
> > > > > > > > > > > >   }
> > > > > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Ok.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > > > > >           dev->config_enabled = false;
> > > > > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > > > > +
> > > > > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > > > > devices. this flag defeats the purpose.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Do you mean:
> > > > > > > > > >
> > > > > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > > > > interrupt handlers
> > > > > > > > >
> > > > > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > > > > users at the moment, even less than probe.
> > > > > > > >
> > > > > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > > > > spinlock or others.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > > > > for old hypervisors
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > > > > >
> > > > > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > > > > codes. They look all fine since day0.
> > > > > > > >
> > > > > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > > > > in the debug build).
> > > > > > > >
> > > > > > > > This looks like a hardening of the driver in the core instead of the
> > > > > > > > device. I think it can be done but in a separate series.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > And going down from there, how about we cache status in the
> > > > > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > > > > speeding boot up a tiny bit.
> > > > > > > >
> > > > > > > > I don't fully understand here, actually spec requires status to be
> > > > > > > > read back for validation in many cases.
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > > > > >   }
> > > > > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > > > > >   {
> > > > > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > > > > + }
> > > > > > > > > > > > +
> > > > > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > > > > >    * @dev: underlying device.
> > > > > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > > > > >           bool failed;
> > > > > > > > > > > >           bool config_enabled;
> > > > > > > > > > > >           bool config_change_pending;
> > > > > > > > > > > > + bool irq_soft_check;
> > > > > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > > > > >           spinlock_t config_lock;
> > > > > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > > > > >           struct device dev;
> > > > > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > > > > >   }
> > > > > > > > > > > > +/*
> > > > > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > > > > + * @vdev: the device
> > > > > > > > > > > > + */
> > > > > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > > > > +{
> > > > > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > > > > +         return true;
> > > > > > > > > > > > +
> > > > > > > > > > > > + /*
> > > > > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > > > > paired
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Will fix.
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > > > > +  */
> > > > > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > > > > +}
> > > > > > > > > > > > +
> > > > > > > > > > > >   /**
> > > > > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > > > > >    * @vdev: the device
> > > > > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > > > > + /*
> > > > > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > > > > +  */
> > > > > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > > > > +
> > > > > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > >   }
> > > > > > > > > > > > --
> > > > > > > > > > > > 2.25.1
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-29  7:12                 ` Re: Jason Wang
@ 2022-03-29 14:08                   ` Michael S. Tsirkin
  2022-03-30  2:40                     ` Re: Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-29 14:08 UTC (permalink / raw)
  To: Jason Wang
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Tue, Mar 29, 2022 at 03:12:14PM +0800, Jason Wang wrote:
> > > > > > And requesting irq commits all memory otherwise all drivers would be
> > > > > > broken,
> > > > >
> > > > > So I think we might talk different issues:
> > > > >
> > > > > 1) Whether request_irq() commits the previous setups, I think the
> > > > > answer is yes, since the spin_unlock of desc->lock (release) can
> > > > > guarantee this though there seems no documentation around
> > > > > request_irq() to say this.
> > > > >
> > > > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> > > > > using smp_wmb() before the request_irq().
> > > > >
> > > > > And even if write is ordered we still need read to be ordered to be
> > > > > paired with that.
> >
> > IMO it synchronizes with the CPU to which irq is
> > delivered. Otherwise basically all drivers would be broken,
> > wouldn't they be?
> 
> I guess it's because most of the drivers don't care much about the
> buggy/malicious device.  And most of the devices may require an extra
> step to enable device IRQ after request_irq(). Or it's the charge of
> the driver to do the synchronization.

It is true that the use-case of malicious devices is somewhat boutique.
But I think most drivers do want to have their hotplug routines to be
robust, yes.

> > I don't know whether it's correct on all platforms, but if not
> > we need to fix request_irq.
> >
> > > > >
> > > > > > if it doesn't it just needs to be fixed, not worked around in
> > > > > > virtio.
> > > > >
> > > > > 2) virtio drivers might do a lot of setups between request_irq() and
> > > > > virtio_device_ready():
> > > > >
> > > > > request_irq()
> > > > > driver specific setups
> > > > > virtio_device_ready()
> > > > >
> > > > > CPU 0 probe) request_irq()
> > > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > > CPU 0 probe) driver specific setups
> > > > > CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> > > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > > CPU 1 IRQ handler) use the uninitialized variable
> > > > >
> > > > > Thanks
> > > >
> > > >
> > > > As I said, virtio_device_ready needs to do synchronize_irq.
> > > > That will guarantee all setup is visible to the specific IRQ,
> > >
> > > Only the interrupt after synchronize_irq() returns.
> >
> > Anything else is a buggy device though.
> 
> Yes, but the goal of this patch is to prevent the possible attack from
> buggy(malicious) devices.

Right. However if a driver of a *buggy* device somehow sees driver_ok =
false even though it's actually initialized, that is not a deal breaker
as that does not open us up to an attack.

> >
> > > >this
> > > > is what it's point is.
> > >
> > > What happens if an interrupt is raised in the middle like:
> > >
> > > smp_store_release(dev->irq_soft_enabled, true)
> > > IRQ handler
> > > synchornize_irq()
> > >
> > > If we don't enforce a reading order, the IRQ handler may still see the
> > > uninitialized variable.
> > >
> > > Thanks
> >
> > IMHO variables should be initialized before request_irq
> > to a value meaning "not a valid interrupt".
> > Specifically driver_ok = false.
> > Handler in the scenario you describe will then see !driver_ok
> > and exit immediately.
> 
> So just to make sure we're on the same page.
> 
> 1) virtio_reset_device() will set the driver_ok to false;
> 2) virtio_device_ready() will set the driver_ok to true
> 
> So for virtio drivers, it often did:
> 
> 1) virtio_reset_device()
> 2) find_vqs() which will call request_irq()
> 3) other driver specific setups
> 4) virtio_device_ready()
> 
> In virtio_device_ready(), the patch perform the following currently:
> 
> smp_store_release(driver_ok, true);
> set_status(DRIVER_OK);
> 
> Per your suggestion, to add synchronize_irq() after
> smp_store_release() so we had
> 
> smp_store_release(driver_ok, true);
> synchornize_irq()
> set_status(DRIVER_OK)
> 
> Suppose there's a interrupt raised before the synchronize_irq(), if we do:
> 
> if (READ_ONCE(driver_ok)) {
>       vq->callback()
> }
> 
> It will see the driver_ok as true but how can we make sure
> vq->callback sees the driver specific setups (3) above?
> 
> And an example is virtio_scsi():
> 
> virtio_reset_device()
> virtscsi_probe()
>     virtscsi_init()
>         virtio_find_vqs()
>         ...
>         virtscsi_init_vq(&vscsi->event_vq, vqs[1])
>     ....
>     virtio_device_ready()
> 
> In virtscsi_event_done():
> 
> virtscsi_event_done():
>     virtscsi_vq_done(vscsi, &vscsi->event_vq, ...);
> 
> We need to make sure the even_done reads driver_ok before read vscsi->event_vq.
> 
> Thanks


See response by Thomas. A simple if (!dev->driver_ok) should be enough,
it's all under a lock.

> >
> >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > > > We use smp_store_relase()
> > > > > > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > > > > > which surprises me.
> > > > > > > > > >
> > > > > > > > > > CC Paul to help make sure I'm right.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > > > > > hardening is disabled by default.
> > > > > > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > > > > > are any buffers in any queues?
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > > > > > >
> > > > > > > > > > > "
> > > > > > > > > > >
> > > > > > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > > > > > "
> > > > > > > > > > >
> > > > > > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > This is only for config interrupt.
> > > > > > > > >
> > > > > > > > > Ok.
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > > > > > expensive.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > > >
> > > > > > > > > > > > > ---
> > > > > > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > > > > > >   #include <linux/of.h>
> > > > > > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > > > > > +
> > > > > > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > > > > > >    * */
> > > > > > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > > > > > >   {
> > > > > > > > > > > > > + /*
> > > > > > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > > > > > though it's most likely is ...
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > > > > > >
> > > > > > > > > > > """
> > > > > > > > > > >
> > > > > > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > > > > > >  * and NMI handlers.
> > > > > > > > > > > """
> > > > > > > > > > >
> > > > > > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > > > > > >
> > > > > > > > > > > And it has the comment for explain the barrier:
> > > > > > > > > > >
> > > > > > > > > > > """
> > > > > > > > > > >
> > > > > > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > > > > > """
> > > > > > > > > > >
> > > > > > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > > > > > irq_soft_enabled as false.
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > You are right. So then
> > > > > > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > > > > > >    READ_ONCE should do.
> > > > > > > > >
> > > > > > > > > See above.
> > > > > > > > >
> > > > > > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > +  */
> > > > > > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > > > > > + synchronize_rcu();
> > > > > > > > > > > > > +
> > > > > > > > > > > > >           dev->config->reset(dev);
> > > > > > > > > > > > >   }
> > > > > > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Ok.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > > > > > >           dev->config_enabled = false;
> > > > > > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > > > > > devices. this flag defeats the purpose.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Do you mean:
> > > > > > > > > > >
> > > > > > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > > > > > interrupt handlers
> > > > > > > > > >
> > > > > > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > > > > > users at the moment, even less than probe.
> > > > > > > > >
> > > > > > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > > > > > spinlock or others.
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > > > > > for old hypervisors
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > > > > > >
> > > > > > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > > > > > codes. They look all fine since day0.
> > > > > > > > >
> > > > > > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > > > > > in the debug build).
> > > > > > > > >
> > > > > > > > > This looks like a hardening of the driver in the core instead of the
> > > > > > > > > device. I think it can be done but in a separate series.
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > And going down from there, how about we cache status in the
> > > > > > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > > > > > speeding boot up a tiny bit.
> > > > > > > > >
> > > > > > > > > I don't fully understand here, actually spec requires status to be
> > > > > > > > > read back for validation in many cases.
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > > > > > >   }
> > > > > > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > > > > > >   {
> > > > > > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > > > > > + }
> > > > > > > > > > > > > +
> > > > > > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > > > > > >    * @dev: underlying device.
> > > > > > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > > > > > >           bool failed;
> > > > > > > > > > > > >           bool config_enabled;
> > > > > > > > > > > > >           bool config_change_pending;
> > > > > > > > > > > > > + bool irq_soft_check;
> > > > > > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > > > > > >           spinlock_t config_lock;
> > > > > > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > > > > > >           struct device dev;
> > > > > > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > > > > > >   }
> > > > > > > > > > > > > +/*
> > > > > > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > > > > > + * @vdev: the device
> > > > > > > > > > > > > + */
> > > > > > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > > > > > +         return true;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > + /*
> > > > > > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > > > > > paired
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Will fix.
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > > > > > +  */
> > > > > > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > > > > > +}
> > > > > > > > > > > > > +
> > > > > > > > > > > > >   /**
> > > > > > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > > > > > >    * @vdev: the device
> > > > > > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > > > > > + /*
> > > > > > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > > > > > +  */
> > > > > > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > > > > > +
> > > > > > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > >   }
> > > > > > > > > > > > > --
> > > > > > > > > > > > > 2.25.1
> > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-29 14:08                   ` Re: Michael S. Tsirkin
@ 2022-03-30  2:40                     ` Jason Wang
  2022-03-30  5:14                       ` Re: Michael S. Tsirkin
  0 siblings, 1 reply; 414+ messages in thread
From: Jason Wang @ 2022-03-30  2:40 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Tue, Mar 29, 2022 at 10:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Mar 29, 2022 at 03:12:14PM +0800, Jason Wang wrote:
> > > > > > > And requesting irq commits all memory otherwise all drivers would be
> > > > > > > broken,
> > > > > >
> > > > > > So I think we might talk different issues:
> > > > > >
> > > > > > 1) Whether request_irq() commits the previous setups, I think the
> > > > > > answer is yes, since the spin_unlock of desc->lock (release) can
> > > > > > guarantee this though there seems no documentation around
> > > > > > request_irq() to say this.
> > > > > >
> > > > > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> > > > > > using smp_wmb() before the request_irq().
> > > > > >
> > > > > > And even if write is ordered we still need read to be ordered to be
> > > > > > paired with that.
> > >
> > > IMO it synchronizes with the CPU to which irq is
> > > delivered. Otherwise basically all drivers would be broken,
> > > wouldn't they be?
> >
> > I guess it's because most of the drivers don't care much about the
> > buggy/malicious device.  And most of the devices may require an extra
> > step to enable device IRQ after request_irq(). Or it's the charge of
> > the driver to do the synchronization.
>
> It is true that the use-case of malicious devices is somewhat boutique.
> But I think most drivers do want to have their hotplug routines to be
> robust, yes.
>
> > > I don't know whether it's correct on all platforms, but if not
> > > we need to fix request_irq.
> > >
> > > > > >
> > > > > > > if it doesn't it just needs to be fixed, not worked around in
> > > > > > > virtio.
> > > > > >
> > > > > > 2) virtio drivers might do a lot of setups between request_irq() and
> > > > > > virtio_device_ready():
> > > > > >
> > > > > > request_irq()
> > > > > > driver specific setups
> > > > > > virtio_device_ready()
> > > > > >
> > > > > > CPU 0 probe) request_irq()
> > > > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > > > CPU 0 probe) driver specific setups
> > > > > > CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> > > > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > > > CPU 1 IRQ handler) use the uninitialized variable
> > > > > >
> > > > > > Thanks
> > > > >
> > > > >
> > > > > As I said, virtio_device_ready needs to do synchronize_irq.
> > > > > That will guarantee all setup is visible to the specific IRQ,
> > > >
> > > > Only the interrupt after synchronize_irq() returns.
> > >
> > > Anything else is a buggy device though.
> >
> > Yes, but the goal of this patch is to prevent the possible attack from
> > buggy(malicious) devices.
>
> Right. However if a driver of a *buggy* device somehow sees driver_ok =
> false even though it's actually initialized, that is not a deal breaker
> as that does not open us up to an attack.
>
> > >
> > > > >this
> > > > > is what it's point is.
> > > >
> > > > What happens if an interrupt is raised in the middle like:
> > > >
> > > > smp_store_release(dev->irq_soft_enabled, true)
> > > > IRQ handler
> > > > synchornize_irq()
> > > >
> > > > If we don't enforce a reading order, the IRQ handler may still see the
> > > > uninitialized variable.
> > > >
> > > > Thanks
> > >
> > > IMHO variables should be initialized before request_irq
> > > to a value meaning "not a valid interrupt".
> > > Specifically driver_ok = false.
> > > Handler in the scenario you describe will then see !driver_ok
> > > and exit immediately.
> >
> > So just to make sure we're on the same page.
> >
> > 1) virtio_reset_device() will set the driver_ok to false;
> > 2) virtio_device_ready() will set the driver_ok to true
> >
> > So for virtio drivers, it often did:
> >
> > 1) virtio_reset_device()
> > 2) find_vqs() which will call request_irq()
> > 3) other driver specific setups
> > 4) virtio_device_ready()
> >
> > In virtio_device_ready(), the patch perform the following currently:
> >
> > smp_store_release(driver_ok, true);
> > set_status(DRIVER_OK);
> >
> > Per your suggestion, to add synchronize_irq() after
> > smp_store_release() so we had
> >
> > smp_store_release(driver_ok, true);
> > synchornize_irq()
> > set_status(DRIVER_OK)
> >
> > Suppose there's a interrupt raised before the synchronize_irq(), if we do:
> >
> > if (READ_ONCE(driver_ok)) {
> >       vq->callback()
> > }
> >
> > It will see the driver_ok as true but how can we make sure
> > vq->callback sees the driver specific setups (3) above?
> >
> > And an example is virtio_scsi():
> >
> > virtio_reset_device()
> > virtscsi_probe()
> >     virtscsi_init()
> >         virtio_find_vqs()
> >         ...
> >         virtscsi_init_vq(&vscsi->event_vq, vqs[1])
> >     ....
> >     virtio_device_ready()
> >
> > In virtscsi_event_done():
> >
> > virtscsi_event_done():
> >     virtscsi_vq_done(vscsi, &vscsi->event_vq, ...);
> >
> > We need to make sure the even_done reads driver_ok before read vscsi->event_vq.
> >
> > Thanks
>
>
> See response by Thomas. A simple if (!dev->driver_ok) should be enough,
> it's all under a lock.

Ordered through ACQUIRE+RELEASE actually since the irq handler is not
running under the lock.

Another question, for synchronize_irq() do you prefer

1) transport specific callbacks
or
2) a simple synchornize_rcu()

Thanks

>
> > >
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > > > We use smp_store_relase()
> > > > > > > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > > > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > > > > > > which surprises me.
> > > > > > > > > > >
> > > > > > > > > > > CC Paul to help make sure I'm right.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > > > > > > hardening is disabled by default.
> > > > > > > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > > > > > > are any buffers in any queues?
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > > > > > > >
> > > > > > > > > > > > "
> > > > > > > > > > > >
> > > > > > > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > > > > > > "
> > > > > > > > > > > >
> > > > > > > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > This is only for config interrupt.
> > > > > > > > > >
> > > > > > > > > > Ok.
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > > > > > > expensive.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > > > >
> > > > > > > > > > > > > > ---
> > > > > > > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > > > > > > >   #include <linux/of.h>
> > > > > > > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > > > > > > >    * */
> > > > > > > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > > > > > > >   {
> > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > > > > > > though it's most likely is ...
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > > > > > > >
> > > > > > > > > > > > """
> > > > > > > > > > > >
> > > > > > > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > > > > > > >  * and NMI handlers.
> > > > > > > > > > > > """
> > > > > > > > > > > >
> > > > > > > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > > > > > > >
> > > > > > > > > > > > And it has the comment for explain the barrier:
> > > > > > > > > > > >
> > > > > > > > > > > > """
> > > > > > > > > > > >
> > > > > > > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > > > > > > """
> > > > > > > > > > > >
> > > > > > > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > > > > > > irq_soft_enabled as false.
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > You are right. So then
> > > > > > > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > > > > > > >    READ_ONCE should do.
> > > > > > > > > >
> > > > > > > > > > See above.
> > > > > > > > > >
> > > > > > > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > > > > > > + synchronize_rcu();
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >           dev->config->reset(dev);
> > > > > > > > > > > > > >   }
> > > > > > > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Ok.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > > > > > > >           dev->config_enabled = false;
> > > > > > > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > > > > > > devices. this flag defeats the purpose.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Do you mean:
> > > > > > > > > > > >
> > > > > > > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > > > > > > interrupt handlers
> > > > > > > > > > >
> > > > > > > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > > > > > > users at the moment, even less than probe.
> > > > > > > > > >
> > > > > > > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > > > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > > > > > > spinlock or others.
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > > > > > > for old hypervisors
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > > > > > > >
> > > > > > > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > > > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > > > > > > codes. They look all fine since day0.
> > > > > > > > > >
> > > > > > > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > > > > > > in the debug build).
> > > > > > > > > >
> > > > > > > > > > This looks like a hardening of the driver in the core instead of the
> > > > > > > > > > device. I think it can be done but in a separate series.
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > And going down from there, how about we cache status in the
> > > > > > > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > > > > > > speeding boot up a tiny bit.
> > > > > > > > > >
> > > > > > > > > > I don't fully understand here, actually spec requires status to be
> > > > > > > > > > read back for validation in many cases.
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > > > > > > >   {
> > > > > > > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > > > > > > + }
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > > > > > > >    * @dev: underlying device.
> > > > > > > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > > > > > > >           bool failed;
> > > > > > > > > > > > > >           bool config_enabled;
> > > > > > > > > > > > > >           bool config_change_pending;
> > > > > > > > > > > > > > + bool irq_soft_check;
> > > > > > > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > > > > > > >           spinlock_t config_lock;
> > > > > > > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > > > > > > >           struct device dev;
> > > > > > > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > +/*
> > > > > > > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > > > > > > + * @vdev: the device
> > > > > > > > > > > > > > + */
> > > > > > > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > > > > > > +         return true;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > > > > > > paired
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Will fix.
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > > > > > > +}
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >   /**
> > > > > > > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > > > > > > >    * @vdev: the device
> > > > > > > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > --
> > > > > > > > > > > > > > 2.25.1
> > > > > > > > > > >
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-30  2:40                     ` Re: Jason Wang
@ 2022-03-30  5:14                       ` Michael S. Tsirkin
  2022-03-30  5:53                         ` Re: Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-30  5:14 UTC (permalink / raw)
  To: Jason Wang
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Wed, Mar 30, 2022 at 10:40:59AM +0800, Jason Wang wrote:
> On Tue, Mar 29, 2022 at 10:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Mar 29, 2022 at 03:12:14PM +0800, Jason Wang wrote:
> > > > > > > > And requesting irq commits all memory otherwise all drivers would be
> > > > > > > > broken,
> > > > > > >
> > > > > > > So I think we might talk different issues:
> > > > > > >
> > > > > > > 1) Whether request_irq() commits the previous setups, I think the
> > > > > > > answer is yes, since the spin_unlock of desc->lock (release) can
> > > > > > > guarantee this though there seems no documentation around
> > > > > > > request_irq() to say this.
> > > > > > >
> > > > > > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> > > > > > > using smp_wmb() before the request_irq().
> > > > > > >
> > > > > > > And even if write is ordered we still need read to be ordered to be
> > > > > > > paired with that.
> > > >
> > > > IMO it synchronizes with the CPU to which irq is
> > > > delivered. Otherwise basically all drivers would be broken,
> > > > wouldn't they be?
> > >
> > > I guess it's because most of the drivers don't care much about the
> > > buggy/malicious device.  And most of the devices may require an extra
> > > step to enable device IRQ after request_irq(). Or it's the charge of
> > > the driver to do the synchronization.
> >
> > It is true that the use-case of malicious devices is somewhat boutique.
> > But I think most drivers do want to have their hotplug routines to be
> > robust, yes.
> >
> > > > I don't know whether it's correct on all platforms, but if not
> > > > we need to fix request_irq.
> > > >
> > > > > > >
> > > > > > > > if it doesn't it just needs to be fixed, not worked around in
> > > > > > > > virtio.
> > > > > > >
> > > > > > > 2) virtio drivers might do a lot of setups between request_irq() and
> > > > > > > virtio_device_ready():
> > > > > > >
> > > > > > > request_irq()
> > > > > > > driver specific setups
> > > > > > > virtio_device_ready()
> > > > > > >
> > > > > > > CPU 0 probe) request_irq()
> > > > > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > > > > CPU 0 probe) driver specific setups
> > > > > > > CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> > > > > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > > > > CPU 1 IRQ handler) use the uninitialized variable
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > As I said, virtio_device_ready needs to do synchronize_irq.
> > > > > > That will guarantee all setup is visible to the specific IRQ,
> > > > >
> > > > > Only the interrupt after synchronize_irq() returns.
> > > >
> > > > Anything else is a buggy device though.
> > >
> > > Yes, but the goal of this patch is to prevent the possible attack from
> > > buggy(malicious) devices.
> >
> > Right. However if a driver of a *buggy* device somehow sees driver_ok =
> > false even though it's actually initialized, that is not a deal breaker
> > as that does not open us up to an attack.
> >
> > > >
> > > > > >this
> > > > > > is what it's point is.
> > > > >
> > > > > What happens if an interrupt is raised in the middle like:
> > > > >
> > > > > smp_store_release(dev->irq_soft_enabled, true)
> > > > > IRQ handler
> > > > > synchornize_irq()
> > > > >
> > > > > If we don't enforce a reading order, the IRQ handler may still see the
> > > > > uninitialized variable.
> > > > >
> > > > > Thanks
> > > >
> > > > IMHO variables should be initialized before request_irq
> > > > to a value meaning "not a valid interrupt".
> > > > Specifically driver_ok = false.
> > > > Handler in the scenario you describe will then see !driver_ok
> > > > and exit immediately.
> > >
> > > So just to make sure we're on the same page.
> > >
> > > 1) virtio_reset_device() will set the driver_ok to false;
> > > 2) virtio_device_ready() will set the driver_ok to true
> > >
> > > So for virtio drivers, it often did:
> > >
> > > 1) virtio_reset_device()
> > > 2) find_vqs() which will call request_irq()
> > > 3) other driver specific setups
> > > 4) virtio_device_ready()
> > >
> > > In virtio_device_ready(), the patch perform the following currently:
> > >
> > > smp_store_release(driver_ok, true);
> > > set_status(DRIVER_OK);
> > >
> > > Per your suggestion, to add synchronize_irq() after
> > > smp_store_release() so we had
> > >
> > > smp_store_release(driver_ok, true);
> > > synchornize_irq()
> > > set_status(DRIVER_OK)
> > >
> > > Suppose there's a interrupt raised before the synchronize_irq(), if we do:
> > >
> > > if (READ_ONCE(driver_ok)) {
> > >       vq->callback()
> > > }
> > >
> > > It will see the driver_ok as true but how can we make sure
> > > vq->callback sees the driver specific setups (3) above?
> > >
> > > And an example is virtio_scsi():
> > >
> > > virtio_reset_device()
> > > virtscsi_probe()
> > >     virtscsi_init()
> > >         virtio_find_vqs()
> > >         ...
> > >         virtscsi_init_vq(&vscsi->event_vq, vqs[1])
> > >     ....
> > >     virtio_device_ready()
> > >
> > > In virtscsi_event_done():
> > >
> > > virtscsi_event_done():
> > >     virtscsi_vq_done(vscsi, &vscsi->event_vq, ...);
> > >
> > > We need to make sure the even_done reads driver_ok before read vscsi->event_vq.
> > >
> > > Thanks
> >
> >
> > See response by Thomas. A simple if (!dev->driver_ok) should be enough,
> > it's all under a lock.
> 
> Ordered through ACQUIRE+RELEASE actually since the irq handler is not
> running under the lock.
> 
> Another question, for synchronize_irq() do you prefer
> 
> 1) transport specific callbacks
> or
> 2) a simple synchornize_rcu()
> 
> Thanks


1) I think, and I'd add a wrapper so we can switch to 2 if we really
want to. But for now synchronizing the specific irq is obviously designed to
make any changes to memory visible to this irq. that
seems cleaner and easier to understand than memory ordering tricks
and relying on side effects of synchornize_rcu, even though
internally this all boils down to memory ordering since
memory is what's used to implement locks :).
Not to mention, synchronize_irq just scales much better from performance
POV.


> >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > We use smp_store_relase()
> > > > > > > > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > > > > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > > > > > > > which surprises me.
> > > > > > > > > > > >
> > > > > > > > > > > > CC Paul to help make sure I'm right.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > > > > > > > hardening is disabled by default.
> > > > > > > > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > > > > > > > are any buffers in any queues?
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > > > > > > > >
> > > > > > > > > > > > > "
> > > > > > > > > > > > >
> > > > > > > > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > > > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > > > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > > > > > > > "
> > > > > > > > > > > > >
> > > > > > > > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > This is only for config interrupt.
> > > > > > > > > > >
> > > > > > > > > > > Ok.
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > > > > > > > expensive.
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > ---
> > > > > > > > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > > > > > > > >   #include <linux/of.h>
> > > > > > > > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > > > > > > > >    * */
> > > > > > > > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > > > > > > > >   {
> > > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > > > > > > > though it's most likely is ...
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > > > > > > > >
> > > > > > > > > > > > > """
> > > > > > > > > > > > >
> > > > > > > > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > > > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > > > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > > > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > > > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > > > > > > > >  * and NMI handlers.
> > > > > > > > > > > > > """
> > > > > > > > > > > > >
> > > > > > > > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > > > > > > > >
> > > > > > > > > > > > > And it has the comment for explain the barrier:
> > > > > > > > > > > > >
> > > > > > > > > > > > > """
> > > > > > > > > > > > >
> > > > > > > > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > > > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > > > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > > > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > > > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > > > > > > > """
> > > > > > > > > > > > >
> > > > > > > > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > > > > > > > irq_soft_enabled as false.
> > > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > You are right. So then
> > > > > > > > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > > > > > > > >    READ_ONCE should do.
> > > > > > > > > > >
> > > > > > > > > > > See above.
> > > > > > > > > > >
> > > > > > > > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > > > > > > > + synchronize_rcu();
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > >           dev->config->reset(dev);
> > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Ok.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > > > > > > > >           dev->config_enabled = false;
> > > > > > > > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > > > > > > > devices. this flag defeats the purpose.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Do you mean:
> > > > > > > > > > > > >
> > > > > > > > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > > > > > > > interrupt handlers
> > > > > > > > > > > >
> > > > > > > > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > > > > > > > users at the moment, even less than probe.
> > > > > > > > > > >
> > > > > > > > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > > > > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > > > > > > > spinlock or others.
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > > > > > > > for old hypervisors
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > > > > > > > >
> > > > > > > > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > > > > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > > > > > > > codes. They look all fine since day0.
> > > > > > > > > > >
> > > > > > > > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > > > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > > > > > > > in the debug build).
> > > > > > > > > > >
> > > > > > > > > > > This looks like a hardening of the driver in the core instead of the
> > > > > > > > > > > device. I think it can be done but in a separate series.
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > And going down from there, how about we cache status in the
> > > > > > > > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > > > > > > > speeding boot up a tiny bit.
> > > > > > > > > > >
> > > > > > > > > > > I don't fully understand here, actually spec requires status to be
> > > > > > > > > > > read back for validation in many cases.
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > > > > > > > >   {
> > > > > > > > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > > > > > > > + }
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > > > > > > > >    * @dev: underlying device.
> > > > > > > > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > > > > > > > >           bool failed;
> > > > > > > > > > > > > > >           bool config_enabled;
> > > > > > > > > > > > > > >           bool config_change_pending;
> > > > > > > > > > > > > > > + bool irq_soft_check;
> > > > > > > > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > > > > > > > >           spinlock_t config_lock;
> > > > > > > > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > > > > > > > >           struct device dev;
> > > > > > > > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > > +/*
> > > > > > > > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > > > > > > > + * @vdev: the device
> > > > > > > > > > > > > > > + */
> > > > > > > > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > > > > > > > +         return true;
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > > > > > > > paired
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Will fix.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Thanks
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > > > > > > > +}
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > >   /**
> > > > > > > > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > > > > > > > >    * @vdev: the device
> > > > > > > > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > > --
> > > > > > > > > > > > > > > 2.25.1
> > > > > > > > > > > >
> > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-30  5:14                       ` Re: Michael S. Tsirkin
@ 2022-03-30  5:53                         ` Jason Wang
  0 siblings, 0 replies; 414+ messages in thread
From: Jason Wang @ 2022-03-30  5:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtualization, linux-kernel, Marc Zyngier, Thomas Gleixner,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Wed, Mar 30, 2022 at 1:14 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Mar 30, 2022 at 10:40:59AM +0800, Jason Wang wrote:
> > On Tue, Mar 29, 2022 at 10:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, Mar 29, 2022 at 03:12:14PM +0800, Jason Wang wrote:
> > > > > > > > > And requesting irq commits all memory otherwise all drivers would be
> > > > > > > > > broken,
> > > > > > > >
> > > > > > > > So I think we might talk different issues:
> > > > > > > >
> > > > > > > > 1) Whether request_irq() commits the previous setups, I think the
> > > > > > > > answer is yes, since the spin_unlock of desc->lock (release) can
> > > > > > > > guarantee this though there seems no documentation around
> > > > > > > > request_irq() to say this.
> > > > > > > >
> > > > > > > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> > > > > > > > using smp_wmb() before the request_irq().
> > > > > > > >
> > > > > > > > And even if write is ordered we still need read to be ordered to be
> > > > > > > > paired with that.
> > > > >
> > > > > IMO it synchronizes with the CPU to which irq is
> > > > > delivered. Otherwise basically all drivers would be broken,
> > > > > wouldn't they be?
> > > >
> > > > I guess it's because most of the drivers don't care much about the
> > > > buggy/malicious device.  And most of the devices may require an extra
> > > > step to enable device IRQ after request_irq(). Or it's the charge of
> > > > the driver to do the synchronization.
> > >
> > > It is true that the use-case of malicious devices is somewhat boutique.
> > > But I think most drivers do want to have their hotplug routines to be
> > > robust, yes.
> > >
> > > > > I don't know whether it's correct on all platforms, but if not
> > > > > we need to fix request_irq.
> > > > >
> > > > > > > >
> > > > > > > > > if it doesn't it just needs to be fixed, not worked around in
> > > > > > > > > virtio.
> > > > > > > >
> > > > > > > > 2) virtio drivers might do a lot of setups between request_irq() and
> > > > > > > > virtio_device_ready():
> > > > > > > >
> > > > > > > > request_irq()
> > > > > > > > driver specific setups
> > > > > > > > virtio_device_ready()
> > > > > > > >
> > > > > > > > CPU 0 probe) request_irq()
> > > > > > > > CPU 1 IRQ handler) read the uninitialized variable
> > > > > > > > CPU 0 probe) driver specific setups
> > > > > > > > CPU 0 probe) smp_store_release(intr_soft_enabled, true), commit the setups
> > > > > > > > CPU 1 IRQ handler) read irq_soft_enable as true
> > > > > > > > CPU 1 IRQ handler) use the uninitialized variable
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > As I said, virtio_device_ready needs to do synchronize_irq.
> > > > > > > That will guarantee all setup is visible to the specific IRQ,
> > > > > >
> > > > > > Only the interrupt after synchronize_irq() returns.
> > > > >
> > > > > Anything else is a buggy device though.
> > > >
> > > > Yes, but the goal of this patch is to prevent the possible attack from
> > > > buggy(malicious) devices.
> > >
> > > Right. However if a driver of a *buggy* device somehow sees driver_ok =
> > > false even though it's actually initialized, that is not a deal breaker
> > > as that does not open us up to an attack.
> > >
> > > > >
> > > > > > >this
> > > > > > > is what it's point is.
> > > > > >
> > > > > > What happens if an interrupt is raised in the middle like:
> > > > > >
> > > > > > smp_store_release(dev->irq_soft_enabled, true)
> > > > > > IRQ handler
> > > > > > synchornize_irq()
> > > > > >
> > > > > > If we don't enforce a reading order, the IRQ handler may still see the
> > > > > > uninitialized variable.
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > IMHO variables should be initialized before request_irq
> > > > > to a value meaning "not a valid interrupt".
> > > > > Specifically driver_ok = false.
> > > > > Handler in the scenario you describe will then see !driver_ok
> > > > > and exit immediately.
> > > >
> > > > So just to make sure we're on the same page.
> > > >
> > > > 1) virtio_reset_device() will set the driver_ok to false;
> > > > 2) virtio_device_ready() will set the driver_ok to true
> > > >
> > > > So for virtio drivers, it often did:
> > > >
> > > > 1) virtio_reset_device()
> > > > 2) find_vqs() which will call request_irq()
> > > > 3) other driver specific setups
> > > > 4) virtio_device_ready()
> > > >
> > > > In virtio_device_ready(), the patch perform the following currently:
> > > >
> > > > smp_store_release(driver_ok, true);
> > > > set_status(DRIVER_OK);
> > > >
> > > > Per your suggestion, to add synchronize_irq() after
> > > > smp_store_release() so we had
> > > >
> > > > smp_store_release(driver_ok, true);
> > > > synchornize_irq()
> > > > set_status(DRIVER_OK)
> > > >
> > > > Suppose there's a interrupt raised before the synchronize_irq(), if we do:
> > > >
> > > > if (READ_ONCE(driver_ok)) {
> > > >       vq->callback()
> > > > }
> > > >
> > > > It will see the driver_ok as true but how can we make sure
> > > > vq->callback sees the driver specific setups (3) above?
> > > >
> > > > And an example is virtio_scsi():
> > > >
> > > > virtio_reset_device()
> > > > virtscsi_probe()
> > > >     virtscsi_init()
> > > >         virtio_find_vqs()
> > > >         ...
> > > >         virtscsi_init_vq(&vscsi->event_vq, vqs[1])
> > > >     ....
> > > >     virtio_device_ready()
> > > >
> > > > In virtscsi_event_done():
> > > >
> > > > virtscsi_event_done():
> > > >     virtscsi_vq_done(vscsi, &vscsi->event_vq, ...);
> > > >
> > > > We need to make sure the even_done reads driver_ok before read vscsi->event_vq.
> > > >
> > > > Thanks
> > >
> > >
> > > See response by Thomas. A simple if (!dev->driver_ok) should be enough,
> > > it's all under a lock.
> >
> > Ordered through ACQUIRE+RELEASE actually since the irq handler is not
> > running under the lock.
> >
> > Another question, for synchronize_irq() do you prefer
> >
> > 1) transport specific callbacks
> > or
> > 2) a simple synchornize_rcu()
> >
> > Thanks
>
>
> 1) I think, and I'd add a wrapper so we can switch to 2 if we really
> want to. But for now synchronizing the specific irq is obviously designed to
> make any changes to memory visible to this irq. that
> seems cleaner and easier to understand than memory ordering tricks
> and relying on side effects of synchornize_rcu, even though
> internally this all boils down to memory ordering since
> memory is what's used to implement locks :).
> Not to mention, synchronize_irq just scales much better from performance
> POV.

Ok. Let me try to do that in V2.

Thanks

>
>
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > We use smp_store_relase()
> > > > > > > > > > > > to make sure the driver commits the setup before enabling the irq. It
> > > > > > > > > > > > means the read needs to be ordered as well in vring_interrupt().
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Although I couldn't find anything about this in memory-barriers.txt
> > > > > > > > > > > > > which surprises me.
> > > > > > > > > > > > >
> > > > > > > > > > > > > CC Paul to help make sure I'm right.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > To avoid breaking legacy device which can send IRQ before DRIVER_OK, a
> > > > > > > > > > > > > > > > module parameter is introduced to enable the hardening so function
> > > > > > > > > > > > > > > > hardening is disabled by default.
> > > > > > > > > > > > > > > Which devices are these? How come they send an interrupt before there
> > > > > > > > > > > > > > > are any buffers in any queues?
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > I copied this from the commit log for 22b7050a024d7
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > "
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >     This change will also benefit old hypervisors (before 2009)
> > > > > > > > > > > > > >     that send interrupts without checking DRIVER_OK: previously,
> > > > > > > > > > > > > >     the callback could race with driver-specific initialization.
> > > > > > > > > > > > > > "
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > If this is only for config interrupt, I can remove the above log.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > This is only for config interrupt.
> > > > > > > > > > > >
> > > > > > > > > > > > Ok.
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > > > > > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > > > > > > > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > > > > > > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > > > > > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > > > > > > > > > > > expensive.
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > ---
> > > > > > > > > > > > > > > >   drivers/virtio/virtio.c       | 19 +++++++++++++++++++
> > > > > > > > > > > > > > > >   drivers/virtio/virtio_ring.c  |  9 ++++++++-
> > > > > > > > > > > > > > > >   include/linux/virtio.h        |  4 ++++
> > > > > > > > > > > > > > > >   include/linux/virtio_config.h | 25 +++++++++++++++++++++++++
> > > > > > > > > > > > > > > >   4 files changed, 56 insertions(+), 1 deletion(-)
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > > > > > > > > > index 8dde44ea044a..85e331efa9cc 100644
> > > > > > > > > > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > > > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > > > > > > > > > @@ -7,6 +7,12 @@
> > > > > > > > > > > > > > > >   #include <linux/of.h>
> > > > > > > > > > > > > > > >   #include <uapi/linux/virtio_ids.h>
> > > > > > > > > > > > > > > > +static bool irq_hardening = false;
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > > +module_param(irq_hardening, bool, 0444);
> > > > > > > > > > > > > > > > +MODULE_PARM_DESC(irq_hardening,
> > > > > > > > > > > > > > > > +          "Disalbe IRQ software processing when it is not expected");
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > >   /* Unique numbering for virtio devices. */
> > > > > > > > > > > > > > > >   static DEFINE_IDA(virtio_index_ida);
> > > > > > > > > > > > > > > > @@ -220,6 +226,15 @@ static int virtio_features_ok(struct virtio_device *dev)
> > > > > > > > > > > > > > > >    * */
> > > > > > > > > > > > > > > >   void virtio_reset_device(struct virtio_device *dev)
> > > > > > > > > > > > > > > >   {
> > > > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > > > +  * The below synchronize_rcu() guarantees that any
> > > > > > > > > > > > > > > > +  * interrupt for this line arriving after
> > > > > > > > > > > > > > > > +  * synchronize_rcu() has completed is guaranteed to see
> > > > > > > > > > > > > > > > +  * irq_soft_enabled == false.
> > > > > > > > > > > > > > > News to me I did not know synchronize_rcu has anything to do
> > > > > > > > > > > > > > > with interrupts. Did not you intend to use synchronize_irq?
> > > > > > > > > > > > > > > I am not even 100% sure synchronize_rcu is by design a memory barrier
> > > > > > > > > > > > > > > though it's most likely is ...
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > According to the comment above tree RCU version of synchronize_rcu():
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > """
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >  * RCU read-side critical sections are delimited by rcu_read_lock()
> > > > > > > > > > > > > >  * and rcu_read_unlock(), and may be nested.  In addition, but only in
> > > > > > > > > > > > > >  * v5.0 and later, regions of code across which interrupts, preemption,
> > > > > > > > > > > > > >  * or softirqs have been disabled also serve as RCU read-side critical
> > > > > > > > > > > > > >  * sections.  This includes hardware interrupt handlers, softirq handlers,
> > > > > > > > > > > > > >  * and NMI handlers.
> > > > > > > > > > > > > > """
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > So interrupt handlers are treated as read-side critical sections.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > And it has the comment for explain the barrier:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > """
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >  * Note that this guarantee implies further memory-ordering guarantees.
> > > > > > > > > > > > > >  * On systems with more than one CPU, when synchronize_rcu() returns,
> > > > > > > > > > > > > >  * each CPU is guaranteed to have executed a full memory barrier since
> > > > > > > > > > > > > >  * the end of its last RCU read-side critical section whose beginning
> > > > > > > > > > > > > >  * preceded the call to synchronize_rcu().  In addition, each CPU having
> > > > > > > > > > > > > > """
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > So on SMP it provides a full barrier. And for UP/tiny RCU we don't need the
> > > > > > > > > > > > > > barrier, if the interrupt come after WRITE_ONCE() it will see the
> > > > > > > > > > > > > > irq_soft_enabled as false.
> > > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > You are right. So then
> > > > > > > > > > > > > 1. I do not think we need load_acquire - why is it needed? Just
> > > > > > > > > > > > >    READ_ONCE should do.
> > > > > > > > > > > >
> > > > > > > > > > > > See above.
> > > > > > > > > > > >
> > > > > > > > > > > > > 2. isn't synchronize_irq also doing the same thing?
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Yes, but it requires a config ops since the IRQ knowledge is transport specific.
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > > > + WRITE_ONCE(dev->irq_soft_enabled, false);
> > > > > > > > > > > > > > > > + synchronize_rcu();
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > >           dev->config->reset(dev);
> > > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > > >   EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > > > > > > > > > > > > > Please add comment explaining where it will be enabled.
> > > > > > > > > > > > > > > Also, we *really* don't need to synch if it was already disabled,
> > > > > > > > > > > > > > > let's not add useless overhead to the boot sequence.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Ok.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > @@ -427,6 +442,10 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > > > > > > > > > > > >           spin_lock_init(&dev->config_lock);
> > > > > > > > > > > > > > > >           dev->config_enabled = false;
> > > > > > > > > > > > > > > >           dev->config_change_pending = false;
> > > > > > > > > > > > > > > > + dev->irq_soft_check = irq_hardening;
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > > + if (dev->irq_soft_check)
> > > > > > > > > > > > > > > > +         dev_info(&dev->dev, "IRQ hardening is enabled\n");
> > > > > > > > > > > > > > > >           /* We always start by resetting the device, in case a previous
> > > > > > > > > > > > > > > >            * driver messed it up.  This also tests that code path a little. */
> > > > > > > > > > > > > > > one of the points of hardening is it's also helpful for buggy
> > > > > > > > > > > > > > > devices. this flag defeats the purpose.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Do you mean:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > 1) we need something like config_enable? This seems not easy to be
> > > > > > > > > > > > > > implemented without obvious overhead, mainly the synchronize with the
> > > > > > > > > > > > > > interrupt handlers
> > > > > > > > > > > > >
> > > > > > > > > > > > > But synchronize is only on tear-down path. That is not critical for any
> > > > > > > > > > > > > users at the moment, even less than probe.
> > > > > > > > > > > >
> > > > > > > > > > > > I meant if we have vq->irq_pending, we need to call vring_interrupt()
> > > > > > > > > > > > in the virtio_device_ready() and synchronize the IRQ handlers with
> > > > > > > > > > > > spinlock or others.
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > 2) enable this by default, so I don't object, but this may have some risk
> > > > > > > > > > > > > > for old hypervisors
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > The risk if there's a driver adding buffers without setting DRIVER_OK.
> > > > > > > > > > > >
> > > > > > > > > > > > Probably not, we have devices that accept random inputs from outside,
> > > > > > > > > > > > net, console, input etc. I've done a round of audits of the Qemu
> > > > > > > > > > > > codes. They look all fine since day0.
> > > > > > > > > > > >
> > > > > > > > > > > > > So with this approach, how about we rename the flag "driver_ok"?
> > > > > > > > > > > > > And then add_buf can actually test it and BUG_ON if not there  (at least
> > > > > > > > > > > > > in the debug build).
> > > > > > > > > > > >
> > > > > > > > > > > > This looks like a hardening of the driver in the core instead of the
> > > > > > > > > > > > device. I think it can be done but in a separate series.
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > And going down from there, how about we cache status in the
> > > > > > > > > > > > > device? Then we don't need to keep re-reading it every time,
> > > > > > > > > > > > > speeding boot up a tiny bit.
> > > > > > > > > > > >
> > > > > > > > > > > > I don't fully understand here, actually spec requires status to be
> > > > > > > > > > > > read back for validation in many cases.
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > > > index 962f1477b1fa..0170f8c784d8 100644
> > > > > > > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > > > > @@ -2144,10 +2144,17 @@ static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > > > > > > > > > >           return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > > > -irqreturn_t vring_interrupt(int irq, void *_vq)
> > > > > > > > > > > > > > > > +irqreturn_t vring_interrupt(int irq, void *v)
> > > > > > > > > > > > > > > >   {
> > > > > > > > > > > > > > > > + struct virtqueue *_vq = v;
> > > > > > > > > > > > > > > > + struct virtio_device *vdev = _vq->vdev;
> > > > > > > > > > > > > > > >           struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > > > > > > > > > + if (!virtio_irq_soft_enabled(vdev)) {
> > > > > > > > > > > > > > > > +         dev_warn_once(&vdev->dev, "virtio vring IRQ raised before DRIVER_OK");
> > > > > > > > > > > > > > > > +         return IRQ_NONE;
> > > > > > > > > > > > > > > > + }
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > >           if (!more_used(vq)) {
> > > > > > > > > > > > > > > >                   pr_debug("virtqueue interrupt with no work for %p\n", vq);
> > > > > > > > > > > > > > > >                   return IRQ_NONE;
> > > > > > > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > > > > > > index 5464f398912a..957d6ad604ac 100644
> > > > > > > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > > > > > > @@ -95,6 +95,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > > > > > > > > > > > > > > >    * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
> > > > > > > > > > > > > > > >    * @config_enabled: configuration change reporting enabled
> > > > > > > > > > > > > > > >    * @config_change_pending: configuration change reported while disabled
> > > > > > > > > > > > > > > > + * @irq_soft_check: whether or not to check @irq_soft_enabled
> > > > > > > > > > > > > > > > + * @irq_soft_enabled: callbacks enabled
> > > > > > > > > > > > > > > >    * @config_lock: protects configuration change reporting
> > > > > > > > > > > > > > > >    * @dev: underlying device.
> > > > > > > > > > > > > > > >    * @id: the device type identification (used to match it with a driver).
> > > > > > > > > > > > > > > > @@ -109,6 +111,8 @@ struct virtio_device {
> > > > > > > > > > > > > > > >           bool failed;
> > > > > > > > > > > > > > > >           bool config_enabled;
> > > > > > > > > > > > > > > >           bool config_change_pending;
> > > > > > > > > > > > > > > > + bool irq_soft_check;
> > > > > > > > > > > > > > > > + bool irq_soft_enabled;
> > > > > > > > > > > > > > > >           spinlock_t config_lock;
> > > > > > > > > > > > > > > >           spinlock_t vqs_list_lock; /* Protects VQs list access */
> > > > > > > > > > > > > > > >           struct device dev;
> > > > > > > > > > > > > > > > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > > > > > > > > > > > > > > > index dafdc7f48c01..9c1b61f2e525 100644
> > > > > > > > > > > > > > > > --- a/include/linux/virtio_config.h
> > > > > > > > > > > > > > > > +++ b/include/linux/virtio_config.h
> > > > > > > > > > > > > > > > @@ -174,6 +174,24 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
> > > > > > > > > > > > > > > >           return __virtio_test_bit(vdev, fbit);
> > > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > > > +/*
> > > > > > > > > > > > > > > > + * virtio_irq_soft_enabled: whether we can execute callbacks
> > > > > > > > > > > > > > > > + * @vdev: the device
> > > > > > > > > > > > > > > > + */
> > > > > > > > > > > > > > > > +static inline bool virtio_irq_soft_enabled(const struct virtio_device *vdev)
> > > > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > > > + if (!vdev->irq_soft_check)
> > > > > > > > > > > > > > > > +         return true;
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > > > +  * Read irq_soft_enabled before reading other device specific
> > > > > > > > > > > > > > > > +  * data. Paried with smp_store_relase() in
> > > > > > > > > > > > > > > paired
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Will fix.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Thanks
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > +  * virtio_device_ready() and WRITE_ONCE()/synchronize_rcu() in
> > > > > > > > > > > > > > > > +  * virtio_reset_device().
> > > > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > > > + return smp_load_acquire(&vdev->irq_soft_enabled);
> > > > > > > > > > > > > > > > +}
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > >   /**
> > > > > > > > > > > > > > > >    * virtio_has_dma_quirk - determine whether this device has the DMA quirk
> > > > > > > > > > > > > > > >    * @vdev: the device
> > > > > > > > > > > > > > > > @@ -236,6 +254,13 @@ void virtio_device_ready(struct virtio_device *dev)
> > > > > > > > > > > > > > > >           if (dev->config->enable_cbs)
> > > > > > > > > > > > > > > >                     dev->config->enable_cbs(dev);
> > > > > > > > > > > > > > > > + /*
> > > > > > > > > > > > > > > > +  * Commit the driver setup before enabling the virtqueue
> > > > > > > > > > > > > > > > +  * callbacks. Paried with smp_load_acuqire() in
> > > > > > > > > > > > > > > > +  * virtio_irq_soft_enabled()
> > > > > > > > > > > > > > > > +  */
> > > > > > > > > > > > > > > > + smp_store_release(&dev->irq_soft_enabled, true);
> > > > > > > > > > > > > > > > +
> > > > > > > > > > > > > > > >           BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > > > > >           dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
> > > > > > > > > > > > > > > >   }
> > > > > > > > > > > > > > > > --
> > > > > > > > > > > > > > > > 2.25.1
> > > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-28 10:40               ` Re: Michael S. Tsirkin
  2022-03-29  7:12                 ` Re: Jason Wang
@ 2022-03-29  8:35                 ` Thomas Gleixner
  2022-03-29 14:37                   ` Re: Michael S. Tsirkin
  2022-04-12  6:55                   ` Re: Michael S. Tsirkin
  1 sibling, 2 replies; 414+ messages in thread
From: Thomas Gleixner @ 2022-03-29  8:35 UTC (permalink / raw)
  To: Michael S. Tsirkin, Jason Wang
  Cc: virtualization, linux-kernel, Marc Zyngier, Peter Zijlstra,
	Stefano Garzarella, Keir Fraser, Paul E. McKenney

On Mon, Mar 28 2022 at 06:40, Michael S. Tsirkin wrote:
> On Mon, Mar 28, 2022 at 02:18:22PM +0800, Jason Wang wrote:
>> > > So I think we might talk different issues:
>> > >
>> > > 1) Whether request_irq() commits the previous setups, I think the
>> > > answer is yes, since the spin_unlock of desc->lock (release) can
>> > > guarantee this though there seems no documentation around
>> > > request_irq() to say this.
>> > >
>> > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
>> > > using smp_wmb() before the request_irq().

That's a complete bogus example especially as there is not a single
smp_rmb() which pairs with the smp_wmb().

>> > > And even if write is ordered we still need read to be ordered to be
>> > > paired with that.
>
> IMO it synchronizes with the CPU to which irq is
> delivered. Otherwise basically all drivers would be broken,
> wouldn't they be?
> I don't know whether it's correct on all platforms, but if not
> we need to fix request_irq.

There is nothing to fix:

request_irq()
   raw_spin_lock_irq(desc->lock);       // ACQUIRE
   ....
   raw_spin_unlock_irq(desc->lock);     // RELEASE

interrupt()
   raw_spin_lock(desc->lock);           // ACQUIRE
   set status to IN_PROGRESS
   raw_spin_unlock(desc->lock);         // RELEASE
   invoke handler()

So anything which the driver set up _before_ request_irq() is visible to
the interrupt handler. No?

>> What happens if an interrupt is raised in the middle like:
>> 
>> smp_store_release(dev->irq_soft_enabled, true)
>> IRQ handler
>> synchornize_irq()

This is bogus. The obvious order of things is:

    dev->ok = false;
    request_irq();

    moar_setup();
    synchronize_irq();  // ACQUIRE + RELEASE
    dev->ok = true;

The reverse operation on teardown:

    dev->ok = false;
    synchronize_irq();  // ACQUIRE + RELEASE

    teardown();

So in both cases a simple check in the handler is sufficient:

handler()
    if (!dev->ok)
    	return;

I'm not understanding what you folks are trying to "fix" here. If any
driver does this in the wrong order, then the driver is broken.

Sure, you can do the same with:

    dev->ok = false;
    request_irq();
    moar_setup();
    smp_wmb();
    dev->ok = true;

for the price of a smp_rmb() in the interrupt handler:

handler()
    if (!dev->ok)
    	return;
    smp_rmb();

but that's only working for the setup case correctly and not for
teardown.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-29  8:35                 ` Re: Thomas Gleixner
@ 2022-03-29 14:37                   ` Michael S. Tsirkin
  2022-03-29 18:13                     ` Re: Thomas Gleixner
  2022-04-12  6:55                   ` Re: Michael S. Tsirkin
  1 sibling, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-29 14:37 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Jason Wang, virtualization, linux-kernel, Marc Zyngier,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Tue, Mar 29, 2022 at 10:35:21AM +0200, Thomas Gleixner wrote:
> On Mon, Mar 28 2022 at 06:40, Michael S. Tsirkin wrote:
> > On Mon, Mar 28, 2022 at 02:18:22PM +0800, Jason Wang wrote:
> >> > > So I think we might talk different issues:
> >> > >
> >> > > 1) Whether request_irq() commits the previous setups, I think the
> >> > > answer is yes, since the spin_unlock of desc->lock (release) can
> >> > > guarantee this though there seems no documentation around
> >> > > request_irq() to say this.
> >> > >
> >> > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> >> > > using smp_wmb() before the request_irq().
> 
> That's a complete bogus example especially as there is not a single
> smp_rmb() which pairs with the smp_wmb().
> 
> >> > > And even if write is ordered we still need read to be ordered to be
> >> > > paired with that.
> >
> > IMO it synchronizes with the CPU to which irq is
> > delivered. Otherwise basically all drivers would be broken,
> > wouldn't they be?
> > I don't know whether it's correct on all platforms, but if not
> > we need to fix request_irq.
> 
> There is nothing to fix:
> 
> request_irq()
>    raw_spin_lock_irq(desc->lock);       // ACQUIRE
>    ....
>    raw_spin_unlock_irq(desc->lock);     // RELEASE
> 
> interrupt()
>    raw_spin_lock(desc->lock);           // ACQUIRE
>    set status to IN_PROGRESS
>    raw_spin_unlock(desc->lock);         // RELEASE
>    invoke handler()
> 
> So anything which the driver set up _before_ request_irq() is visible to
> the interrupt handler. No?
> >> What happens if an interrupt is raised in the middle like:
> >> 
> >> smp_store_release(dev->irq_soft_enabled, true)
> >> IRQ handler
> >> synchornize_irq()
> 
> This is bogus. The obvious order of things is:
> 
>     dev->ok = false;
>     request_irq();
> 
>     moar_setup();
>     synchronize_irq();  // ACQUIRE + RELEASE
>     dev->ok = true;
> 
> The reverse operation on teardown:
> 
>     dev->ok = false;
>     synchronize_irq();  // ACQUIRE + RELEASE
> 
>     teardown();
> 
> So in both cases a simple check in the handler is sufficient:
> 
> handler()
>     if (!dev->ok)
>     	return;


Thanks a lot for the analysis Thomas. This is more or less what I was
thinking.

> 
> I'm not understanding what you folks are trying to "fix" here.

We are trying to fix the driver since at the moment it does not
have the dev->ok flag at all.


And I suspect virtio is not alone in that.
So it would have been nice if there was a standard flag
replacing the driver-specific dev->ok above, and ideally
would also handle the case of an interrupt triggering
too early by deferring the interrupt until the flag is set.

And in fact, it does kind of exist: IRQF_NO_AUTOEN, and you would call
enable_irq instead of dev->ok = true, except
- it doesn't work with affinity managed IRQs
- it does not work with shared IRQs

So using dev->ok as you propose above seems better at this point.

> If any
> driver does this in the wrong order, then the driver is broken.

I agree, however:
$ git grep synchronize_irq `git grep -l request_irq drivers/net/`|wc -l
113
$ git grep -l request_irq drivers/net/|wc -l
397

I suspect there are more drivers which in theory need the
synchronize_irq dance but in practice do not execute it.


> Sure, you can do the same with:
> 
>     dev->ok = false;
>     request_irq();
>     moar_setup();
>     smp_wmb();
>     dev->ok = true;
> 
> for the price of a smp_rmb() in the interrupt handler:
> 
> handler()
>     if (!dev->ok)
>     	return;
>     smp_rmb();
> 
> but that's only working for the setup case correctly and not for
> teardown.
> 
> Thanks,
> 
>         tglx


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-29 14:37                   ` Re: Michael S. Tsirkin
@ 2022-03-29 18:13                     ` Thomas Gleixner
  2022-03-29 22:04                       ` Re: Michael S. Tsirkin
  0 siblings, 1 reply; 414+ messages in thread
From: Thomas Gleixner @ 2022-03-29 18:13 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jason Wang, virtualization, linux-kernel, Marc Zyngier,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Tue, Mar 29 2022 at 10:37, Michael S. Tsirkin wrote:
> On Tue, Mar 29, 2022 at 10:35:21AM +0200, Thomas Gleixner wrote:
> We are trying to fix the driver since at the moment it does not
> have the dev->ok flag at all.
>
> And I suspect virtio is not alone in that.
> So it would have been nice if there was a standard flag
> replacing the driver-specific dev->ok above, and ideally
> would also handle the case of an interrupt triggering
> too early by deferring the interrupt until the flag is set.
>
> And in fact, it does kind of exist: IRQF_NO_AUTOEN, and you would call
> enable_irq instead of dev->ok = true, except
> - it doesn't work with affinity managed IRQs
> - it does not work with shared IRQs
>
> So using dev->ok as you propose above seems better at this point.

Unless there is a big enough amount of drivers which could make use of a
generic mechanism for that.

>> If any driver does this in the wrong order, then the driver is
>> broken.
> 
> I agree, however:
> $ git grep synchronize_irq `git grep -l request_irq drivers/net/`|wc -l
> 113
> $ git grep -l request_irq drivers/net/|wc -l
> 397
>
> I suspect there are more drivers which in theory need the
> synchronize_irq dance but in practice do not execute it.

That really depends on when the driver requests the interrupt, when
it actually enables the interrupt in the device itself and how the
interrupt service routine works.

So just doing that grep dance does not tell much. You really have to do
a case by case analysis.

Thanks,

        tglx


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-29 18:13                     ` Re: Thomas Gleixner
@ 2022-03-29 22:04                       ` Michael S. Tsirkin
  2022-03-30  2:38                         ` Re: Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-29 22:04 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Jason Wang, virtualization, linux-kernel, Marc Zyngier,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Tue, Mar 29, 2022 at 08:13:57PM +0200, Thomas Gleixner wrote:
> On Tue, Mar 29 2022 at 10:37, Michael S. Tsirkin wrote:
> > On Tue, Mar 29, 2022 at 10:35:21AM +0200, Thomas Gleixner wrote:
> > We are trying to fix the driver since at the moment it does not
> > have the dev->ok flag at all.
> >
> > And I suspect virtio is not alone in that.
> > So it would have been nice if there was a standard flag
> > replacing the driver-specific dev->ok above, and ideally
> > would also handle the case of an interrupt triggering
> > too early by deferring the interrupt until the flag is set.
> >
> > And in fact, it does kind of exist: IRQF_NO_AUTOEN, and you would call
> > enable_irq instead of dev->ok = true, except
> > - it doesn't work with affinity managed IRQs
> > - it does not work with shared IRQs
> >
> > So using dev->ok as you propose above seems better at this point.
> 
> Unless there is a big enough amount of drivers which could make use of a
> generic mechanism for that.
> 
> >> If any driver does this in the wrong order, then the driver is
> >> broken.
> > 
> > I agree, however:
> > $ git grep synchronize_irq `git grep -l request_irq drivers/net/`|wc -l
> > 113
> > $ git grep -l request_irq drivers/net/|wc -l
> > 397
> >
> > I suspect there are more drivers which in theory need the
> > synchronize_irq dance but in practice do not execute it.
> 
> That really depends on when the driver requests the interrupt, when
> it actually enables the interrupt in the device itself

This last point does not matter since we are talking about protecting
against buggy/malicious devices. They can inject the interrupt anyway
even if driver did not configure it.

> and how the
> interrupt service routine works.
> 
> So just doing that grep dance does not tell much. You really have to do
> a case by case analysis.
> 
> Thanks,
> 
>         tglx


I agree. In fact, at least for network the standard approach is to
request interrupts in the open call, virtio net is unusual
in doing it in probe. We should consider changing that.
Jason?

-- 
MST


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-29 22:04                       ` Re: Michael S. Tsirkin
@ 2022-03-30  2:38                         ` Jason Wang
  2022-03-30  5:09                           ` Re: Michael S. Tsirkin
  0 siblings, 1 reply; 414+ messages in thread
From: Jason Wang @ 2022-03-30  2:38 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Thomas Gleixner, virtualization, linux-kernel, Marc Zyngier,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Wed, Mar 30, 2022 at 6:04 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Mar 29, 2022 at 08:13:57PM +0200, Thomas Gleixner wrote:
> > On Tue, Mar 29 2022 at 10:37, Michael S. Tsirkin wrote:
> > > On Tue, Mar 29, 2022 at 10:35:21AM +0200, Thomas Gleixner wrote:
> > > We are trying to fix the driver since at the moment it does not
> > > have the dev->ok flag at all.
> > >
> > > And I suspect virtio is not alone in that.
> > > So it would have been nice if there was a standard flag
> > > replacing the driver-specific dev->ok above, and ideally
> > > would also handle the case of an interrupt triggering
> > > too early by deferring the interrupt until the flag is set.
> > >
> > > And in fact, it does kind of exist: IRQF_NO_AUTOEN, and you would call
> > > enable_irq instead of dev->ok = true, except
> > > - it doesn't work with affinity managed IRQs
> > > - it does not work with shared IRQs
> > >
> > > So using dev->ok as you propose above seems better at this point.
> >
> > Unless there is a big enough amount of drivers which could make use of a
> > generic mechanism for that.
> >
> > >> If any driver does this in the wrong order, then the driver is
> > >> broken.
> > >
> > > I agree, however:
> > > $ git grep synchronize_irq `git grep -l request_irq drivers/net/`|wc -l
> > > 113
> > > $ git grep -l request_irq drivers/net/|wc -l
> > > 397
> > >
> > > I suspect there are more drivers which in theory need the
> > > synchronize_irq dance but in practice do not execute it.
> >
> > That really depends on when the driver requests the interrupt, when
> > it actually enables the interrupt in the device itself
>
> This last point does not matter since we are talking about protecting
> against buggy/malicious devices. They can inject the interrupt anyway
> even if driver did not configure it.
>
> > and how the
> > interrupt service routine works.
> >
> > So just doing that grep dance does not tell much. You really have to do
> > a case by case analysis.
> >
> > Thanks,
> >
> >         tglx
>
>
> I agree. In fact, at least for network the standard approach is to
> request interrupts in the open call, virtio net is unusual
> in doing it in probe. We should consider changing that.
> Jason?

This probably works only for virtio-net and it looks like not trivial
since we don't have a specific core API to request interrupts.

Thanks

>
> --
> MST
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-30  2:38                         ` Re: Jason Wang
@ 2022-03-30  5:09                           ` Michael S. Tsirkin
  2022-03-30  5:53                             ` Re: Jason Wang
  0 siblings, 1 reply; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-03-30  5:09 UTC (permalink / raw)
  To: Jason Wang
  Cc: Thomas Gleixner, virtualization, linux-kernel, Marc Zyngier,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Wed, Mar 30, 2022 at 10:38:06AM +0800, Jason Wang wrote:
> On Wed, Mar 30, 2022 at 6:04 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Mar 29, 2022 at 08:13:57PM +0200, Thomas Gleixner wrote:
> > > On Tue, Mar 29 2022 at 10:37, Michael S. Tsirkin wrote:
> > > > On Tue, Mar 29, 2022 at 10:35:21AM +0200, Thomas Gleixner wrote:
> > > > We are trying to fix the driver since at the moment it does not
> > > > have the dev->ok flag at all.
> > > >
> > > > And I suspect virtio is not alone in that.
> > > > So it would have been nice if there was a standard flag
> > > > replacing the driver-specific dev->ok above, and ideally
> > > > would also handle the case of an interrupt triggering
> > > > too early by deferring the interrupt until the flag is set.
> > > >
> > > > And in fact, it does kind of exist: IRQF_NO_AUTOEN, and you would call
> > > > enable_irq instead of dev->ok = true, except
> > > > - it doesn't work with affinity managed IRQs
> > > > - it does not work with shared IRQs
> > > >
> > > > So using dev->ok as you propose above seems better at this point.
> > >
> > > Unless there is a big enough amount of drivers which could make use of a
> > > generic mechanism for that.
> > >
> > > >> If any driver does this in the wrong order, then the driver is
> > > >> broken.
> > > >
> > > > I agree, however:
> > > > $ git grep synchronize_irq `git grep -l request_irq drivers/net/`|wc -l
> > > > 113
> > > > $ git grep -l request_irq drivers/net/|wc -l
> > > > 397
> > > >
> > > > I suspect there are more drivers which in theory need the
> > > > synchronize_irq dance but in practice do not execute it.
> > >
> > > That really depends on when the driver requests the interrupt, when
> > > it actually enables the interrupt in the device itself
> >
> > This last point does not matter since we are talking about protecting
> > against buggy/malicious devices. They can inject the interrupt anyway
> > even if driver did not configure it.
> >
> > > and how the
> > > interrupt service routine works.
> > >
> > > So just doing that grep dance does not tell much. You really have to do
> > > a case by case analysis.
> > >
> > > Thanks,
> > >
> > >         tglx
> >
> >
> > I agree. In fact, at least for network the standard approach is to
> > request interrupts in the open call, virtio net is unusual
> > in doing it in probe. We should consider changing that.
> > Jason?
> 
> This probably works only for virtio-net and it looks like not trivial
> since we don't have a specific core API to request interrupts.
> 
> Thanks

We'll need a new API, for sure. E.g.  find vqs with no
callback on probe, and then virtio_request_vq_callbacks separately.

The existing API that specifies callbacks during find vqs
can be used by other drivers.

> >
> > --
> > MST
> >


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-30  5:09                           ` Re: Michael S. Tsirkin
@ 2022-03-30  5:53                             ` Jason Wang
  0 siblings, 0 replies; 414+ messages in thread
From: Jason Wang @ 2022-03-30  5:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Thomas Gleixner, virtualization, linux-kernel, Marc Zyngier,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Wed, Mar 30, 2022 at 1:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Mar 30, 2022 at 10:38:06AM +0800, Jason Wang wrote:
> > On Wed, Mar 30, 2022 at 6:04 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, Mar 29, 2022 at 08:13:57PM +0200, Thomas Gleixner wrote:
> > > > On Tue, Mar 29 2022 at 10:37, Michael S. Tsirkin wrote:
> > > > > On Tue, Mar 29, 2022 at 10:35:21AM +0200, Thomas Gleixner wrote:
> > > > > We are trying to fix the driver since at the moment it does not
> > > > > have the dev->ok flag at all.
> > > > >
> > > > > And I suspect virtio is not alone in that.
> > > > > So it would have been nice if there was a standard flag
> > > > > replacing the driver-specific dev->ok above, and ideally
> > > > > would also handle the case of an interrupt triggering
> > > > > too early by deferring the interrupt until the flag is set.
> > > > >
> > > > > And in fact, it does kind of exist: IRQF_NO_AUTOEN, and you would call
> > > > > enable_irq instead of dev->ok = true, except
> > > > > - it doesn't work with affinity managed IRQs
> > > > > - it does not work with shared IRQs
> > > > >
> > > > > So using dev->ok as you propose above seems better at this point.
> > > >
> > > > Unless there is a big enough amount of drivers which could make use of a
> > > > generic mechanism for that.
> > > >
> > > > >> If any driver does this in the wrong order, then the driver is
> > > > >> broken.
> > > > >
> > > > > I agree, however:
> > > > > $ git grep synchronize_irq `git grep -l request_irq drivers/net/`|wc -l
> > > > > 113
> > > > > $ git grep -l request_irq drivers/net/|wc -l
> > > > > 397
> > > > >
> > > > > I suspect there are more drivers which in theory need the
> > > > > synchronize_irq dance but in practice do not execute it.
> > > >
> > > > That really depends on when the driver requests the interrupt, when
> > > > it actually enables the interrupt in the device itself
> > >
> > > This last point does not matter since we are talking about protecting
> > > against buggy/malicious devices. They can inject the interrupt anyway
> > > even if driver did not configure it.
> > >
> > > > and how the
> > > > interrupt service routine works.
> > > >
> > > > So just doing that grep dance does not tell much. You really have to do
> > > > a case by case analysis.
> > > >
> > > > Thanks,
> > > >
> > > >         tglx
> > >
> > >
> > > I agree. In fact, at least for network the standard approach is to
> > > request interrupts in the open call, virtio net is unusual
> > > in doing it in probe. We should consider changing that.
> > > Jason?
> >
> > This probably works only for virtio-net and it looks like not trivial
> > since we don't have a specific core API to request interrupts.
> >
> > Thanks
>
> We'll need a new API, for sure. E.g.  find vqs with no
> callback on probe, and then virtio_request_vq_callbacks separately.
>
> The existing API that specifies callbacks during find vqs
> can be used by other drivers.

Ok, I will do it.

Thanks

>
> > >
> > > --
> > > MST
> > >
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-03-29  8:35                 ` Re: Thomas Gleixner
  2022-03-29 14:37                   ` Re: Michael S. Tsirkin
@ 2022-04-12  6:55                   ` Michael S. Tsirkin
  1 sibling, 0 replies; 414+ messages in thread
From: Michael S. Tsirkin @ 2022-04-12  6:55 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Jason Wang, virtualization, linux-kernel, Marc Zyngier,
	Peter Zijlstra, Stefano Garzarella, Keir Fraser,
	Paul E. McKenney

On Tue, Mar 29, 2022 at 10:35:21AM +0200, Thomas Gleixner wrote:
> On Mon, Mar 28 2022 at 06:40, Michael S. Tsirkin wrote:
> > On Mon, Mar 28, 2022 at 02:18:22PM +0800, Jason Wang wrote:
> >> > > So I think we might talk different issues:
> >> > >
> >> > > 1) Whether request_irq() commits the previous setups, I think the
> >> > > answer is yes, since the spin_unlock of desc->lock (release) can
> >> > > guarantee this though there seems no documentation around
> >> > > request_irq() to say this.
> >> > >
> >> > > And I can see at least drivers/video/fbdev/omap2/omapfb/dss/dispc.c is
> >> > > using smp_wmb() before the request_irq().
> 
> That's a complete bogus example especially as there is not a single
> smp_rmb() which pairs with the smp_wmb().
> 
> >> > > And even if write is ordered we still need read to be ordered to be
> >> > > paired with that.
> >
> > IMO it synchronizes with the CPU to which irq is
> > delivered. Otherwise basically all drivers would be broken,
> > wouldn't they be?
> > I don't know whether it's correct on all platforms, but if not
> > we need to fix request_irq.
> 
> There is nothing to fix:
> 
> request_irq()
>    raw_spin_lock_irq(desc->lock);       // ACQUIRE
>    ....
>    raw_spin_unlock_irq(desc->lock);     // RELEASE
> 
> interrupt()
>    raw_spin_lock(desc->lock);           // ACQUIRE
>    set status to IN_PROGRESS
>    raw_spin_unlock(desc->lock);         // RELEASE
>    invoke handler()
> 
> So anything which the driver set up _before_ request_irq() is visible to
> the interrupt handler. No?
> 
> >> What happens if an interrupt is raised in the middle like:
> >> 
> >> smp_store_release(dev->irq_soft_enabled, true)
> >> IRQ handler
> >> synchornize_irq()
> 
> This is bogus. The obvious order of things is:
> 
>     dev->ok = false;
>     request_irq();
> 
>     moar_setup();
>     synchronize_irq();  // ACQUIRE + RELEASE
>     dev->ok = true;
> 
> The reverse operation on teardown:
> 
>     dev->ok = false;
>     synchronize_irq();  // ACQUIRE + RELEASE
> 
>     teardown();
> 
> So in both cases a simple check in the handler is sufficient:
> 
> handler()
>     if (!dev->ok)
>     	return;

Does this need to be if (!READ_ONCE(dev->ok)) ?



> I'm not understanding what you folks are trying to "fix" here. If any
> driver does this in the wrong order, then the driver is broken.
> 
> Sure, you can do the same with:
> 
>     dev->ok = false;
>     request_irq();
>     moar_setup();
>     smp_wmb();
>     dev->ok = true;
> 
> for the price of a smp_rmb() in the interrupt handler:
> 
> handler()
>     if (!dev->ok)
>     	return;
>     smp_rmb();
> 
> but that's only working for the setup case correctly and not for
> teardown.
> 
> Thanks,
> 
>         tglx


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2022-01-20 15:28 Myrtle Shah
  2022-01-20 15:37 ` Vitaly Wool
  0 siblings, 1 reply; 414+ messages in thread
From: Myrtle Shah @ 2022-01-20 15:28 UTC (permalink / raw)
  To: linux-riscv, paul.walmsley, palmer; +Cc: linux-kernel

These are some initial patches to bugs I found attempting to
get a XIP kernel working on hardware:
 - 32-bit VexRiscv processor
 - kernel in SPI flash, at 0x00200000
 - 16MB of RAM at 0x10000000
 - MMU enabled
 
I still have some more debugging to do, but these at least
get the kernel as far as initialising the MMU, and I would
appreciate feedback if anyone else is working on RISC-V XIP.



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-01-20 15:28 Myrtle Shah
@ 2022-01-20 15:37 ` Vitaly Wool
  2022-01-20 23:29   ` Re: Damien Le Moal
  2022-02-04 21:45   ` Re: Palmer Dabbelt
  0 siblings, 2 replies; 414+ messages in thread
From: Vitaly Wool @ 2022-01-20 15:37 UTC (permalink / raw)
  To: Myrtle Shah; +Cc: linux-riscv, Paul Walmsley, Palmer Dabbelt, LKML

Hey,

On Thu, Jan 20, 2022 at 4:30 PM Myrtle Shah <gatecat@ds0.me> wrote:
>
> These are some initial patches to bugs I found attempting to
> get a XIP kernel working on hardware:
>  - 32-bit VexRiscv processor
>  - kernel in SPI flash, at 0x00200000
>  - 16MB of RAM at 0x10000000
>  - MMU enabled
>
> I still have some more debugging to do, but these at least
> get the kernel as far as initialising the MMU, and I would
> appreciate feedback if anyone else is working on RISC-V XIP.

I'll try to support you as much as I can, unfortunately I don't have
any 32-bit RISC-V around so I was rather thinking of extending the
RISC-V XIP support to 64-bit non-MMU targets.
For now just please keep in mind that there might be some inherent
assumptions that a target is 64 bit.

Best regards,
Vitaly

>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-01-20 15:37 ` Vitaly Wool
@ 2022-01-20 23:29   ` Damien Le Moal
  2022-02-04 21:45   ` Re: Palmer Dabbelt
  1 sibling, 0 replies; 414+ messages in thread
From: Damien Le Moal @ 2022-01-20 23:29 UTC (permalink / raw)
  To: Vitaly Wool, Myrtle Shah; +Cc: linux-riscv, Paul Walmsley, Palmer Dabbelt, LKML

On 2022/01/21 0:37, Vitaly Wool wrote:
> Hey,
> 
> On Thu, Jan 20, 2022 at 4:30 PM Myrtle Shah <gatecat@ds0.me> wrote:
>>
>> These are some initial patches to bugs I found attempting to
>> get a XIP kernel working on hardware:
>>  - 32-bit VexRiscv processor
>>  - kernel in SPI flash, at 0x00200000
>>  - 16MB of RAM at 0x10000000
>>  - MMU enabled
>>
>> I still have some more debugging to do, but these at least
>> get the kernel as far as initialising the MMU, and I would
>> appreciate feedback if anyone else is working on RISC-V XIP.
> 
> I'll try to support you as much as I can, unfortunately I don't have
> any 32-bit RISC-V around so I was rather thinking of extending the
> RISC-V XIP support to 64-bit non-MMU targets.

That would be great ! I am completing the buildroot patches for the K210. Got
u-boot almost working for SD card boot too (fighting a problem with rootfs
kernel mount on boot when using u-boot though).

> For now just please keep in mind that there might be some inherent
> assumptions that a target is 64 bit.
> 
> Best regards,
> Vitaly
> 
>>
>> _______________________________________________
>> linux-riscv mailing list
>> linux-riscv@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-riscv
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv


-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2022-01-20 15:37 ` Vitaly Wool
  2022-01-20 23:29   ` Re: Damien Le Moal
@ 2022-02-04 21:45   ` Palmer Dabbelt
  1 sibling, 0 replies; 414+ messages in thread
From: Palmer Dabbelt @ 2022-02-04 21:45 UTC (permalink / raw)
  To: vitaly.wool; +Cc: gatecat, linux-riscv, Paul Walmsley, linux-kernel

On Thu, 20 Jan 2022 07:37:00 PST (-0800), vitaly.wool@konsulko.com wrote:
> Hey,
>
> On Thu, Jan 20, 2022 at 4:30 PM Myrtle Shah <gatecat@ds0.me> wrote:
>>
>> These are some initial patches to bugs I found attempting to
>> get a XIP kernel working on hardware:
>>  - 32-bit VexRiscv processor
>>  - kernel in SPI flash, at 0x00200000
>>  - 16MB of RAM at 0x10000000
>>  - MMU enabled
>>
>> I still have some more debugging to do, but these at least
>> get the kernel as far as initialising the MMU, and I would
>> appreciate feedback if anyone else is working on RISC-V XIP.
>
> I'll try to support you as much as I can, unfortunately I don't have
> any 32-bit RISC-V around so I was rather thinking of extending the
> RISC-V XIP support to 64-bit non-MMU targets.
> For now just please keep in mind that there might be some inherent
> assumptions that a target is 64 bit.

I don't test any of the XIP configs, but if you guys have something that's sane
to run in QEMU I'm happy to do so.  Given that there's now some folks finding
boot bugs it's probably worth getting what does boot into a regression test so
it's less likely to break moving forwards.

These are on fixes, with the second one split up so it's got a better chance of
landing in the stable trees.

Thanks!

>
> Best regards,
> Vitaly
>
>>
>> _______________________________________________
>> linux-riscv mailing list
>> linux-riscv@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <20211126221034.21331-1-lukasz.bartosik@semihalf.com--annotate>]

* Re:
       [not found] <20211126221034.21331-1-lukasz.bartosik@semihalf.com--annotate>
@ 2021-11-29 21:59 ` sean.wang
  0 siblings, 0 replies; 414+ messages in thread
From: sean.wang @ 2021-11-29 21:59 UTC (permalink / raw)
  To: lb
  Cc: marcel, johan.hedberg, luiz.dentz, upstream, linux-bluetooth,
	linux-mediatek, linux-kernel, Sean Wang

From: Sean Wang <sean.wang@mediatek.com>

>Enable msft opcode for btmtksdio driver.
>
>Signed-off-by: Łukasz Bartosik <lb@semihalf.com>
>---
> drivers/bluetooth/btmtksdio.c | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index d9cf0c492e29..2a7a615663b9 100644
>--- a/drivers/bluetooth/btmtksdio.c
>+++ b/drivers/bluetooth/btmtksdio.c
>@@ -887,6 +887,7 @@ static int btmtksdio_setup(struct hci_dev *hdev)
>	if (enable_autosuspend)
>		pm_runtime_allow(bdev->dev);
>
>+	hci_set_msft_opcode(hdev, 0xFD30);

Hi Łukasz,

msft feature is supposed only supported on mt7921. Could you help rework the patch to enalbe msft opocde only for mt7921?

	Sean

>	bt_dev_info(hdev, "Device setup in %llu usecs", duration);
>
>	return 0;
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH v5 00/11] Add support for X86/ACPI camera sensor/PMIC setup with clk and regulator platform data
@ 2021-11-02  9:48 Hans de Goede
  2021-11-02  9:49 ` [PATCH v5 05/11] clk: Introduce clk-tps68470 driver Hans de Goede
  0 siblings, 1 reply; 414+ messages in thread
From: Hans de Goede @ 2021-11-02  9:48 UTC (permalink / raw)
  To: Rafael J . Wysocki, Mark Gross, Andy Shevchenko, Wolfram Sang,
	Mika Westerberg, Daniel Scally, Laurent Pinchart,
	Mauro Carvalho Chehab, Liam Girdwood, Mark Brown,
	Michael Turquette, Stephen Boyd
  Cc: Hans de Goede, Len Brown, linux-acpi, platform-driver-x86,
	linux-kernel, linux-i2c, Sakari Ailus, Kate Hsuan, linux-media,
	linux-clk

Here is v5 of my patch-set adding support for camera sensor connected to a
TPS68470 PMIC on x86/ACPI devices.

Changes in v5:
- Update regulator_init_data in patch 10/11 to include the VCM regulator
- Address various small review remarks from Andy
- Make a couple of functions / vars static in the clk + regulator drivers
  Reported-by: kernel test robot <lkp@intel.com>

Changes in v4:
[PATCH 01/11] ACPI: delay enumeration of devices with a _DEP
              pointing to an INT3472 device:
- Move the acpi_dev_ready_for_enumeration() check to acpi_bus_attach()
  (replacing the acpi_device_is_present() check there)

[PATCH 04/11] regulator: Introduce tps68470-regulator driver:
- Make the top comment block use c++ style comments
- Drop the bogus builtin regulator_init_data
- Make the driver enable the PMIC clk when enabling the Core buck
  regulator, this switching regulator needs the PLL to be on
- Kconfig: add || COMPILE_TEST, fix help text

[PATCH 05/11] clk: Introduce clk-tps68470 driver
- Kconfig: select REGMAP_I2C, add || COMPILE_TEST, fix help text
- tps68470_clk_prepare(): Wait for the PLL to lock before returning
- tps68470_clk_unprepare(): Remove unnecesary clearing of divider regs
- tps68470_clk_probe(): Use devm_clk_hw_register()
- Misc. small cleanups

I'm quite happy with how this works now, so from my pov this is the final
version of the device-instantiation deferral code / approach.

###

The clk and regulator frameworks expect clk/regulator consumer-devices
to have info about the consumed clks/regulators described in the device's
fw_node, but on ACPI this info is missing.

This series worksaround this by providing platform_data with the info to
the TPS68470 clk/regulator MFD cells.

Patches 1 - 2 deal with a probe-ordering problem this introduces,
since the lookups are only registered when the provider-driver binds,
trying to get these clks/regulators before then results in a -ENOENT
error for clks and a dummy regulator for regulators. See the patches
for more details.

Patch 3 adds a header file which adds tps68470_clk_platform_data and
tps68470_regulator_platform_data structs. The futher patches depend on
this new header file.

Patch 4 + 5 add the TPS68470 clk and regulator drivers

Patches 6 - 11 Modify the INT3472 driver which instantiates the MFD cells to
provide the necessary platform-data.

Assuming this series is acceptable to everyone, we need to talk about how
to merge this.

Patch 2 has already been acked by Wolfram for merging by Rafael, so patch
1 + 2 can be merged into linux-pm, independent of the rest of the series
(there are some runtime deps on other changes for everything to work,
but the camera-sensors impacted by this are not fully supported yet in
the mainline kernel anyways).

For "[PATCH 03/13] platform_data: Add linux/platform_data/tps68470.h file",
which all further patches depend on I plan to provide an immutable branch
myself (once it has been reviewed), which the clk / regulator maintainers
can then merge before merging the clk / regulator driver which depends on
this.

And I will merge that IM-branch + patches 6-11 into the pdx86 tree myself.

Regards,

Hans


Daniel Scally (1):
  platform/x86: int3472: Enable I2c daisy chain

Hans de Goede (10):
  ACPI: delay enumeration of devices with a _DEP pointing to an INT3472
    device
  i2c: acpi: Use acpi_dev_ready_for_enumeration() helper
  platform_data: Add linux/platform_data/tps68470.h file
  regulator: Introduce tps68470-regulator driver
  clk: Introduce clk-tps68470 driver
  platform/x86: int3472: Split into 2 drivers
  platform/x86: int3472: Add get_sensor_adev_and_name() helper
  platform/x86: int3472: Pass tps68470_clk_platform_data to the
    tps68470-regulator MFD-cell
  platform/x86: int3472: Pass tps68470_regulator_platform_data to the
    tps68470-regulator MFD-cell
  platform/x86: int3472: Deal with probe ordering issues

 drivers/acpi/scan.c                           |  37 ++-
 drivers/clk/Kconfig                           |   8 +
 drivers/clk/Makefile                          |   1 +
 drivers/clk/clk-tps68470.c                    | 257 ++++++++++++++++++
 drivers/i2c/i2c-core-acpi.c                   |   5 +-
 drivers/platform/x86/intel/int3472/Makefile   |   9 +-
 ...lk_and_regulator.c => clk_and_regulator.c} |   2 +-
 drivers/platform/x86/intel/int3472/common.c   |  82 ++++++
 .../{intel_skl_int3472_common.h => common.h}  |   6 +-
 ...ntel_skl_int3472_discrete.c => discrete.c} |  51 ++--
 .../intel/int3472/intel_skl_int3472_common.c  | 106 --------
 ...ntel_skl_int3472_tps68470.c => tps68470.c} |  99 ++++++-
 drivers/platform/x86/intel/int3472/tps68470.h |  25 ++
 .../x86/intel/int3472/tps68470_board_data.c   | 134 +++++++++
 drivers/regulator/Kconfig                     |   9 +
 drivers/regulator/Makefile                    |   1 +
 drivers/regulator/tps68470-regulator.c        | 212 +++++++++++++++
 include/acpi/acpi_bus.h                       |   5 +-
 include/linux/mfd/tps68470.h                  |  11 +
 include/linux/platform_data/tps68470.h        |  35 +++
 20 files changed, 944 insertions(+), 151 deletions(-)
 create mode 100644 drivers/clk/clk-tps68470.c
 rename drivers/platform/x86/intel/int3472/{intel_skl_int3472_clk_and_regulator.c => clk_and_regulator.c} (99%)
 create mode 100644 drivers/platform/x86/intel/int3472/common.c
 rename drivers/platform/x86/intel/int3472/{intel_skl_int3472_common.h => common.h} (94%)
 rename drivers/platform/x86/intel/int3472/{intel_skl_int3472_discrete.c => discrete.c} (91%)
 delete mode 100644 drivers/platform/x86/intel/int3472/intel_skl_int3472_common.c
 rename drivers/platform/x86/intel/int3472/{intel_skl_int3472_tps68470.c => tps68470.c} (54%)
 create mode 100644 drivers/platform/x86/intel/int3472/tps68470.h
 create mode 100644 drivers/platform/x86/intel/int3472/tps68470_board_data.c
 create mode 100644 drivers/regulator/tps68470-regulator.c
 create mode 100644 include/linux/platform_data/tps68470.h

-- 
2.31.1


^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH v5 05/11] clk: Introduce clk-tps68470 driver
  2021-11-02  9:48 [PATCH v5 00/11] Add support for X86/ACPI camera sensor/PMIC setup with clk and regulator platform data Hans de Goede
@ 2021-11-02  9:49 ` Hans de Goede
       [not found]   ` <163588780885.2993099.2088131017920983969@swboyd.mtv.corp.google.com>
  0 siblings, 1 reply; 414+ messages in thread
From: Hans de Goede @ 2021-11-02  9:49 UTC (permalink / raw)
  To: Rafael J . Wysocki, Mark Gross, Andy Shevchenko, Wolfram Sang,
	Mika Westerberg, Daniel Scally, Laurent Pinchart,
	Mauro Carvalho Chehab, Liam Girdwood, Mark Brown,
	Michael Turquette, Stephen Boyd
  Cc: Hans de Goede, Len Brown, linux-acpi, platform-driver-x86,
	linux-kernel, linux-i2c, Sakari Ailus, Kate Hsuan, linux-media,
	linux-clk

The TPS68470 PMIC provides Clocks, GPIOs and Regulators. At present in
the kernel the Regulators and Clocks are controlled by an OpRegion
driver designed to work with power control methods defined in ACPI, but
some platforms lack those methods, meaning drivers need to be able to
consume the resources of these chips through the usual frameworks.

This commit adds a driver for the clocks provided by the tps68470,
and is designed to bind to the platform_device registered by the
intel_skl_int3472 module.

This is based on this out of tree driver written by Intel:
https://github.com/intel/linux-intel-lts/blob/4.14/base/drivers/clk/clk-tps68470.c
with various cleanups added.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
Changes in v5:
- Small comment cleanups based on review from Andy

Changes in v4:
- Kconfig: select REGMAP_I2C, add || COMPILE_TEST, fix help text
- tps68470_clk_prepare(): Wait for the PLL to lock before returning
- tps68470_clk_unprepare(): Remove unnecesary clearing of divider regs
- tps68470_clk_probe(): Use devm_clk_hw_register()
- Misc. small cleanups

Changes in v2:
- Update the comment on why a subsys_initcall is used to register the drv
- Fix trailing whitespice on line 100
---
 drivers/clk/Kconfig          |   8 ++
 drivers/clk/Makefile         |   1 +
 drivers/clk/clk-tps68470.c   | 257 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps68470.h |  11 ++
 4 files changed, 277 insertions(+)
 create mode 100644 drivers/clk/clk-tps68470.c

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index c5b3dc97396a..4e9098d79249 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -169,6 +169,14 @@ config COMMON_CLK_CDCE706
 	help
 	  This driver supports TI CDCE706 programmable 3-PLL clock synthesizer.
 
+config COMMON_CLK_TPS68470
+	tristate "Clock Driver for TI TPS68470 PMIC"
+	depends on I2C
+	depends on INTEL_SKL_INT3472 || COMPILE_TEST
+	select REGMAP_I2C
+	help
+	  This driver supports the clocks provided by the TPS68470 PMIC.
+
 config COMMON_CLK_CDCE925
 	tristate "Clock driver for TI CDCE913/925/937/949 devices"
 	depends on I2C
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index e42312121e51..6b6a88ae1425 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_COMMON_CLK_SI570)		+= clk-si570.o
 obj-$(CONFIG_COMMON_CLK_STM32F)		+= clk-stm32f4.o
 obj-$(CONFIG_COMMON_CLK_STM32H7)	+= clk-stm32h7.o
 obj-$(CONFIG_COMMON_CLK_STM32MP157)	+= clk-stm32mp1.o
+obj-$(CONFIG_COMMON_CLK_TPS68470)      += clk-tps68470.o
 obj-$(CONFIG_CLK_TWL6040)		+= clk-twl6040.o
 obj-$(CONFIG_ARCH_VT8500)		+= clk-vt8500.o
 obj-$(CONFIG_COMMON_CLK_VC5)		+= clk-versaclock5.o
diff --git a/drivers/clk/clk-tps68470.c b/drivers/clk/clk-tps68470.c
new file mode 100644
index 000000000000..2ad0ac2f4096
--- /dev/null
+++ b/drivers/clk/clk-tps68470.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clock driver for TPS68470 PMIC
+ *
+ * Copyright (c) 2021 Red Hat Inc.
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *	Hans de Goede <hdegoede@redhat.com>
+ *	Zaikuo Wang <zaikuo.wang@intel.com>
+ *	Tianshu Qiu <tian.shu.qiu@intel.com>
+ *	Jian Xu Zheng <jian.xu.zheng@intel.com>
+ *	Yuning Pu <yuning.pu@intel.com>
+ *	Antti Laakso <antti.laakso@intel.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/kernel.h>
+#include <linux/mfd/tps68470.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/tps68470.h>
+#include <linux/regmap.h>
+
+#define TPS68470_CLK_NAME "tps68470-clk"
+
+#define to_tps68470_clkdata(clkd) \
+	container_of(clkd, struct tps68470_clkdata, clkout_hw)
+
+static struct tps68470_clkout_freqs {
+	unsigned long freq;
+	unsigned int xtaldiv;
+	unsigned int plldiv;
+	unsigned int postdiv;
+	unsigned int buckdiv;
+	unsigned int boostdiv;
+} clk_freqs[] = {
+/*
+ *  The PLL is used to multiply the crystal oscillator
+ *  frequency range of 3 MHz to 27 MHz by a programmable
+ *  factor of F = (M/N)*(1/P) such that the output
+ *  available at the HCLK_A or HCLK_B pins are in the range
+ *  of 4 MHz to 64 MHz in increments of 0.1 MHz.
+ *
+ * hclk_# = osc_in * (((plldiv*2)+320) / (xtaldiv+30)) * (1 / 2^postdiv)
+ *
+ * PLL_REF_CLK should be as close as possible to 100kHz
+ * PLL_REF_CLK = input clk / XTALDIV[7:0] + 30)
+ *
+ * PLL_VCO_CLK = (PLL_REF_CLK * (plldiv*2 + 320))
+ *
+ * BOOST should be as close as possible to 2Mhz
+ * BOOST = PLL_VCO_CLK / (BOOSTDIV[4:0] + 16) *
+ *
+ * BUCK should be as close as possible to 5.2Mhz
+ * BUCK = PLL_VCO_CLK / (BUCKDIV[3:0] + 5)
+ *
+ * osc_in   xtaldiv  plldiv   postdiv   hclk_#
+ * 20Mhz    170      32       1         19.2Mhz
+ * 20Mhz    170      40       1         20Mhz
+ * 20Mhz    170      80       1         24Mhz
+ */
+	{ 19200000, 170, 32, 1, 2, 3 },
+	{ 20000000, 170, 40, 1, 3, 4 },
+	{ 24000000, 170, 80, 1, 4, 8 },
+};
+
+struct tps68470_clkdata {
+	struct clk_hw clkout_hw;
+	struct regmap *regmap;
+	unsigned int clk_cfg_idx;
+};
+
+static int tps68470_clk_is_prepared(struct clk_hw *hw)
+{
+	struct tps68470_clkdata *clkdata = to_tps68470_clkdata(hw);
+	int val;
+
+	if (regmap_read(clkdata->regmap, TPS68470_REG_PLLCTL, &val))
+		return 0;
+
+	return val & TPS68470_PLL_EN_MASK;
+}
+
+static int tps68470_clk_prepare(struct clk_hw *hw)
+{
+	struct tps68470_clkdata *clkdata = to_tps68470_clkdata(hw);
+	unsigned int idx = clkdata->clk_cfg_idx;
+
+	regmap_write(clkdata->regmap, TPS68470_REG_BOOSTDIV, clk_freqs[idx].boostdiv);
+	regmap_write(clkdata->regmap, TPS68470_REG_BUCKDIV, clk_freqs[idx].buckdiv);
+	regmap_write(clkdata->regmap, TPS68470_REG_PLLSWR, TPS68470_PLLSWR_DEFAULT);
+	regmap_write(clkdata->regmap, TPS68470_REG_XTALDIV, clk_freqs[idx].xtaldiv);
+	regmap_write(clkdata->regmap, TPS68470_REG_PLLDIV, clk_freqs[idx].plldiv);
+	regmap_write(clkdata->regmap, TPS68470_REG_POSTDIV, clk_freqs[idx].postdiv);
+	regmap_write(clkdata->regmap, TPS68470_REG_POSTDIV2, clk_freqs[idx].postdiv);
+	regmap_write(clkdata->regmap, TPS68470_REG_CLKCFG2, TPS68470_CLKCFG2_DRV_STR_2MA);
+
+	regmap_write(clkdata->regmap, TPS68470_REG_PLLCTL,
+		     TPS68470_OSC_EXT_CAP_DEFAULT << TPS68470_OSC_EXT_CAP_SHIFT |
+		     TPS68470_CLK_SRC_XTAL << TPS68470_CLK_SRC_SHIFT);
+
+	regmap_write(clkdata->regmap, TPS68470_REG_CLKCFG1,
+			   (TPS68470_PLL_OUTPUT_ENABLE << TPS68470_OUTPUT_A_SHIFT) |
+			   (TPS68470_PLL_OUTPUT_ENABLE << TPS68470_OUTPUT_B_SHIFT));
+
+	regmap_update_bits(clkdata->regmap, TPS68470_REG_PLLCTL,
+			   TPS68470_PLL_EN_MASK, TPS68470_PLL_EN_MASK);
+
+	/*
+	 * The PLLCTL reg lock bit is set by the PMIC after approx. 4ms and
+	 * does not indicate a true lock, so just wait 4 ms.
+	 */
+	usleep_range(4000, 5000);
+
+	return 0;
+}
+
+static void tps68470_clk_unprepare(struct clk_hw *hw)
+{
+	struct tps68470_clkdata *clkdata = to_tps68470_clkdata(hw);
+
+	/* Disable clock first ... */
+	regmap_update_bits(clkdata->regmap, TPS68470_REG_PLLCTL, TPS68470_PLL_EN_MASK, 0);
+
+	/* ... and then tri-state the clock outputs. */
+	regmap_write(clkdata->regmap, TPS68470_REG_CLKCFG1, 0);
+}
+
+static unsigned long tps68470_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+{
+	struct tps68470_clkdata *clkdata = to_tps68470_clkdata(hw);
+
+	return clk_freqs[clkdata->clk_cfg_idx].freq;
+}
+
+/*
+ * This returns the index of the clk_freqs[] cfg with the closest rate for
+ * use in tps68470_clk_round_rate(). tps68470_clk_set_rate() checks that
+ * the rate of the returned cfg is an exact match.
+ */
+static unsigned int tps68470_clk_cfg_lookup(unsigned long rate)
+{
+	long diff, best_diff = LONG_MAX;
+	unsigned int i, best_idx = 0;
+
+	for (i = 0; i < ARRAY_SIZE(clk_freqs); i++) {
+		diff = clk_freqs[i].freq - rate;
+		if (diff == 0)
+			return i;
+
+		diff = abs(diff);
+		if (diff < best_diff) {
+			best_diff = diff;
+			best_idx = i;
+		}
+	}
+
+	return best_idx;
+}
+
+static long tps68470_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+				    unsigned long *parent_rate)
+{
+	unsigned int idx = tps68470_clk_cfg_lookup(rate);
+
+	return clk_freqs[idx].freq;
+}
+
+static int tps68470_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long parent_rate)
+{
+	struct tps68470_clkdata *clkdata = to_tps68470_clkdata(hw);
+	unsigned int idx = tps68470_clk_cfg_lookup(rate);
+
+	if (rate != clk_freqs[idx].freq)
+		return -EINVAL;
+
+	clkdata->clk_cfg_idx = idx;
+
+	return 0;
+}
+
+static const struct clk_ops tps68470_clk_ops = {
+	.is_prepared = tps68470_clk_is_prepared,
+	.prepare = tps68470_clk_prepare,
+	.unprepare = tps68470_clk_unprepare,
+	.recalc_rate = tps68470_clk_recalc_rate,
+	.round_rate = tps68470_clk_round_rate,
+	.set_rate = tps68470_clk_set_rate,
+};
+
+static const struct clk_init_data tps68470_clk_initdata = {
+	.name = TPS68470_CLK_NAME,
+	.ops = &tps68470_clk_ops,
+};
+
+static int tps68470_clk_probe(struct platform_device *pdev)
+{
+	struct tps68470_clk_platform_data *pdata = pdev->dev.platform_data;
+	struct tps68470_clkdata *tps68470_clkdata;
+	int ret;
+
+	tps68470_clkdata = devm_kzalloc(&pdev->dev, sizeof(*tps68470_clkdata),
+					GFP_KERNEL);
+	if (!tps68470_clkdata)
+		return -ENOMEM;
+
+	tps68470_clkdata->regmap = dev_get_drvdata(pdev->dev.parent);
+	tps68470_clkdata->clkout_hw.init = &tps68470_clk_initdata;
+	ret = devm_clk_hw_register(&pdev->dev, &tps68470_clkdata->clkout_hw);
+	if (ret)
+		return ret;
+
+	ret = devm_clk_hw_register_clkdev(&pdev->dev, &tps68470_clkdata->clkout_hw,
+					  TPS68470_CLK_NAME, NULL);
+	if (ret)
+		return ret;
+
+	if (pdata) {
+		ret = devm_clk_hw_register_clkdev(&pdev->dev,
+						  &tps68470_clkdata->clkout_hw,
+						  pdata->consumer_con_id,
+						  pdata->consumer_dev_name);
+	}
+
+	return ret;
+}
+
+static struct platform_driver tps68470_clk_driver = {
+	.driver = {
+		.name = TPS68470_CLK_NAME,
+	},
+	.probe = tps68470_clk_probe,
+};
+
+/*
+ * The ACPI tps68470 probe-ordering depends on the clk/gpio/regulator drivers
+ * registering before the drivers for the camera-sensors which use them bind.
+ * subsys_initcall() ensures this when the drivers are builtin.
+ */
+static int __init tps68470_clk_init(void)
+{
+	return platform_driver_register(&tps68470_clk_driver);
+}
+subsys_initcall(tps68470_clk_init);
+
+static void __exit tps68470_clk_exit(void)
+{
+	platform_driver_unregister(&tps68470_clk_driver);
+}
+module_exit(tps68470_clk_exit);
+
+MODULE_ALIAS("platform:tps68470-clk");
+MODULE_DESCRIPTION("clock driver for TPS68470 pmic");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h
index ffe81127d91c..7807fa329db0 100644
--- a/include/linux/mfd/tps68470.h
+++ b/include/linux/mfd/tps68470.h
@@ -75,6 +75,17 @@
 #define TPS68470_CLKCFG1_MODE_A_MASK	GENMASK(1, 0)
 #define TPS68470_CLKCFG1_MODE_B_MASK	GENMASK(3, 2)
 
+#define TPS68470_CLKCFG2_DRV_STR_2MA	0x05
+#define TPS68470_PLL_OUTPUT_ENABLE	0x02
+#define TPS68470_CLK_SRC_XTAL		BIT(0)
+#define TPS68470_PLLSWR_DEFAULT		GENMASK(1, 0)
+#define TPS68470_OSC_EXT_CAP_DEFAULT	0x05
+
+#define TPS68470_OUTPUT_A_SHIFT		0x00
+#define TPS68470_OUTPUT_B_SHIFT		0x02
+#define TPS68470_CLK_SRC_SHIFT		GENMASK(2, 0)
+#define TPS68470_OSC_EXT_CAP_SHIFT	BIT(2)
+
 #define TPS68470_GPIO_CTL_REG_A(x)	(TPS68470_REG_GPCTL0A + (x) * 2)
 #define TPS68470_GPIO_CTL_REG_B(x)	(TPS68470_REG_GPCTL0B + (x) * 2)
 #define TPS68470_GPIO_MODE_MASK		GENMASK(1, 0)
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

[parent not found: <163588780885.2993099.2088131017920983969@swboyd.mtv.corp.google.com>]

* Re:
       [not found]   ` <163588780885.2993099.2088131017920983969@swboyd.mtv.corp.google.com>
@ 2021-11-25 15:01     ` Hans de Goede
  0 siblings, 0 replies; 414+ messages in thread
From: Hans de Goede @ 2021-11-25 15:01 UTC (permalink / raw)
  To: Stephen Boyd, Andy Shevchenko, Daniel Scally, Laurent Pinchart,
	Liam Girdwood, Mark Brown, Mark Gross, Mauro Carvalho Chehab,
	Michael Turquette, Mika Westerberg, Rafael J.Wysocki,
	Wolfram Sang
  Cc: Len Brown, linux-acpi, platform-driver-x86, linux-kernel,
	linux-i2c, Sakari Ailus, Kate Hsuan, linux-media, linux-clk

Hi,

On 11/2/21 22:16, Stephen Boyd wrote:
> Quoting Hans de Goede (2021-11-02 02:49:01)
>> diff --git a/drivers/clk/clk-tps68470.c b/drivers/clk/clk-tps68470.c
>> new file mode 100644
>> index 000000000000..2ad0ac2f4096
>> --- /dev/null
>> +++ b/drivers/clk/clk-tps68470.c
>> @@ -0,0 +1,257 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Clock driver for TPS68470 PMIC
>> + *
>> + * Copyright (c) 2021 Red Hat Inc.
>> + * Copyright (C) 2018 Intel Corporation
>> + *
>> + * Authors:
>> + *     Hans de Goede <hdegoede@redhat.com>
>> + *     Zaikuo Wang <zaikuo.wang@intel.com>
>> + *     Tianshu Qiu <tian.shu.qiu@intel.com>
>> + *     Jian Xu Zheng <jian.xu.zheng@intel.com>
>> + *     Yuning Pu <yuning.pu@intel.com>
>> + *     Antti Laakso <antti.laakso@intel.com>
>> + */
>> +
>> +#include <linux/clk-provider.h>
>> +#include <linux/clkdev.h>
>> +#include <linux/kernel.h>
>> +#include <linux/mfd/tps68470.h>
>> +#include <linux/module.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/platform_data/tps68470.h>
>> +#include <linux/regmap.h>
>> +
>> +#define TPS68470_CLK_NAME "tps68470-clk"
>> +
>> +#define to_tps68470_clkdata(clkd) \
>> +       container_of(clkd, struct tps68470_clkdata, clkout_hw)
>> +
> [...]
>> +
>> +static int tps68470_clk_set_rate(struct clk_hw *hw, unsigned long rate,
>> +                                unsigned long parent_rate)
>> +{
>> +       struct tps68470_clkdata *clkdata = to_tps68470_clkdata(hw);
>> +       unsigned int idx = tps68470_clk_cfg_lookup(rate);
>> +
>> +       if (rate != clk_freqs[idx].freq)
>> +               return -EINVAL;
>> +
>> +       clkdata->clk_cfg_idx = idx;
> 
> It deserves a comment that set_rate can only be called when the clk is
> gated. We have CLK_SET_RATE_GATE flag as well that should be set if the
> clk can't support changing rate while enabled. With that flag set, this
> function should be able to actually change hardware with the assumption
> that the framework won't call down into this clk_op when the clk is
> enabled.

Ok for v6 I've added the CLK_SET_RATE_GATE flag + a comment why
it used and moved the divider programming to tps68470_clk_set_rate()m
while keeping the PLL_EN + output-enable writes in tps68470_clk_prepare()


> 
>> +
>> +       return 0;
>> +}
>> +
>> +static const struct clk_ops tps68470_clk_ops = {
>> +       .is_prepared = tps68470_clk_is_prepared,
>> +       .prepare = tps68470_clk_prepare,
>> +       .unprepare = tps68470_clk_unprepare,
>> +       .recalc_rate = tps68470_clk_recalc_rate,
>> +       .round_rate = tps68470_clk_round_rate,
>> +       .set_rate = tps68470_clk_set_rate,
>> +};
>> +
>> +static const struct clk_init_data tps68470_clk_initdata = {
> 
> Is there a reason to make this a static global? It's probably better to
> throw it on the stack so that a structure isn't sitting around after
> driver probe being unused.

Fixed for v6.

Thanks & Regards,

Hans


> 
>> +       .name = TPS68470_CLK_NAME,
>> +       .ops = &tps68470_clk_ops,
>> +};
> 


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CAP7CzPcLhtXDyLudfmR2pWR5fzSQ_jhJSoRheH=cytoDnb_ujg@mail.gmail.com>]

* Re:
       [not found] <CAP7CzPcLhtXDyLudfmR2pWR5fzSQ_jhJSoRheH=cytoDnb_ujg@mail.gmail.com>
@ 2021-09-14 15:37 ` Nick Desaulniers
  0 siblings, 0 replies; 414+ messages in thread
From: Nick Desaulniers @ 2021-09-14 15:37 UTC (permalink / raw)
  To: zhao xc
  Cc: tglx, mingo, bp, x86, hpa, nathan, tony.luck, linux, mpe,
	dan.j.williams, linux-kernel, clang-built-linux

On Sun, Sep 12, 2021 at 10:42 PM zhao xc <xinchao.zhao.kernelz@gmail.com> wrote:
>
> Hi maintainer:
>         This is a patch fix the unused macro definition

Hi Zhao,
Thanks for the patch.  Would you mind following the standard procedure
for submitting patches to the list for review.  I wrote up
https://nickdesaulniers.github.io/blog/2017/05/16/submitting-your-first-patch-to-the-linux-kernel-and-responding-to-feedback/
a while ago, but I think it's still helpful.

-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2021-08-12  9:21 Valdis Klētnieks
  2021-08-12  9:42 ` SeongJae Park
  0 siblings, 1 reply; 414+ messages in thread
From: Valdis Klētnieks @ 2021-08-12  9:21 UTC (permalink / raw)
  To: SeongJae Park, Andrew Morton; +Cc: linux-mm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 832 bytes --]

In this commit:

commit fedc37448fb1be5d03e420ca7791d4286893d5ec
Author: SeongJae Park <sjpark@amazon.de>
Date:   Tue Aug 10 16:55:51 2021 +1000

    mm/idle_page_tracking: make PG_idle reusable

diff --git a/mm/Kconfig b/mm/Kconfig
index 504336de9a1e..d0b85dc12429 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -739,10 +739,18 @@ config DEFERRED_STRUCT_PAGE_INIT
          lifetime of the system until these kthreads finish the
          initialisation.

+config PAGE_IDLE_FLAG
+       bool "Add PG_idle and PG_young flags"
+       help
+         This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
+         Accessed bit writers can set the state of the bit in the flags to let
+         other PTE Accessed bit readers don't disturbed.

This needs to be converted to proper, or at least comprehensible, English....


[-- Attachment #2: Type: application/pgp-signature, Size: 494 bytes --]

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2021-08-12  9:21 Valdis Klētnieks
@ 2021-08-12  9:42 ` SeongJae Park
  2021-08-12 20:19   ` Re: Andrew Morton
  0 siblings, 1 reply; 414+ messages in thread
From: SeongJae Park @ 2021-08-12  9:42 UTC (permalink / raw)
  To: Valdis Klētnieks
  Cc: SeongJae Park, Andrew Morton, linux-mm, linux-kernel

From: SeongJae Park <sjpark@amazon.de>

Hello Valdis,

On Thu, 12 Aug 2021 05:21:57 -0400 "Valdis =?utf-8?Q?Kl=c4=93tnieks?=" <valdis.kletnieks@vt.edu> wrote:

> In this commit:
> 
> commit fedc37448fb1be5d03e420ca7791d4286893d5ec
> Author: SeongJae Park <sjpark@amazon.de>
> Date:   Tue Aug 10 16:55:51 2021 +1000
> 
>     mm/idle_page_tracking: make PG_idle reusable
> 
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 504336de9a1e..d0b85dc12429 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -739,10 +739,18 @@ config DEFERRED_STRUCT_PAGE_INIT
>           lifetime of the system until these kthreads finish the
>           initialisation.
> 
> +config PAGE_IDLE_FLAG
> +       bool "Add PG_idle and PG_young flags"
> +       help
> +         This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
> +         Accessed bit writers can set the state of the bit in the flags to let
> +         other PTE Accessed bit readers don't disturbed.
> 
> This needs to be converted to proper, or at least comprehensible, English....

Thank you for the comment.

How about below?

--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -743,9 +743,9 @@ config PAGE_IDLE_FLAG
        bool "Add PG_idle and PG_young flags"
        select PAGE_EXTENSION if !64BIT
        help
-         This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
-         Accessed bit writers can set the state of the bit in the flags to let
-         other PTE Accessed bit readers don't disturbed.
+         This feature adds 'PG_idle' and 'PG_young' flags in 'struct page'.
+         PTE Accessed bit writers can save the state of the bit in the flags
+         to let other PTE Accessed bit readers don't get disturbed.


Thanks,
SeongJae Park

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2021-08-12  9:42 ` SeongJae Park
@ 2021-08-12 20:19   ` Andrew Morton
  2021-08-13  8:14     ` Re: SeongJae Park
  0 siblings, 1 reply; 414+ messages in thread
From: Andrew Morton @ 2021-08-12 20:19 UTC (permalink / raw)
  To: SeongJae Park
  Cc:  Valdis Klētnieks , SeongJae Park, linux-mm, linux-kernel

On Thu, 12 Aug 2021 09:42:40 +0000 SeongJae Park <sj38.park@gmail.com> wrote:

> > +config PAGE_IDLE_FLAG
> > +       bool "Add PG_idle and PG_young flags"
> > +       help
> > +         This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
> > +         Accessed bit writers can set the state of the bit in the flags to let
> > +         other PTE Accessed bit readers don't disturbed.
> > 
> > This needs to be converted to proper, or at least comprehensible, English....
> 
> Thank you for the comment.
> 
> How about below?
> 
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -743,9 +743,9 @@ config PAGE_IDLE_FLAG
>         bool "Add PG_idle and PG_young flags"
>         select PAGE_EXTENSION if !64BIT
>         help
> -         This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
> -         Accessed bit writers can set the state of the bit in the flags to let
> -         other PTE Accessed bit readers don't disturbed.
> +         This feature adds 'PG_idle' and 'PG_young' flags in 'struct page'.
> +         PTE Accessed bit writers can save the state of the bit in the flags
> +         to let other PTE Accessed bit readers don't get disturbed.

How about this?

--- a/mm/Kconfig~mm-idle_page_tracking-make-pg_idle-reusable-fix-fix
+++ a/mm/Kconfig
@@ -743,9 +743,9 @@ config PAGE_IDLE_FLAG
 	bool "Add PG_idle and PG_young flags"
 	select PAGE_EXTENSION if !64BIT
 	help
-	  This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
-	  Accessed bit writers can set the state of the bit in the flags to let
-	  other PTE Accessed bit readers don't disturbed.
+	  This adds PG_idle and PG_young flags to 'struct page'.  PTE Accessed
+	  bit writers can set the state of the bit in the flags so that PTE
+	  Accessed bit readers may avoid disturbance.
 
 config IDLE_PAGE_TRACKING
 	bool "Enable idle page tracking"

Also, is there any way in which we can avoid presenting this option to
the user?  Because most users will have real trouble understanding what
this thing is for.  Can we simply select it when needed, as dictated by
other, higher-level config options?


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2021-08-12 20:19   ` Re: Andrew Morton
@ 2021-08-13  8:14     ` SeongJae Park
  0 siblings, 0 replies; 414+ messages in thread
From: SeongJae Park @ 2021-08-13  8:14 UTC (permalink / raw)
  To: Andrew Morton
  Cc: SeongJae Park,  Valdis Klētnieks ,
	SeongJae Park, linux-mm, linux-kernel

From: SeongJae Park <sjpark@amazon.de>

On Thu, 12 Aug 2021 13:19:21 -0700 Andrew Morton <akpm@linux-foundation.org> wrote:

> On Thu, 12 Aug 2021 09:42:40 +0000 SeongJae Park <sj38.park@gmail.com> wrote:
> 
> > > +config PAGE_IDLE_FLAG
> > > +       bool "Add PG_idle and PG_young flags"
> > > +       help
> > > +         This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
> > > +         Accessed bit writers can set the state of the bit in the flags to let
> > > +         other PTE Accessed bit readers don't disturbed.
> > > 
> > > This needs to be converted to proper, or at least comprehensible, English....
> > 
> > Thank you for the comment.
> > 
> > How about below?
> > 
> > --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -743,9 +743,9 @@ config PAGE_IDLE_FLAG
> >         bool "Add PG_idle and PG_young flags"
> >         select PAGE_EXTENSION if !64BIT
> >         help
> > -         This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
> > -         Accessed bit writers can set the state of the bit in the flags to let
> > -         other PTE Accessed bit readers don't disturbed.
> > +         This feature adds 'PG_idle' and 'PG_young' flags in 'struct page'.
> > +         PTE Accessed bit writers can save the state of the bit in the flags
> > +         to let other PTE Accessed bit readers don't get disturbed.
> 
> How about this?
> 
> --- a/mm/Kconfig~mm-idle_page_tracking-make-pg_idle-reusable-fix-fix
> +++ a/mm/Kconfig
> @@ -743,9 +743,9 @@ config PAGE_IDLE_FLAG
>  	bool "Add PG_idle and PG_young flags"
>  	select PAGE_EXTENSION if !64BIT
>  	help
> -	  This feature adds PG_idle and PG_young flags in 'struct page'.  PTE
> -	  Accessed bit writers can set the state of the bit in the flags to let
> -	  other PTE Accessed bit readers don't disturbed.
> +	  This adds PG_idle and PG_young flags to 'struct page'.  PTE Accessed
> +	  bit writers can set the state of the bit in the flags so that PTE
> +	  Accessed bit readers may avoid disturbance.
>  
>  config IDLE_PAGE_TRACKING
>  	bool "Enable idle page tracking"

So good, thank you!

> 
> Also, is there any way in which we can avoid presenting this option to
> the user?  Because most users will have real trouble understanding what
> this thing is for.  Can we simply select it when needed, as dictated by
> other, higher-level config options?

I believe this is the right way to go!  I sent a patch for removing the prompt
of this option:
https://lore.kernel.org/linux-mm/20210813081238.34705-1-sj38.park@gmail.com/


Thanks,
SeongJae Park

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH v9] iomap: Support file tail packing
@ 2021-07-27  2:59 Gao Xiang
  2021-07-27 15:10 ` Darrick J. Wong
  0 siblings, 1 reply; 414+ messages in thread
From: Gao Xiang @ 2021-07-27  2:59 UTC (permalink / raw)
  To: linux-erofs, linux-fsdevel
  Cc: LKML, Huang Jianan, Joseph Qi, Gao Xiang, Darrick J . Wong,
	Christoph Hellwig, Matthew Wilcox, Andreas Gruenbacher

The existing inline data support only works for cases where the entire
file is stored as inline data.  For larger files, EROFS stores the
initial blocks separately and then can pack a small tail adjacent to the
inode.  Generalise inline data to allow for tail packing.  Tails may not
cross a page boundary in memory.

We currently have no filesystems that support tail packing and writing,
so that case is currently disabled (see iomap_write_begin_inline).

Cc: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
v8: https://lore.kernel.org/r/20210726145734.214295-1-hsiangkao@linux.alibaba.com
changes since v8:
 - update the subject to 'iomap: Support file tail packing' as there
   are clearly a number of ways to make the inline data support more
   flexible (Matthew);

 - add one extra safety check (Darrick):
	if (WARN_ON_ONCE(size > iomap->length))
		return -EIO;

 fs/iomap/buffered-io.c | 42 ++++++++++++++++++++++++++++++------------
 fs/iomap/direct-io.c   | 10 ++++++----
 include/linux/iomap.h  | 18 ++++++++++++++++++
 3 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 87ccb3438bec..f429b9d87dbe 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -205,25 +205,32 @@ struct iomap_readpage_ctx {
 	struct readahead_control *rac;
 };
 
-static void
-iomap_read_inline_data(struct inode *inode, struct page *page,
+static int iomap_read_inline_data(struct inode *inode, struct page *page,
 		struct iomap *iomap)
 {
-	size_t size = i_size_read(inode);
+	size_t size = i_size_read(inode) - iomap->offset;
 	void *addr;
 
 	if (PageUptodate(page))
-		return;
+		return 0;
 
-	BUG_ON(page_has_private(page));
-	BUG_ON(page->index);
-	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/* inline data must start page aligned in the file */
+	if (WARN_ON_ONCE(offset_in_page(iomap->offset)))
+		return -EIO;
+	if (WARN_ON_ONCE(size > PAGE_SIZE -
+			 offset_in_page(iomap->inline_data)))
+		return -EIO;
+	if (WARN_ON_ONCE(size > iomap->length))
+		return -EIO;
+	if (WARN_ON_ONCE(page_has_private(page)))
+		return -EIO;
 
 	addr = kmap_atomic(page);
 	memcpy(addr, iomap->inline_data, size);
 	memset(addr + size, 0, PAGE_SIZE - size);
 	kunmap_atomic(addr);
 	SetPageUptodate(page);
+	return 0;
 }
 
 static inline bool iomap_block_needs_zeroing(struct inode *inode,
@@ -247,8 +254,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	sector_t sector;
 
 	if (iomap->type == IOMAP_INLINE) {
-		WARN_ON_ONCE(pos);
-		iomap_read_inline_data(inode, page, iomap);
+		int ret = iomap_read_inline_data(inode, page, iomap);
+
+		if (ret)
+			return ret;
 		return PAGE_SIZE;
 	}
 
@@ -589,6 +598,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 	return 0;
 }
 
+static int iomap_write_begin_inline(struct inode *inode,
+		struct page *page, struct iomap *srcmap)
+{
+	/* needs more work for the tailpacking case, disable for now */
+	if (WARN_ON_ONCE(srcmap->offset != 0))
+		return -EIO;
+	return iomap_read_inline_data(inode, page, srcmap);
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
@@ -618,7 +636,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	}
 
 	if (srcmap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, srcmap);
+		status = iomap_write_begin_inline(inode, page, srcmap);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
@@ -671,11 +689,11 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
 	void *addr;
 
 	WARN_ON_ONCE(!PageUptodate(page));
-	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	BUG_ON(!iomap_inline_data_valid(iomap));
 
 	flush_dcache_page(page);
 	addr = kmap_atomic(page);
-	memcpy(iomap->inline_data + pos, addr + pos, copied);
+	memcpy(iomap_inline_data(iomap, pos), addr + pos, copied);
 	kunmap_atomic(addr);
 
 	mark_inode_dirty(inode);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9398b8c31323..41ccbfc9dc82 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 		struct iomap_dio *dio, struct iomap *iomap)
 {
 	struct iov_iter *iter = dio->submit.iter;
+	void *inline_data = iomap_inline_data(iomap, pos);
 	size_t copied;
 
-	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
+		return -EIO;
 
 	if (dio->flags & IOMAP_DIO_WRITE) {
 		loff_t size = inode->i_size;
 
 		if (pos > size)
-			memset(iomap->inline_data + size, 0, pos - size);
-		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+			memset(iomap_inline_data(iomap, size), 0, pos - size);
+		copied = copy_from_iter(inline_data, length, iter);
 		if (copied) {
 			if (pos + copied > size)
 				i_size_write(inode, pos + copied);
 			mark_inode_dirty(inode);
 		}
 	} else {
-		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+		copied = copy_to_iter(inline_data, length, iter);
 	}
 	dio->size += copied;
 	return copied;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 479c1da3e221..b8ec145b2975 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -97,6 +97,24 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+/*
+ * Returns the inline data pointer for logical offset @pos.
+ */
+static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos)
+{
+	return iomap->inline_data + pos - iomap->offset;
+}
+
+/*
+ * Check if the mapping's length is within the valid range for inline data.
+ * This is used to guard against accessing data beyond the page inline_data
+ * points at.
+ */
+static inline bool iomap_inline_data_valid(struct iomap *iomap)
+{
+	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
+}
+
 /*
  * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
  * and page_done will be called for each page written to.  This only applies to
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* (no subject)
  2021-07-27  2:59 [PATCH v9] iomap: Support file tail packing Gao Xiang
@ 2021-07-27 15:10 ` Darrick J. Wong
  2021-07-27 15:23   ` Andreas Grünbacher
  2021-07-27 15:30   ` Re: Gao Xiang
  0 siblings, 2 replies; 414+ messages in thread
From: Darrick J. Wong @ 2021-07-27 15:10 UTC (permalink / raw)
  To: Gao Xiang
  Cc: linux-erofs, linux-fsdevel, LKML, Huang Jianan, Joseph Qi,
	Christoph Hellwig, Matthew Wilcox, Andreas Gruenbacher

I'll change the subject to:

iomap: support reading inline data from non-zero pos

The existing inline data support only works for cases where the entire
file is stored as inline data.  For larger files, EROFS stores the
initial blocks separately and the remainder of the file ("file tail")
adjacent to the inode.  Generalise inline data to allow reading the
inline file tail.  Tails may not cross a page boundary in memory.

We currently have no filesystems that support tails and writing,
so that case is currently disabled (see iomap_write_begin_inline).

If that's ok with everyone,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D


On Tue, Jul 27, 2021 at 10:59:56AM +0800, Gao Xiang wrote:
> The existing inline data support only works for cases where the entire
> file is stored as inline data.  For larger files, EROFS stores the
> initial blocks separately and then can pack a small tail adjacent to the
> inode.  Generalise inline data to allow for tail packing.  Tails may not
> cross a page boundary in memory.
> 
> We currently have no filesystems that support tail packing and writing,
> so that case is currently disabled (see iomap_write_begin_inline).
> 
> Cc: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
> v8: https://lore.kernel.org/r/20210726145734.214295-1-hsiangkao@linux.alibaba.com
> changes since v8:
>  - update the subject to 'iomap: Support file tail packing' as there
>    are clearly a number of ways to make the inline data support more
>    flexible (Matthew);
> 
>  - add one extra safety check (Darrick):
> 	if (WARN_ON_ONCE(size > iomap->length))
> 		return -EIO;
> 
>  fs/iomap/buffered-io.c | 42 ++++++++++++++++++++++++++++++------------
>  fs/iomap/direct-io.c   | 10 ++++++----
>  include/linux/iomap.h  | 18 ++++++++++++++++++
>  3 files changed, 54 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 87ccb3438bec..f429b9d87dbe 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -205,25 +205,32 @@ struct iomap_readpage_ctx {
>  	struct readahead_control *rac;
>  };
>  
> -static void
> -iomap_read_inline_data(struct inode *inode, struct page *page,
> +static int iomap_read_inline_data(struct inode *inode, struct page *page,
>  		struct iomap *iomap)
>  {
> -	size_t size = i_size_read(inode);
> +	size_t size = i_size_read(inode) - iomap->offset;
>  	void *addr;
>  
>  	if (PageUptodate(page))
> -		return;
> +		return 0;
>  
> -	BUG_ON(page_has_private(page));
> -	BUG_ON(page->index);
> -	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	/* inline data must start page aligned in the file */
> +	if (WARN_ON_ONCE(offset_in_page(iomap->offset)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(size > PAGE_SIZE -
> +			 offset_in_page(iomap->inline_data)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(size > iomap->length))
> +		return -EIO;
> +	if (WARN_ON_ONCE(page_has_private(page)))
> +		return -EIO;
>  
>  	addr = kmap_atomic(page);
>  	memcpy(addr, iomap->inline_data, size);
>  	memset(addr + size, 0, PAGE_SIZE - size);
>  	kunmap_atomic(addr);
>  	SetPageUptodate(page);
> +	return 0;
>  }
>  
>  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> @@ -247,8 +254,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	sector_t sector;
>  
>  	if (iomap->type == IOMAP_INLINE) {
> -		WARN_ON_ONCE(pos);
> -		iomap_read_inline_data(inode, page, iomap);
> +		int ret = iomap_read_inline_data(inode, page, iomap);
> +
> +		if (ret)
> +			return ret;
>  		return PAGE_SIZE;
>  	}
>  
> @@ -589,6 +598,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  	return 0;
>  }
>  
> +static int iomap_write_begin_inline(struct inode *inode,
> +		struct page *page, struct iomap *srcmap)
> +{
> +	/* needs more work for the tailpacking case, disable for now */
> +	if (WARN_ON_ONCE(srcmap->offset != 0))
> +		return -EIO;
> +	return iomap_read_inline_data(inode, page, srcmap);
> +}
> +
>  static int
>  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> @@ -618,7 +636,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  	}
>  
>  	if (srcmap->type == IOMAP_INLINE)
> -		iomap_read_inline_data(inode, page, srcmap);
> +		status = iomap_write_begin_inline(inode, page, srcmap);
>  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
>  		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>  	else
> @@ -671,11 +689,11 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
>  	void *addr;
>  
>  	WARN_ON_ONCE(!PageUptodate(page));
> -	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	BUG_ON(!iomap_inline_data_valid(iomap));
>  
>  	flush_dcache_page(page);
>  	addr = kmap_atomic(page);
> -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> +	memcpy(iomap_inline_data(iomap, pos), addr + pos, copied);
>  	kunmap_atomic(addr);
>  
>  	mark_inode_dirty(inode);
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 9398b8c31323..41ccbfc9dc82 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  		struct iomap_dio *dio, struct iomap *iomap)
>  {
>  	struct iov_iter *iter = dio->submit.iter;
> +	void *inline_data = iomap_inline_data(iomap, pos);
>  	size_t copied;
>  
> -	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
> +		return -EIO;
>  
>  	if (dio->flags & IOMAP_DIO_WRITE) {
>  		loff_t size = inode->i_size;
>  
>  		if (pos > size)
> -			memset(iomap->inline_data + size, 0, pos - size);
> -		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> +			memset(iomap_inline_data(iomap, size), 0, pos - size);
> +		copied = copy_from_iter(inline_data, length, iter);
>  		if (copied) {
>  			if (pos + copied > size)
>  				i_size_write(inode, pos + copied);
>  			mark_inode_dirty(inode);
>  		}
>  	} else {
> -		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> +		copied = copy_to_iter(inline_data, length, iter);
>  	}
>  	dio->size += copied;
>  	return copied;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 479c1da3e221..b8ec145b2975 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -97,6 +97,24 @@ iomap_sector(struct iomap *iomap, loff_t pos)
>  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>  }
>  
> +/*
> + * Returns the inline data pointer for logical offset @pos.
> + */
> +static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos)
> +{
> +	return iomap->inline_data + pos - iomap->offset;
> +}
> +
> +/*
> + * Check if the mapping's length is within the valid range for inline data.
> + * This is used to guard against accessing data beyond the page inline_data
> + * points at.
> + */
> +static inline bool iomap_inline_data_valid(struct iomap *iomap)
> +{
> +	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
> +}
> +
>  /*
>   * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
>   * and page_done will be called for each page written to.  This only applies to
> -- 
> 2.24.4
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2021-07-27 15:10 ` Darrick J. Wong
@ 2021-07-27 15:23   ` Andreas Grünbacher
  2021-07-27 15:30   ` Re: Gao Xiang
  1 sibling, 0 replies; 414+ messages in thread
From: Andreas Grünbacher @ 2021-07-27 15:23 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Gao Xiang, linux-erofs, Linux FS-devel Mailing List, LKML,
	Huang Jianan, Joseph Qi, Christoph Hellwig, Matthew Wilcox,
	Andreas Gruenbacher

Am Di., 27. Juli 2021 um 17:11 Uhr schrieb Darrick J. Wong <djwong@kernel.org>:
> I'll change the subject to:
>
> iomap: support reading inline data from non-zero pos

That surely works for me.

Thanks,
Andreas

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2021-07-27 15:10 ` Darrick J. Wong
  2021-07-27 15:23   ` Andreas Grünbacher
@ 2021-07-27 15:30   ` Gao Xiang
  1 sibling, 0 replies; 414+ messages in thread
From: Gao Xiang @ 2021-07-27 15:30 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: linux-erofs, linux-fsdevel, LKML, Huang Jianan, Joseph Qi,
	Christoph Hellwig, Matthew Wilcox, Andreas Gruenbacher

On Tue, Jul 27, 2021 at 08:10:51AM -0700, Darrick J. Wong wrote:
> I'll change the subject to:
> 
> iomap: support reading inline data from non-zero pos

I'm fine with this too. Many thanks for updating!

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2021-06-06 19:19 Davidlohr Bueso
  2021-06-07 16:02 ` André Almeida
  0 siblings, 1 reply; 414+ messages in thread
From: Davidlohr Bueso @ 2021-06-06 19:19 UTC (permalink / raw)
  To: Andrï¿½ Almeida
  Cc: Thomas Gleixner, Ingo Molnar, Peter Zijlstra, Darren Hart,
	linux-kernel, Steven Rostedt, Sebastian Andrzej Siewior, kernel,
	krisman, pgriffais, z.figura12, joel, malteskarupke, linux-api,
	fweimer, libc-alpha, linux-kselftest, shuah, acme, corbet,
	Peter Oskolkov, Andrey Semashev, mtk.manpages

Bcc:
Subject: Re: [PATCH v4 07/15] docs: locking: futex2: Add documentation
Reply-To:
In-Reply-To: <20210603195924.361327-8-andrealmeid@collabora.com>

On Thu, 03 Jun 2021, Andrï¿½ Almeida wrote:

>Add a new documentation file specifying both userspace API and internal
>implementation details of futex2 syscalls.

I think equally important would be to provide a manpage for each new
syscall you are introducing, and keep mkt in the loop as in the past he
extensively documented and improved futex manpages, and overall has a
lot of experience with dealing with kernel interfaces.

Thanks,
Davidlohr

>
>Signed-off-by: André Almeida <andrealmeid@collabora.com>
>---
> Documentation/locking/futex2.rst | 198 +++++++++++++++++++++++++++++++
> Documentation/locking/index.rst  |   1 +
> 2 files changed, 199 insertions(+)
> create mode 100644 Documentation/locking/futex2.rst
>
>diff --git a/Documentation/locking/futex2.rst b/Documentation/locking/futex2.rst
>new file mode 100644
>index 000000000000..2f74d7c97a55
>--- /dev/null
>+++ b/Documentation/locking/futex2.rst
>@@ -0,0 +1,198 @@
>+.. SPDX-License-Identifier: GPL-2.0
>+
>+======
>+futex2
>+======
>+
>+:Author: André Almeida <andrealmeid@collabora.com>
>+
>+futex, or fast user mutex, is a set of syscalls to allow userspace to create
>+performant synchronization mechanisms, such as mutexes, semaphores and
>+conditional variables in userspace. C standard libraries, like glibc, uses it
>+as a means to implement more high level interfaces like pthreads.
>+
>+The interface
>+=============
>+
>+uAPI functions
>+--------------
>+
>+.. kernel-doc:: kernel/futex2.c
>+   :identifiers: sys_futex_wait sys_futex_wake sys_futex_waitv sys_futex_requeue
>+
>+uAPI structures
>+---------------
>+
>+.. kernel-doc:: include/uapi/linux/futex.h
>+
>+The ``flag`` argument
>+---------------------
>+
>+The flag is used to specify the size of the futex word
>+(FUTEX_[8, 16, 32, 64]). It's mandatory to define one, since there's no
>+default size.
>+
>+By default, the timeout uses a monotonic clock, but can be used as a realtime
>+one by using the FUTEX_REALTIME_CLOCK flag.
>+
>+By default, futexes are of the private type, that means that this user address
>+will be accessed by threads that share the same memory region. This allows for
>+some internal optimizations, so they are faster. However, if the address needs
>+to be shared with different processes (like using ``mmap()`` or ``shm()``), they
>+need to be defined as shared and the flag FUTEX_SHARED_FLAG is used to set that.
>+
>+By default, the operation has no NUMA-awareness, meaning that the user can't
>+choose the memory node where the kernel side futex data will be stored. The
>+user can choose the node where it wants to operate by setting the
>+FUTEX_NUMA_FLAG and using the following structure (where X can be 8, 16, 32 or
>+64)::
>+
>+ struct futexX_numa {
>+         __uX value;
>+         __sX hint;
>+ };
>+
>+This structure should be passed at the ``void *uaddr`` of futex functions. The
>+address of the structure will be used to be waited on/waken on, and the
>+``value`` will be compared to ``val`` as usual. The ``hint`` member is used to
>+define which node the futex will use. When waiting, the futex will be
>+registered on a kernel-side table stored on that node; when waking, the futex
>+will be searched for on that given table. That means that there's no redundancy
>+between tables, and the wrong ``hint`` value will lead to undesired behavior.
>+Userspace is responsible for dealing with node migrations issues that may
>+occur. ``hint`` can range from [0, MAX_NUMA_NODES), for specifying a node, or
>+-1, to use the same node the current process is using.
>+
>+When not using FUTEX_NUMA_FLAG on a NUMA system, the futex will be stored on a
>+global table on allocated on the first node.
>+
>+The ``timo`` argument
>+---------------------
>+
>+As per the Y2038 work done in the kernel, new interfaces shouldn't add timeout
>+options known to be buggy. Given that, ``timo`` should be a 64-bit timeout at
>+all platforms, using an absolute timeout value.
>+
>+Implementation
>+==============
>+
>+The internal implementation follows a similar design to the original futex.
>+Given that we want to replicate the same external behavior of current futex,
>+this should be somewhat expected.
>+
>+Waiting
>+-------
>+
>+For the wait operations, they are all treated as if you want to wait on N
>+futexes, so the path for futex_wait and futex_waitv is the basically the same.
>+For both syscalls, the first step is to prepare an internal list for the list
>+of futexes to wait for (using struct futexv_head). For futex_wait() calls, this
>+list will have a single object.
>+
>+We have a hash table, where waiters register themselves before sleeping. Then
>+the wake function checks this table looking for waiters at uaddr.  The hash
>+bucket to be used is determined by a struct futex_key, that stores information
>+to uniquely identify an address from a given process. Given the huge address
>+space, there'll be hash collisions, so we store information to be later used on
>+collision treatment.
>+
>+First, for every futex we want to wait on, we check if (``*uaddr == val``).
>+This check is done holding the bucket lock, so we are correctly serialized with
>+any futex_wake() calls. If any waiter fails the check above, we dequeue all
>+futexes. The check (``*uaddr == val``) can fail for two reasons:
>+
>+- The values are different, and we return -EAGAIN. However, if while
>+  dequeueing we found that some futexes were awakened, we prioritize this
>+  and return success.
>+
>+- When trying to access the user address, we do so with page faults
>+  disabled because we are holding a bucket's spin lock (and can't sleep
>+  while holding a spin lock). If there's an error, it might be a page
>+  fault, or an invalid address. We release the lock, dequeue everyone
>+  (because it's illegal to sleep while there are futexes enqueued, we
>+  could lose wakeups) and try again with page fault enabled. If we
>+  succeed, this means that the address is valid, but we need to do
>+  all the work again. For serialization reasons, we need to have the
>+  spin lock when getting the user value. Additionally, for shared
>+  futexes, we also need to recalculate the hash, since the underlying
>+  mapping mechanisms could have changed when dealing with page fault.
>+  If, even with page fault enabled, we can't access the address, it
>+  means it's an invalid user address, and we return -EFAULT. For this
>+  case, we prioritize the error, even if some futexes were awaken.
>+
>+If the check is OK, they are enqueued on a linked list in our bucket, and
>+proceed to the next one. If all waiters succeed, we put the thread to sleep
>+until a futex_wake() call, timeout expires or we get a signal. After waking up,
>+we dequeue everyone, and check if some futex was awakened. This dequeue is done
>+by iteratively walking at each element of struct futex_head list.
>+
>+All enqueuing/dequeuing operations requires to hold the bucket lock, to avoid
>+racing while modifying the list.
>+
>+Waking
>+------
>+
>+We get the bucket that's storing the waiters at uaddr, and wake the required
>+number of waiters, checking for hash collision.
>+
>+There's an optimization that makes futex_wake() not take the bucket lock if
>+there's no one to be woken on that bucket. It checks an atomic counter that each
>+bucket has, if it says 0, then the syscall exits. In order for this to work, the
>+waiter thread increases it before taking the lock, so the wake thread will
>+correctly see that there's someone waiting and will continue the path to take
>+the bucket lock. To get the correct serialization, the waiter issues a memory
>+barrier after increasing the bucket counter and the waker issues a memory
>+barrier before checking it.
>+
>+Requeuing
>+---------
>+
>+The requeue path first checks for each struct futex_requeue and their flags.
>+Then, it will compare the expected value with the one at uaddr1::uaddr.
>+Following the same serialization explained at Waking_, we increase the atomic
>+counter for the bucket of uaddr2 before taking the lock. We need to have both
>+buckets locks at same time so we don't race with other futex operation. To
>+ensure the locks are taken in the same order for all threads (and thus avoiding
>+deadlocks), every requeue operation takes the "smaller" bucket first, when
>+comparing both addresses.
>+
>+If the compare with user value succeeds, we proceed by waking ``nr_wake``
>+futexes, and then requeuing ``nr_requeue`` from bucket of uaddr1 to the uaddr2.
>+This consists in a simple list deletion/addition and replacing the old futex key
>+with the new one.
>+
>+Futex keys
>+----------
>+
>+There are two types of futexes: private and shared ones. The private are futexes
>+meant to be used by threads that share the same memory space, are easier to be
>+uniquely identified and thus can have some performance optimization. The
>+elements for identifying one are: the start address of the page where the
>+address is, the address offset within the page and the current->mm pointer.
>+
>+Now, for uniquely identifying a shared futex:
>+
>+- If the page containing the user address is an anonymous page, we can
>+  just use the same data used for private futexes (the start address of
>+  the page, the address offset within the page and the current->mm
>+  pointer); that will be enough for uniquely identifying such futex. We
>+  also set one bit at the key to differentiate if a private futex is
>+  used on the same address (mixing shared and private calls does not
>+  work).
>+
>+- If the page is file-backed, current->mm maybe isn't the same one for
>+  every user of this futex, so we need to use other data: the
>+  page->index, a UUID for the struct inode and the offset within the
>+  page.
>+
>+Note that members of futex_key don't have any particular meaning after they
>+are part of the struct - they are just bytes to identify a futex.  Given that,
>+we don't need to use a particular name or type that matches the original data,
>+we only need to care about the bitsize of each component and make both private
>+and shared fit in the same memory space.
>+
>+Source code documentation
>+=========================
>+
>+.. kernel-doc:: kernel/futex2.c
>+   :no-identifiers: sys_futex_wait sys_futex_wake sys_futex_waitv sys_futex_requeue
>diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
>index 7003bd5aeff4..9bf03c7fa1ec 100644
>--- a/Documentation/locking/index.rst
>+++ b/Documentation/locking/index.rst
>@@ -24,6 +24,7 @@ locking
>     percpu-rw-semaphore
>     robust-futexes
>     robust-futex-ABI
>+    futex2
>
> .. only::  subproject and html
>
>--
>2.31.1
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2021-06-06 19:19 Davidlohr Bueso
@ 2021-06-07 16:02 ` André Almeida
  0 siblings, 0 replies; 414+ messages in thread
From: André Almeida @ 2021-06-07 16:02 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Thomas Gleixner, Ingo Molnar, Peter Zijlstra, Darren Hart,
	linux-kernel, Steven Rostedt, Sebastian Andrzej Siewior, kernel,
	krisman, pgriffais, z.figura12, joel, malteskarupke, linux-api,
	fweimer, libc-alpha, linux-kselftest, shuah, acme, corbet,
	Peter Oskolkov, Andrey Semashev, mtk.manpages

Às 16:19 de 06/06/21, Davidlohr Bueso escreveu:
> Bcc:
> Subject: Re: [PATCH v4 07/15] docs: locking: futex2: Add documentation
> Reply-To:
> In-Reply-To: <20210603195924.361327-8-andrealmeid@collabora.com>
> 
> On Thu, 03 Jun 2021, Andrï¿½ Almeida wrote:
> 
>> Add a new documentation file specifying both userspace API and internal
>> implementation details of futex2 syscalls.
> 
> I think equally important would be to provide a manpage for each new
> syscall you are introducing, and keep mkt in the loop as in the past he
> extensively documented and improved futex manpages, and overall has a
> lot of experience with dealing with kernel interfaces.

Right, I'll add the man pages in a future version and make sure to have
mkt in the loop, thanks for the tip.

> 
> Thanks,
> Davidlohr
> 
>>
>> Signed-off-by: André Almeida <andrealmeid@collabora.com>
>> ---
>> Documentation/locking/futex2.rst | 198 +++++++++++++++++++++++++++++++
>> Documentation/locking/index.rst  |   1 +
>> 2 files changed, 199 insertions(+)
>> create mode 100644 Documentation/locking/futex2.rst
>>
>> diff --git a/Documentation/locking/futex2.rst
>> b/Documentation/locking/futex2.rst
>> new file mode 100644
>> index 000000000000..2f74d7c97a55
>> --- /dev/null
>> +++ b/Documentation/locking/futex2.rst
>> @@ -0,0 +1,198 @@
>> +.. SPDX-License-Identifier: GPL-2.0
>> +
>> +======
>> +futex2
>> +======
>> +
>> +:Author: André Almeida <andrealmeid@collabora.com>
>> +
>> +futex, or fast user mutex, is a set of syscalls to allow userspace to
>> create
>> +performant synchronization mechanisms, such as mutexes, semaphores and
>> +conditional variables in userspace. C standard libraries, like glibc,
>> uses it
>> +as a means to implement more high level interfaces like pthreads.
>> +
>> +The interface
>> +=============
>> +
>> +uAPI functions
>> +--------------
>> +
>> +.. kernel-doc:: kernel/futex2.c
>> +   :identifiers: sys_futex_wait sys_futex_wake sys_futex_waitv
>> sys_futex_requeue
>> +
>> +uAPI structures
>> +---------------
>> +
>> +.. kernel-doc:: include/uapi/linux/futex.h
>> +
>> +The ``flag`` argument
>> +---------------------
>> +
>> +The flag is used to specify the size of the futex word
>> +(FUTEX_[8, 16, 32, 64]). It's mandatory to define one, since there's no
>> +default size.
>> +
>> +By default, the timeout uses a monotonic clock, but can be used as a
>> realtime
>> +one by using the FUTEX_REALTIME_CLOCK flag.
>> +
>> +By default, futexes are of the private type, that means that this
>> user address
>> +will be accessed by threads that share the same memory region. This
>> allows for
>> +some internal optimizations, so they are faster. However, if the
>> address needs
>> +to be shared with different processes (like using ``mmap()`` or
>> ``shm()``), they
>> +need to be defined as shared and the flag FUTEX_SHARED_FLAG is used
>> to set that.
>> +
>> +By default, the operation has no NUMA-awareness, meaning that the
>> user can't
>> +choose the memory node where the kernel side futex data will be
>> stored. The
>> +user can choose the node where it wants to operate by setting the
>> +FUTEX_NUMA_FLAG and using the following structure (where X can be 8,
>> 16, 32 or
>> +64)::
>> +
>> + struct futexX_numa {
>> +         __uX value;
>> +         __sX hint;
>> + };
>> +
>> +This structure should be passed at the ``void *uaddr`` of futex
>> functions. The
>> +address of the structure will be used to be waited on/waken on, and the
>> +``value`` will be compared to ``val`` as usual. The ``hint`` member
>> is used to
>> +define which node the futex will use. When waiting, the futex will be
>> +registered on a kernel-side table stored on that node; when waking,
>> the futex
>> +will be searched for on that given table. That means that there's no
>> redundancy
>> +between tables, and the wrong ``hint`` value will lead to undesired
>> behavior.
>> +Userspace is responsible for dealing with node migrations issues that
>> may
>> +occur. ``hint`` can range from [0, MAX_NUMA_NODES), for specifying a
>> node, or
>> +-1, to use the same node the current process is using.
>> +
>> +When not using FUTEX_NUMA_FLAG on a NUMA system, the futex will be
>> stored on a
>> +global table on allocated on the first node.
>> +
>> +The ``timo`` argument
>> +---------------------
>> +
>> +As per the Y2038 work done in the kernel, new interfaces shouldn't
>> add timeout
>> +options known to be buggy. Given that, ``timo`` should be a 64-bit
>> timeout at
>> +all platforms, using an absolute timeout value.
>> +
>> +Implementation
>> +==============
>> +
>> +The internal implementation follows a similar design to the original
>> futex.
>> +Given that we want to replicate the same external behavior of current
>> futex,
>> +this should be somewhat expected.
>> +
>> +Waiting
>> +-------
>> +
>> +For the wait operations, they are all treated as if you want to wait
>> on N
>> +futexes, so the path for futex_wait and futex_waitv is the basically
>> the same.
>> +For both syscalls, the first step is to prepare an internal list for
>> the list
>> +of futexes to wait for (using struct futexv_head). For futex_wait()
>> calls, this
>> +list will have a single object.
>> +
>> +We have a hash table, where waiters register themselves before
>> sleeping. Then
>> +the wake function checks this table looking for waiters at uaddr. 
>> The hash
>> +bucket to be used is determined by a struct futex_key, that stores
>> information
>> +to uniquely identify an address from a given process. Given the huge
>> address
>> +space, there'll be hash collisions, so we store information to be
>> later used on
>> +collision treatment.
>> +
>> +First, for every futex we want to wait on, we check if (``*uaddr ==
>> val``).
>> +This check is done holding the bucket lock, so we are correctly
>> serialized with
>> +any futex_wake() calls. If any waiter fails the check above, we
>> dequeue all
>> +futexes. The check (``*uaddr == val``) can fail for two reasons:
>> +
>> +- The values are different, and we return -EAGAIN. However, if while
>> +  dequeueing we found that some futexes were awakened, we prioritize
>> this
>> +  and return success.
>> +
>> +- When trying to access the user address, we do so with page faults
>> +  disabled because we are holding a bucket's spin lock (and can't sleep
>> +  while holding a spin lock). If there's an error, it might be a page
>> +  fault, or an invalid address. We release the lock, dequeue everyone
>> +  (because it's illegal to sleep while there are futexes enqueued, we
>> +  could lose wakeups) and try again with page fault enabled. If we
>> +  succeed, this means that the address is valid, but we need to do
>> +  all the work again. For serialization reasons, we need to have the
>> +  spin lock when getting the user value. Additionally, for shared
>> +  futexes, we also need to recalculate the hash, since the underlying
>> +  mapping mechanisms could have changed when dealing with page fault.
>> +  If, even with page fault enabled, we can't access the address, it
>> +  means it's an invalid user address, and we return -EFAULT. For this
>> +  case, we prioritize the error, even if some futexes were awaken.
>> +
>> +If the check is OK, they are enqueued on a linked list in our bucket,
>> and
>> +proceed to the next one. If all waiters succeed, we put the thread to
>> sleep
>> +until a futex_wake() call, timeout expires or we get a signal. After
>> waking up,
>> +we dequeue everyone, and check if some futex was awakened. This
>> dequeue is done
>> +by iteratively walking at each element of struct futex_head list.
>> +
>> +All enqueuing/dequeuing operations requires to hold the bucket lock,
>> to avoid
>> +racing while modifying the list.
>> +
>> +Waking
>> +------
>> +
>> +We get the bucket that's storing the waiters at uaddr, and wake the
>> required
>> +number of waiters, checking for hash collision.
>> +
>> +There's an optimization that makes futex_wake() not take the bucket
>> lock if
>> +there's no one to be woken on that bucket. It checks an atomic
>> counter that each
>> +bucket has, if it says 0, then the syscall exits. In order for this
>> to work, the
>> +waiter thread increases it before taking the lock, so the wake thread
>> will
>> +correctly see that there's someone waiting and will continue the path
>> to take
>> +the bucket lock. To get the correct serialization, the waiter issues
>> a memory
>> +barrier after increasing the bucket counter and the waker issues a
>> memory
>> +barrier before checking it.
>> +
>> +Requeuing
>> +---------
>> +
>> +The requeue path first checks for each struct futex_requeue and their
>> flags.
>> +Then, it will compare the expected value with the one at uaddr1::uaddr.
>> +Following the same serialization explained at Waking_, we increase
>> the atomic
>> +counter for the bucket of uaddr2 before taking the lock. We need to
>> have both
>> +buckets locks at same time so we don't race with other futex
>> operation. To
>> +ensure the locks are taken in the same order for all threads (and
>> thus avoiding
>> +deadlocks), every requeue operation takes the "smaller" bucket first,
>> when
>> +comparing both addresses.
>> +
>> +If the compare with user value succeeds, we proceed by waking
>> ``nr_wake``
>> +futexes, and then requeuing ``nr_requeue`` from bucket of uaddr1 to
>> the uaddr2.
>> +This consists in a simple list deletion/addition and replacing the
>> old futex key
>> +with the new one.
>> +
>> +Futex keys
>> +----------
>> +
>> +There are two types of futexes: private and shared ones. The private
>> are futexes
>> +meant to be used by threads that share the same memory space, are
>> easier to be
>> +uniquely identified and thus can have some performance optimization. The
>> +elements for identifying one are: the start address of the page where
>> the
>> +address is, the address offset within the page and the current->mm
>> pointer.
>> +
>> +Now, for uniquely identifying a shared futex:
>> +
>> +- If the page containing the user address is an anonymous page, we can
>> +  just use the same data used for private futexes (the start address of
>> +  the page, the address offset within the page and the current->mm
>> +  pointer); that will be enough for uniquely identifying such futex. We
>> +  also set one bit at the key to differentiate if a private futex is
>> +  used on the same address (mixing shared and private calls does not
>> +  work).
>> +
>> +- If the page is file-backed, current->mm maybe isn't the same one for
>> +  every user of this futex, so we need to use other data: the
>> +  page->index, a UUID for the struct inode and the offset within the
>> +  page.
>> +
>> +Note that members of futex_key don't have any particular meaning
>> after they
>> +are part of the struct - they are just bytes to identify a futex. 
>> Given that,
>> +we don't need to use a particular name or type that matches the
>> original data,
>> +we only need to care about the bitsize of each component and make
>> both private
>> +and shared fit in the same memory space.
>> +
>> +Source code documentation
>> +=========================
>> +
>> +.. kernel-doc:: kernel/futex2.c
>> +   :no-identifiers: sys_futex_wait sys_futex_wake sys_futex_waitv
>> sys_futex_requeue
>> diff --git a/Documentation/locking/index.rst
>> b/Documentation/locking/index.rst
>> index 7003bd5aeff4..9bf03c7fa1ec 100644
>> --- a/Documentation/locking/index.rst
>> +++ b/Documentation/locking/index.rst
>> @@ -24,6 +24,7 @@ locking
>>     percpu-rw-semaphore
>>     robust-futexes
>>     robust-futex-ABI
>> +    futex2
>>
>> .. only::  subproject and html
>>
>> -- 
>> 2.31.1
>>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2021-04-05  0:01 Mitali Borkar
  2021-04-06  7:03 ` Arnd Bergmann
  0 siblings, 1 reply; 414+ messages in thread
From: Mitali Borkar @ 2021-04-05  0:01 UTC (permalink / raw)
  To: manish, GR-Linux-NIC-Dev, gregkh; +Cc: linux-staging, linux-kernel

outreachy-kernel@googlegroups.com, mitaliborkar810@gmail.com 
Bcc: 
Subject: [PATCH] staging: qlge:remove else after break
Reply-To: 

Fixed Warning:- else is not needed after break
break terminates the loop if encountered. else is unnecessary and
increases indenatation

Signed-off-by: Mitali Borkar <mitaliborkar810@gmail.com>
---
 drivers/staging/qlge/qlge_mpi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/qlge/qlge_mpi.c b/drivers/staging/qlge/qlge_mpi.c
index 2630ebf50341..3a49f187203b 100644
--- a/drivers/staging/qlge/qlge_mpi.c
+++ b/drivers/staging/qlge/qlge_mpi.c
@@ -935,13 +935,11 @@ static int qlge_idc_wait(struct qlge_adapter *qdev)
 			netif_err(qdev, drv, qdev->ndev, "IDC Success.\n");
 			status = 0;
 			break;
-		} else {
-			netif_err(qdev, drv, qdev->ndev,
+		}	netif_err(qdev, drv, qdev->ndev,
 				  "IDC: Invalid State 0x%.04x.\n",
 				  mbcp->mbox_out[0]);
 			status = -EIO;
 			break;
-		}
 	}
 
 	return status;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2021-04-05  0:01 Mitali Borkar
@ 2021-04-06  7:03 ` Arnd Bergmann
  0 siblings, 0 replies; 414+ messages in thread
From: Arnd Bergmann @ 2021-04-06  7:03 UTC (permalink / raw)
  To: Mitali Borkar
  Cc: manish, GR-Linux-NIC-Dev, gregkh, linux-staging,
	Linux Kernel Mailing List

On Mon, Apr 5, 2021 at 2:03 AM Mitali Borkar <mitaliborkar810@gmail.com> wrote:
>
> outreachy-kernel@googlegroups.com, mitaliborkar810@gmail.com
> Bcc:
> Subject: [PATCH] staging: qlge:remove else after break
> Reply-To:
>
> Fixed Warning:- else is not needed after break
> break terminates the loop if encountered. else is unnecessary and
> increases indenatation
>
> Signed-off-by: Mitali Borkar <mitaliborkar810@gmail.com>
> ---
>  drivers/staging/qlge/qlge_mpi.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/drivers/staging/qlge/qlge_mpi.c b/drivers/staging/qlge/qlge_mpi.c
> index 2630ebf50341..3a49f187203b 100644
> --- a/drivers/staging/qlge/qlge_mpi.c
> +++ b/drivers/staging/qlge/qlge_mpi.c
> @@ -935,13 +935,11 @@ static int qlge_idc_wait(struct qlge_adapter *qdev)
>                         netif_err(qdev, drv, qdev->ndev, "IDC Success.\n");
>                         status = 0;
>                         break;
> -               } else {
> -                       netif_err(qdev, drv, qdev->ndev,
> +               }       netif_err(qdev, drv, qdev->ndev,
>                                   "IDC: Invalid State 0x%.04x.\n",
>                                   mbcp->mbox_out[0]);
>                         status = -EIO;
>                         break;
> -               }
>         }

It looks like you got this one wrong in multiple ways:

- This is not an equivalent transformation, since the errror is now
  printed in the first part of the 'if()' block as well.

- The indentation is wrong now, with the netif_err() starting in the
  same line as the '}'.

- The description mentions a change in indentation, but you did not
   actually change it.

- The changelog text appears mangled.

        Arnd

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CAPncsNOFoUt7uEDEdihDTZY4pJsuPxt146W-L+Ju53SgZ6ezYw@mail.gmail.com>]

[parent not found: <CAPncsNMWCim1kozMyJaT7_suEnWyGadf1Kg1fzjyWfdGDVMZ3A@mail.gmail.com>]

[parent not found: <CAPncsNOpMhn=N+9+uC8hx0shRE-5uhvHCmZKJ8X3=aAeja1sag@mail.gmail.com>]

* Re:
       [not found]   ` <CAPncsNOpMhn=N+9+uC8hx0shRE-5uhvHCmZKJ8X3=aAeja1sag@mail.gmail.com>
@ 2021-03-18  6:51     ` Jarvis Jiang
  0 siblings, 0 replies; 414+ messages in thread
From: Jarvis Jiang @ 2021-03-18  6:51 UTC (permalink / raw)
  To: linux-kernel

jarvis.w.jiang@gmail.com

On Thu, Mar 18, 2021 at 2:49 PM Jarvis Jiang <jarvis.w.jiang@gmail.com> wrote:
> subscribe linex-kernel jarvis.w.jiang@gmail.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2021-01-19  0:10 David Howells
  2021-01-20 14:46 ` Jarkko Sakkinen
  0 siblings, 1 reply; 414+ messages in thread
From: David Howells @ 2021-01-19  0:10 UTC (permalink / raw)
  To: torvalds
  Cc: Tobias Markus, Tianjia Zhang, dhowells, keyrings, linux-crypto,
	linux-security-module, stable, linux-kernel


From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>

On the following call path, `sig->pkey_algo` is not assigned
in asymmetric_key_verify_signature(), which causes runtime
crash in public_key_verify_signature().

  keyctl_pkey_verify
    asymmetric_key_verify_signature
      verify_signature
        public_key_verify_signature

This patch simply check this situation and fixes the crash
caused by NULL pointer.

Fixes: 215525639631 ("X.509: support OSCCA SM2-with-SM3 certificate verification")
Reported-by: Tobias Markus <tobias@markus-regensburg.de>
Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-tested-by: Toke Høiland-Jørgensen <toke@redhat.com>
Tested-by: João Fonseca <jpedrofonseca@ua.pt>
Cc: stable@vger.kernel.org # v5.10+
---

 crypto/asymmetric_keys/public_key.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 8892908ad58c..788a4ba1e2e7 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -356,7 +356,8 @@ int public_key_verify_signature(const struct public_key *pkey,
 	if (ret)
 		goto error_free_key;
 
-	if (strcmp(sig->pkey_algo, "sm2") == 0 && sig->data_size) {
+	if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 &&
+	    sig->data_size) {
 		ret = cert_sig_digest_update(sig, tfm);
 		if (ret)
 			goto error_free_key;


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2021-01-19  0:10 David Howells
@ 2021-01-20 14:46 ` Jarkko Sakkinen
  0 siblings, 0 replies; 414+ messages in thread
From: Jarkko Sakkinen @ 2021-01-20 14:46 UTC (permalink / raw)
  To: David Howells
  Cc: torvalds, Tobias Markus, Tianjia Zhang, keyrings, linux-crypto,
	linux-security-module, stable, linux-kernel

On Tue, Jan 19, 2021 at 12:10:33AM +0000, David Howells wrote:
> 
> From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
> 
> On the following call path, `sig->pkey_algo` is not assigned
> in asymmetric_key_verify_signature(), which causes runtime
> crash in public_key_verify_signature().
> 
>   keyctl_pkey_verify
>     asymmetric_key_verify_signature
>       verify_signature
>         public_key_verify_signature
> 
> This patch simply check this situation and fixes the crash
> caused by NULL pointer.
> 
> Fixes: 215525639631 ("X.509: support OSCCA SM2-with-SM3 certificate verification")
> Reported-by: Tobias Markus <tobias@markus-regensburg.de>
> Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Reviewed-and-tested-by: Toke Høiland-Jørgensen <toke@redhat.com>
> Tested-by: João Fonseca <jpedrofonseca@ua.pt>
> Cc: stable@vger.kernel.org # v5.10+
> ---

For what it's worth

Acked-by: Jarkko Sakkinen <jarkko@kernel.org>

/Jarkko

> 
>  crypto/asymmetric_keys/public_key.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
> index 8892908ad58c..788a4ba1e2e7 100644
> --- a/crypto/asymmetric_keys/public_key.c
> +++ b/crypto/asymmetric_keys/public_key.c
> @@ -356,7 +356,8 @@ int public_key_verify_signature(const struct public_key *pkey,
>  	if (ret)
>  		goto error_free_key;
>  
> -	if (strcmp(sig->pkey_algo, "sm2") == 0 && sig->data_size) {
> +	if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 &&
> +	    sig->data_size) {
>  		ret = cert_sig_digest_update(sig, tfm);
>  		if (ret)
>  			goto error_free_key;
> 
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CAGMNF6W8baS_zLYL8DwVsbfPWTP2ohzRB7xutW0X=MUzv93pbA@mail.gmail.com>]

* Re:
       [not found] <CAGMNF6W8baS_zLYL8DwVsbfPWTP2ohzRB7xutW0X=MUzv93pbA@mail.gmail.com>
@ 2020-12-02 17:09 ` Kun Yi
  0 siblings, 0 replies; 414+ messages in thread
From: Kun Yi @ 2020-12-02 17:09 UTC (permalink / raw)
  To: Kun Yi, Guenter Roeck, robh+dt, Venkatesh, Supreeth
  Cc: OpenBMC Maillist, linux-hwmon, linux-kernel

Much apologies for the super late reply.. I was out for an extended
period of time due to personal circumstances.
I have now addressed most of the comments in the v4 series.

Also cc'ed Supreeth who works on the AMD System Manageability stack.

On Wed, Dec 2, 2020 at 8:57 AM Kun Yi <kunyi@google.com> wrote:
>
> On Sat, Apr 04, 2020 at 08:01:16PM -0700, Kun Yi wrote:
> > SB Temperature Sensor Interface (SB-TSI) is an SMBus compatible
> > interface that reports AMD SoC's Ttcl (normalized temperature),
> > and resembles a typical 8-pin remote temperature sensor's I2C interface
> > to BMC.
> >
> > This commit adds basic support using this interface to read CPU
> > temperature, and read/write high/low CPU temp thresholds.
> >
> > To instantiate this driver on an AMD CPU with SB-TSI
> > support, the i2c bus number would be the bus connected from the board
> > management controller (BMC) to the CPU. The i2c address is specified in
> > Section 6.3.1 of the spec [1]: The SB-TSI address is normally 98h for socket 0
> > and 90h for socket 1, but it could vary based on hardware address select pins.
> >
> > [1]: https://www.amd.com/system/files/TechDocs/56255_OSRR.pdf
> >
> > Test status: tested reading temp1_input, and reading/writing
> > temp1_max/min.
> >
> > Signed-off-by: Kun Yi <kunyi at google.com>
> > ---
> >  drivers/hwmon/Kconfig      |  10 ++
> >  drivers/hwmon/Makefile     |   1 +
> >  drivers/hwmon/sbtsi_temp.c | 259 +++++++++++++++++++++++++++++++++++++
> >  3 files changed, 270 insertions(+)
> >  create mode 100644 drivers/hwmon/sbtsi_temp.c
> >
> > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> > index 05a30832c6ba..9585dcd01d1b 100644
> > --- a/drivers/hwmon/Kconfig
> > +++ b/drivers/hwmon/Kconfig
> > @@ -1412,6 +1412,16 @@ config SENSORS_RASPBERRYPI_HWMON
> >    This driver can also be built as a module. If so, the module
> >    will be called raspberrypi-hwmon.
> >
> > +config SENSORS_SBTSI
> > + tristate "Emulated SB-TSI temperature sensor"
> > + depends on I2C
> > + help
> > +  If you say yes here you get support for emulated temperature
> > +  sensors on AMD SoCs with SB-TSI interface connected to a BMC device.
> > +
> > +  This driver can also be built as a module. If so, the module will
> > +  be called sbtsi_temp.
> > +
> >  config SENSORS_SHT15
> >   tristate "Sensiron humidity and temperature sensors. SHT15 and compat."
> >   depends on GPIOLIB || COMPILE_TEST
> > diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> > index b0b9c8e57176..cd109f003ce4 100644
> > --- a/drivers/hwmon/Makefile
> > +++ b/drivers/hwmon/Makefile
> > @@ -152,6 +152,7 @@ obj-$(CONFIG_SENSORS_POWR1220)  += powr1220.o
> >  obj-$(CONFIG_SENSORS_PWM_FAN) += pwm-fan.o
> >  obj-$(CONFIG_SENSORS_RASPBERRYPI_HWMON) += raspberrypi-hwmon.o
> >  obj-$(CONFIG_SENSORS_S3C) += s3c-hwmon.o
> > +obj-$(CONFIG_SENSORS_SBTSI) += sbtsi_temp.o
> >  obj-$(CONFIG_SENSORS_SCH56XX_COMMON)+= sch56xx-common.o
> >  obj-$(CONFIG_SENSORS_SCH5627) += sch5627.o
> >  obj-$(CONFIG_SENSORS_SCH5636) += sch5636.o
> > diff --git a/drivers/hwmon/sbtsi_temp.c b/drivers/hwmon/sbtsi_temp.c
> > new file mode 100644
> > index 000000000000..e3ad6a9f7ec1
> > --- /dev/null
> > +++ b/drivers/hwmon/sbtsi_temp.c
> > @@ -0,0 +1,259 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * sbtsi_temp.c - hwmon driver for a SBI Temperature Sensor Interface (SB-TSI)
> > + *                compliant AMD SoC temperature device.
> > + *
> > + * Copyright (c) 2020, Google Inc.
> > + * Copyright (c) 2020, Kun Yi <kunyi at google.com>
> > + */
> > +
> > +#include <linux/err.h>
> > +#include <linux/i2c.h>
> > +#include <linux/init.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/of_device.h>
> > +#include <linux/of.h>
> > +
> > +/*
> > + * SB-TSI registers only support SMBus byte data access. "_INT" registers are
> > + * the integer part of a temperature value or limit, and "_DEC" registers are
> > + * corresponding decimal parts.
> > + */
> > +#define SBTSI_REG_TEMP_INT 0x01 /* RO */
> > +#define SBTSI_REG_STATUS 0x02 /* RO */
> > +#define SBTSI_REG_CONFIG 0x03 /* RO */
> > +#define SBTSI_REG_TEMP_HIGH_INT 0x07 /* RW */
> > +#define SBTSI_REG_TEMP_LOW_INT 0x08 /* RW */
> > +#define SBTSI_REG_TEMP_DEC 0x10 /* RW */
> > +#define SBTSI_REG_TEMP_HIGH_DEC 0x13 /* RW */
> > +#define SBTSI_REG_TEMP_LOW_DEC 0x14 /* RW */
> > +#define SBTSI_REG_REV 0xFF /* RO */
>
> The revision register is not actually used.
Thanks. Removed. I agree that the register is not well documented, at
least publicly.
It shouldn't affect functionality of this driver, so I removed the
definition altogether.
>
> > +
> > +#define SBTSI_CONFIG_READ_ORDER_SHIFT 5
> > +
> > +#define SBTSI_TEMP_MIN 0
> > +#define SBTSI_TEMP_MAX 255875
> > +#define SBTSI_REV_MAX_VALID_ID 4
>
> Not actually used, and I am not sure if it would make sense to check it.
> If at all, it would only make sense if you also check SBTSIxFE (Manufacture
> ID). Unfortunately, the actual SB-TSI specification seems to be non-public,
> so I can't check if the driver as-is supports versions 0..3 (assuming those
> exist).

Thanks. Removed.

>
> > +
> > +/* Each client has this additional data */
> > +struct sbtsi_data {
> > + struct i2c_client *client;
> > + struct mutex lock;
> > +};
> > +
> > +/*
> > + * From SB-TSI spec: CPU temperature readings and limit registers encode the
> > + * temperature in increments of 0.125 from 0 to 255.875. The "high byte"
> > + * register encodes the base-2 of the integer portion, and the upper 3 bits of
> > + * the "low byte" encode in base-2 the decimal portion.
> > + *
> > + * e.g. INT=0x19, DEC=0x20 represents 25.125 degrees Celsius
> > + *
> > + * Therefore temperature in millidegree Celsius =
> > + *   (INT + DEC / 256) * 1000 = (INT * 8 + DEC / 32) * 125
> > + */
> > +static inline int sbtsi_reg_to_mc(s32 integer, s32 decimal)
> > +{
> > + return ((integer << 3) + (decimal >> 5)) * 125;
> > +}
> > +
> > +/*
> > + * Inversely, given temperature in millidegree Celsius
> > + *   INT = (TEMP / 125) / 8
> > + *   DEC = ((TEMP / 125) % 8) * 32
> > + * Caller have to make sure temp doesn't exceed 255875, the max valid value.
> > + */
> > +static inline void sbtsi_mc_to_reg(s32 temp, u8 *integer, u8 *decimal)
> > +{
> > + temp /= 125;
> > + *integer = temp >> 3;
> > + *decimal = (temp & 0x7) << 5;
> > +}
> > +
> > +static int sbtsi_read(struct device *dev, enum hwmon_sensor_types type,
> > +      u32 attr, int channel, long *val)
> > +{
> > + struct sbtsi_data *data = dev_get_drvdata(dev);
> > + s32 temp_int, temp_dec;
> > + int err, reg_int, reg_dec;
> > + u8 read_order;
> > +
> > + if (type != hwmon_temp)
> > + return -EINVAL;
> > +
> > + read_order = 0;
> > + switch (attr) {
> > + case hwmon_temp_input:
> > + /*
> > + * ReadOrder bit specifies the reading order of integer and
> > + * decimal part of CPU temp for atomic reads. If bit == 0,
> > + * reading integer part triggers latching of the decimal part,
> > + * so integer part should be read first. If bit == 1, read
> > + * order should be reversed.
> > + */
> > + err = i2c_smbus_read_byte_data(data->client, SBTSI_REG_CONFIG);
> > + if (err < 0)
> > + return err;
> > +
> As I understand it, the idea is to set this configuration bit once and then
> just use it. Any chance to do that ? This would save an i2c read operation
> each time the temperature is read, and the if/else complexity below.

Unfortunately, the read-order register bit is read-only.

>
> > + read_order = (u8)err & BIT(SBTSI_CONFIG_READ_ORDER_SHIFT);
>
> Nit: typecast is unnecessary.

Done.

>
> > + reg_int = SBTSI_REG_TEMP_INT;
> > + reg_dec = SBTSI_REG_TEMP_DEC;
> > + break;
> > + case hwmon_temp_max:
> > + reg_int = SBTSI_REG_TEMP_HIGH_INT;
> > + reg_dec = SBTSI_REG_TEMP_HIGH_DEC;
> > + break;
> > + case hwmon_temp_min:
> > + reg_int = SBTSI_REG_TEMP_LOW_INT;
> > + reg_dec = SBTSI_REG_TEMP_LOW_DEC;
> > + break;
> > + default:
> > + return -EINVAL;
> > + }
> > +
> > + if (read_order == 0) {
> > + temp_int = i2c_smbus_read_byte_data(data->client, reg_int);
> > + temp_dec = i2c_smbus_read_byte_data(data->client, reg_dec);
> > + } else {
> > + temp_dec = i2c_smbus_read_byte_data(data->client, reg_dec);
> > + temp_int = i2c_smbus_read_byte_data(data->client, reg_int);
> > + }
>
> Just a thought: if you use regmap and tell it that the limit registers
> are non-volatile, this wouldn't actually read from the chip more than once.

That's a great suggestion, although in our normal use cases the limit
values are read and cached by the
userspace application. Seems changing to regmap would require some
messaging of the code. Would it
be acceptable to keep the initial driver as-is and do that in a following patch?

>
> Also, since the read involves reading two registers, and the first read
> locks the value for the second, you'll need mutex protection when reading
> the current temperature (not for limits, though).

Added mutex locking before/after the temp input reading.

>
> > +
> > + if (temp_int < 0)
> > + return temp_int;
> > + if (temp_dec < 0)
> > + return temp_dec;
> > +
> > + *val = sbtsi_reg_to_mc(temp_int, temp_dec);
> > +
> > + return 0;
> > +}
> > +
> > +static int sbtsi_write(struct device *dev, enum hwmon_sensor_types type,
> > +       u32 attr, int channel, long val)
> > +{
> > + struct sbtsi_data *data = dev_get_drvdata(dev);
> > + int reg_int, reg_dec, err;
> > + u8 temp_int, temp_dec;
> > +
> > + if (type != hwmon_temp)
> > + return -EINVAL;
> > +
> > + switch (attr) {
> > + case hwmon_temp_max:
> > + reg_int = SBTSI_REG_TEMP_HIGH_INT;
> > + reg_dec = SBTSI_REG_TEMP_HIGH_DEC;
> > + break;
> > + case hwmon_temp_min:
> > + reg_int = SBTSI_REG_TEMP_LOW_INT;
> > + reg_dec = SBTSI_REG_TEMP_LOW_DEC;
> > + break;
> > + default:
> > + return -EINVAL;
> > + }
> > +
> > + val = clamp_val(val, SBTSI_TEMP_MIN, SBTSI_TEMP_MAX);
> > + mutex_lock(&data->lock);
> > + sbtsi_mc_to_reg(val, &temp_int, &temp_dec);
> > + err = i2c_smbus_write_byte_data(data->client, reg_int, temp_int);
> > + if (err)
> > + goto exit;
> > +
> > + err = i2c_smbus_write_byte_data(data->client, reg_dec, temp_dec);
> > +exit:
> > + mutex_unlock(&data->lock);
> > + return err;
> > +}
> > +
> > +static umode_t sbtsi_is_visible(const void *data,
> > + enum hwmon_sensor_types type,
> > + u32 attr, int channel)
> > +{
> > + switch (type) {
> > + case hwmon_temp:
> > + switch (attr) {
> > + case hwmon_temp_input:
> > + return 0444;
> > + case hwmon_temp_min:
> > + return 0644;
> > + case hwmon_temp_max:
> > + return 0644;
> > + }
> > + break;
> > + default:
> > + break;
> > + }
> > + return 0;
> > +}
> > +
> > +static const struct hwmon_channel_info *sbtsi_info[] = {
> > + HWMON_CHANNEL_INFO(chip,
> > +   HWMON_C_REGISTER_TZ),
> > + HWMON_CHANNEL_INFO(temp,
> > +   HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX),
>
> For your consideration: SB-TSI supports reporting high/low alerts.
> With this, it would be possible to implement respective alarm attributes.
> In conjunction with https://patchwork.kernel.org/patch/11277347/mbox/,
> it should also be possible to add interrupt and thus userspace notification
> for those attributes.
>
> SBTSI also supports setting the update rate (SBTSIx04) and setting
> the temperature offset (SBTSIx11, SBTSIx12), which could also be
> implemented as standard attributes.
>
> I won't require that for the initial version, just something to keep
> in mind.

Ack and thanks for the suggestions. I will keep in mind for future improvements.


>
> > + NULL
> > +};
> > +
> > +static const struct hwmon_ops sbtsi_hwmon_ops = {
> > + .is_visible = sbtsi_is_visible,
> > + .read = sbtsi_read,
> > + .write = sbtsi_write,
> > +};
> > +
> > +static const struct hwmon_chip_info sbtsi_chip_info = {
> > + .ops = &sbtsi_hwmon_ops,
> > + .info = sbtsi_info,
> > +};
> > +
> > +static int sbtsi_probe(struct i2c_client *client,
> > +       const struct i2c_device_id *id)
> > +{
> > + struct device *dev = &client->dev;
> > + struct device *hwmon_dev;
> > + struct sbtsi_data *data;
> > +
> > + data = devm_kzalloc(dev, sizeof(struct sbtsi_data), GFP_KERNEL);
> > + if (!data)
> > + return -ENOMEM;
> > +
> > + data->client = client;
> > + mutex_init(&data->lock);
> > +
> > + hwmon_dev =
> > + devm_hwmon_device_register_with_info(dev, client->name, data,
> > +     &sbtsi_chip_info, NULL);
> > +
> > + return PTR_ERR_OR_ZERO(hwmon_dev);
> > +}
> > +
> > +static const struct i2c_device_id sbtsi_id[] = {
> > + {"sbtsi", 0},
> > + {}
> > +};
> > +MODULE_DEVICE_TABLE(i2c, sbtsi_id);
> > +
> > +static const struct of_device_id __maybe_unused sbtsi_of_match[] = {
> > + {
> > + .compatible = "amd,sbtsi",
> > + },
> > + { },
> > +};
> > +MODULE_DEVICE_TABLE(of, sbtsi_of_match);
> > +
> > +static struct i2c_driver sbtsi_driver = {
> > + .class = I2C_CLASS_HWMON,
> > + .driver = {
> > + .name = "sbtsi",
> > + .of_match_table = of_match_ptr(sbtsi_of_match),
> > + },
> > + .probe = sbtsi_probe,
> > + .id_table = sbtsi_id,
> > +};
> > +
> > +module_i2c_driver(sbtsi_driver);
> > +
> > +MODULE_AUTHOR("Kun Yi <kunyi at google.com>");
> > +MODULE_DESCRIPTION("Hwmon driver for AMD SB-TSI emulated sensor");
> > +MODULE_LICENSE("GPL");



--
Regards,
Kun

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH] lib/find_bit: Add find_prev_*_bit functions.
@ 2020-12-02  1:10 Yun Levi
  2020-12-02  9:47 ` Andy Shevchenko
  0 siblings, 1 reply; 414+ messages in thread
From: Yun Levi @ 2020-12-02  1:10 UTC (permalink / raw)
  To: dushistov, arnd, akpm, gustavo, vilhelm.gray, richard.weiyang,
	andriy.shevchenko, joseph.qi, skalluru, yury.norov, jpoimboe
  Cc: linux-kernel, linux-arch

Inspired find_next_*bit function series, add find_prev_*_bit series.
I'm not sure whether it'll be used right now But, I add these functions
for future usage.

Signed-off-by: Levi Yun <ppbuk5246@gmail.com>
---
 fs/ufs/util.h                     |  24 +++---
 include/asm-generic/bitops/find.h |  69 ++++++++++++++++
 include/asm-generic/bitops/le.h   |  33 ++++++++
 include/linux/bitops.h            |  34 +++++---
 lib/find_bit.c                    | 126 +++++++++++++++++++++++++++++-
 5 files changed, 260 insertions(+), 26 deletions(-)

diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 4931bec1a01c..7c87c77d10ca 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -2,7 +2,7 @@
 /*
  *  linux/fs/ufs/util.h
  *
- * Copyright (C) 1998
+ * Copyright (C) 1998
  * Daniel Pirkl <daniel.pirkl@email.cz>
  * Charles University, Faculty of Mathematics and Physics
  */
@@ -263,7 +263,7 @@ extern int ufs_prepare_chunk(struct page *page,
loff_t pos, unsigned len);
 /*
  * These functions manipulate ufs buffers
  */
-#define ubh_bread(sb,fragment,size) _ubh_bread_(uspi,sb,fragment,size)
+#define ubh_bread(sb,fragment,size) _ubh_bread_(uspi,sb,fragment,size)
 extern struct ufs_buffer_head * _ubh_bread_(struct
ufs_sb_private_info *, struct super_block *, u64 , u64);
 extern struct ufs_buffer_head * ubh_bread_uspi(struct
ufs_sb_private_info *, struct super_block *, u64, u64);
 extern void ubh_brelse (struct ufs_buffer_head *);
@@ -296,7 +296,7 @@ static inline void *get_usb_offset(struct
ufs_sb_private_info *uspi,
                                   unsigned int offset)
 {
        unsigned int index;
-
+
        index = offset >> uspi->s_fshift;
        offset &= ~uspi->s_fmask;
        return uspi->s_ubh.bh[index]->b_data + offset;
@@ -411,9 +411,9 @@ static inline unsigned _ubh_find_next_zero_bit_(
                offset = 0;
        }
        return (base << uspi->s_bpfshift) + pos - begin;
-}
+}

-static inline unsigned find_last_zero_bit (unsigned char * bitmap,
+static inline unsigned __ubh_find_last_zero_bit(unsigned char * bitmap,
        unsigned size, unsigned offset)
 {
        unsigned bit, i;
@@ -453,7 +453,7 @@ static inline unsigned _ubh_find_last_zero_bit_(
                            size + (uspi->s_bpf - start), uspi->s_bpf)
                        - (uspi->s_bpf - start);
                size -= count;
-               pos = find_last_zero_bit (ubh->bh[base]->b_data,
+               pos = __ubh_find_last_zero_bit(ubh->bh[base]->b_data,
                        start, start - count);
                if (pos > start - count || !size)
                        break;
@@ -461,7 +461,7 @@ static inline unsigned _ubh_find_last_zero_bit_(
                start = uspi->s_bpf;
        }
        return (base << uspi->s_bpfshift) + pos - begin;
-}
+}

 #define ubh_isblockclear(ubh,begin,block)
(!_ubh_isblockset_(uspi,ubh,begin,block))

@@ -483,7 +483,7 @@ static inline int _ubh_isblockset_(struct
ufs_sb_private_info * uspi,
                mask = 0x01 << (block & 0x07);
                return (*ubh_get_addr (ubh, begin + (block >> 3)) &
mask) == mask;
        }
-       return 0;
+       return 0;
 }

 #define ubh_clrblock(ubh,begin,block) _ubh_clrblock_(uspi,ubh,begin,block)
@@ -492,8 +492,8 @@ static inline void _ubh_clrblock_(struct
ufs_sb_private_info * uspi,
 {
        switch (uspi->s_fpb) {
        case 8:
-               *ubh_get_addr (ubh, begin + block) = 0x00;
-               return;
+               *ubh_get_addr (ubh, begin + block) = 0x00;
+               return;
        case 4:
                *ubh_get_addr (ubh, begin + (block >> 1)) &= ~(0x0f <<
((block & 0x01) << 2));
                return;
@@ -531,9 +531,9 @@ static inline void ufs_fragacct (struct
super_block * sb, unsigned blockmap,
 {
        struct ufs_sb_private_info * uspi;
        unsigned fragsize, pos;
-
+
        uspi = UFS_SB(sb)->s_uspi;
-
+
        fragsize = 0;
        for (pos = 0; pos < uspi->s_fpb; pos++) {
                if (blockmap & (1 << pos)) {
diff --git a/include/asm-generic/bitops/find.h
b/include/asm-generic/bitops/find.h
index 9fdf21302fdf..ca18b2ec954c 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -16,6 +16,20 @@ extern unsigned long find_next_bit(const unsigned
long *addr, unsigned long
                size, unsigned long offset);
 #endif

+#ifndef find_prev_bit
+/**
+ * find_prev_bit - find the prev set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the prev set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_prev_bit(const unsigned long *addr, unsigned long
+               size, unsigned long offset);
+#endif
+
 #ifndef find_next_and_bit
 /**
  * find_next_and_bit - find the next set bit in both memory regions
@@ -32,6 +46,22 @@ extern unsigned long find_next_and_bit(const
unsigned long *addr1,
                unsigned long offset);
 #endif

+#ifndef find_prev_and_bit
+/**
+ * find_prev_and_bit - find the prev set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the prev set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_prev_and_bit(const unsigned long *addr1,
+               const unsigned long *addr2, unsigned long size,
+               unsigned long offset);
+#endif
+
 #ifndef find_next_zero_bit
 /**
  * find_next_zero_bit - find the next cleared bit in a memory region
@@ -46,6 +76,20 @@ extern unsigned long find_next_zero_bit(const
unsigned long *addr, unsigned
                long size, unsigned long offset);
 #endif

+#ifndef find_prev_zero_bit
+/**
+ * find_prev_zero_bit - find the prev cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the prev zero bit
+ * If no bits are zero, returns @size.
+ */
+extern unsigned long find_prev_zero_bit(const unsigned long *addr, unsigned
+               long size, unsigned long offset);
+#endif
+
 #ifdef CONFIG_GENERIC_FIND_FIRST_BIT

 /**
@@ -80,6 +124,31 @@ extern unsigned long find_first_zero_bit(const
unsigned long *addr,

 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */

+#ifndef find_last_bit
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The number of bits to search
+ *
+ * Returns the bit number of the last set bit, or size.
+ */
+extern unsigned long find_last_bit(const unsigned long *addr,
+                                  unsigned long size);
+#endif
+
+#ifndef find_last_zero_bit
+/**
+ * find_last_zero_bit - find the last cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first cleared bit.
+ * If no bits are zero, returns @size.
+ */
+extern unsigned long find_last_zero_bit(const unsigned long *addr,
+                                        unsigned long size);
+#endif
+
 /**
  * find_next_clump8 - find next 8-bit clump with set bits in a memory region
  * @clump: location to store copy of found clump
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 188d3eba3ace..d0bd15bc34d9 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -27,6 +27,24 @@ static inline unsigned long
find_first_zero_bit_le(const void *addr,
        return find_first_zero_bit(addr, size);
 }

+static inline unsigned long find_prev_zero_bit_le(const void *addr,
+               unsigned long size, unsigned long offset)
+{
+       return find_prev_zero_bit(addr, size, offset);
+}
+
+static inline unsigned long find_prev_bit_le(const void *addr,
+               unsigned long size, unsigned long offset)
+{
+       return find_prev_bit(addr, size, offset);
+}
+
+static inline unsigned long find_last_zero_bit_le(const void *addr,
+               unsigned long size)
+{
+       return find_last_zero_bit(addr, size);
+}
+
 #elif defined(__BIG_ENDIAN)

 #define BITOP_LE_SWIZZLE       ((BITS_PER_LONG-1) & ~0x7)
@@ -41,11 +59,26 @@ extern unsigned long find_next_bit_le(const void *addr,
                unsigned long size, unsigned long offset);
 #endif

+#ifndef find_prev_zero_bit_le
+extern unsigned long find_prev_zero_bit_le(const void *addr,
+               unsigned long size, unsigned long offset);
+#endif
+
+#ifndef find_prev_bit_le
+extern unsigned long find_prev_bit_le(const void *addr,
+               unsigned long size, unsigned long offset);
+#endif
+
 #ifndef find_first_zero_bit_le
 #define find_first_zero_bit_le(addr, size) \
        find_next_zero_bit_le((addr), (size), 0)
 #endif

+#ifndef find_last_zero_bit_le
+#define find_last_zero_bit_le(addr, size) \
+       find_prev_zero_bit_le((addr), (size), (size - 1))
+#endif
+
 #else
 #error "Please fix <asm/byteorder.h>"
 #endif
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5b74bdf159d6..124c68929861 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -50,6 +50,28 @@ extern unsigned long __sw_hweight64(__u64 w);
             (bit) < (size);                                    \
             (bit) = find_next_zero_bit((addr), (size), (bit) + 1))

+#define for_each_set_bit_reverse(bit, addr, size) \
+       for ((bit) = find_last_bit((addr), (size));             \
+            (bit) < (size);                                    \
+            (bit) = find_prev_bit((addr), (size), (bit)))
+
+/* same as for_each_set_bit_reverse() but use bit as value to start with */
+#define for_each_set_bit_from_reverse(bit, addr, size) \
+       for ((bit) = find_prev_bit((addr), (size), (bit));      \
+            (bit) < (size);                                    \
+            (bit) = find_prev_bit((addr), (size), (bit - 1)))
+
+#define for_each_clear_bit_reverse(bit, addr, size) \
+       for ((bit) = find_last_zero_bit((addr), (size));        \
+            (bit) < (size);                                    \
+            (bit) = find_prev_zero_bit((addr), (size), (bit)))
+
+/* same as for_each_clear_bit_reverse() but use bit as value to start with */
+#define for_each_clear_bit_from_reverse(bit, addr, size) \
+       for ((bit) = find_prev_zero_bit((addr), (size), (bit)); \
+            (bit) < (size);                                    \
+            (bit) = find_next_zero_bit((addr), (size), (bit - 1)))
+
 /**
  * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
  * @start: bit offset to start search and to store the current iteration offset
@@ -283,17 +305,5 @@ static __always_inline void __assign_bit(long nr,
volatile unsigned long *addr,
 })
 #endif

-#ifndef find_last_bit
-/**
- * find_last_bit - find the last set bit in a memory region
- * @addr: The address to start the search at
- * @size: The number of bits to search
- *
- * Returns the bit number of the last set bit, or size.
- */
-extern unsigned long find_last_bit(const unsigned long *addr,
-                                  unsigned long size);
-#endif
-
 #endif /* __KERNEL__ */
 #endif
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 4a8751010d59..cbe06abd3d21 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -69,6 +69,58 @@ static unsigned long _find_next_bit(const unsigned
long *addr1,
 }
 #endif

+#if !defined(find_prev_bit) || !defined(find_prev_zero_bit) ||
         \
+       !defined(find_prev_bit_le) || !defined(find_prev_zero_bit_le)
||        \
+       !defined(find_prev_and_bit)
+/*
+ * This is a common helper function for find_prev_bit, find_prev_zero_bit, and
+ * find_prev_and_bit. The differences are:
+ *  - The "invert" argument, which is XORed with each fetched word before
+ *    searching it for one bits.
+ *  - The optional "addr2", which is anded with "addr1" if present.
+ */
+static unsigned long _find_prev_bit(const unsigned long *addr1,
+               const unsigned long *addr2, unsigned long nbits,
+               unsigned long start, unsigned long invert, unsigned long le)
+{
+       unsigned long tmp, mask;
+
+       if (unlikely(start >= nbits))
+               return nbits;
+
+       tmp = addr1[start / BITS_PER_LONG];
+       if (addr2)
+               tmp &= addr2[start / BITS_PER_LONG];
+       tmp ^= invert;
+
+       /* Handle 1st word. */
+       mask = BITMAP_LAST_WORD_MASK(start + 1);
+       if (le)
+               mask = swab(mask);
+
+       tmp &= mask;
+
+       start = round_down(start, BITS_PER_LONG);
+
+       while (!tmp) {
+               start -= BITS_PER_LONG;
+               if (start >= nbits)
+                       return nbits;
+
+               tmp = addr1[start / BITS_PER_LONG];
+               if (addr2)
+                       tmp &= addr2[start / BITS_PER_LONG];
+               tmp ^= invert;
+       }
+
+       if (le)
+               tmp = swab(tmp);
+
+       return start + __fls(tmp);
+}
+#endif
+
+
 #ifndef find_next_bit
 /*
  * Find the next set bit in a memory region.
@@ -81,6 +133,18 @@ unsigned long find_next_bit(const unsigned long
*addr, unsigned long size,
 EXPORT_SYMBOL(find_next_bit);
 #endif

+#ifndef find_prev_bit
+/*
+ * Find the prev set bit in a memory region.
+ */
+unsigned long find_prev_bit(const unsigned long *addr, unsigned long size,
+                           unsigned long offset)
+{
+       return _find_prev_bit(addr, NULL, size, offset, 0UL, 0);
+}
+EXPORT_SYMBOL(find_prev_bit);
+#endif
+
 #ifndef find_next_zero_bit
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
                                 unsigned long offset)
@@ -90,7 +154,16 @@ unsigned long find_next_zero_bit(const unsigned
long *addr, unsigned long size,
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif

-#if !defined(find_next_and_bit)
+#ifndef find_prev_zero_bit
+unsigned long find_prev_zero_bit(const unsigned long *addr, unsigned long size,
+                                unsigned long offset)
+{
+       return _find_prev_bit(addr, NULL, size, offset, ~0UL, 0);
+}
+EXPORT_SYMBOL(find_prev_zero_bit);
+#endif
+
+#ifndef find_next_and_bit
 unsigned long find_next_and_bit(const unsigned long *addr1,
                const unsigned long *addr2, unsigned long size,
                unsigned long offset)
@@ -100,6 +173,16 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
 EXPORT_SYMBOL(find_next_and_bit);
 #endif

+#ifndef find_prev_and_bit
+unsigned long find_prev_and_bit(const unsigned long *addr1,
+               const unsigned long *addr2, unsigned long size,
+               unsigned long offset)
+{
+       return _find_prev_bit(addr1, addr2, size, offset, 0UL, 0);
+}
+EXPORT_SYMBOL(find_prev_and_bit);
+#endif
+
 #ifndef find_first_bit
 /*
  * Find the first set bit in a memory region.
@@ -141,7 +224,7 @@ unsigned long find_last_bit(const unsigned long
*addr, unsigned long size)
 {
        if (size) {
                unsigned long val = BITMAP_LAST_WORD_MASK(size);
-               unsigned long idx = (size-1) / BITS_PER_LONG;
+               unsigned long idx = (size - 1) / BITS_PER_LONG;

                do {
                        val &= addr[idx];
@@ -156,6 +239,27 @@ unsigned long find_last_bit(const unsigned long
*addr, unsigned long size)
 EXPORT_SYMBOL(find_last_bit);
 #endif

+#ifndef find_last_zero_bit
+unsigned long find_last_zero_bit(const unsigned long *addr, unsigned long size)
+{
+       if (size) {
+               unsigned long val = BITMAP_LAST_WORD_MASK(size);
+               unsigned long idx = (size - 1) / BITS_PER_LONG;
+
+               do {
+                       val &= ~addr[idx];
+                       if (val)
+                               return idx * BITS_PER_LONG + __fls(val);
+
+                       val = ~0ul;
+               } while (idx--);
+       }
+
+       return size;
+}
+EXPORT_SYMBOL(find_last_zero_bit);
+#endif
+
 #ifdef __BIG_ENDIAN

 #ifndef find_next_zero_bit_le
@@ -167,6 +271,15 @@ unsigned long find_next_zero_bit_le(const void
*addr, unsigned
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif

+#ifndef find_prev_zero_bit_le
+unsigned long find_prev_zero_bit_le(const void *addr, unsigned
+               long size, unsigned long offset)
+{
+       return _find_prev_bit(addr, NULL, size, offset, ~0UL, 1);
+}
+EXPORT_SYMBOL(find_prev_zero_bit_le);
+#endif
+
 #ifndef find_next_bit_le
 unsigned long find_next_bit_le(const void *addr, unsigned
                long size, unsigned long offset)
@@ -176,6 +289,15 @@ unsigned long find_next_bit_le(const void *addr, unsigned
 EXPORT_SYMBOL(find_next_bit_le);
 #endif

+#ifdef find_prev_bit_le
+unsigned long find_prev_bit_le(const void *addr, unsigned
+               long size, unsigned long offset)
+{
+       return _find_prev_bit(addr, NULL, size, offset, 0UL, 1);
+}
+EXPORT_SYMBOL(find_prev_bit_le);
+#endif
+
 #endif /* __BIG_ENDIAN */

 unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
--
2.29.2

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re: [PATCH] lib/find_bit: Add find_prev_*_bit functions.
  2020-12-02  1:10 [PATCH] lib/find_bit: Add find_prev_*_bit functions Yun Levi
@ 2020-12-02  9:47 ` Andy Shevchenko
  2020-12-02 10:04   ` Rasmus Villemoes
  0 siblings, 1 reply; 414+ messages in thread
From: Andy Shevchenko @ 2020-12-02  9:47 UTC (permalink / raw)
  To: Yun Levi
  Cc: dushistov, arnd, akpm, gustavo, vilhelm.gray, richard.weiyang,
	joseph.qi, skalluru, yury.norov, jpoimboe, linux-kernel,
	linux-arch

On Wed, Dec 02, 2020 at 10:10:09AM +0900, Yun Levi wrote:
> Inspired find_next_*bit function series, add find_prev_*_bit series.
> I'm not sure whether it'll be used right now But, I add these functions
> for future usage.

This patch has few issues:
- it has more things than described (should be several patches instead)
- new functionality can be split logically to couple or more pieces as well
- it proposes functionality w/o user (dead code)

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH] lib/find_bit: Add find_prev_*_bit functions.
  2020-12-02  9:47 ` Andy Shevchenko
@ 2020-12-02 10:04   ` Rasmus Villemoes
  2020-12-02 11:50     ` Yun Levi
  0 siblings, 1 reply; 414+ messages in thread
From: Rasmus Villemoes @ 2020-12-02 10:04 UTC (permalink / raw)
  To: Andy Shevchenko, Yun Levi
  Cc: dushistov, arnd, akpm, gustavo, vilhelm.gray, richard.weiyang,
	joseph.qi, skalluru, yury.norov, jpoimboe, linux-kernel,
	linux-arch

On 02/12/2020 10.47, Andy Shevchenko wrote:
> On Wed, Dec 02, 2020 at 10:10:09AM +0900, Yun Levi wrote:
>> Inspired find_next_*bit function series, add find_prev_*_bit series.
>> I'm not sure whether it'll be used right now But, I add these functions
>> for future usage.
> 
> This patch has few issues:
> - it has more things than described (should be several patches instead)
> - new functionality can be split logically to couple or more pieces as well
> - it proposes functionality w/o user (dead code)

Yeah, the last point means it can't be applied - please submit it again
if and when you have an actual use case. And I'll add

- it lacks extension of the bitmap test module to cover the new
functions (that also wants to be a separate patch).

Rasmus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH] lib/find_bit: Add find_prev_*_bit functions.
  2020-12-02 10:04   ` Rasmus Villemoes
@ 2020-12-02 11:50     ` Yun Levi
       [not found]       ` <CAAH8bW-jUeFVU-0OrJzK-MuGgKJgZv38RZugEQzFRJHSXFRRDA@mail.gmail.com>
  0 siblings, 1 reply; 414+ messages in thread
From: Yun Levi @ 2020-12-02 11:50 UTC (permalink / raw)
  To: Rasmus Villemoes
  Cc: Andy Shevchenko, dushistov, arnd, akpm, gustavo, vilhelm.gray,
	richard.weiyang, joseph.qi, skalluru, yury.norov, jpoimboe,
	linux-kernel, linux-arch

Thanks for kind advice. But I'm so afraid to have questions below:

 > - it proposes functionality w/o user (dead code)
     Actually, I add these series functions to rewrite some of the
resource clean-up routine.
     A typical case is ethtool_set_per_queue_coalesce 's rollback label.
     Could this usage be an actual use case?

 >- it lacks extension of the bitmap test module to cover the new
 > functions (that also wants to be a separate patch).
     I see, then Could I add some of testcase on lib/test_bitops.c for testing?






On Wed, Dec 2, 2020 at 7:04 PM Rasmus Villemoes
<linux@rasmusvillemoes.dk> wrote:
>
> On 02/12/2020 10.47, Andy Shevchenko wrote:
> > On Wed, Dec 02, 2020 at 10:10:09AM +0900, Yun Levi wrote:
> >> Inspired find_next_*bit function series, add find_prev_*_bit series.
> >> I'm not sure whether it'll be used right now But, I add these functions
> >> for future usage.
> >
> > This patch has few issues:
> > - it has more things than described (should be several patches instead)
> > - new functionality can be split logically to couple or more pieces as well
> > - it proposes functionality w/o user (dead code)
>
> Yeah, the last point means it can't be applied - please submit it again
> if and when you have an actual use case. And I'll add
>
> - it lacks extension of the bitmap test module to cover the new
> functions (that also wants to be a separate patch).
>
> Rasmus

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CAAH8bW-jUeFVU-0OrJzK-MuGgKJgZv38RZugEQzFRJHSXFRRDA@mail.gmail.com>]

* (no subject)
       [not found]       ` <CAAH8bW-jUeFVU-0OrJzK-MuGgKJgZv38RZugEQzFRJHSXFRRDA@mail.gmail.com>
@ 2020-12-02 18:22         ` Yun Levi
  2020-12-02 21:26           ` Yury Norov
  0 siblings, 1 reply; 414+ messages in thread
From: Yun Levi @ 2020-12-02 18:22 UTC (permalink / raw)
  To: Yury Norov
  Cc: Rasmus Villemoes, dushistov, Arnd Bergmann, Andrew Morton,
	Gustavo A. R. Silva, William Breathitt Gray, richard.weiyang,
	joseph.qi, skalluru, Josh Poimboeuf, Linux Kernel Mailing List,
	linux-arch, Andy Shevchenko

On Thu, Dec 3, 2020 at 2:26 AM Yury Norov <yury.norov@gmail.com> wrote:

> Also look at lib/find_bit_benchmark.c
Thanks. I'll see.

> We need find_next_*_bit() because find_first_*_bit() can start searching only at word-aligned
> bits. In the case of find_last_*_bit(), we can start at any bit. So, if my understanding is correct,
> for the purpose of reverse traversing we can go with already existing find_last_bit(),

Thank you. I haven't thought that way.
But I think if we implement reverse traversing using find_last_bit(),
we have a problem.
Suppose the last bit 0, 1, 2, is set.
If we start
    find_last_bit(bitmap, 3) ==> return 2;
    find_last_bit(bitmap, 2) ==> return 1;
    find_last_bit(bitmap, 1) ==> return 0;
    find_last_bit(bitmap, 0) ===> return 0? // here we couldn't
distinguish size 0 input or 0 is set

and the for_each traverse routine prevent above case by returning size
(nbits) using find_next_bit.
So, for compatibility and the same expected return value like next traversing,
I think we need to find_prev_*_bit routine. if my understanding is correct.


>  I think this patch has some good catches. We definitely need to implement
> find_last_zero_bit(), as it is used by fs/ufs, and their local implementation is not optimal.
>
> We also should consider adding reverse traversing macros based on find_last_*_bit(),
> if there are proposed users.

Not only this, I think 'steal_from_bitmap_to_front' can be improved
using ffind_prev_zero_bit
like

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index af0013d3df63..9debb9707390 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2372,7 +2372,6 @@ static bool steal_from_bitmap_to_front(struct
btrfs_free_space_ctl *ctl,
  u64 bitmap_offset;
  unsigned long i;
  unsigned long j;
- unsigned long prev_j;
  u64 bytes;

  bitmap_offset = offset_to_bitmap(ctl, info->offset);
@@ -2388,20 +2387,15 @@ static bool steal_from_bitmap_to_front(struct
btrfs_free_space_ctl *ctl,
  return false;

  i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
- j = 0;
- prev_j = (unsigned long)-1;
- for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
- if (j > i)
- break;
- prev_j = j;
- }
- if (prev_j == i)
+ j = find_prev_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
+
+ if (j == i)
  return false;

- if (prev_j == (unsigned long)-1)
+ if (j == BITS_PER_BITMAP)
  bytes = (i + 1) * ctl->unit;
  else
- bytes = (i - prev_j) * ctl->unit;
+ bytes = (i - j) * ctl->unit;

  info->offset -= bytes;
  info->bytes += bytes;

Thanks.

HTH
Levi.

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2020-12-02 18:22         ` Yun Levi
@ 2020-12-02 21:26           ` Yury Norov
  2020-12-02 22:51             ` Yun Levi
  0 siblings, 1 reply; 414+ messages in thread
From: Yury Norov @ 2020-12-02 21:26 UTC (permalink / raw)
  To: Yun Levi
  Cc: Rasmus Villemoes, dushistov, Arnd Bergmann, Andrew Morton,
	Gustavo A. R. Silva, William Breathitt Gray, richard.weiyang,
	joseph.qi, skalluru, Josh Poimboeuf, Linux Kernel Mailing List,
	linux-arch, Andy Shevchenko

On Wed, Dec 2, 2020 at 10:22 AM Yun Levi <ppbuk5246@gmail.com> wrote:
>
> On Thu, Dec 3, 2020 at 2:26 AM Yury Norov <yury.norov@gmail.com> wrote:
>
> > Also look at lib/find_bit_benchmark.c
> Thanks. I'll see.
>
> > We need find_next_*_bit() because find_first_*_bit() can start searching only at word-aligned
> > bits. In the case of find_last_*_bit(), we can start at any bit. So, if my understanding is correct,
> > for the purpose of reverse traversing we can go with already existing find_last_bit(),
>
> Thank you. I haven't thought that way.
> But I think if we implement reverse traversing using find_last_bit(),
> we have a problem.
> Suppose the last bit 0, 1, 2, is set.
> If we start
>     find_last_bit(bitmap, 3) ==> return 2;
>     find_last_bit(bitmap, 2) ==> return 1;
>     find_last_bit(bitmap, 1) ==> return 0;
>     find_last_bit(bitmap, 0) ===> return 0? // here we couldn't
> distinguish size 0 input or 0 is set

If you traverse backward and reach bit #0, you're done. No need to continue.

>
> and the for_each traverse routine prevent above case by returning size
> (nbits) using find_next_bit.
> So, for compatibility and the same expected return value like next traversing,
> I think we need to find_prev_*_bit routine. if my understanding is correct.
>
>
> >  I think this patch has some good catches. We definitely need to implement
> > find_last_zero_bit(), as it is used by fs/ufs, and their local implementation is not optimal.
> >
> > We also should consider adding reverse traversing macros based on find_last_*_bit(),
> > if there are proposed users.
>
> Not only this, I think 'steal_from_bitmap_to_front' can be improved
> using ffind_prev_zero_bit
> like
>
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index af0013d3df63..9debb9707390 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -2372,7 +2372,6 @@ static bool steal_from_bitmap_to_front(struct
> btrfs_free_space_ctl *ctl,
>   u64 bitmap_offset;
>   unsigned long i;
>   unsigned long j;
> - unsigned long prev_j;
>   u64 bytes;
>
>   bitmap_offset = offset_to_bitmap(ctl, info->offset);
> @@ -2388,20 +2387,15 @@ static bool steal_from_bitmap_to_front(struct
> btrfs_free_space_ctl *ctl,
>   return false;
>
>   i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
> - j = 0;
> - prev_j = (unsigned long)-1;
> - for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
> - if (j > i)
> - break;
> - prev_j = j;
> - }
> - if (prev_j == i)
> + j = find_prev_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);

This one may be implemented with find_last_zero_bit() as well:

unsigned log j = find_last_zero_bit(bitmap, BITS_PER_BITMAP);
if (j <= i || j >= BITS_PER_BITMAP)
        return false;

I believe the latter version is better because find_last_*_bit() is simpler in
implementation (and partially exists), has less parameters, and therefore
simpler for users, and doesn't introduce functionality duplication.

The only consideration I can imagine to advocate find_prev*() is the performance
advantage in the scenario when we know for sure that first N bits of
bitmap are all
set/clear, and we can bypass traversing that area. But again, in this
case we can pass the
bitmap address with the appropriate offset, and stay with find_last_*()

> +
> + if (j == i)
>   return false;
>
> - if (prev_j == (unsigned long)-1)
> + if (j == BITS_PER_BITMAP)
>   bytes = (i + 1) * ctl->unit;
>   else
> - bytes = (i - prev_j) * ctl->unit;
> + bytes = (i - j) * ctl->unit;
>
>   info->offset -= bytes;
>   info->bytes += bytes;
>
> Thanks.
>
> HTH
> Levi.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2020-12-02 21:26           ` Yury Norov
@ 2020-12-02 22:51             ` Yun Levi
  2020-12-03  1:23               ` Yun Levi
  0 siblings, 1 reply; 414+ messages in thread
From: Yun Levi @ 2020-12-02 22:51 UTC (permalink / raw)
  To: Yury Norov
  Cc: Rasmus Villemoes, dushistov, Arnd Bergmann, Andrew Morton,
	Gustavo A. R. Silva, William Breathitt Gray, richard.weiyang,
	joseph.qi, skalluru, Josh Poimboeuf, Linux Kernel Mailing List,
	linux-arch, Andy Shevchenko

On Thu, Dec 3, 2020 at 6:26 AM Yury Norov <yury.norov@gmail.com> wrote:
>
> On Wed, Dec 2, 2020 at 10:22 AM Yun Levi <ppbuk5246@gmail.com> wrote:
> >
> > On Thu, Dec 3, 2020 at 2:26 AM Yury Norov <yury.norov@gmail.com> wrote:
> >
> > > Also look at lib/find_bit_benchmark.c
> > Thanks. I'll see.
> >
> > > We need find_next_*_bit() because find_first_*_bit() can start searching only at word-aligned
> > > bits. In the case of find_last_*_bit(), we can start at any bit. So, if my understanding is correct,
> > > for the purpose of reverse traversing we can go with already existing find_last_bit(),
> >
> > Thank you. I haven't thought that way.
> > But I think if we implement reverse traversing using find_last_bit(),
> > we have a problem.
> > Suppose the last bit 0, 1, 2, is set.
> > If we start
> >     find_last_bit(bitmap, 3) ==> return 2;
> >     find_last_bit(bitmap, 2) ==> return 1;
> >     find_last_bit(bitmap, 1) ==> return 0;
> >     find_last_bit(bitmap, 0) ===> return 0? // here we couldn't
> > distinguish size 0 input or 0 is set
>
> If you traverse backward and reach bit #0, you're done. No need to continue.
I think the case when I consider the this macro like

#define for_each_clear_bit_reverse(bit, addr, size)
    for ((bit) = find_last_zero_bit((addr), (size))
          (bit) < (size);
          (bit) = find_prev_zero_bit((addr), (size), (bit)))

If we implement the above macro only with find_last_zero_bit,
I think there is no way without adding any additional variable to finish loop.
But I don't want to add additional variable to sustain same format
with for_each_clear_bit,
That's why i decide to implement find_prev_*_bit series.

I don't know it's correct thinking or biased. Am I wrong?

>
> >
> > and the for_each traverse routine prevent above case by returning size
> > (nbits) using find_next_bit.
> > So, for compatibility and the same expected return value like next traversing,
> > I think we need to find_prev_*_bit routine. if my understanding is correct.
> >
> >
> > >  I think this patch has some good catches. We definitely need to implement
> > > find_last_zero_bit(), as it is used by fs/ufs, and their local implementation is not optimal.
> > >
> > > We also should consider adding reverse traversing macros based on find_last_*_bit(),
> > > if there are proposed users.
> >
> > Not only this, I think 'steal_from_bitmap_to_front' can be improved
> > using ffind_prev_zero_bit
> > like
> >
> > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > index af0013d3df63..9debb9707390 100644
> > --- a/fs/btrfs/free-space-cache.c
> > +++ b/fs/btrfs/free-space-cache.c
> > @@ -2372,7 +2372,6 @@ static bool steal_from_bitmap_to_front(struct
> > btrfs_free_space_ctl *ctl,
> >   u64 bitmap_offset;
> >   unsigned long i;
> >   unsigned long j;
> > - unsigned long prev_j;
> >   u64 bytes;
> >
> >   bitmap_offset = offset_to_bitmap(ctl, info->offset);
> > @@ -2388,20 +2387,15 @@ static bool steal_from_bitmap_to_front(struct
> > btrfs_free_space_ctl *ctl,
> >   return false;
> >
> >   i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
> > - j = 0;
> > - prev_j = (unsigned long)-1;
> > - for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
> > - if (j > i)
> > - break;
> > - prev_j = j;
> > - }
> > - if (prev_j == i)
> > + j = find_prev_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
>
> This one may be implemented with find_last_zero_bit() as well:
>
> unsigned log j = find_last_zero_bit(bitmap, BITS_PER_BITMAP);
> if (j <= i || j >= BITS_PER_BITMAP)
>         return false;
>
Actually, in that code, we don't need to check the bit after i.
Originally, if my understanding is correct, former code tries to find
the last 0 bit before i.
and if all bits are fully set before i, it use next one as i + 1

that's why i think the if condition should be
   if (j >= i)

But above condition couldn't the discern the case when all bits are
fully set before i.
Also, I think we don't need to check the bit after i and In this case,
find_prev_zero_bit which
specifies the start point is clear to show the meaning of the code.


> I believe the latter version is better because find_last_*_bit() is simpler in
> implementation (and partially exists), has less parameters, and therefore
> simpler for users, and doesn't introduce functionality duplication.
>
> The only consideration I can imagine to advocate find_prev*() is the performance
> advantage in the scenario when we know for sure that first N bits of
> bitmap are all
> set/clear, and we can bypass traversing that area. But again, in this
> case we can pass the
> bitmap address with the appropriate offset, and stay with find_last_*()
>
> > +
> > + if (j == i)
> >   return false;
> >
> > - if (prev_j == (unsigned long)-1)
> > + if (j == BITS_PER_BITMAP)
> >   bytes = (i + 1) * ctl->unit;
> >   else
> > - bytes = (i - prev_j) * ctl->unit;
> > + bytes = (i - j) * ctl->unit;
> >
> >   info->offset -= bytes;
> >   info->bytes += bytes;
> >
> > Thanks.
> >
> > HTH
> > Levi.

Thanks but

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2020-12-02 22:51             ` Yun Levi
@ 2020-12-03  1:23               ` Yun Levi
  2020-12-03  8:33                 ` Rasmus Villemoes
  0 siblings, 1 reply; 414+ messages in thread
From: Yun Levi @ 2020-12-03  1:23 UTC (permalink / raw)
  To: Yury Norov
  Cc: Rasmus Villemoes, dushistov, Arnd Bergmann, Andrew Morton,
	Gustavo A. R. Silva, William Breathitt Gray, richard.weiyang,
	joseph.qi, skalluru, Josh Poimboeuf, Linux Kernel Mailing List,
	linux-arch, Andy Shevchenko

On Thu, Dec 3, 2020 at 7:51 AM Yun Levi <ppbuk5246@gmail.com> wrote:
>
> On Thu, Dec 3, 2020 at 6:26 AM Yury Norov <yury.norov@gmail.com> wrote:
> >
> > On Wed, Dec 2, 2020 at 10:22 AM Yun Levi <ppbuk5246@gmail.com> wrote:
> > >
> > > On Thu, Dec 3, 2020 at 2:26 AM Yury Norov <yury.norov@gmail.com> wrote:
> > >
> > > > Also look at lib/find_bit_benchmark.c
> > > Thanks. I'll see.
> > >
> > > > We need find_next_*_bit() because find_first_*_bit() can start searching only at word-aligned
> > > > bits. In the case of find_last_*_bit(), we can start at any bit. So, if my understanding is correct,
> > > > for the purpose of reverse traversing we can go with already existing find_last_bit(),
> > >
> > > Thank you. I haven't thought that way.
> > > But I think if we implement reverse traversing using find_last_bit(),
> > > we have a problem.
> > > Suppose the last bit 0, 1, 2, is set.
> > > If we start
> > >     find_last_bit(bitmap, 3) ==> return 2;
> > >     find_last_bit(bitmap, 2) ==> return 1;
> > >     find_last_bit(bitmap, 1) ==> return 0;
> > >     find_last_bit(bitmap, 0) ===> return 0? // here we couldn't
> > > distinguish size 0 input or 0 is set
> >
> > If you traverse backward and reach bit #0, you're done. No need to continue.
> I think the case when I consider the this macro like
>
> #define for_each_clear_bit_reverse(bit, addr, size)
>     for ((bit) = find_last_zero_bit((addr), (size))
>           (bit) < (size);
>           (bit) = find_prev_zero_bit((addr), (size), (bit)))
>
> If we implement the above macro only with find_last_zero_bit,
> I think there is no way without adding any additional variable to finish loop.
> But I don't want to add additional variable to sustain same format
> with for_each_clear_bit,
> That's why i decide to implement find_prev_*_bit series.
>
> I don't know it's correct thinking or biased. Am I wrong?
>
> >
> > >
> > > and the for_each traverse routine prevent above case by returning size
> > > (nbits) using find_next_bit.
> > > So, for compatibility and the same expected return value like next traversing,
> > > I think we need to find_prev_*_bit routine. if my understanding is correct.
> > >
> > >
> > > >  I think this patch has some good catches. We definitely need to implement
> > > > find_last_zero_bit(), as it is used by fs/ufs, and their local implementation is not optimal.
> > > >
> > > > We also should consider adding reverse traversing macros based on find_last_*_bit(),
> > > > if there are proposed users.
> > >
> > > Not only this, I think 'steal_from_bitmap_to_front' can be improved
> > > using ffind_prev_zero_bit
> > > like
> > >
> > > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > > index af0013d3df63..9debb9707390 100644
> > > --- a/fs/btrfs/free-space-cache.c
> > > +++ b/fs/btrfs/free-space-cache.c
> > > @@ -2372,7 +2372,6 @@ static bool steal_from_bitmap_to_front(struct
> > > btrfs_free_space_ctl *ctl,
> > >   u64 bitmap_offset;
> > >   unsigned long i;
> > >   unsigned long j;
> > > - unsigned long prev_j;
> > >   u64 bytes;
> > >
> > >   bitmap_offset = offset_to_bitmap(ctl, info->offset);
> > > @@ -2388,20 +2387,15 @@ static bool steal_from_bitmap_to_front(struct
> > > btrfs_free_space_ctl *ctl,
> > >   return false;
> > >
> > >   i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
> > > - j = 0;
> > > - prev_j = (unsigned long)-1;
> > > - for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
> > > - if (j > i)
> > > - break;
> > > - prev_j = j;
> > > - }
> > > - if (prev_j == i)
> > > + j = find_prev_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
> >
> > This one may be implemented with find_last_zero_bit() as well:
> >
> > unsigned log j = find_last_zero_bit(bitmap, BITS_PER_BITMAP);
> > if (j <= i || j >= BITS_PER_BITMAP)
> >         return false;
> >
> Actually, in that code, we don't need to check the bit after i.
> Originally, if my understanding is correct, former code tries to find
> the last 0 bit before i.
> and if all bits are fully set before i, it use next one as i + 1
>
> that's why i think the if condition should be
>    if (j >= i)
>
> But above condition couldn't the discern the case when all bits are
> fully set before i.
> Also, I think we don't need to check the bit after i and In this case,
> find_prev_zero_bit which
> specifies the start point is clear to show the meaning of the code.
>
>
> > I believe the latter version is better because find_last_*_bit() is simpler in
> > implementation (and partially exists), has less parameters, and therefore
> > simpler for users, and doesn't introduce functionality duplication.

I think it's not duplication.
Actually, former you teach me find_first_*_bit should be start word-aligned bit,
But as find_first_*_bit declares it as "size of bitmap" not a start offset.
Though the bitmap size it's word-aligned, it doesn't matter to fine
first bit in the specified size of bitmap (it no, it will return just
size of bitmap)

Likewise, find_last_*_bit is also similar in context.
Fundamentally, it's not a start offset of bitmap but I think it just
size of bitmap.

That's the reason why we need to find_next_*_bit to start at the
specified offset.
In this matter, I think it's better to have find_prev_*_bit.

So, I think we can use both of these functions to be used to achieve a goal.
But, each function has different concept actually that's why I don't
think it's not duplication.

if my understanding is wrong.. Forgive me. and let me know..

Thanks.



> >
> > The only consideration I can imagine to advocate find_prev*() is the performance
> > advantage in the scenario when we know for sure that first N bits of
> > bitmap are all
> > set/clear, and we can bypass traversing that area. But again, in this
> > case we can pass the
> > bitmap address with the appropriate offset, and stay with find_last_*()
> >
> > > +
> > > + if (j == i)
> > >   return false;
> > >
> > > - if (prev_j == (unsigned long)-1)
> > > + if (j == BITS_PER_BITMAP)
> > >   bytes = (i + 1) * ctl->unit;
> > >   else
> > > - bytes = (i - prev_j) * ctl->unit;
> > > + bytes = (i - j) * ctl->unit;
> > >
> > >   info->offset -= bytes;
> > >   info->bytes += bytes;
> > >
> > > Thanks.
> > >
> > > HTH
> > > Levi.
>
> Thanks but

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-03  1:23               ` Yun Levi
@ 2020-12-03  8:33                 ` Rasmus Villemoes
  2020-12-03  9:47                   ` Re: Yun Levi
  0 siblings, 1 reply; 414+ messages in thread
From: Rasmus Villemoes @ 2020-12-03  8:33 UTC (permalink / raw)
  To: Yun Levi, Yury Norov
  Cc: dushistov, Arnd Bergmann, Andrew Morton, Gustavo A. R. Silva,
	William Breathitt Gray, richard.weiyang, joseph.qi, skalluru,
	Josh Poimboeuf, Linux Kernel Mailing List, linux-arch,
	Andy Shevchenko

On 03/12/2020 02.23, Yun Levi wrote:
> On Thu, Dec 3, 2020 at 7:51 AM Yun Levi <ppbuk5246@gmail.com> wrote:
>>
>> On Thu, Dec 3, 2020 at 6:26 AM Yury Norov <yury.norov@gmail.com> wrote:
>>>
>>> On Wed, Dec 2, 2020 at 10:22 AM Yun Levi <ppbuk5246@gmail.com> wrote:
>>>>
>>>> On Thu, Dec 3, 2020 at 2:26 AM Yury Norov <yury.norov@gmail.com> wrote:
>>>>
>>>>> Also look at lib/find_bit_benchmark.c
>>>> Thanks. I'll see.
>>>>
>>>>> We need find_next_*_bit() because find_first_*_bit() can start searching only at word-aligned
>>>>> bits. In the case of find_last_*_bit(), we can start at any bit. So, if my understanding is correct,
>>>>> for the purpose of reverse traversing we can go with already existing find_last_bit(),
>>>>
>>>> Thank you. I haven't thought that way.
>>>> But I think if we implement reverse traversing using find_last_bit(),
>>>> we have a problem.
>>>> Suppose the last bit 0, 1, 2, is set.
>>>> If we start
>>>>     find_last_bit(bitmap, 3) ==> return 2;
>>>>     find_last_bit(bitmap, 2) ==> return 1;
>>>>     find_last_bit(bitmap, 1) ==> return 0;
>>>>     find_last_bit(bitmap, 0) ===> return 0? // here we couldn't

Either just make the return type of all find_prev/find_last be signed
int and use -1 as the sentinel to indicate "no such position exists", so
the loop condition would be foo >= 0. Or, change the condition from
"stop if we get the size returned" to "only continue if we get something
strictly less than the size we passed in (i.e., something which can
possibly be a valid bit index). In the latter case, both (unsigned)-1
aka UINT_MAX and the actual size value passed work equally well as a
sentinel.

If one uses UINT_MAX, a for_each_bit_reverse() macro would just be
something like

for (i = find_last_bit(bitmap, size); i < size; i =
find_last_bit(bitmap, i))

if one wants to use the size argument as the sentinel, the caller would
have to supply a scratch variable to keep track of the last i value:

for (j = size, i = find_last_bit(bitmap, j); i < j; j = i, i =
find_last_bit(bitmap, j))

which is probably a little less ergonomic.

Rasmus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-03  8:33                 ` Rasmus Villemoes
@ 2020-12-03  9:47                   ` Yun Levi
  2020-12-03 18:46                     ` Re: Yury Norov
  0 siblings, 1 reply; 414+ messages in thread
From: Yun Levi @ 2020-12-03  9:47 UTC (permalink / raw)
  To: Rasmus Villemoes
  Cc: Yury Norov, dushistov, Arnd Bergmann, Andrew Morton,
	Gustavo A. R. Silva, William Breathitt Gray, richard.weiyang,
	joseph.qi, skalluru, Josh Poimboeuf, Linux Kernel Mailing List,
	linux-arch, Andy Shevchenko

> If one uses UINT_MAX, a for_each_bit_reverse() macro would just be
> something like
>
> for (i = find_last_bit(bitmap, size); i < size; i =
> find_last_bit(bitmap, i))
>
> if one wants to use the size argument as the sentinel, the caller would
> have to supply a scratch variable to keep track of the last i value:
>
> for (j = size, i = find_last_bit(bitmap, j); i < j; j = i, i =
> find_last_bit(bitmap, j))
>
> which is probably a little less ergonomic.

Actually Because I want to avoid the modification of return type of
find_last_*_bit for new sentinel,
I add find_prev_*_bit.
the big difference between find_last_bit and find_prev_bit is
   find_last_bit doesn't check the size bit and use sentinel with size.
   but find_prev_bit check the offset bit and use sentinel with size
which passed by another argument.
   So if we use find_prev_bit, we could have a clear iteration if
using find_prev_bit like.

  #define for_each_set_bit_reverse(bit, addr, size) \
      for ((bit) = find_last_bit((addr), (size));    \
            (bit) < (size);                                     \
            (bit) = find_prev_bit((addr), (size), (bit - 1)))

  #define for_each_set_bit_from_reverse(bit, addr, size) \
      for ((bit) = find_prev_bit((addr), (size), (bit)); \
             (bit) < (size);                                           \
             (bit) = find_prev_bit((addr), (size), (bit - 1)))

Though find_prev_*_bit / find_last_*_bit have the same functionality.
But they also have a small difference.
I think this small this small difference doesn't make some of
confusion to user but it help to solve problem
with a simple way (just like the iteration above).

So I think I need, find_prev_*_bit series.

Am I missing anything?

Thanks.

Levi.

On Thu, Dec 3, 2020 at 5:33 PM Rasmus Villemoes
<linux@rasmusvillemoes.dk> wrote:
>
> On 03/12/2020 02.23, Yun Levi wrote:
> > On Thu, Dec 3, 2020 at 7:51 AM Yun Levi <ppbuk5246@gmail.com> wrote:
> >>
> >> On Thu, Dec 3, 2020 at 6:26 AM Yury Norov <yury.norov@gmail.com> wrote:
> >>>
> >>> On Wed, Dec 2, 2020 at 10:22 AM Yun Levi <ppbuk5246@gmail.com> wrote:
> >>>>
> >>>> On Thu, Dec 3, 2020 at 2:26 AM Yury Norov <yury.norov@gmail.com> wrote:
> >>>>
> >>>>> Also look at lib/find_bit_benchmark.c
> >>>> Thanks. I'll see.
> >>>>
> >>>>> We need find_next_*_bit() because find_first_*_bit() can start searching only at word-aligned
> >>>>> bits. In the case of find_last_*_bit(), we can start at any bit. So, if my understanding is correct,
> >>>>> for the purpose of reverse traversing we can go with already existing find_last_bit(),
> >>>>
> >>>> Thank you. I haven't thought that way.
> >>>> But I think if we implement reverse traversing using find_last_bit(),
> >>>> we have a problem.
> >>>> Suppose the last bit 0, 1, 2, is set.
> >>>> If we start
> >>>>     find_last_bit(bitmap, 3) ==> return 2;
> >>>>     find_last_bit(bitmap, 2) ==> return 1;
> >>>>     find_last_bit(bitmap, 1) ==> return 0;
> >>>>     find_last_bit(bitmap, 0) ===> return 0? // here we couldn't
>
> Either just make the return type of all find_prev/find_last be signed
> int and use -1 as the sentinel to indicate "no such position exists", so
> the loop condition would be foo >= 0. Or, change the condition from
> "stop if we get the size returned" to "only continue if we get something
> strictly less than the size we passed in (i.e., something which can
> possibly be a valid bit index). In the latter case, both (unsigned)-1
> aka UINT_MAX and the actual size value passed work equally well as a
> sentinel.
>
> If one uses UINT_MAX, a for_each_bit_reverse() macro would just be
> something like
>
> for (i = find_last_bit(bitmap, size); i < size; i =
> find_last_bit(bitmap, i))
>
> if one wants to use the size argument as the sentinel, the caller would
> have to supply a scratch variable to keep track of the last i value:
>
> for (j = size, i = find_last_bit(bitmap, j); i < j; j = i, i =
> find_last_bit(bitmap, j))
>
> which is probably a little less ergonomic.
>
> Rasmus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-03  9:47                   ` Re: Yun Levi
@ 2020-12-03 18:46                     ` Yury Norov
  2020-12-03 18:52                       ` Re: Willy Tarreau
  2020-12-05 11:10                       ` Re: Rasmus Villemoes
  0 siblings, 2 replies; 414+ messages in thread
From: Yury Norov @ 2020-12-03 18:46 UTC (permalink / raw)
  To: Yun Levi
  Cc: Rasmus Villemoes, dushistov, Arnd Bergmann, Andrew Morton,
	Gustavo A. R. Silva, William Breathitt Gray, richard.weiyang,
	joseph.qi, skalluru, Josh Poimboeuf, Linux Kernel Mailing List,
	linux-arch, Andy Shevchenko

Yun, could you please stop top-posting and excessive trimming in the thread?

On Thu, Dec 3, 2020 at 1:47 AM Yun Levi <ppbuk5246@gmail.com> wrote:
> > Either just make the return type of all find_prev/find_last be signed
> > int and use -1 as the sentinel to indicate "no such position exists", so
> > the loop condition would be foo >= 0. Or, change the condition from
> > "stop if we get the size returned" to "only continue if we get something
> > strictly less than the size we passed in (i.e., something which can
> > possibly be a valid bit index). In the latter case, both (unsigned)-1
> > aka UINT_MAX and the actual size value passed work equally well as a
> > sentinel.
> >
> > If one uses UINT_MAX, a for_each_bit_reverse() macro would just be
> > something like
> >
> > for (i = find_last_bit(bitmap, size); i < size; i =
> > find_last_bit(bitmap, i))
> >
> > if one wants to use the size argument as the sentinel, the caller would
> > have to supply a scratch variable to keep track of the last i value:
> >
> > for (j = size, i = find_last_bit(bitmap, j); i < j; j = i, i =
> > find_last_bit(bitmap, j))
> >
> > which is probably a little less ergonomic.
> >
> > Rasmus

I would prefer to avoid changing the find*bit() semantics. As for now,
if any of find_*_bit()
finds nothing, it returns the size of the bitmap it was passed.
Changing this for
a single function would break the consistency, and may cause problems
for those who
rely on existing behaviour.

Passing non-positive size to find_*_bit() should produce undefined
behaviour, because we cannot dereference a pointer to the bitmap in
this case; this is most probably a sign of a problem on a caller side
anyways.

Let's keep this logic unchanged?

> Actually Because I want to avoid the modification of return type of
> find_last_*_bit for new sentinel,
> I add find_prev_*_bit.
> the big difference between find_last_bit and find_prev_bit is
>    find_last_bit doesn't check the size bit and use sentinel with size.
>    but find_prev_bit check the offset bit and use sentinel with size
> which passed by another argument.
>    So if we use find_prev_bit, we could have a clear iteration if
> using find_prev_bit like.
>
>   #define for_each_set_bit_reverse(bit, addr, size) \
>       for ((bit) = find_last_bit((addr), (size));    \
>             (bit) < (size);                                     \
>             (bit) = find_prev_bit((addr), (size), (bit - 1)))
>
>   #define for_each_set_bit_from_reverse(bit, addr, size) \
>       for ((bit) = find_prev_bit((addr), (size), (bit)); \
>              (bit) < (size);                                           \
>              (bit) = find_prev_bit((addr), (size), (bit - 1)))
>
> Though find_prev_*_bit / find_last_*_bit have the same functionality.
> But they also have a small difference.
> I think this small this small difference doesn't make some of
> confusion to user but it help to solve problem
> with a simple way (just like the iteration above).
>
> So I think I need, find_prev_*_bit series.
>
> Am I missing anything?
>
> Thanks.
>
> Levi.

As you said, find_last_bit() and proposed find_prev_*_bit() have the
same functionality.
If you really want to have find_prev_*_bit(), could you please at
least write it using find_last_bit(), otherwise it would be just a
blottering.

Regarding reverse search, we can probably do like this (not tested,
just an idea):

#define for_each_set_bit_reverse(bit, addr, size) \
    for ((bit) = find_last_bit((addr), (size));    \
          (bit) < (size);                                     \
          (size) = (bit), (bit) = find_last_bit((addr), (bit)))

Thanks,
Yury

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-03 18:46                     ` Re: Yury Norov
@ 2020-12-03 18:52                       ` Willy Tarreau
  2020-12-04  1:36                         ` Re: Yun Levi
  2020-12-05 11:10                       ` Re: Rasmus Villemoes
  1 sibling, 1 reply; 414+ messages in thread
From: Willy Tarreau @ 2020-12-03 18:52 UTC (permalink / raw)
  To: Yury Norov
  Cc: Yun Levi, Rasmus Villemoes, dushistov, Arnd Bergmann,
	Andrew Morton, Gustavo A. R. Silva, William Breathitt Gray,
	richard.weiyang, joseph.qi, skalluru, Josh Poimboeuf,
	Linux Kernel Mailing List, linux-arch, Andy Shevchenko

On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> Yun, could you please stop top-posting and excessive trimming in the thread?

And re-configure the mail agent to make the "Subject" field appear and
fill it.

Willy

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-03 18:52                       ` Re: Willy Tarreau
@ 2020-12-04  1:36                         ` Yun Levi
  2020-12-04 18:14                           ` Re: Yury Norov
  0 siblings, 1 reply; 414+ messages in thread
From: Yun Levi @ 2020-12-04  1:36 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Yury Norov, Rasmus Villemoes, dushistov, Arnd Bergmann,
	Andrew Morton, Gustavo A. R. Silva, William Breathitt Gray,
	richard.weiyang, joseph.qi, skalluru, Josh Poimboeuf,
	Linux Kernel Mailing List, linux-arch, Andy Shevchenko

>On Fri, Dec 4, 2020 at 3:53 AM Willy Tarreau <w@1wt.eu> wrote:
>
> On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> > Yun, could you please stop top-posting and excessive trimming in the thread?
>
> And re-configure the mail agent to make the "Subject" field appear and
> fill it.

>On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> Yun, could you please stop top-posting and excessive trimming in the thread?
Sorry to make you uncomfortable... Thanks for advice.

>On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> As you said, find_last_bit() and proposed find_prev_*_bit() have the
> same functionality.
> If you really want to have find_prev_*_bit(), could you please at
> least write it using find_last_bit(), otherwise it would be just a
> blottering.

Actually find_prev_*_bit call _find_prev_bit which is a common helper function
like _find_next_bit.
As you know this function is required to support __BIGEDIAN's little
endian search.
find_prev_bit actually wrapper of _find_prev_bit which have a feature
the find_last_bit.

That makes the semantics difference between find_last_bit and find_prev_bit.
-- specify where you find from and
   In loop, find_last_bit couldn't sustain original size as sentinel
return value
    (we should change the size argument for next searching
     But it means whenever we call, "NOT SET or NOT CLEAR"'s sentinel
return value is changed per call).

Because we should have _find_prev_bit,
I think it's the matter to choose which is better to usein
find_prev_bit (find_last_bit? or _find_prev_bit?)
sustaining find_prev_bit feature (give size as sentinel return, from
where I start).
if my understanding is correct.

In my view, I prefer to use _find_prev_bit like find_next_bit for
integrated format.

But In some of the benchmarking, find_last_bit is better than _find_prev_bit,
here what I tested (look similar but sometimes have some difference).

              Start testing find_bit() with random-filled bitmap
[  +0.001850] find_next_bit:                  842792 ns, 163788 iterations
[  +0.000873] find_prev_bit:                  870914 ns, 163788 iterations
[  +0.000824] find_next_zero_bit:             821959 ns, 163894 iterations
[  +0.000677] find_prev_zero_bit:             676240 ns, 163894 iterations
[  +0.000777] find_last_bit:                  659103 ns, 163788 iterations
[  +0.001822] find_first_bit:                1708041 ns,  16250 iterations
[  +0.000539] find_next_and_bit:              492182 ns,  73871 iterations
[  +0.000001]
              Start testing find_bit() with sparse bitmap
[  +0.000222] find_next_bit:                   13227 ns,    654 iterations
[  +0.000013] find_prev_bit:                   11652 ns,    654 iterations
[  +0.001845] find_next_zero_bit:            1723869 ns, 327028 iterations
[  +0.001538] find_prev_zero_bit:            1355808 ns, 327028 iterations
[  +0.000010] find_last_bit:                    8114 ns,    654 iterations
[  +0.000867] find_first_bit:                 710639 ns,    654 iterations
[  +0.000006] find_next_and_bit:                4273 ns,      1 iterations
[  +0.000004] find_next_and_bit:                3278 ns,      1 iterations

              Start testing find_bit() with random-filled bitmap
[  +0.001784] find_next_bit:                  805553 ns, 164240 iterations
[  +0.000643] find_prev_bit:                  632474 ns, 164240 iterations
[  +0.000950] find_next_zero_bit:             877215 ns, 163442 iterations
[  +0.000664] find_prev_zero_bit:             662339 ns, 163442 iterations
[  +0.000680] find_last_bit:                  602204 ns, 164240 iterations
[  +0.001912] find_first_bit:                1758208 ns,  16408 iterations
[  +0.000760] find_next_and_bit:              531033 ns,  73798 iterations
[  +0.000002]
              Start testing find_bit() with sparse bitmap
[  +0.000203] find_next_bit:                   12468 ns,    656 iterations
[  +0.000205] find_prev_bit:                   10948 ns,    656 iterations
[  +0.001759] find_next_zero_bit:            1579447 ns, 327026 iterations
[  +0.001935] find_prev_zero_bit:            1931961 ns, 327026 iterations
[  +0.000013] find_last_bit:                    9543 ns,    656 iterations
[  +0.000732] find_first_bit:                 562009 ns,    656 iterations
[  +0.000217] find_next_and_bit:                6804 ns,      1 iterations
[  +0.000007] find_next_and_bit:                4367 ns,      1 iterations

Is it better to write find_prev_bit using find_last_bit?
I question again.

Thanks for your great advice, But please forgive my fault and lackness.

HTH.
Levi.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-04  1:36                         ` Re: Yun Levi
@ 2020-12-04 18:14                           ` Yury Norov
  2020-12-05  0:45                             ` Re: Yun Levi
  0 siblings, 1 reply; 414+ messages in thread
From: Yury Norov @ 2020-12-04 18:14 UTC (permalink / raw)
  To: Yun Levi
  Cc: Willy Tarreau, Rasmus Villemoes, dushistov, Arnd Bergmann,
	Andrew Morton, Gustavo A. R. Silva, William Breathitt Gray,
	richard.weiyang, joseph.qi, skalluru, Josh Poimboeuf,
	Linux Kernel Mailing List, linux-arch, Andy Shevchenko

On Thu, Dec 3, 2020 at 5:36 PM Yun Levi <ppbuk5246@gmail.com> wrote:
>
> >On Fri, Dec 4, 2020 at 3:53 AM Willy Tarreau <w@1wt.eu> wrote:
> >
> > On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> > > Yun, could you please stop top-posting and excessive trimming in the thread?
> >
> > And re-configure the mail agent to make the "Subject" field appear and
> > fill it.
>
> >On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> > Yun, could you please stop top-posting and excessive trimming in the thread?
> Sorry to make you uncomfortable... Thanks for advice.
>
> >On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> > As you said, find_last_bit() and proposed find_prev_*_bit() have the
> > same functionality.
> > If you really want to have find_prev_*_bit(), could you please at
> > least write it using find_last_bit(), otherwise it would be just a
> > blottering.
>
> Actually find_prev_*_bit call _find_prev_bit which is a common helper function
> like _find_next_bit.
> As you know this function is required to support __BIGEDIAN's little
> endian search.
> find_prev_bit actually wrapper of _find_prev_bit which have a feature
> the find_last_bit.
>
> That makes the semantics difference between find_last_bit and find_prev_bit.
> -- specify where you find from and
>    In loop, find_last_bit couldn't sustain original size as sentinel
> return value
>     (we should change the size argument for next searching
>      But it means whenever we call, "NOT SET or NOT CLEAR"'s sentinel
> return value is changed per call).
>
> Because we should have _find_prev_bit,
> I think it's the matter to choose which is better to usein
> find_prev_bit (find_last_bit? or _find_prev_bit?)
> sustaining find_prev_bit feature (give size as sentinel return, from
> where I start).
> if my understanding is correct.
>
> In my view, I prefer to use _find_prev_bit like find_next_bit for
> integrated format.
>
> But In some of the benchmarking, find_last_bit is better than _find_prev_bit,
> here what I tested (look similar but sometimes have some difference).
>
>               Start testing find_bit() with random-filled bitmap
> [  +0.001850] find_next_bit:                  842792 ns, 163788 iterations
> [  +0.000873] find_prev_bit:                  870914 ns, 163788 iterations
> [  +0.000824] find_next_zero_bit:             821959 ns, 163894 iterations
> [  +0.000677] find_prev_zero_bit:             676240 ns, 163894 iterations
> [  +0.000777] find_last_bit:                  659103 ns, 163788 iterations
> [  +0.001822] find_first_bit:                1708041 ns,  16250 iterations
> [  +0.000539] find_next_and_bit:              492182 ns,  73871 iterations
> [  +0.000001]
>               Start testing find_bit() with sparse bitmap
> [  +0.000222] find_next_bit:                   13227 ns,    654 iterations
> [  +0.000013] find_prev_bit:                   11652 ns,    654 iterations
> [  +0.001845] find_next_zero_bit:            1723869 ns, 327028 iterations
> [  +0.001538] find_prev_zero_bit:            1355808 ns, 327028 iterations
> [  +0.000010] find_last_bit:                    8114 ns,    654 iterations
> [  +0.000867] find_first_bit:                 710639 ns,    654 iterations
> [  +0.000006] find_next_and_bit:                4273 ns,      1 iterations
> [  +0.000004] find_next_and_bit:                3278 ns,      1 iterations
>
>               Start testing find_bit() with random-filled bitmap
> [  +0.001784] find_next_bit:                  805553 ns, 164240 iterations
> [  +0.000643] find_prev_bit:                  632474 ns, 164240 iterations
> [  +0.000950] find_next_zero_bit:             877215 ns, 163442 iterations
> [  +0.000664] find_prev_zero_bit:             662339 ns, 163442 iterations
> [  +0.000680] find_last_bit:                  602204 ns, 164240 iterations
> [  +0.001912] find_first_bit:                1758208 ns,  16408 iterations
> [  +0.000760] find_next_and_bit:              531033 ns,  73798 iterations
> [  +0.000002]
>               Start testing find_bit() with sparse bitmap
> [  +0.000203] find_next_bit:                   12468 ns,    656 iterations
> [  +0.000205] find_prev_bit:                   10948 ns,    656 iterations
> [  +0.001759] find_next_zero_bit:            1579447 ns, 327026 iterations
> [  +0.001935] find_prev_zero_bit:            1931961 ns, 327026 iterations
> [  +0.000013] find_last_bit:                    9543 ns,    656 iterations
> [  +0.000732] find_first_bit:                 562009 ns,    656 iterations
> [  +0.000217] find_next_and_bit:                6804 ns,      1 iterations
> [  +0.000007] find_next_and_bit:                4367 ns,      1 iterations
>
> Is it better to write find_prev_bit using find_last_bit?
> I question again.

I answer again. It's better not to write find_prev_bit at all and
learn how to use existing functionality.

Yury

> Thanks for your great advice, But please forgive my fault and lackness.
>
> HTH.
> Levi.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-04 18:14                           ` Re: Yury Norov
@ 2020-12-05  0:45                             ` Yun Levi
  0 siblings, 0 replies; 414+ messages in thread
From: Yun Levi @ 2020-12-05  0:45 UTC (permalink / raw)
  To: Yury Norov
  Cc: Willy Tarreau, Rasmus Villemoes, dushistov, Arnd Bergmann,
	Andrew Morton, Gustavo A. R. Silva, William Breathitt Gray,
	richard.weiyang, joseph.qi, skalluru, Josh Poimboeuf,
	Linux Kernel Mailing List, linux-arch, Andy Shevchenko

> I answer again. It's better not to write find_prev_bit at all and
> learn how to use existing functionality.

Thanks for the answer I'll fix and send the patch again :)

On Sat, Dec 5, 2020 at 3:14 AM Yury Norov <yury.norov@gmail.com> wrote:
>
> On Thu, Dec 3, 2020 at 5:36 PM Yun Levi <ppbuk5246@gmail.com> wrote:
> >
> > >On Fri, Dec 4, 2020 at 3:53 AM Willy Tarreau <w@1wt.eu> wrote:
> > >
> > > On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> > > > Yun, could you please stop top-posting and excessive trimming in the thread?
> > >
> > > And re-configure the mail agent to make the "Subject" field appear and
> > > fill it.
> >
> > >On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> > > Yun, could you please stop top-posting and excessive trimming in the thread?
> > Sorry to make you uncomfortable... Thanks for advice.
> >
> > >On Thu, Dec 03, 2020 at 10:46:25AM -0800, Yury Norov wrote:
> > > As you said, find_last_bit() and proposed find_prev_*_bit() have the
> > > same functionality.
> > > If you really want to have find_prev_*_bit(), could you please at
> > > least write it using find_last_bit(), otherwise it would be just a
> > > blottering.
> >
> > Actually find_prev_*_bit call _find_prev_bit which is a common helper function
> > like _find_next_bit.
> > As you know this function is required to support __BIGEDIAN's little
> > endian search.
> > find_prev_bit actually wrapper of _find_prev_bit which have a feature
> > the find_last_bit.
> >
> > That makes the semantics difference between find_last_bit and find_prev_bit.
> > -- specify where you find from and
> >    In loop, find_last_bit couldn't sustain original size as sentinel
> > return value
> >     (we should change the size argument for next searching
> >      But it means whenever we call, "NOT SET or NOT CLEAR"'s sentinel
> > return value is changed per call).
> >
> > Because we should have _find_prev_bit,
> > I think it's the matter to choose which is better to usein
> > find_prev_bit (find_last_bit? or _find_prev_bit?)
> > sustaining find_prev_bit feature (give size as sentinel return, from
> > where I start).
> > if my understanding is correct.
> >
> > In my view, I prefer to use _find_prev_bit like find_next_bit for
> > integrated format.
> >
> > But In some of the benchmarking, find_last_bit is better than _find_prev_bit,
> > here what I tested (look similar but sometimes have some difference).
> >
> >               Start testing find_bit() with random-filled bitmap
> > [  +0.001850] find_next_bit:                  842792 ns, 163788 iterations
> > [  +0.000873] find_prev_bit:                  870914 ns, 163788 iterations
> > [  +0.000824] find_next_zero_bit:             821959 ns, 163894 iterations
> > [  +0.000677] find_prev_zero_bit:             676240 ns, 163894 iterations
> > [  +0.000777] find_last_bit:                  659103 ns, 163788 iterations
> > [  +0.001822] find_first_bit:                1708041 ns,  16250 iterations
> > [  +0.000539] find_next_and_bit:              492182 ns,  73871 iterations
> > [  +0.000001]
> >               Start testing find_bit() with sparse bitmap
> > [  +0.000222] find_next_bit:                   13227 ns,    654 iterations
> > [  +0.000013] find_prev_bit:                   11652 ns,    654 iterations
> > [  +0.001845] find_next_zero_bit:            1723869 ns, 327028 iterations
> > [  +0.001538] find_prev_zero_bit:            1355808 ns, 327028 iterations
> > [  +0.000010] find_last_bit:                    8114 ns,    654 iterations
> > [  +0.000867] find_first_bit:                 710639 ns,    654 iterations
> > [  +0.000006] find_next_and_bit:                4273 ns,      1 iterations
> > [  +0.000004] find_next_and_bit:                3278 ns,      1 iterations
> >
> >               Start testing find_bit() with random-filled bitmap
> > [  +0.001784] find_next_bit:                  805553 ns, 164240 iterations
> > [  +0.000643] find_prev_bit:                  632474 ns, 164240 iterations
> > [  +0.000950] find_next_zero_bit:             877215 ns, 163442 iterations
> > [  +0.000664] find_prev_zero_bit:             662339 ns, 163442 iterations
> > [  +0.000680] find_last_bit:                  602204 ns, 164240 iterations
> > [  +0.001912] find_first_bit:                1758208 ns,  16408 iterations
> > [  +0.000760] find_next_and_bit:              531033 ns,  73798 iterations
> > [  +0.000002]
> >               Start testing find_bit() with sparse bitmap
> > [  +0.000203] find_next_bit:                   12468 ns,    656 iterations
> > [  +0.000205] find_prev_bit:                   10948 ns,    656 iterations
> > [  +0.001759] find_next_zero_bit:            1579447 ns, 327026 iterations
> > [  +0.001935] find_prev_zero_bit:            1931961 ns, 327026 iterations
> > [  +0.000013] find_last_bit:                    9543 ns,    656 iterations
> > [  +0.000732] find_first_bit:                 562009 ns,    656 iterations
> > [  +0.000217] find_next_and_bit:                6804 ns,      1 iterations
> > [  +0.000007] find_next_and_bit:                4367 ns,      1 iterations
> >
> > Is it better to write find_prev_bit using find_last_bit?
> > I question again.
>
> I answer again. It's better not to write find_prev_bit at all and
> learn how to use existing functionality.
>
> Yury
>
> > Thanks for your great advice, But please forgive my fault and lackness.
> >
> > HTH.
> > Levi.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-03 18:46                     ` Re: Yury Norov
  2020-12-03 18:52                       ` Re: Willy Tarreau
@ 2020-12-05 11:10                       ` Rasmus Villemoes
  2020-12-05 18:20                         ` Re: Yury Norov
  1 sibling, 1 reply; 414+ messages in thread
From: Rasmus Villemoes @ 2020-12-05 11:10 UTC (permalink / raw)
  To: Yury Norov, Yun Levi
  Cc: dushistov, Arnd Bergmann, Andrew Morton, Gustavo A. R. Silva,
	William Breathitt Gray, richard.weiyang, joseph.qi, skalluru,
	Josh Poimboeuf, Linux Kernel Mailing List, linux-arch,
	Andy Shevchenko

On 03/12/2020 19.46, Yury Norov wrote:

> I would prefer to avoid changing the find*bit() semantics. As for now,
> if any of find_*_bit()
> finds nothing, it returns the size of the bitmap it was passed.

Yeah, we should actually try to fix that, it causes bad code generation.
It's hard, because callers of course do that "if ret == size" check. But
it's really silly that something like find_first_bit needs to do that
"min(i*BPL + __ffs(word), size)" - the caller does a comparison anyway,
that comparison might as well be "ret >= size" rather than "ret ==
size", and then we could get rid of that branch (which min() necessarily
becomes) at the end of find_next_bit.

I haven't dug very deep into this, but I could also imagine the
arch-specific parts of this might become a little easier to do if the
semantics were just "if no such bit, return an indeterminate value >=
the size".

> Changing this for
> a single function would break the consistency, and may cause problems
> for those who
> rely on existing behaviour.

True. But I think it should be possible - I suppose most users are via
the iterator macros, which could all be updated at once. Changing ret ==
size to ret >= size will still work even if the implementations have not
been switched over, so it should be doable.

> 
> Passing non-positive size to find_*_bit() should produce undefined
> behaviour, because we cannot dereference a pointer to the bitmap in
> this case; this is most probably a sign of a problem on a caller side
> anyways.

No, the out-of-line bitmap functions should all handle the case of a
zero-size bitmap sensibly.

Is bitmap full? Yes (all the 0 bits are set).
Is bitmap empty? Yes, (none of the 0 bits are set).
Find the first bit set (returns 0, there's no such bit)

Etc. The static inlines for small_const_nbits do assume that the pointer
can be dereferenced, which is why small_const_nbits was updated to mean
1<=bits<=BITS_PER_LONG rather than just bits<=BITS_PER_LONG.

Rasmus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-12-05 11:10                       ` Re: Rasmus Villemoes
@ 2020-12-05 18:20                         ` Yury Norov
  0 siblings, 0 replies; 414+ messages in thread
From: Yury Norov @ 2020-12-05 18:20 UTC (permalink / raw)
  To: Rasmus Villemoes
  Cc: Yun Levi, dushistov, Arnd Bergmann, Andrew Morton,
	Gustavo A. R. Silva, William Breathitt Gray, richard.weiyang,
	joseph.qi, skalluru, Josh Poimboeuf, Linux Kernel Mailing List,
	linux-arch, Andy Shevchenko

On Sat, Dec 5, 2020 at 3:10 AM Rasmus Villemoes
<linux@rasmusvillemoes.dk> wrote:
>
> On 03/12/2020 19.46, Yury Norov wrote:
>
> > I would prefer to avoid changing the find*bit() semantics. As for now,
> > if any of find_*_bit()
> > finds nothing, it returns the size of the bitmap it was passed.
>
> Yeah, we should actually try to fix that, it causes bad code generation.
> It's hard, because callers of course do that "if ret == size" check. But
> it's really silly that something like find_first_bit needs to do that
> "min(i*BPL + __ffs(word), size)" - the caller does a comparison anyway,
> that comparison might as well be "ret >= size" rather than "ret ==
> size", and then we could get rid of that branch (which min() necessarily
> becomes) at the end of find_next_bit.

We didn't do that 5 years ago because it's too invasive and the improvement
is barely measurable, the difference is 2 instructions (on arm64).e.
Has something
changed since that?

20000000000000000 <find_first_bit_better>:
   0:   aa0003e3        mov     x3, x0
   4:   aa0103e0        mov     x0, x1
   8:   b4000181        cbz     x1, 38 <find_first_bit_better+0x38>
   c:   f9400064        ldr     x4, [x3]
  10:   d2800802        mov     x2, #0x40                       // #64
  14:   91002063        add     x3, x3, #0x8
  18:   b40000c4        cbz     x4, 30 <find_first_bit_better+0x30>
  1c:   14000008        b       3c <find_first_bit_better+0x3c>
  20:   f8408464        ldr     x4, [x3], #8
  24:   91010045        add     x5, x2, #0x40
  28:   b50000c4        cbnz    x4, 40 <find_first_bit_better+0x40>
  2c:   aa0503e2        mov     x2, x5
  30:   eb00005f        cmp     x2, x0
  34:   54ffff63        b.cc    20 <find_first_bit_better+0x20>  //
b.lo, b.ul, b.last
  38:   d65f03c0        ret
  3c:   d2800002        mov     x2, #0x0                        // #0
  40:   dac00084        rbit    x4, x4
  44:   dac01084        clz     x4, x4
  48:   8b020080        add     x0, x4, x2
  4c:   d65f03c0        ret

0000000000000050 <find_first_bit_worse>:
  50:   aa0003e4        mov     x4, x0
  54:   aa0103e0        mov     x0, x1
  58:   b4000181        cbz     x1, 88 <find_first_bit_worse+0x38>
  5c:   f9400083        ldr     x3, [x4]
  60:   d2800802        mov     x2, #0x40                       // #64
  64:   91002084        add     x4, x4, #0x8
  68:   b40000c3        cbz     x3, 80 <find_first_bit_worse+0x30>
  6c:   14000008        b       8c <find_first_bit_worse+0x3c>
  70:   f8408483        ldr     x3, [x4], #8
  74:   91010045        add     x5, x2, #0x40
  78:   b50000c3        cbnz    x3, 90 <find_first_bit_worse+0x40>
  7c:   aa0503e2        mov     x2, x5
  80:   eb02001f        cmp     x0, x2
  84:   54ffff68        b.hi    70 <find_first_bit_worse+0x20>  // b.pmore
  88:   d65f03c0        ret
  8c:   d2800002        mov     x2, #0x0                        // #0
  90:   dac00063        rbit    x3, x3
  94:   dac01063        clz     x3, x3
  98:   8b020062        add     x2, x3, x2
  9c:   eb02001f        cmp     x0, x2
  a0:   9a829000        csel    x0, x0, x2, ls  // ls = plast
  a4:   d65f03c0        ret

> I haven't dug very deep into this, but I could also imagine the
> arch-specific parts of this might become a little easier to do if the
> semantics were just "if no such bit, return an indeterminate value >=
> the size".
>
> > Changing this for
> > a single function would break the consistency, and may cause problems
> > for those who
> > rely on existing behaviour.
>
> True. But I think it should be possible - I suppose most users are via
> the iterator macros, which could all be updated at once. Changing ret ==
> size to ret >= size will still work even if the implementations have not
> been switched over, so it should be doable.

Since there's no assembler users for it, we can do just:
#define find_first_bit(bitmap, size)
min(better_find_first_bit((bitmap), (size)), (size))

... and deprecate find_first_bit.

> > Passing non-positive size to find_*_bit() should produce undefined
> > behaviour, because we cannot dereference a pointer to the bitmap in
> > this case; this is most probably a sign of a problem on a caller side
> > anyways.
>
> No, the out-of-line bitmap functions should all handle the case of a
> zero-size bitmap sensibly.

I could be more specific, the behaviour is defined: don't dereference
the address and return undefined value (which now is always 0).

> Is bitmap full? Yes (all the 0 bits are set).
> Is bitmap empty? Yes, (none of the 0 bits are set).
> Find the first bit set (returns 0, there's no such bit)

I can't answer because this object is not a map of bits - there's no room for
bits inside.

> Etc. The static inlines for small_const_nbits do assume that the pointer
> can be dereferenced, which is why small_const_nbits was updated to mean
> 1<=bits<=BITS_PER_LONG rather than just bits<=BITS_PER_LONG.

I don't want to do something like

if (size == 0)
        return -1;

... because it legitimizes this kind of usage and hides problems on
callers' side.
Instead, I'd add WARN_ON(size == 0), but I don't think it's so
critical to bother with it.

Yury

> Rasmus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH v4] arm64: dts: qcom: Add support for Xiaomi Poco F1 (Beryllium)
@ 2020-08-05 11:02 Amit Pundir
  2020-08-06 22:31 ` Konrad Dybcio
  0 siblings, 1 reply; 414+ messages in thread
From: Amit Pundir @ 2020-08-05 11:02 UTC (permalink / raw)
  To: Andy Gross, Bjorn Andersson, Rob Herring, John Stultz, Sumit Semwal
  Cc: linux-arm-msm, dt, lkml

On Wed, 5 Aug 2020 at 16:21, Amit Pundir <amit.pundir@linaro.org> wrote:
>
> Add initial dts support for Xiaomi Poco F1 (Beryllium).
>
> This initial support is based on upstream Dragonboard 845c
> (sdm845) device. With this dts, Beryllium boots AOSP up to
> ADB shell over USB-C.
>
> Supported functionality includes UFS, USB-C (peripheral),
> microSD card and Vol+/Vol-/power keys. Bluetooth should work
> too but couldn't be verified from adb command line, it is
> verified when enabled from UI with few WIP display patches.
>
> Just like initial db845c support, initializing the SMMU is
> clearing the mapping used for the splash screen framebuffer,
> which causes the device to hang during boot and recovery
> needs a hard power reset. This can be worked around using:
>
>     fastboot oem select-display-panel none
>
> To switch ON the display back run:
>
>     fastboot oem select-display-panel
>
> But this only works on Beryllium devices running bootloader
> version BOOT.XF.2.0-00369-SDM845LZB-1 that shipped with
> Android-9 based release. Newer bootloader version do not
> support switching OFF the display panel at all. So we need
> a few additional smmu patches (under review) from here to
> boot to shell:
> https://github.com/pundiramit/linux/commits/beryllium-mainline
>
> Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
> ---
> v4: Added more downstream reserved memory regions. It probably
>     need more work, but for now I see adsp/cdsp/wlan remoteprocs
>     powering up properly. Also removed the regulator nodes not
>     required for the device, as suggested by Bjorn.

Forgot to mention that I added couple of clocks to protected clocks in v4,
which need for display to work.

> v3: Added a reserved-memory region from downstream kernel to fix
>     a boot regression with recent dma-pool changes in v5.8-rc6.
> v2: Updated machine compatible string for seemingly inevitable
>     future quirks.
>
>  arch/arm64/boot/dts/qcom/Makefile             |   1 +
>  arch/arm64/boot/dts/qcom/sdm845-beryllium.dts | 383 ++++++++++++++++++++++++++
>  2 files changed, 384 insertions(+)
>  create mode 100644 arch/arm64/boot/dts/qcom/sdm845-beryllium.dts
>
> diff --git a/arch/arm64/boot/dts/qcom/Makefile b/arch/arm64/boot/dts/qcom/Makefile
> index 0f2c33d611df..3ef1b48bc0cb 100644
> --- a/arch/arm64/boot/dts/qcom/Makefile
> +++ b/arch/arm64/boot/dts/qcom/Makefile
> @@ -21,6 +21,7 @@ dtb-$(CONFIG_ARCH_QCOM)       += sdm845-cheza-r1.dtb
>  dtb-$(CONFIG_ARCH_QCOM)        += sdm845-cheza-r2.dtb
>  dtb-$(CONFIG_ARCH_QCOM)        += sdm845-cheza-r3.dtb
>  dtb-$(CONFIG_ARCH_QCOM)        += sdm845-db845c.dtb
> +dtb-$(CONFIG_ARCH_QCOM)        += sdm845-beryllium.dtb
>  dtb-$(CONFIG_ARCH_QCOM)        += sdm845-mtp.dtb
>  dtb-$(CONFIG_ARCH_QCOM)        += sdm850-lenovo-yoga-c630.dtb
>  dtb-$(CONFIG_ARCH_QCOM)        += sm8150-mtp.dtb
> diff --git a/arch/arm64/boot/dts/qcom/sdm845-beryllium.dts b/arch/arm64/boot/dts/qcom/sdm845-beryllium.dts
> new file mode 100644
> index 000000000000..0f9f61bf9fa4
> --- /dev/null
> +++ b/arch/arm64/boot/dts/qcom/sdm845-beryllium.dts
> @@ -0,0 +1,383 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/dts-v1/;
> +
> +#include <dt-bindings/gpio/gpio.h>
> +#include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
> +#include <dt-bindings/regulator/qcom,rpmh-regulator.h>
> +#include "sdm845.dtsi"
> +#include "pm8998.dtsi"
> +#include "pmi8998.dtsi"
> +
> +/ {
> +       model = "Xiaomi Technologies Inc. Beryllium";
> +       compatible = "xiaomi,beryllium", "qcom,sdm845";
> +
> +       /* required for bootloader to select correct board */
> +       qcom,board-id = <69 0>;
> +       qcom,msm-id = <321 0x20001>;
> +
> +       aliases {
> +               hsuart0 = &uart6;
> +       };
> +
> +       gpio-keys {
> +               compatible = "gpio-keys";
> +               autorepeat;
> +
> +               pinctrl-names = "default";
> +               pinctrl-0 = <&vol_up_pin_a>;
> +
> +               vol-up {
> +                       label = "Volume Up";
> +                       linux,code = <KEY_VOLUMEUP>;
> +                       gpios = <&pm8998_gpio 6 GPIO_ACTIVE_LOW>;
> +               };
> +       };
> +
> +       vreg_s4a_1p8: vreg-s4a-1p8 {
> +               compatible = "regulator-fixed";
> +               regulator-name = "vreg_s4a_1p8";
> +
> +               regulator-min-microvolt = <1800000>;
> +               regulator-max-microvolt = <1800000>;
> +               regulator-always-on;
> +       };
> +};
> +
> +&adsp_pas {
> +       status = "okay";
> +       firmware-name = "qcom/sdm845/adsp.mdt";
> +};
> +
> +&apps_rsc {
> +       pm8998-rpmh-regulators {
> +               compatible = "qcom,pm8998-rpmh-regulators";
> +               qcom,pmic-id = "a";
> +
> +               vreg_l1a_0p875: ldo1 {
> +                       regulator-min-microvolt = <880000>;
> +                       regulator-max-microvolt = <880000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l5a_0p8: ldo5 {
> +                       regulator-min-microvolt = <800000>;
> +                       regulator-max-microvolt = <800000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l7a_1p8: ldo7 {
> +                       regulator-min-microvolt = <1800000>;
> +                       regulator-max-microvolt = <1800000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l12a_1p8: ldo12 {
> +                       regulator-min-microvolt = <1800000>;
> +                       regulator-max-microvolt = <1800000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l13a_2p95: ldo13 {
> +                       regulator-min-microvolt = <1800000>;
> +                       regulator-max-microvolt = <2960000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l17a_1p3: ldo17 {
> +                       regulator-min-microvolt = <1304000>;
> +                       regulator-max-microvolt = <1304000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l20a_2p95: ldo20 {
> +                       regulator-min-microvolt = <2960000>;
> +                       regulator-max-microvolt = <2968000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l21a_2p95: ldo21 {
> +                       regulator-min-microvolt = <2960000>;
> +                       regulator-max-microvolt = <2968000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l24a_3p075: ldo24 {
> +                       regulator-min-microvolt = <3088000>;
> +                       regulator-max-microvolt = <3088000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l25a_3p3: ldo25 {
> +                       regulator-min-microvolt = <3300000>;
> +                       regulator-max-microvolt = <3312000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +
> +               vreg_l26a_1p2: ldo26 {
> +                       regulator-min-microvolt = <1200000>;
> +                       regulator-max-microvolt = <1200000>;
> +                       regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
> +               };
> +       };
> +};
> +
> +&cdsp_pas {
> +       status = "okay";
> +       firmware-name = "qcom/sdm845/cdsp.mdt";
> +};
> +
> +&gcc {
> +       protected-clocks = <GCC_QSPI_CORE_CLK>,
> +                          <GCC_QSPI_CORE_CLK_SRC>,
> +                          <GCC_QSPI_CNOC_PERIPH_AHB_CLK>,
> +                          <GCC_LPASS_Q6_AXI_CLK>,
> +                          <GCC_LPASS_SWAY_CLK>;
> +};
> +
> +&gpu {
> +       zap-shader {
> +               memory-region = <&gpu_mem>;
> +               firmware-name = "qcom/sdm845/a630_zap.mbn";
> +       };
> +};
> +
> +/* Reserved memory changes from downstream */
> +/delete-node/ &adsp_mem;
> +/delete-node/ &wlan_msa_mem;
> +/delete-node/ &mpss_region;
> +/delete-node/ &venus_mem;
> +/delete-node/ &cdsp_mem;
> +/delete-node/ &mba_region;
> +/delete-node/ &slpi_mem;
> +/delete-node/ &spss_mem;
> +/delete-node/ &rmtfs_mem;
> +/ {
> +       reserved-memory {
> +               // This removed_region is needed to boot the device
> +               // TODO: Find out the user of this reserved memory
> +               removed_region: memory@88f00000 {
> +                       reg = <0 0x88f00000 0 0x1a00000>;
> +                       no-map;
> +               };
> +
> +               adsp_mem: memory@8c500000 {
> +                       reg = <0 0x8c500000 0 0x1e00000>;
> +                       no-map;
> +               };
> +
> +               wlan_msa_mem: memory@8e300000 {
> +                       reg = <0 0x8e300000 0 0x100000>;
> +                       no-map;
> +               };
> +
> +               mpss_region: memory@8e400000 {
> +                       reg = <0 0x8e400000 0 0x7800000>;
> +                       no-map;
> +               };
> +
> +               venus_mem: memory@95c00000 {
> +                       reg = <0 0x95c00000 0 0x500000>;
> +                       no-map;
> +               };
> +
> +               cdsp_mem: memory@96100000 {
> +                       reg = <0 0x96100000 0 0x800000>;
> +                       no-map;
> +               };
> +
> +               mba_region: memory@96900000 {
> +                       reg = <0 0x96900000 0 0x200000>;
> +                       no-map;
> +               };
> +
> +               slpi_mem: memory@96b00000 {
> +                       reg = <0 0x96b00000 0 0x1400000>;
> +                       no-map;
> +               };
> +
> +               spss_mem: memory@97f00000 {
> +                       reg = <0 0x97f00000 0 0x100000>;
> +                       no-map;
> +               };
> +
> +               rmtfs_mem: memory@f6301000 {
> +                       compatible = "qcom,rmtfs-mem";
> +                       reg = <0 0xf6301000 0 0x200000>;
> +                       no-map;
> +
> +                       qcom,client-id = <1>;
> +                       qcom,vmid = <15>;
> +               };
> +       };
> +};
> +
> +&mss_pil {
> +       status = "okay";
> +       firmware-name = "qcom/sdm845/mba.mbn", "qcom/sdm845/modem.mdt";
> +};
> +
> +&pm8998_gpio {
> +       vol_up_pin_a: vol-up-active {
> +               pins = "gpio6";
> +               function = "normal";
> +               input-enable;
> +               bias-pull-up;
> +               qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
> +       };
> +};
> +
> +&pm8998_pon {
> +       resin {
> +               compatible = "qcom,pm8941-resin";
> +               interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
> +               debounce = <15625>;
> +               bias-pull-up;
> +               linux,code = <KEY_VOLUMEDOWN>;
> +       };
> +};
> +
> +&qupv3_id_0 {
> +       status = "okay";
> +};
> +
> +&sdhc_2 {
> +       status = "okay";
> +
> +       pinctrl-names = "default";
> +       pinctrl-0 = <&sdc2_default_state &sdc2_card_det_n>;
> +
> +       vmmc-supply = <&vreg_l21a_2p95>;
> +       vqmmc-supply = <&vreg_l13a_2p95>;
> +
> +       bus-width = <4>;
> +       cd-gpios = <&tlmm 126 GPIO_ACTIVE_HIGH>;
> +};
> +
> +&tlmm {
> +       gpio-reserved-ranges = <0 4>, <81 4>;
> +
> +       sdc2_default_state: sdc2-default {
> +               clk {
> +                       pins = "sdc2_clk";
> +                       bias-disable;
> +
> +                       /*
> +                        * It seems that mmc_test reports errors if drive
> +                        * strength is not 16 on clk, cmd, and data pins.
> +                        */
> +                       drive-strength = <16>;
> +               };
> +
> +               cmd {
> +                       pins = "sdc2_cmd";
> +                       bias-pull-up;
> +                       drive-strength = <10>;
> +               };
> +
> +               data {
> +                       pins = "sdc2_data";
> +                       bias-pull-up;
> +                       drive-strength = <10>;
> +               };
> +       };
> +
> +       sdc2_card_det_n: sd-card-det-n {
> +               pins = "gpio126";
> +               function = "gpio";
> +               bias-pull-up;
> +       };
> +};
> +
> +&uart6 {
> +       status = "okay";
> +
> +       bluetooth {
> +               compatible = "qcom,wcn3990-bt";
> +
> +               vddio-supply = <&vreg_s4a_1p8>;
> +               vddxo-supply = <&vreg_l7a_1p8>;
> +               vddrf-supply = <&vreg_l17a_1p3>;
> +               vddch0-supply = <&vreg_l25a_3p3>;
> +               max-speed = <3200000>;
> +       };
> +};
> +
> +&usb_1 {
> +       status = "okay";
> +};
> +
> +&usb_1_dwc3 {
> +       dr_mode = "peripheral";
> +};
> +
> +&usb_1_hsphy {
> +       status = "okay";
> +
> +       vdd-supply = <&vreg_l1a_0p875>;
> +       vdda-pll-supply = <&vreg_l12a_1p8>;
> +       vdda-phy-dpdm-supply = <&vreg_l24a_3p075>;
> +
> +       qcom,imp-res-offset-value = <8>;
> +       qcom,hstx-trim-value = <QUSB2_V2_HSTX_TRIM_21_6_MA>;
> +       qcom,preemphasis-level = <QUSB2_V2_PREEMPHASIS_5_PERCENT>;
> +       qcom,preemphasis-width = <QUSB2_V2_PREEMPHASIS_WIDTH_HALF_BIT>;
> +};
> +
> +&usb_1_qmpphy {
> +       status = "okay";
> +
> +       vdda-phy-supply = <&vreg_l26a_1p2>;
> +       vdda-pll-supply = <&vreg_l1a_0p875>;
> +};
> +
> +&ufs_mem_hc {
> +       status = "okay";
> +
> +       reset-gpios = <&tlmm 150 GPIO_ACTIVE_LOW>;
> +
> +       vcc-supply = <&vreg_l20a_2p95>;
> +       vcc-max-microamp = <800000>;
> +};
> +
> +&ufs_mem_phy {
> +       status = "okay";
> +
> +       vdda-phy-supply = <&vreg_l1a_0p875>;
> +       vdda-pll-supply = <&vreg_l26a_1p2>;
> +};
> +
> +&wifi {
> +       status = "okay";
> +
> +       vdd-0.8-cx-mx-supply = <&vreg_l5a_0p8>;
> +       vdd-1.8-xo-supply = <&vreg_l7a_1p8>;
> +       vdd-1.3-rfa-supply = <&vreg_l17a_1p3>;
> +       vdd-3.3-ch0-supply = <&vreg_l25a_3p3>;
> +};
> +
> +/* PINCTRL - additions to nodes defined in sdm845.dtsi */
> +
> +&qup_uart6_default {
> +       pinmux {
> +               pins = "gpio45", "gpio46", "gpio47", "gpio48";
> +               function = "qup6";
> +       };
> +
> +       cts {
> +               pins = "gpio45";
> +               bias-disable;
> +       };
> +
> +       rts-tx {
> +               pins = "gpio46", "gpio47";
> +               drive-strength = <2>;
> +               bias-disable;
> +       };
> +
> +       rx {
> +               pins = "gpio48";
> +               bias-pull-up;
> +       };
> +};
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2020-08-05 11:02 [PATCH v4] arm64: dts: qcom: Add support for Xiaomi Poco F1 (Beryllium) Amit Pundir
@ 2020-08-06 22:31 ` Konrad Dybcio
  2020-08-12 13:37   ` Amit Pundir
  0 siblings, 1 reply; 414+ messages in thread
From: Konrad Dybcio @ 2020-08-06 22:31 UTC (permalink / raw)
  To: amit.pundir
  Cc: agross, bjorn.andersson, devicetree, john.stultz, linux-arm-msm,
	linux-kernel, robh+dt, sumit.semwal, Konrad Dybcio

Subject: Re: [PATCH v4] arm64: dts: qcom: Add support for Xiaomi Poco F1 (Beryllium)

>// This removed_region is needed to boot the device
>               // TODO: Find out the user of this reserved memory
>               removed_region: memory@88f00000 {

This region seems to belong to the Trust Zone. When Linux tries to access it, TZ bites and shuts the device down.

Konrad

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-08-06 22:31 ` Konrad Dybcio
@ 2020-08-12 13:37   ` Amit Pundir
  0 siblings, 0 replies; 414+ messages in thread
From: Amit Pundir @ 2020-08-12 13:37 UTC (permalink / raw)
  To: Konrad Dybcio
  Cc: Andy Gross, Bjorn Andersson, dt, John Stultz, linux-arm-msm,
	lkml, Rob Herring, Sumit Semwal

On Fri, 7 Aug 2020 at 04:02, Konrad Dybcio <konradybcio@gmail.com> wrote:
>
> Subject: Re: [PATCH v4] arm64: dts: qcom: Add support for Xiaomi Poco F1 (Beryllium)
>
> >// This removed_region is needed to boot the device
> >               // TODO: Find out the user of this reserved memory
> >               removed_region: memory@88f00000 {
>
> This region seems to belong to the Trust Zone. When Linux tries to access it, TZ bites and shuts the device down.

That is totally possible. Plus it falls right in between TZ and QSEE
reserved-memory regions. However, I do not find any credible source
of information which can confirm this. So I'm hesitant to update the
TODO item in the above comment.

>
> Konrad

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2020-06-30 17:56 Vasiliy Kupriakov
  2020-07-10 20:36 ` Andy Shevchenko
  0 siblings, 1 reply; 414+ messages in thread
From: Vasiliy Kupriakov @ 2020-06-30 17:56 UTC (permalink / raw)
  To: Corentin Chary, Darren Hart, Andy Shevchenko
  Cc: Vasiliy Kupriakov,
	open list:ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS,
	open list:ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS,
	open list

Subject: [PATCH] platform/x86: asus-wmi: allow BAT1 battery name

The battery on my laptop ASUS TUF Gaming FX706II is named BAT1.
This patch allows battery extension to load.

Signed-off-by: Vasiliy Kupriakov <rublag-ns@yandex.ru>
---
 drivers/platform/x86/asus-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 877aade19497..8f4acdc06b13 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -441,6 +441,7 @@ static int asus_wmi_battery_add(struct power_supply *battery)
 	 * battery is named BATT.
 	 */
 	if (strcmp(battery->desc->name, "BAT0") != 0 &&
+	    strcmp(battery->desc->name, "BAT1") != 0 &&
 	    strcmp(battery->desc->name, "BATT") != 0)
 		return -ENODEV;
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2020-06-30 17:56 Vasiliy Kupriakov
@ 2020-07-10 20:36 ` Andy Shevchenko
  0 siblings, 0 replies; 414+ messages in thread
From: Andy Shevchenko @ 2020-07-10 20:36 UTC (permalink / raw)
  To: Vasiliy Kupriakov
  Cc: Corentin Chary, Darren Hart, Andy Shevchenko,
	open list:ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS,
	open list:ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS,
	open list

On Tue, Jun 30, 2020 at 8:57 PM Vasiliy Kupriakov <rublag-ns@yandex.ru> wrote:
>
> Subject: [PATCH] platform/x86: asus-wmi: allow BAT1 battery name
>
> The battery on my laptop ASUS TUF Gaming FX706II is named BAT1.
> This patch allows battery extension to load.
>

Pushed to my review and testing queue, thanks!

> Signed-off-by: Vasiliy Kupriakov <rublag-ns@yandex.ru>
> ---
>  drivers/platform/x86/asus-wmi.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
> index 877aade19497..8f4acdc06b13 100644
> --- a/drivers/platform/x86/asus-wmi.c
> +++ b/drivers/platform/x86/asus-wmi.c
> @@ -441,6 +441,7 @@ static int asus_wmi_battery_add(struct power_supply *battery)
>          * battery is named BATT.
>          */
>         if (strcmp(battery->desc->name, "BAT0") != 0 &&
> +           strcmp(battery->desc->name, "BAT1") != 0 &&
>             strcmp(battery->desc->name, "BATT") != 0)
>                 return -ENODEV;
>
> --
> 2.27.0
>


-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2020-05-06  5:52 Jiaxun Yang
  2020-05-06 17:17 ` Nick Desaulniers
  0 siblings, 1 reply; 414+ messages in thread
From: Jiaxun Yang @ 2020-05-06  5:52 UTC (permalink / raw)
  To: linux-mips
  Cc: Jiaxun Yang, clang-built-linux, Maciej W . Rozycki, Fangrui Song,
	Kees Cook, Nathan Chancellor, Thomas Bogendoerfer, Paul Burton,
	Masahiro Yamada, Jouni Hogander, Kevin Darbyshire-Bryant,
	Borislav Petkov, Heiko Carstens, linux-kernel

Subject: [PATCH v6] MIPS: Truncate link address into 32bit for 32bit kernel
In-Reply-To: <20200413062651.3992652-1-jiaxun.yang@flygoat.com>

LLD failed to link vmlinux with 64bit load address for 32bit ELF
while bfd will strip 64bit address into 32bit silently.
To fix LLD build, we should truncate load address provided by platform
into 32bit for 32bit kernel.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/786
Link: https://sourceware.org/bugzilla/show_bug.cgi?id=25784
Reviewed-by: Fangrui Song <maskray@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
---
V2: Take MaskRay's shell magic.

V3: After spent an hour on dealing with special character issue in
Makefile, I gave up to do shell hacks and write a util in C instead.
Thanks Maciej for pointing out Makefile variable problem.

v4: Finally we managed to find a Makefile method to do it properly
thanks to Kees. As it's too far from the initial version, I removed
Review & Test tag from Nick and Fangrui and Cc instead.

v5: Care vmlinuz as well.

v6: Rename to LIKER_LOAD_ADDRESS 
---
 arch/mips/Makefile                 | 13 ++++++++++++-
 arch/mips/boot/compressed/Makefile |  2 +-
 arch/mips/kernel/vmlinux.lds.S     |  2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index e1c44aed8156..68c0f22fefc0 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -288,12 +288,23 @@ ifdef CONFIG_64BIT
   endif
 endif
 
+# When linking a 32-bit executable the LLVM linker cannot cope with a
+# 32-bit load address that has been sign-extended to 64 bits.  Simply
+# remove the upper 32 bits then, as it is safe to do so with other
+# linkers.
+ifdef CONFIG_64BIT
+	load-ld			= $(load-y)
+else
+	load-ld			= $(subst 0xffffffff,0x,$(load-y))
+endif
+
 KBUILD_AFLAGS	+= $(cflags-y)
 KBUILD_CFLAGS	+= $(cflags-y)
-KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y)
+KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -DLINKER_LOAD_ADDRESS=$(load-ld)
 KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
 
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y) \
+		  LINKER_LOAD_ADDRESS=$(load-ld) \
 		  VMLINUX_ENTRY_ADDRESS=$(entry-y) \
 		  PLATFORM="$(platform-y)" \
 		  ITS_INPUTS="$(its-y)"
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 0df0ee8a298d..3d391256ab7e 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -90,7 +90,7 @@ ifneq ($(zload-y),)
 VMLINUZ_LOAD_ADDRESS := $(zload-y)
 else
 VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \
-		$(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS))
+		$(obj)/vmlinux.bin $(LINKER_LOAD_ADDRESS))
 endif
 UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS)
 
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index a5f00ec73ea6..5226cd8e4bee 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -55,7 +55,7 @@ SECTIONS
 	/* . = 0xa800000000300000; */
 	. = 0xffffffff80300000;
 #endif
-	. = VMLINUX_LOAD_ADDRESS;
+	. = LINKER_LOAD_ADDRESS;
 	/* read-only */
 	_text = .;	/* Text and read-only data */
 	.text : {

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2020-05-06  5:52 Jiaxun Yang
@ 2020-05-06 17:17 ` Nick Desaulniers
  0 siblings, 0 replies; 414+ messages in thread
From: Nick Desaulniers @ 2020-05-06 17:17 UTC (permalink / raw)
  To: Jiaxun Yang
  Cc: linux-mips, clang-built-linux, Maciej W . Rozycki, Fangrui Song,
	Kees Cook, Nathan Chancellor, Thomas Bogendoerfer, Paul Burton,
	Masahiro Yamada, Jouni Hogander, Kevin Darbyshire-Bryant,
	Borislav Petkov, Heiko Carstens, LKML

On Tue, May 5, 2020 at 10:52 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>
> Subject: [PATCH v6] MIPS: Truncate link address into 32bit for 32bit kernel
> In-Reply-To: <20200413062651.3992652-1-jiaxun.yang@flygoat.com>
>
> LLD failed to link vmlinux with 64bit load address for 32bit ELF
> while bfd will strip 64bit address into 32bit silently.
> To fix LLD build, we should truncate load address provided by platform
> into 32bit for 32bit kernel.
>
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> Link: https://github.com/ClangBuiltLinux/linux/issues/786
> Link: https://sourceware.org/bugzilla/show_bug.cgi?id=25784
> Reviewed-by: Fangrui Song <maskray@google.com>
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Tested-by: Nathan Chancellor <natechancellor@gmail.com>
> Cc: Maciej W. Rozycki <macro@linux-mips.org>

Cool, this revision looks a bit simpler. Thanks for chasing this.
Tested-by: Nick Desaulniers <ndesaulniers@google.com>

> ---
> V2: Take MaskRay's shell magic.
>
> V3: After spent an hour on dealing with special character issue in
> Makefile, I gave up to do shell hacks and write a util in C instead.
> Thanks Maciej for pointing out Makefile variable problem.
>
> v4: Finally we managed to find a Makefile method to do it properly
> thanks to Kees. As it's too far from the initial version, I removed
> Review & Test tag from Nick and Fangrui and Cc instead.
>
> v5: Care vmlinuz as well.
>
> v6: Rename to LIKER_LOAD_ADDRESS
> ---
>  arch/mips/Makefile                 | 13 ++++++++++++-
>  arch/mips/boot/compressed/Makefile |  2 +-
>  arch/mips/kernel/vmlinux.lds.S     |  2 +-
>  3 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/arch/mips/Makefile b/arch/mips/Makefile
> index e1c44aed8156..68c0f22fefc0 100644
> --- a/arch/mips/Makefile
> +++ b/arch/mips/Makefile
> @@ -288,12 +288,23 @@ ifdef CONFIG_64BIT
>    endif
>  endif
>
> +# When linking a 32-bit executable the LLVM linker cannot cope with a
> +# 32-bit load address that has been sign-extended to 64 bits.  Simply
> +# remove the upper 32 bits then, as it is safe to do so with other
> +# linkers.
> +ifdef CONFIG_64BIT
> +       load-ld                 = $(load-y)
> +else
> +       load-ld                 = $(subst 0xffffffff,0x,$(load-y))
> +endif
> +
>  KBUILD_AFLAGS  += $(cflags-y)
>  KBUILD_CFLAGS  += $(cflags-y)
> -KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y)
> +KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -DLINKER_LOAD_ADDRESS=$(load-ld)
>  KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
>
>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y) \
> +                 LINKER_LOAD_ADDRESS=$(load-ld) \
>                   VMLINUX_ENTRY_ADDRESS=$(entry-y) \
>                   PLATFORM="$(platform-y)" \
>                   ITS_INPUTS="$(its-y)"
> diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
> index 0df0ee8a298d..3d391256ab7e 100644
> --- a/arch/mips/boot/compressed/Makefile
> +++ b/arch/mips/boot/compressed/Makefile
> @@ -90,7 +90,7 @@ ifneq ($(zload-y),)
>  VMLINUZ_LOAD_ADDRESS := $(zload-y)
>  else
>  VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \
> -               $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS))
> +               $(obj)/vmlinux.bin $(LINKER_LOAD_ADDRESS))
>  endif
>  UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS)
>
> diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
> index a5f00ec73ea6..5226cd8e4bee 100644
> --- a/arch/mips/kernel/vmlinux.lds.S
> +++ b/arch/mips/kernel/vmlinux.lds.S
> @@ -55,7 +55,7 @@ SECTIONS
>         /* . = 0xa800000000300000; */
>         . = 0xffffffff80300000;
>  #endif
> -       . = VMLINUX_LOAD_ADDRESS;
> +       . = LINKER_LOAD_ADDRESS;
>         /* read-only */
>         _text = .;      /* Text and read-only data */
>         .text : {
>
> --

-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <5e7dc543.vYG3wru8B/me1sOV%chenanqing@oppo.com>]

* Re:
       [not found] <5e7dc543.vYG3wru8B/me1sOV%chenanqing@oppo.com>
@ 2020-03-27 15:53 ` Lee Duncan
  0 siblings, 0 replies; 414+ messages in thread
From: Lee Duncan @ 2020-03-27 15:53 UTC (permalink / raw)
  To: chenanqing, linux-kernel, linux-scsi, open-iscsi, ceph-devel,
	martin.petersen, jejb, cleech

On 3/27/20 2:20 AM, chenanqing@oppo.com wrote:
> From: Chen Anqing <chenanqing@oppo.com>
> To: Lee Duncan <lduncan@suse.com>
> Cc: Chris Leech <cleech@redhat.com>,
>         "James E . J . Bottomley" <jejb@linux.ibm.com>,
>         "Martin K . Petersen" <martin.petersen@oracle.com>,
>         ceph-devel@vger.kernel.org,
>         open-iscsi@googlegroups.com,
>         linux-scsi@vger.kernel.org,
>         linux-kernel@vger.kernel.org,
>         chenanqing@oppo.com
> Subject: [PATCH] scsi: libiscsi: we should take compound page into account also
> Date: Fri, 27 Mar 2020 05:20:01 -0400
> Message-Id: <20200327092001.56879-1-chenanqing@oppo.com>
> X-Mailer: git-send-email 2.18.2
> 
> the patch is occur at a real crash,which slab is
> come from a compound page,so we need take the compound page
> into account also.
> fixed commit 08b11eaccfcf ("scsi: libiscsi: fall back to
> sendmsg for slab pages").
> 
> Signed-off-by: Chen Anqing <chenanqing@oppo.com>
> ---
>  drivers/scsi/libiscsi_tcp.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
> index 6ef93c7af954..98304e5e1f6f 100644
> --- a/drivers/scsi/libiscsi_tcp.c
> +++ b/drivers/scsi/libiscsi_tcp.c
> @@ -128,7 +128,8 @@ static void iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
>          * coalescing neighboring slab objects into a single frag which
>          * triggers one of hardened usercopy checks.
>          */
> -       if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg)))
> +       if (!recv && page_count(sg_page(sg)) >= 1 &&
> +           !PageSlab(compound_head(sg_page(sg))))
>                 return;
> 
>         if (recv) {
> --
> 2.18.2
> 


This is missing a proper subject ...


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <5e7dbb10.ulraq/ljeOm297+z%chenanqing@oppo.com>]

* Re:
       [not found] <5e7dbb10.ulraq/ljeOm297+z%chenanqing@oppo.com>
@ 2020-03-27  8:59 ` Ilya Dryomov
  0 siblings, 0 replies; 414+ messages in thread
From: Ilya Dryomov @ 2020-03-27  8:59 UTC (permalink / raw)
  To: chenanqing; +Cc: LKML, netdev, Ceph Development, kuba, Sage Weil, Jeff Layton

On Fri, Mar 27, 2020 at 9:36 AM <chenanqing@oppo.com> wrote:
>
> From: Chen Anqing <chenanqing@oppo.com>
> To: Ilya Dryomov <idryomov@gmail.com>
> Cc: Jeff Layton <jlayton@kernel.org>,
>         Sage Weil <sage@redhat.com>,
>         Jakub Kicinski <kuba@kernel.org>,
>         ceph-devel@vger.kernel.org,
>         netdev@vger.kernel.org,
>         linux-kernel@vger.kernel.org,
>         chenanqing@oppo.com
> Subject: [PATCH] libceph: we should take compound page into account also
> Date: Fri, 27 Mar 2020 04:36:30 -0400
> Message-Id: <20200327083630.36296-1-chenanqing@oppo.com>
> X-Mailer: git-send-email 2.18.2
>
> the patch is occur at a real crash,which slab is
> come from a compound page,so we need take the compound page
> into account also.
> fixed commit 7e241f647dc7 ("libceph: fall back to sendmsg for slab pages")'
>
> Signed-off-by: Chen Anqing <chenanqing@oppo.com>
> ---
>  net/ceph/messenger.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
> index f8ca5edc5f2c..e08c1c334cd9 100644
> --- a/net/ceph/messenger.c
> +++ b/net/ceph/messenger.c
> @@ -582,7 +582,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
>          * coalescing neighboring slab objects into a single frag which
>          * triggers one of hardened usercopy checks.
>          */
> -       if (page_count(page) >= 1 && !PageSlab(page))
> +       if (page_count(page) >= 1 && !PageSlab(compound_head(page)))
>                 sendpage = sock->ops->sendpage;
>         else
>                 sendpage = sock_no_sendpage;

Hi Chen,

AFAICT compound pages should already be taken into account, because
PageSlab is defined as:

  __PAGEFLAG(Slab, slab, PF_NO_TAIL)

  #define __PAGEFLAG(uname, lname, policy)                       \
      TESTPAGEFLAG(uname, lname, policy)                         \
      __SETPAGEFLAG(uname, lname, policy)                        \
      __CLEARPAGEFLAG(uname, lname, policy)

  #define TESTPAGEFLAG(uname, lname, policy)                     \
  static __always_inline int Page##uname(struct page *page)      \
      { return test_bit(PG_##lname, &policy(page, 0)->flags); }

and PF_NO_TAIL policy is defined as:

  #define PF_NO_TAIL(page, enforce) ({                        \
      VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);     \
      PF_POISONED_CHECK(compound_head(page)); })

So compound_head() is called behind the scenes.

Could you please explain what crash did you observe in more detail?
Perhaps you backported this patch to an older kernel?

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2020-03-03 15:27 Gene Chen
  2020-03-04 14:56 ` Matthias Brugger
  0 siblings, 1 reply; 414+ messages in thread
From: Gene Chen @ 2020-03-03 15:27 UTC (permalink / raw)
  To: lee.jones, matthias.bgg
  Cc: linux-arm-kernel, linux-mediatek, linux-kernel, gene_chen,
	Wilma.Wu, shufan_lee, cy_huang


Add mfd driver for mt6360 pmic chip include
Battery Charger/USB_PD/Flash LED/RGB LED/LDO/Buck

Signed-off-by: Gene Chen <gene_chen@richtek.com
---
 drivers/mfd/Kconfig        |  12 ++
 drivers/mfd/Makefile       |   1 +
 drivers/mfd/mt6360-core.c  | 425 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/mt6360.h | 240 +++++++++++++++++++++++++
 4 files changed, 678 insertions(+)
 create mode 100644 drivers/mfd/mt6360-core.c
 create mode 100644 include/linux/mfd/mt6360.h

changelogs between v1 & v2
- include missing header file

changelogs between v2 & v3
- add changelogs

changelogs between v3 & v4
- fix Kconfig description
- replace mt6360_pmu_info with mt6360_pmu_data
- replace probe with probe_new
- remove unnecessary irq_chip variable
- remove annotation
- replace MT6360_MFD_CELL with OF_MFD_CELL

changelogs between v4 & v5
- remove unnecessary parse dt function
- use devm_i2c_new_dummy_device
- add base-commit message

changelogs between v5 & v6
- review return value
- remove i2c id_table
- use GPL license v2

changelogs between v6 & v7
- add author description
- replace MT6360_REGMAP_IRQ_REG by REGMAP_IRQ_REG_LINE
- remove mt6360-private.h

changelogs between v7 & v8
- fix kbuild auto reboot by include interrupt header

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 2b20329..0f8c341 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -857,6 +857,18 @@ config MFD_MAX8998
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
 
+config MFD_MT6360
+	tristate "Mediatek MT6360 SubPMIC"
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	depends on I2C
+	help
+	  Say Y here to enable MT6360 PMU/PMIC/LDO functional support.
+	  PMU part includes Charger, Flashlight, RGB LED
+	  PMIC part includes 2-channel BUCKs and 2-channel LDOs
+	  LDO part includes 4-channel LDOs
+
 config MFD_MT6397
 	tristate "MediaTek MT6397 PMIC Support"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index b83f172..8c35816 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -238,6 +238,7 @@ obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
 obj-$(CONFIG_INTEL_SOC_PMIC_BXTWC)	+= intel_soc_pmic_bxtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTWC)	+= intel_soc_pmic_chtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI)	+= intel_soc_pmic_chtdc_ti.o
+obj-$(CONFIG_MFD_MT6360)	+= mt6360-core.o
 mt6397-objs	:= mt6397-core.o mt6397-irq.o
 obj-$(CONFIG_MFD_MT6397)	+= mt6397.o
 obj-$(CONFIG_INTEL_SOC_PMIC_MRFLD)	+= intel_soc_pmic_mrfld.o
diff --git a/drivers/mfd/mt6360-core.c b/drivers/mfd/mt6360-core.c
new file mode 100644
index 0000000..d1168f8
--- /dev/null
+++ b/drivers/mfd/mt6360-core.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ *
+ * Author: Gene Chen <gene_chen@richtek.com>
+ */
+
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/version.h>
+
+#include <linux/mfd/mt6360.h>
+
+/* reg 0 -> 0 ~ 7 */
+#define MT6360_CHG_TREG_EVT		(4)
+#define MT6360_CHG_AICR_EVT		(5)
+#define MT6360_CHG_MIVR_EVT		(6)
+#define MT6360_PWR_RDY_EVT		(7)
+/* REG 1 -> 8 ~ 15 */
+#define MT6360_CHG_BATSYSUV_EVT		(9)
+#define MT6360_FLED_CHG_VINOVP_EVT	(11)
+#define MT6360_CHG_VSYSUV_EVT		(12)
+#define MT6360_CHG_VSYSOV_EVT		(13)
+#define MT6360_CHG_VBATOV_EVT		(14)
+#define MT6360_CHG_VBUSOV_EVT		(15)
+/* REG 2 -> 16 ~ 23 */
+/* REG 3 -> 24 ~ 31 */
+#define MT6360_WD_PMU_DET		(25)
+#define MT6360_WD_PMU_DONE		(26)
+#define MT6360_CHG_TMRI			(27)
+#define MT6360_CHG_ADPBADI		(29)
+#define MT6360_CHG_RVPI			(30)
+#define MT6360_OTPI			(31)
+/* REG 4 -> 32 ~ 39 */
+#define MT6360_CHG_AICCMEASL		(32)
+#define MT6360_CHGDET_DONEI		(34)
+#define MT6360_WDTMRI			(35)
+#define MT6360_SSFINISHI		(36)
+#define MT6360_CHG_RECHGI		(37)
+#define MT6360_CHG_TERMI		(38)
+#define MT6360_CHG_IEOCI		(39)
+/* REG 5 -> 40 ~ 47 */
+#define MT6360_PUMPX_DONEI		(40)
+#define MT6360_BAT_OVP_ADC_EVT		(41)
+#define MT6360_TYPEC_OTP_EVT		(42)
+#define MT6360_ADC_WAKEUP_EVT		(43)
+#define MT6360_ADC_DONEI		(44)
+#define MT6360_BST_BATUVI		(45)
+#define MT6360_BST_VBUSOVI		(46)
+#define MT6360_BST_OLPI			(47)
+/* REG 6 -> 48 ~ 55 */
+#define MT6360_ATTACH_I			(48)
+#define MT6360_DETACH_I			(49)
+#define MT6360_QC30_STPDONE		(51)
+#define MT6360_QC_VBUSDET_DONE		(52)
+#define MT6360_HVDCP_DET		(53)
+#define MT6360_CHGDETI			(54)
+#define MT6360_DCDTI			(55)
+/* REG 7 -> 56 ~ 63 */
+#define MT6360_FOD_DONE_EVT		(56)
+#define MT6360_FOD_OV_EVT		(57)
+#define MT6360_CHRDET_UVP_EVT		(58)
+#define MT6360_CHRDET_OVP_EVT		(59)
+#define MT6360_CHRDET_EXT_EVT		(60)
+#define MT6360_FOD_LR_EVT		(61)
+#define MT6360_FOD_HR_EVT		(62)
+#define MT6360_FOD_DISCHG_FAIL_EVT	(63)
+/* REG 8 -> 64 ~ 71 */
+#define MT6360_USBID_EVT		(64)
+#define MT6360_APWDTRST_EVT		(65)
+#define MT6360_EN_EVT			(66)
+#define MT6360_QONB_RST_EVT		(67)
+#define MT6360_MRSTB_EVT		(68)
+#define MT6360_OTP_EVT			(69)
+#define MT6360_VDDAOV_EVT		(70)
+#define MT6360_SYSUV_EVT		(71)
+/* REG 9 -> 72 ~ 79 */
+#define MT6360_FLED_STRBPIN_EVT		(72)
+#define MT6360_FLED_TORPIN_EVT		(73)
+#define MT6360_FLED_TX_EVT		(74)
+#define MT6360_FLED_LVF_EVT		(75)
+#define MT6360_FLED2_SHORT_EVT		(78)
+#define MT6360_FLED1_SHORT_EVT		(79)
+/* REG 10 -> 80 ~ 87 */
+#define MT6360_FLED2_STRB_EVT		(80)
+#define MT6360_FLED1_STRB_EVT		(81)
+#define MT6360_FLED2_STRB_TO_EVT	(82)
+#define MT6360_FLED1_STRB_TO_EVT	(83)
+#define MT6360_FLED2_TOR_EVT		(84)
+#define MT6360_FLED1_TOR_EVT		(85)
+/* REG 11 -> 88 ~ 95 */
+/* REG 12 -> 96 ~ 103 */
+#define MT6360_BUCK1_PGB_EVT		(96)
+#define MT6360_BUCK1_OC_EVT		(100)
+#define MT6360_BUCK1_OV_EVT		(101)
+#define MT6360_BUCK1_UV_EVT		(102)
+/* REG 13 -> 104 ~ 111 */
+#define MT6360_BUCK2_PGB_EVT		(104)
+#define MT6360_BUCK2_OC_EVT		(108)
+#define MT6360_BUCK2_OV_EVT		(109)
+#define MT6360_BUCK2_UV_EVT		(110)
+/* REG 14 -> 112 ~ 119 */
+#define MT6360_LDO1_OC_EVT		(113)
+#define MT6360_LDO2_OC_EVT		(114)
+#define MT6360_LDO3_OC_EVT		(115)
+#define MT6360_LDO5_OC_EVT		(117)
+#define MT6360_LDO6_OC_EVT		(118)
+#define MT6360_LDO7_OC_EVT		(119)
+/* REG 15 -> 120 ~ 127 */
+#define MT6360_LDO1_PGB_EVT		(121)
+#define MT6360_LDO2_PGB_EVT		(122)
+#define MT6360_LDO3_PGB_EVT		(123)
+#define MT6360_LDO5_PGB_EVT		(125)
+#define MT6360_LDO6_PGB_EVT		(126)
+#define MT6360_LDO7_PGB_EVT		(127)
+
+static const struct regmap_irq mt6360_pmu_irqs[] =  {
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_TREG_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_AICR_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_MIVR_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_PWR_RDY_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_BATSYSUV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED_CHG_VINOVP_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_VSYSUV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_VSYSOV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_VBATOV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_VBUSOV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_WD_PMU_DET, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_WD_PMU_DONE, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_TMRI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_ADPBADI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_RVPI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_OTPI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_AICCMEASL, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHGDET_DONEI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_WDTMRI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_SSFINISHI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_RECHGI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_TERMI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_IEOCI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_PUMPX_DONEI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHG_TREG_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BAT_OVP_ADC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_TYPEC_OTP_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_ADC_WAKEUP_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_ADC_DONEI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BST_BATUVI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BST_VBUSOVI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BST_OLPI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_ATTACH_I, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_DETACH_I, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_QC30_STPDONE, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_QC_VBUSDET_DONE, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_HVDCP_DET, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHGDETI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_DCDTI, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FOD_DONE_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FOD_OV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHRDET_UVP_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHRDET_OVP_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_CHRDET_EXT_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FOD_LR_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FOD_HR_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FOD_DISCHG_FAIL_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_USBID_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_APWDTRST_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_EN_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_QONB_RST_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_MRSTB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_OTP_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_VDDAOV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_SYSUV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED_STRBPIN_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED_TORPIN_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED_TX_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED_LVF_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED2_SHORT_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED1_SHORT_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED2_STRB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED1_STRB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED2_STRB_TO_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED1_STRB_TO_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED2_TOR_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_FLED1_TOR_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_PGB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_OV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_UV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_PGB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_OV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_UV_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO1_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO2_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO3_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO5_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO6_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO7_OC_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO1_PGB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO2_PGB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO3_PGB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO5_PGB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO6_PGB_EVT, 8),
+	REGMAP_IRQ_REG_LINE(MT6360_LDO7_PGB_EVT, 8),
+};
+
+static int mt6360_pmu_handle_post_irq(void *irq_drv_data)
+{
+	struct mt6360_pmu_data *mpd = irq_drv_data;
+
+	return regmap_update_bits(mpd->regmap,
+		MT6360_PMU_IRQ_SET, MT6360_IRQ_RETRIG, MT6360_IRQ_RETRIG);
+}
+
+static struct regmap_irq_chip mt6360_pmu_irq_chip = {
+	.irqs = mt6360_pmu_irqs,
+	.num_irqs = ARRAY_SIZE(mt6360_pmu_irqs),
+	.num_regs = MT6360_PMU_IRQ_REGNUM,
+	.mask_base = MT6360_PMU_CHG_MASK1,
+	.status_base = MT6360_PMU_CHG_IRQ1,
+	.ack_base = MT6360_PMU_CHG_IRQ1,
+	.init_ack_masked = true,
+	.use_ack = true,
+	.handle_post_irq = mt6360_pmu_handle_post_irq,
+};
+
+static const struct regmap_config mt6360_pmu_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = MT6360_PMU_MAXREG,
+};
+
+static const struct resource mt6360_adc_resources[] = {
+	DEFINE_RES_IRQ_NAMED(MT6360_ADC_DONEI, "adc_donei"),
+};
+
+static const struct resource mt6360_chg_resources[] = {
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_TREG_EVT, "chg_treg_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_PWR_RDY_EVT, "pwr_rdy_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_BATSYSUV_EVT, "chg_batsysuv_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VSYSUV_EVT, "chg_vsysuv_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VSYSOV_EVT, "chg_vsysov_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VBATOV_EVT, "chg_vbatov_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VBUSOV_EVT, "chg_vbusov_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_AICCMEASL, "chg_aiccmeasl"),
+	DEFINE_RES_IRQ_NAMED(MT6360_WDTMRI, "wdtmri"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_RECHGI, "chg_rechgi"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_TERMI, "chg_termi"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHG_IEOCI, "chg_ieoci"),
+	DEFINE_RES_IRQ_NAMED(MT6360_PUMPX_DONEI, "pumpx_donei"),
+	DEFINE_RES_IRQ_NAMED(MT6360_ATTACH_I, "attach_i"),
+	DEFINE_RES_IRQ_NAMED(MT6360_CHRDET_EXT_EVT, "chrdet_ext_evt"),
+};
+
+static const struct resource mt6360_led_resources[] = {
+	DEFINE_RES_IRQ_NAMED(MT6360_FLED_CHG_VINOVP_EVT, "fled_chg_vinovp_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_FLED_LVF_EVT, "fled_lvf_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_FLED2_SHORT_EVT, "fled2_short_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_FLED1_SHORT_EVT, "fled1_short_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_FLED2_STRB_TO_EVT, "fled2_strb_to_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_FLED1_STRB_TO_EVT, "fled1_strb_to_evt"),
+};
+
+static const struct resource mt6360_pmic_resources[] = {
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_PGB_EVT, "buck1_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_OC_EVT, "buck1_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_OV_EVT, "buck1_ov_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_UV_EVT, "buck1_uv_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_PGB_EVT, "buck2_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_OC_EVT, "buck2_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_OV_EVT, "buck2_ov_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_UV_EVT, "buck2_uv_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_OC_EVT, "ldo6_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_OC_EVT, "ldo7_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_PGB_EVT, "ldo6_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_PGB_EVT, "ldo7_pgb_evt"),
+};
+
+static const struct resource mt6360_ldo_resources[] = {
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO1_OC_EVT, "ldo1_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO2_OC_EVT, "ldo2_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO3_OC_EVT, "ldo3_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO5_OC_EVT, "ldo5_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO1_PGB_EVT, "ldo1_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO2_PGB_EVT, "ldo2_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO3_PGB_EVT, "ldo3_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO5_PGB_EVT, "ldo5_pgb_evt"),
+};
+
+static const struct mfd_cell mt6360_devs[] = {
+	OF_MFD_CELL("mt6360_adc", mt6360_adc_resources,
+		    NULL, 0, 0, "mediatek,mt6360_adc"),
+	OF_MFD_CELL("mt6360_chg", mt6360_chg_resources,
+		    NULL, 0, 0, "mediatek,mt6360_chg"),
+	OF_MFD_CELL("mt6360_led", mt6360_led_resources,
+		    NULL, 0, 0, "mediatek,mt6360_led"),
+	OF_MFD_CELL("mt6360_pmic", mt6360_pmic_resources,
+		    NULL, 0, 0, "mediatek,mt6360_pmic"),
+	OF_MFD_CELL("mt6360_ldo", mt6360_ldo_resources,
+		    NULL, 0, 0, "mediatek,mt6360_ldo"),
+	OF_MFD_CELL("mt6360_tcpc", NULL,
+		    NULL, 0, 0, "mediatek,mt6360_tcpc"),
+};
+
+static const unsigned short mt6360_slave_addr[MT6360_SLAVE_MAX] = {
+	MT6360_PMU_SLAVEID,
+	MT6360_PMIC_SLAVEID,
+	MT6360_LDO_SLAVEID,
+	MT6360_TCPC_SLAVEID,
+};
+
+static int mt6360_pmu_probe(struct i2c_client *client)
+{
+	struct mt6360_pmu_data *mpd;
+	unsigned int reg_data;
+	int i, ret;
+
+	mpd = devm_kzalloc(&client->dev, sizeof(*mpd), GFP_KERNEL);
+	if (!mpd)
+		return -ENOMEM;
+
+	mpd->dev = &client->dev;
+	i2c_set_clientdata(client, mpd);
+
+	mpd->regmap = devm_regmap_init_i2c(client, &mt6360_pmu_regmap_config);
+	if (IS_ERR(mpd->regmap)) {
+		dev_err(&client->dev, "Failed to register regmap\n");
+		return PTR_ERR(mpd->regmap);
+	}
+
+	ret = regmap_read(mpd->regmap, MT6360_PMU_DEV_INFO, &reg_data);
+	if (ret) {
+		dev_err(&client->dev, "Device not found\n");
+		return ret;
+	}
+
+	mpd->chip_rev = reg_data & CHIP_REV_MASK;
+	if (mpd->chip_rev != CHIP_VEN_MT6360) {
+		dev_err(&client->dev, "Device not supported\n");
+		return -ENODEV;
+	}
+
+	mt6360_pmu_irq_chip.irq_drv_data = mpd;
+	ret = devm_regmap_add_irq_chip(&client->dev, mpd->regmap, client->irq,
+				       IRQF_TRIGGER_FALLING, 0,
+				       &mt6360_pmu_irq_chip, &mpd->irq_data);
+	if (ret) {
+		dev_err(&client->dev, "Failed to add Regmap IRQ Chip\n");
+		return ret;
+	}
+
+	mpd->i2c[0] = client;
+	for (i = 1; i < MT6360_SLAVE_MAX; i++) {
+		mpd->i2c[i] = devm_i2c_new_dummy_device(&client->dev,
+							client->adapter,
+							mt6360_slave_addr[i]);
+		if (IS_ERR(mpd->i2c[i])) {
+			dev_err(&client->dev,
+				"Failed to get new dummy I2C device for address 0x%x",
+				mt6360_slave_addr[i]);
+			return PTR_ERR(mpd->i2c[i]);
+		}
+		i2c_set_clientdata(mpd->i2c[i], mpd);
+	}
+
+	ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_AUTO,
+				   mt6360_devs, ARRAY_SIZE(mt6360_devs), NULL,
+				   0, regmap_irq_get_domain(mpd->irq_data));
+	if (ret) {
+		dev_err(&client->dev,
+			"Failed to register subordinate devices\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused mt6360_pmu_suspend(struct device *dev)
+{
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	if (device_may_wakeup(dev))
+		enable_irq_wake(i2c->irq);
+
+	return 0;
+}
+
+static int __maybe_unused mt6360_pmu_resume(struct device *dev)
+{
+
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	if (device_may_wakeup(dev))
+		disable_irq_wake(i2c->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(mt6360_pmu_pm_ops,
+			 mt6360_pmu_suspend, mt6360_pmu_resume);
+
+static const struct of_device_id __maybe_unused mt6360_pmu_of_id[] = {
+	{ .compatible = "mediatek,mt6360_pmu", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mt6360_pmu_of_id);
+
+static struct i2c_driver mt6360_pmu_driver = {
+	.driver = {
+		.pm = &mt6360_pmu_pm_ops,
+		.of_match_table = of_match_ptr(mt6360_pmu_of_id),
+	},
+	.probe_new = mt6360_pmu_probe,
+};
+module_i2c_driver(mt6360_pmu_driver);
+
+MODULE_AUTHOR("Gene Chen <gene_chen@richtek.com>");
+MODULE_DESCRIPTION("MT6360 PMU I2C Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/mt6360.h b/include/linux/mfd/mt6360.h
new file mode 100644
index 0000000..c03e6d1
--- /dev/null
+++ b/include/linux/mfd/mt6360.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ */
+
+#ifndef __MT6360_H__
+#define __MT6360_H__
+
+#include <linux/regmap.h>
+
+enum {
+	MT6360_SLAVE_PMU = 0,
+	MT6360_SLAVE_PMIC,
+	MT6360_SLAVE_LDO,
+	MT6360_SLAVE_TCPC,
+	MT6360_SLAVE_MAX,
+};
+
+#define MT6360_PMU_SLAVEID	(0x34)
+#define MT6360_PMIC_SLAVEID	(0x1A)
+#define MT6360_LDO_SLAVEID	(0x64)
+#define MT6360_TCPC_SLAVEID	(0x4E)
+
+struct mt6360_pmu_data {
+	struct i2c_client *i2c[MT6360_SLAVE_MAX];
+	struct device *dev;
+	struct regmap *regmap;
+	struct regmap_irq_chip_data *irq_data;
+	unsigned int chip_rev;
+};
+
+/* PMU register defininition */
+#define MT6360_PMU_DEV_INFO			(0x00)
+#define MT6360_PMU_CORE_CTRL1			(0x01)
+#define MT6360_PMU_RST1				(0x02)
+#define MT6360_PMU_CRCEN			(0x03)
+#define MT6360_PMU_RST_PAS_CODE1		(0x04)
+#define MT6360_PMU_RST_PAS_CODE2		(0x05)
+#define MT6360_PMU_CORE_CTRL2			(0x06)
+#define MT6360_PMU_TM_PAS_CODE1			(0x07)
+#define MT6360_PMU_TM_PAS_CODE2			(0x08)
+#define MT6360_PMU_TM_PAS_CODE3			(0x09)
+#define MT6360_PMU_TM_PAS_CODE4			(0x0A)
+#define MT6360_PMU_IRQ_IND			(0x0B)
+#define MT6360_PMU_IRQ_MASK			(0x0C)
+#define MT6360_PMU_IRQ_SET			(0x0D)
+#define MT6360_PMU_SHDN_CTRL			(0x0E)
+#define MT6360_PMU_TM_INF			(0x0F)
+#define MT6360_PMU_I2C_CTRL			(0x10)
+#define MT6360_PMU_CHG_CTRL1			(0x11)
+#define MT6360_PMU_CHG_CTRL2			(0x12)
+#define MT6360_PMU_CHG_CTRL3			(0x13)
+#define MT6360_PMU_CHG_CTRL4			(0x14)
+#define MT6360_PMU_CHG_CTRL5			(0x15)
+#define MT6360_PMU_CHG_CTRL6			(0x16)
+#define MT6360_PMU_CHG_CTRL7			(0x17)
+#define MT6360_PMU_CHG_CTRL8			(0x18)
+#define MT6360_PMU_CHG_CTRL9			(0x19)
+#define MT6360_PMU_CHG_CTRL10			(0x1A)
+#define MT6360_PMU_CHG_CTRL11			(0x1B)
+#define MT6360_PMU_CHG_CTRL12			(0x1C)
+#define MT6360_PMU_CHG_CTRL13			(0x1D)
+#define MT6360_PMU_CHG_CTRL14			(0x1E)
+#define MT6360_PMU_CHG_CTRL15			(0x1F)
+#define MT6360_PMU_CHG_CTRL16			(0x20)
+#define MT6360_PMU_CHG_AICC_RESULT		(0x21)
+#define MT6360_PMU_DEVICE_TYPE			(0x22)
+#define MT6360_PMU_QC_CONTROL1			(0x23)
+#define MT6360_PMU_QC_CONTROL2			(0x24)
+#define MT6360_PMU_QC30_CONTROL1		(0x25)
+#define MT6360_PMU_QC30_CONTROL2		(0x26)
+#define MT6360_PMU_USB_STATUS1			(0x27)
+#define MT6360_PMU_QC_STATUS1			(0x28)
+#define MT6360_PMU_QC_STATUS2			(0x29)
+#define MT6360_PMU_CHG_PUMP			(0x2A)
+#define MT6360_PMU_CHG_CTRL17			(0x2B)
+#define MT6360_PMU_CHG_CTRL18			(0x2C)
+#define MT6360_PMU_CHRDET_CTRL1			(0x2D)
+#define MT6360_PMU_CHRDET_CTRL2			(0x2E)
+#define MT6360_PMU_DPDN_CTRL			(0x2F)
+#define MT6360_PMU_CHG_HIDDEN_CTRL1		(0x30)
+#define MT6360_PMU_CHG_HIDDEN_CTRL2		(0x31)
+#define MT6360_PMU_CHG_HIDDEN_CTRL3		(0x32)
+#define MT6360_PMU_CHG_HIDDEN_CTRL4		(0x33)
+#define MT6360_PMU_CHG_HIDDEN_CTRL5		(0x34)
+#define MT6360_PMU_CHG_HIDDEN_CTRL6		(0x35)
+#define MT6360_PMU_CHG_HIDDEN_CTRL7		(0x36)
+#define MT6360_PMU_CHG_HIDDEN_CTRL8		(0x37)
+#define MT6360_PMU_CHG_HIDDEN_CTRL9		(0x38)
+#define MT6360_PMU_CHG_HIDDEN_CTRL10		(0x39)
+#define MT6360_PMU_CHG_HIDDEN_CTRL11		(0x3A)
+#define MT6360_PMU_CHG_HIDDEN_CTRL12		(0x3B)
+#define MT6360_PMU_CHG_HIDDEN_CTRL13		(0x3C)
+#define MT6360_PMU_CHG_HIDDEN_CTRL14		(0x3D)
+#define MT6360_PMU_CHG_HIDDEN_CTRL15		(0x3E)
+#define MT6360_PMU_CHG_HIDDEN_CTRL16		(0x3F)
+#define MT6360_PMU_CHG_HIDDEN_CTRL17		(0x40)
+#define MT6360_PMU_CHG_HIDDEN_CTRL18		(0x41)
+#define MT6360_PMU_CHG_HIDDEN_CTRL19		(0x42)
+#define MT6360_PMU_CHG_HIDDEN_CTRL20		(0x43)
+#define MT6360_PMU_CHG_HIDDEN_CTRL21		(0x44)
+#define MT6360_PMU_CHG_HIDDEN_CTRL22		(0x45)
+#define MT6360_PMU_CHG_HIDDEN_CTRL23		(0x46)
+#define MT6360_PMU_CHG_HIDDEN_CTRL24		(0x47)
+#define MT6360_PMU_CHG_HIDDEN_CTRL25		(0x48)
+#define MT6360_PMU_BC12_CTRL			(0x49)
+#define MT6360_PMU_CHG_STAT			(0x4A)
+#define MT6360_PMU_RESV1			(0x4B)
+#define MT6360_PMU_TYPEC_OTP_TH_SEL_CODEH	(0x4E)
+#define MT6360_PMU_TYPEC_OTP_TH_SEL_CODEL	(0x4F)
+#define MT6360_PMU_TYPEC_OTP_HYST_TH		(0x50)
+#define MT6360_PMU_TYPEC_OTP_CTRL		(0x51)
+#define MT6360_PMU_ADC_BAT_DATA_H		(0x52)
+#define MT6360_PMU_ADC_BAT_DATA_L		(0x53)
+#define MT6360_PMU_IMID_BACKBST_ON		(0x54)
+#define MT6360_PMU_IMID_BACKBST_OFF		(0x55)
+#define MT6360_PMU_ADC_CONFIG			(0x56)
+#define MT6360_PMU_ADC_EN2			(0x57)
+#define MT6360_PMU_ADC_IDLE_T			(0x58)
+#define MT6360_PMU_ADC_RPT_1			(0x5A)
+#define MT6360_PMU_ADC_RPT_2			(0x5B)
+#define MT6360_PMU_ADC_RPT_3			(0x5C)
+#define MT6360_PMU_ADC_RPT_ORG1			(0x5D)
+#define MT6360_PMU_ADC_RPT_ORG2			(0x5E)
+#define MT6360_PMU_BAT_OVP_TH_SEL_CODEH		(0x5F)
+#define MT6360_PMU_BAT_OVP_TH_SEL_CODEL		(0x60)
+#define MT6360_PMU_CHG_CTRL19			(0x61)
+#define MT6360_PMU_VDDASUPPLY			(0x62)
+#define MT6360_PMU_BC12_MANUAL			(0x63)
+#define MT6360_PMU_CHGDET_FUNC			(0x64)
+#define MT6360_PMU_FOD_CTRL			(0x65)
+#define MT6360_PMU_CHG_CTRL20			(0x66)
+#define MT6360_PMU_CHG_HIDDEN_CTRL26		(0x67)
+#define MT6360_PMU_CHG_HIDDEN_CTRL27		(0x68)
+#define MT6360_PMU_RESV2			(0x69)
+#define MT6360_PMU_USBID_CTRL1			(0x6D)
+#define MT6360_PMU_USBID_CTRL2			(0x6E)
+#define MT6360_PMU_USBID_CTRL3			(0x6F)
+#define MT6360_PMU_FLED_CFG			(0x70)
+#define MT6360_PMU_RESV3			(0x71)
+#define MT6360_PMU_FLED1_CTRL			(0x72)
+#define MT6360_PMU_FLED_STRB_CTRL		(0x73)
+#define MT6360_PMU_FLED1_STRB_CTRL2		(0x74)
+#define MT6360_PMU_FLED1_TOR_CTRL		(0x75)
+#define MT6360_PMU_FLED2_CTRL			(0x76)
+#define MT6360_PMU_RESV4			(0x77)
+#define MT6360_PMU_FLED2_STRB_CTRL2		(0x78)
+#define MT6360_PMU_FLED2_TOR_CTRL		(0x79)
+#define MT6360_PMU_FLED_VMIDTRK_CTRL1		(0x7A)
+#define MT6360_PMU_FLED_VMID_RTM		(0x7B)
+#define MT6360_PMU_FLED_VMIDTRK_CTRL2		(0x7C)
+#define MT6360_PMU_FLED_PWSEL			(0x7D)
+#define MT6360_PMU_FLED_EN			(0x7E)
+#define MT6360_PMU_FLED_Hidden1			(0x7F)
+#define MT6360_PMU_RGB_EN			(0x80)
+#define MT6360_PMU_RGB1_ISNK			(0x81)
+#define MT6360_PMU_RGB2_ISNK			(0x82)
+#define MT6360_PMU_RGB3_ISNK			(0x83)
+#define MT6360_PMU_RGB_ML_ISNK			(0x84)
+#define MT6360_PMU_RGB1_DIM			(0x85)
+#define MT6360_PMU_RGB2_DIM			(0x86)
+#define MT6360_PMU_RGB3_DIM			(0x87)
+#define MT6360_PMU_RESV5			(0x88)
+#define MT6360_PMU_RGB12_Freq			(0x89)
+#define MT6360_PMU_RGB34_Freq			(0x8A)
+#define MT6360_PMU_RGB1_Tr			(0x8B)
+#define MT6360_PMU_RGB1_Tf			(0x8C)
+#define MT6360_PMU_RGB1_TON_TOFF		(0x8D)
+#define MT6360_PMU_RGB2_Tr			(0x8E)
+#define MT6360_PMU_RGB2_Tf			(0x8F)
+#define MT6360_PMU_RGB2_TON_TOFF		(0x90)
+#define MT6360_PMU_RGB3_Tr			(0x91)
+#define MT6360_PMU_RGB3_Tf			(0x92)
+#define MT6360_PMU_RGB3_TON_TOFF		(0x93)
+#define MT6360_PMU_RGB_Hidden_CTRL1		(0x94)
+#define MT6360_PMU_RGB_Hidden_CTRL2		(0x95)
+#define MT6360_PMU_RESV6			(0x97)
+#define MT6360_PMU_SPARE1			(0x9A)
+#define MT6360_PMU_SPARE2			(0xA0)
+#define MT6360_PMU_SPARE3			(0xB0)
+#define MT6360_PMU_SPARE4			(0xC0)
+#define MT6360_PMU_CHG_IRQ1			(0xD0)
+#define MT6360_PMU_CHG_IRQ2			(0xD1)
+#define MT6360_PMU_CHG_IRQ3			(0xD2)
+#define MT6360_PMU_CHG_IRQ4			(0xD3)
+#define MT6360_PMU_CHG_IRQ5			(0xD4)
+#define MT6360_PMU_CHG_IRQ6			(0xD5)
+#define MT6360_PMU_QC_IRQ			(0xD6)
+#define MT6360_PMU_FOD_IRQ			(0xD7)
+#define MT6360_PMU_BASE_IRQ			(0xD8)
+#define MT6360_PMU_FLED_IRQ1			(0xD9)
+#define MT6360_PMU_FLED_IRQ2			(0xDA)
+#define MT6360_PMU_RGB_IRQ			(0xDB)
+#define MT6360_PMU_BUCK1_IRQ			(0xDC)
+#define MT6360_PMU_BUCK2_IRQ			(0xDD)
+#define MT6360_PMU_LDO_IRQ1			(0xDE)
+#define MT6360_PMU_LDO_IRQ2			(0xDF)
+#define MT6360_PMU_CHG_STAT1			(0xE0)
+#define MT6360_PMU_CHG_STAT2			(0xE1)
+#define MT6360_PMU_CHG_STAT3			(0xE2)
+#define MT6360_PMU_CHG_STAT4			(0xE3)
+#define MT6360_PMU_CHG_STAT5			(0xE4)
+#define MT6360_PMU_CHG_STAT6			(0xE5)
+#define MT6360_PMU_QC_STAT			(0xE6)
+#define MT6360_PMU_FOD_STAT			(0xE7)
+#define MT6360_PMU_BASE_STAT			(0xE8)
+#define MT6360_PMU_FLED_STAT1			(0xE9)
+#define MT6360_PMU_FLED_STAT2			(0xEA)
+#define MT6360_PMU_RGB_STAT			(0xEB)
+#define MT6360_PMU_BUCK1_STAT			(0xEC)
+#define MT6360_PMU_BUCK2_STAT			(0xED)
+#define MT6360_PMU_LDO_STAT1			(0xEE)
+#define MT6360_PMU_LDO_STAT2			(0xEF)
+#define MT6360_PMU_CHG_MASK1			(0xF0)
+#define MT6360_PMU_CHG_MASK2			(0xF1)
+#define MT6360_PMU_CHG_MASK3			(0xF2)
+#define MT6360_PMU_CHG_MASK4			(0xF3)
+#define MT6360_PMU_CHG_MASK5			(0xF4)
+#define MT6360_PMU_CHG_MASK6			(0xF5)
+#define MT6360_PMU_QC_MASK			(0xF6)
+#define MT6360_PMU_FOD_MASK			(0xF7)
+#define MT6360_PMU_BASE_MASK			(0xF8)
+#define MT6360_PMU_FLED_MASK1			(0xF9)
+#define MT6360_PMU_FLED_MASK2			(0xFA)
+#define MT6360_PMU_FAULTB_MASK			(0xFB)
+#define MT6360_PMU_BUCK1_MASK			(0xFC)
+#define MT6360_PMU_BUCK2_MASK			(0xFD)
+#define MT6360_PMU_LDO_MASK1			(0xFE)
+#define MT6360_PMU_LDO_MASK2			(0xFF)
+#define MT6360_PMU_MAXREG			(MT6360_PMU_LDO_MASK2)
+
+/* MT6360_PMU_IRQ_SET */
+#define MT6360_PMU_IRQ_REGNUM	(MT6360_PMU_LDO_IRQ2 - MT6360_PMU_CHG_IRQ1 + 1)
+#define MT6360_IRQ_RETRIG	BIT(2)
+
+#define CHIP_VEN_MASK				(0xF0)
+#define CHIP_VEN_MT6360				(0x50)
+#define CHIP_REV_MASK				(0x0F)
+
+#endif /* __MT6360_H__ */
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2020-03-03 15:27 Gene Chen
@ 2020-03-04 14:56 ` Matthias Brugger
  2020-03-04 15:15   ` Re: Lee Jones
  0 siblings, 1 reply; 414+ messages in thread
From: Matthias Brugger @ 2020-03-04 14:56 UTC (permalink / raw)
  To: Gene Chen, lee.jones
  Cc: linux-arm-kernel, linux-mediatek, linux-kernel, gene_chen,
	Wilma.Wu, shufan_lee, cy_huang

Please resend with appropiate commit message.

On 03/03/2020 16:27, Gene Chen wrote:
> Add mfd driver for mt6360 pmic chip include
> Battery Charger/USB_PD/Flash LED/RGB LED/LDO/Buck
> 
> Signed-off-by: Gene Chen <gene_chen@richtek.com
> ---
>  drivers/mfd/Kconfig        |  12 ++
>  drivers/mfd/Makefile       |   1 +
>  drivers/mfd/mt6360-core.c  | 425 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/mfd/mt6360.h | 240 +++++++++++++++++++++++++
>  4 files changed, 678 insertions(+)
>  create mode 100644 drivers/mfd/mt6360-core.c
>  create mode 100644 include/linux/mfd/mt6360.h
> 
> changelogs between v1 & v2
> - include missing header file
> 
> changelogs between v2 & v3
> - add changelogs
> 
> changelogs between v3 & v4
> - fix Kconfig description
> - replace mt6360_pmu_info with mt6360_pmu_data
> - replace probe with probe_new
> - remove unnecessary irq_chip variable
> - remove annotation
> - replace MT6360_MFD_CELL with OF_MFD_CELL
> 
> changelogs between v4 & v5
> - remove unnecessary parse dt function
> - use devm_i2c_new_dummy_device
> - add base-commit message
> 
> changelogs between v5 & v6
> - review return value
> - remove i2c id_table
> - use GPL license v2
> 
> changelogs between v6 & v7
> - add author description
> - replace MT6360_REGMAP_IRQ_REG by REGMAP_IRQ_REG_LINE
> - remove mt6360-private.h
> 
> changelogs between v7 & v8
> - fix kbuild auto reboot by include interrupt header
> 
> diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
> index 2b20329..0f8c341 100644
> --- a/drivers/mfd/Kconfig
> +++ b/drivers/mfd/Kconfig
> @@ -857,6 +857,18 @@ config MFD_MAX8998
>  	  additional drivers must be enabled in order to use the functionality
>  	  of the device.
>  
> +config MFD_MT6360
> +	tristate "Mediatek MT6360 SubPMIC"
> +	select MFD_CORE
> +	select REGMAP_I2C
> +	select REGMAP_IRQ
> +	depends on I2C
> +	help
> +	  Say Y here to enable MT6360 PMU/PMIC/LDO functional support.
> +	  PMU part includes Charger, Flashlight, RGB LED
> +	  PMIC part includes 2-channel BUCKs and 2-channel LDOs
> +	  LDO part includes 4-channel LDOs
> +
>  config MFD_MT6397
>  	tristate "MediaTek MT6397 PMIC Support"
>  	select MFD_CORE
> diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
> index b83f172..8c35816 100644
> --- a/drivers/mfd/Makefile
> +++ b/drivers/mfd/Makefile
> @@ -238,6 +238,7 @@ obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
>  obj-$(CONFIG_INTEL_SOC_PMIC_BXTWC)	+= intel_soc_pmic_bxtwc.o
>  obj-$(CONFIG_INTEL_SOC_PMIC_CHTWC)	+= intel_soc_pmic_chtwc.o
>  obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI)	+= intel_soc_pmic_chtdc_ti.o
> +obj-$(CONFIG_MFD_MT6360)	+= mt6360-core.o
>  mt6397-objs	:= mt6397-core.o mt6397-irq.o
>  obj-$(CONFIG_MFD_MT6397)	+= mt6397.o
>  obj-$(CONFIG_INTEL_SOC_PMIC_MRFLD)	+= intel_soc_pmic_mrfld.o
> diff --git a/drivers/mfd/mt6360-core.c b/drivers/mfd/mt6360-core.c
> new file mode 100644
> index 0000000..d1168f8
> --- /dev/null
> +++ b/drivers/mfd/mt6360-core.c
> @@ -0,0 +1,425 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019 MediaTek Inc.
> + *
> + * Author: Gene Chen <gene_chen@richtek.com>
> + */
> +
> +#include <linux/i2c.h>
> +#include <linux/init.h>
> +#include <linux/interrupt.h>
> +#include <linux/kernel.h>
> +#include <linux/mfd/core.h>
> +#include <linux/module.h>
> +#include <linux/of_irq.h>
> +#include <linux/of_platform.h>
> +#include <linux/version.h>
> +
> +#include <linux/mfd/mt6360.h>
> +
> +/* reg 0 -> 0 ~ 7 */
> +#define MT6360_CHG_TREG_EVT		(4)
> +#define MT6360_CHG_AICR_EVT		(5)
> +#define MT6360_CHG_MIVR_EVT		(6)
> +#define MT6360_PWR_RDY_EVT		(7)
> +/* REG 1 -> 8 ~ 15 */
> +#define MT6360_CHG_BATSYSUV_EVT		(9)
> +#define MT6360_FLED_CHG_VINOVP_EVT	(11)
> +#define MT6360_CHG_VSYSUV_EVT		(12)
> +#define MT6360_CHG_VSYSOV_EVT		(13)
> +#define MT6360_CHG_VBATOV_EVT		(14)
> +#define MT6360_CHG_VBUSOV_EVT		(15)
> +/* REG 2 -> 16 ~ 23 */
> +/* REG 3 -> 24 ~ 31 */
> +#define MT6360_WD_PMU_DET		(25)
> +#define MT6360_WD_PMU_DONE		(26)
> +#define MT6360_CHG_TMRI			(27)
> +#define MT6360_CHG_ADPBADI		(29)
> +#define MT6360_CHG_RVPI			(30)
> +#define MT6360_OTPI			(31)
> +/* REG 4 -> 32 ~ 39 */
> +#define MT6360_CHG_AICCMEASL		(32)
> +#define MT6360_CHGDET_DONEI		(34)
> +#define MT6360_WDTMRI			(35)
> +#define MT6360_SSFINISHI		(36)
> +#define MT6360_CHG_RECHGI		(37)
> +#define MT6360_CHG_TERMI		(38)
> +#define MT6360_CHG_IEOCI		(39)
> +/* REG 5 -> 40 ~ 47 */
> +#define MT6360_PUMPX_DONEI		(40)
> +#define MT6360_BAT_OVP_ADC_EVT		(41)
> +#define MT6360_TYPEC_OTP_EVT		(42)
> +#define MT6360_ADC_WAKEUP_EVT		(43)
> +#define MT6360_ADC_DONEI		(44)
> +#define MT6360_BST_BATUVI		(45)
> +#define MT6360_BST_VBUSOVI		(46)
> +#define MT6360_BST_OLPI			(47)
> +/* REG 6 -> 48 ~ 55 */
> +#define MT6360_ATTACH_I			(48)
> +#define MT6360_DETACH_I			(49)
> +#define MT6360_QC30_STPDONE		(51)
> +#define MT6360_QC_VBUSDET_DONE		(52)
> +#define MT6360_HVDCP_DET		(53)
> +#define MT6360_CHGDETI			(54)
> +#define MT6360_DCDTI			(55)
> +/* REG 7 -> 56 ~ 63 */
> +#define MT6360_FOD_DONE_EVT		(56)
> +#define MT6360_FOD_OV_EVT		(57)
> +#define MT6360_CHRDET_UVP_EVT		(58)
> +#define MT6360_CHRDET_OVP_EVT		(59)
> +#define MT6360_CHRDET_EXT_EVT		(60)
> +#define MT6360_FOD_LR_EVT		(61)
> +#define MT6360_FOD_HR_EVT		(62)
> +#define MT6360_FOD_DISCHG_FAIL_EVT	(63)
> +/* REG 8 -> 64 ~ 71 */
> +#define MT6360_USBID_EVT		(64)
> +#define MT6360_APWDTRST_EVT		(65)
> +#define MT6360_EN_EVT			(66)
> +#define MT6360_QONB_RST_EVT		(67)
> +#define MT6360_MRSTB_EVT		(68)
> +#define MT6360_OTP_EVT			(69)
> +#define MT6360_VDDAOV_EVT		(70)
> +#define MT6360_SYSUV_EVT		(71)
> +/* REG 9 -> 72 ~ 79 */
> +#define MT6360_FLED_STRBPIN_EVT		(72)
> +#define MT6360_FLED_TORPIN_EVT		(73)
> +#define MT6360_FLED_TX_EVT		(74)
> +#define MT6360_FLED_LVF_EVT		(75)
> +#define MT6360_FLED2_SHORT_EVT		(78)
> +#define MT6360_FLED1_SHORT_EVT		(79)
> +/* REG 10 -> 80 ~ 87 */
> +#define MT6360_FLED2_STRB_EVT		(80)
> +#define MT6360_FLED1_STRB_EVT		(81)
> +#define MT6360_FLED2_STRB_TO_EVT	(82)
> +#define MT6360_FLED1_STRB_TO_EVT	(83)
> +#define MT6360_FLED2_TOR_EVT		(84)
> +#define MT6360_FLED1_TOR_EVT		(85)
> +/* REG 11 -> 88 ~ 95 */
> +/* REG 12 -> 96 ~ 103 */
> +#define MT6360_BUCK1_PGB_EVT		(96)
> +#define MT6360_BUCK1_OC_EVT		(100)
> +#define MT6360_BUCK1_OV_EVT		(101)
> +#define MT6360_BUCK1_UV_EVT		(102)
> +/* REG 13 -> 104 ~ 111 */
> +#define MT6360_BUCK2_PGB_EVT		(104)
> +#define MT6360_BUCK2_OC_EVT		(108)
> +#define MT6360_BUCK2_OV_EVT		(109)
> +#define MT6360_BUCK2_UV_EVT		(110)
> +/* REG 14 -> 112 ~ 119 */
> +#define MT6360_LDO1_OC_EVT		(113)
> +#define MT6360_LDO2_OC_EVT		(114)
> +#define MT6360_LDO3_OC_EVT		(115)
> +#define MT6360_LDO5_OC_EVT		(117)
> +#define MT6360_LDO6_OC_EVT		(118)
> +#define MT6360_LDO7_OC_EVT		(119)
> +/* REG 15 -> 120 ~ 127 */
> +#define MT6360_LDO1_PGB_EVT		(121)
> +#define MT6360_LDO2_PGB_EVT		(122)
> +#define MT6360_LDO3_PGB_EVT		(123)
> +#define MT6360_LDO5_PGB_EVT		(125)
> +#define MT6360_LDO6_PGB_EVT		(126)
> +#define MT6360_LDO7_PGB_EVT		(127)
> +
> +static const struct regmap_irq mt6360_pmu_irqs[] =  {
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_TREG_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_AICR_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_MIVR_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_PWR_RDY_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_BATSYSUV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED_CHG_VINOVP_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_VSYSUV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_VSYSOV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_VBATOV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_VBUSOV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_WD_PMU_DET, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_WD_PMU_DONE, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_TMRI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_ADPBADI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_RVPI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_OTPI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_AICCMEASL, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHGDET_DONEI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_WDTMRI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_SSFINISHI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_RECHGI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_TERMI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_IEOCI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_PUMPX_DONEI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHG_TREG_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BAT_OVP_ADC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_TYPEC_OTP_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_ADC_WAKEUP_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_ADC_DONEI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BST_BATUVI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BST_VBUSOVI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BST_OLPI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_ATTACH_I, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_DETACH_I, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_QC30_STPDONE, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_QC_VBUSDET_DONE, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_HVDCP_DET, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHGDETI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_DCDTI, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FOD_DONE_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FOD_OV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHRDET_UVP_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHRDET_OVP_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_CHRDET_EXT_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FOD_LR_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FOD_HR_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FOD_DISCHG_FAIL_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_USBID_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_APWDTRST_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_EN_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_QONB_RST_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_MRSTB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_OTP_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_VDDAOV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_SYSUV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED_STRBPIN_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED_TORPIN_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED_TX_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED_LVF_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED2_SHORT_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED1_SHORT_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED2_STRB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED1_STRB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED2_STRB_TO_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED1_STRB_TO_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED2_TOR_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_FLED1_TOR_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_PGB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_OV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK1_UV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_PGB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_OV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_BUCK2_UV_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO1_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO2_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO3_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO5_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO6_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO7_OC_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO1_PGB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO2_PGB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO3_PGB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO5_PGB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO6_PGB_EVT, 8),
> +	REGMAP_IRQ_REG_LINE(MT6360_LDO7_PGB_EVT, 8),
> +};
> +
> +static int mt6360_pmu_handle_post_irq(void *irq_drv_data)
> +{
> +	struct mt6360_pmu_data *mpd = irq_drv_data;
> +
> +	return regmap_update_bits(mpd->regmap,
> +		MT6360_PMU_IRQ_SET, MT6360_IRQ_RETRIG, MT6360_IRQ_RETRIG);
> +}
> +
> +static struct regmap_irq_chip mt6360_pmu_irq_chip = {
> +	.irqs = mt6360_pmu_irqs,
> +	.num_irqs = ARRAY_SIZE(mt6360_pmu_irqs),
> +	.num_regs = MT6360_PMU_IRQ_REGNUM,
> +	.mask_base = MT6360_PMU_CHG_MASK1,
> +	.status_base = MT6360_PMU_CHG_IRQ1,
> +	.ack_base = MT6360_PMU_CHG_IRQ1,
> +	.init_ack_masked = true,
> +	.use_ack = true,
> +	.handle_post_irq = mt6360_pmu_handle_post_irq,
> +};
> +
> +static const struct regmap_config mt6360_pmu_regmap_config = {
> +	.reg_bits = 8,
> +	.val_bits = 8,
> +	.max_register = MT6360_PMU_MAXREG,
> +};
> +
> +static const struct resource mt6360_adc_resources[] = {
> +	DEFINE_RES_IRQ_NAMED(MT6360_ADC_DONEI, "adc_donei"),
> +};
> +
> +static const struct resource mt6360_chg_resources[] = {
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_TREG_EVT, "chg_treg_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_PWR_RDY_EVT, "pwr_rdy_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_BATSYSUV_EVT, "chg_batsysuv_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VSYSUV_EVT, "chg_vsysuv_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VSYSOV_EVT, "chg_vsysov_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VBATOV_EVT, "chg_vbatov_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_VBUSOV_EVT, "chg_vbusov_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_AICCMEASL, "chg_aiccmeasl"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_WDTMRI, "wdtmri"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_RECHGI, "chg_rechgi"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_TERMI, "chg_termi"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHG_IEOCI, "chg_ieoci"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_PUMPX_DONEI, "pumpx_donei"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_ATTACH_I, "attach_i"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_CHRDET_EXT_EVT, "chrdet_ext_evt"),
> +};
> +
> +static const struct resource mt6360_led_resources[] = {
> +	DEFINE_RES_IRQ_NAMED(MT6360_FLED_CHG_VINOVP_EVT, "fled_chg_vinovp_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_FLED_LVF_EVT, "fled_lvf_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_FLED2_SHORT_EVT, "fled2_short_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_FLED1_SHORT_EVT, "fled1_short_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_FLED2_STRB_TO_EVT, "fled2_strb_to_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_FLED1_STRB_TO_EVT, "fled1_strb_to_evt"),
> +};
> +
> +static const struct resource mt6360_pmic_resources[] = {
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_PGB_EVT, "buck1_pgb_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_OC_EVT, "buck1_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_OV_EVT, "buck1_ov_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK1_UV_EVT, "buck1_uv_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_PGB_EVT, "buck2_pgb_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_OC_EVT, "buck2_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_OV_EVT, "buck2_ov_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_UV_EVT, "buck2_uv_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_OC_EVT, "ldo6_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_OC_EVT, "ldo7_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_PGB_EVT, "ldo6_pgb_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_PGB_EVT, "ldo7_pgb_evt"),
> +};
> +
> +static const struct resource mt6360_ldo_resources[] = {
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO1_OC_EVT, "ldo1_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO2_OC_EVT, "ldo2_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO3_OC_EVT, "ldo3_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO5_OC_EVT, "ldo5_oc_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO1_PGB_EVT, "ldo1_pgb_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO2_PGB_EVT, "ldo2_pgb_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO3_PGB_EVT, "ldo3_pgb_evt"),
> +	DEFINE_RES_IRQ_NAMED(MT6360_LDO5_PGB_EVT, "ldo5_pgb_evt"),
> +};
> +
> +static const struct mfd_cell mt6360_devs[] = {
> +	OF_MFD_CELL("mt6360_adc", mt6360_adc_resources,
> +		    NULL, 0, 0, "mediatek,mt6360_adc"),
> +	OF_MFD_CELL("mt6360_chg", mt6360_chg_resources,
> +		    NULL, 0, 0, "mediatek,mt6360_chg"),
> +	OF_MFD_CELL("mt6360_led", mt6360_led_resources,
> +		    NULL, 0, 0, "mediatek,mt6360_led"),
> +	OF_MFD_CELL("mt6360_pmic", mt6360_pmic_resources,
> +		    NULL, 0, 0, "mediatek,mt6360_pmic"),
> +	OF_MFD_CELL("mt6360_ldo", mt6360_ldo_resources,
> +		    NULL, 0, 0, "mediatek,mt6360_ldo"),
> +	OF_MFD_CELL("mt6360_tcpc", NULL,
> +		    NULL, 0, 0, "mediatek,mt6360_tcpc"),
> +};
> +
> +static const unsigned short mt6360_slave_addr[MT6360_SLAVE_MAX] = {
> +	MT6360_PMU_SLAVEID,
> +	MT6360_PMIC_SLAVEID,
> +	MT6360_LDO_SLAVEID,
> +	MT6360_TCPC_SLAVEID,
> +};
> +
> +static int mt6360_pmu_probe(struct i2c_client *client)
> +{
> +	struct mt6360_pmu_data *mpd;
> +	unsigned int reg_data;
> +	int i, ret;
> +
> +	mpd = devm_kzalloc(&client->dev, sizeof(*mpd), GFP_KERNEL);
> +	if (!mpd)
> +		return -ENOMEM;
> +
> +	mpd->dev = &client->dev;
> +	i2c_set_clientdata(client, mpd);
> +
> +	mpd->regmap = devm_regmap_init_i2c(client, &mt6360_pmu_regmap_config);
> +	if (IS_ERR(mpd->regmap)) {
> +		dev_err(&client->dev, "Failed to register regmap\n");
> +		return PTR_ERR(mpd->regmap);
> +	}
> +
> +	ret = regmap_read(mpd->regmap, MT6360_PMU_DEV_INFO, &reg_data);
> +	if (ret) {
> +		dev_err(&client->dev, "Device not found\n");
> +		return ret;
> +	}
> +
> +	mpd->chip_rev = reg_data & CHIP_REV_MASK;
> +	if (mpd->chip_rev != CHIP_VEN_MT6360) {
> +		dev_err(&client->dev, "Device not supported\n");
> +		return -ENODEV;
> +	}
> +
> +	mt6360_pmu_irq_chip.irq_drv_data = mpd;
> +	ret = devm_regmap_add_irq_chip(&client->dev, mpd->regmap, client->irq,
> +				       IRQF_TRIGGER_FALLING, 0,
> +				       &mt6360_pmu_irq_chip, &mpd->irq_data);
> +	if (ret) {
> +		dev_err(&client->dev, "Failed to add Regmap IRQ Chip\n");
> +		return ret;
> +	}
> +
> +	mpd->i2c[0] = client;
> +	for (i = 1; i < MT6360_SLAVE_MAX; i++) {
> +		mpd->i2c[i] = devm_i2c_new_dummy_device(&client->dev,
> +							client->adapter,
> +							mt6360_slave_addr[i]);
> +		if (IS_ERR(mpd->i2c[i])) {
> +			dev_err(&client->dev,
> +				"Failed to get new dummy I2C device for address 0x%x",
> +				mt6360_slave_addr[i]);
> +			return PTR_ERR(mpd->i2c[i]);
> +		}
> +		i2c_set_clientdata(mpd->i2c[i], mpd);
> +	}
> +
> +	ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_AUTO,
> +				   mt6360_devs, ARRAY_SIZE(mt6360_devs), NULL,
> +				   0, regmap_irq_get_domain(mpd->irq_data));
> +	if (ret) {
> +		dev_err(&client->dev,
> +			"Failed to register subordinate devices\n");
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int __maybe_unused mt6360_pmu_suspend(struct device *dev)
> +{
> +	struct i2c_client *i2c = to_i2c_client(dev);
> +
> +	if (device_may_wakeup(dev))
> +		enable_irq_wake(i2c->irq);
> +
> +	return 0;
> +}
> +
> +static int __maybe_unused mt6360_pmu_resume(struct device *dev)
> +{
> +
> +	struct i2c_client *i2c = to_i2c_client(dev);
> +
> +	if (device_may_wakeup(dev))
> +		disable_irq_wake(i2c->irq);
> +
> +	return 0;
> +}
> +
> +static SIMPLE_DEV_PM_OPS(mt6360_pmu_pm_ops,
> +			 mt6360_pmu_suspend, mt6360_pmu_resume);
> +
> +static const struct of_device_id __maybe_unused mt6360_pmu_of_id[] = {
> +	{ .compatible = "mediatek,mt6360_pmu", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mt6360_pmu_of_id);
> +
> +static struct i2c_driver mt6360_pmu_driver = {
> +	.driver = {
> +		.pm = &mt6360_pmu_pm_ops,
> +		.of_match_table = of_match_ptr(mt6360_pmu_of_id),
> +	},
> +	.probe_new = mt6360_pmu_probe,
> +};
> +module_i2c_driver(mt6360_pmu_driver);
> +
> +MODULE_AUTHOR("Gene Chen <gene_chen@richtek.com>");
> +MODULE_DESCRIPTION("MT6360 PMU I2C Driver");
> +MODULE_LICENSE("GPL v2");
> diff --git a/include/linux/mfd/mt6360.h b/include/linux/mfd/mt6360.h
> new file mode 100644
> index 0000000..c03e6d1
> --- /dev/null
> +++ b/include/linux/mfd/mt6360.h
> @@ -0,0 +1,240 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2019 MediaTek Inc.
> + */
> +
> +#ifndef __MT6360_H__
> +#define __MT6360_H__
> +
> +#include <linux/regmap.h>
> +
> +enum {
> +	MT6360_SLAVE_PMU = 0,
> +	MT6360_SLAVE_PMIC,
> +	MT6360_SLAVE_LDO,
> +	MT6360_SLAVE_TCPC,
> +	MT6360_SLAVE_MAX,
> +};
> +
> +#define MT6360_PMU_SLAVEID	(0x34)
> +#define MT6360_PMIC_SLAVEID	(0x1A)
> +#define MT6360_LDO_SLAVEID	(0x64)
> +#define MT6360_TCPC_SLAVEID	(0x4E)
> +
> +struct mt6360_pmu_data {
> +	struct i2c_client *i2c[MT6360_SLAVE_MAX];
> +	struct device *dev;
> +	struct regmap *regmap;
> +	struct regmap_irq_chip_data *irq_data;
> +	unsigned int chip_rev;
> +};
> +
> +/* PMU register defininition */
> +#define MT6360_PMU_DEV_INFO			(0x00)
> +#define MT6360_PMU_CORE_CTRL1			(0x01)
> +#define MT6360_PMU_RST1				(0x02)
> +#define MT6360_PMU_CRCEN			(0x03)
> +#define MT6360_PMU_RST_PAS_CODE1		(0x04)
> +#define MT6360_PMU_RST_PAS_CODE2		(0x05)
> +#define MT6360_PMU_CORE_CTRL2			(0x06)
> +#define MT6360_PMU_TM_PAS_CODE1			(0x07)
> +#define MT6360_PMU_TM_PAS_CODE2			(0x08)
> +#define MT6360_PMU_TM_PAS_CODE3			(0x09)
> +#define MT6360_PMU_TM_PAS_CODE4			(0x0A)
> +#define MT6360_PMU_IRQ_IND			(0x0B)
> +#define MT6360_PMU_IRQ_MASK			(0x0C)
> +#define MT6360_PMU_IRQ_SET			(0x0D)
> +#define MT6360_PMU_SHDN_CTRL			(0x0E)
> +#define MT6360_PMU_TM_INF			(0x0F)
> +#define MT6360_PMU_I2C_CTRL			(0x10)
> +#define MT6360_PMU_CHG_CTRL1			(0x11)
> +#define MT6360_PMU_CHG_CTRL2			(0x12)
> +#define MT6360_PMU_CHG_CTRL3			(0x13)
> +#define MT6360_PMU_CHG_CTRL4			(0x14)
> +#define MT6360_PMU_CHG_CTRL5			(0x15)
> +#define MT6360_PMU_CHG_CTRL6			(0x16)
> +#define MT6360_PMU_CHG_CTRL7			(0x17)
> +#define MT6360_PMU_CHG_CTRL8			(0x18)
> +#define MT6360_PMU_CHG_CTRL9			(0x19)
> +#define MT6360_PMU_CHG_CTRL10			(0x1A)
> +#define MT6360_PMU_CHG_CTRL11			(0x1B)
> +#define MT6360_PMU_CHG_CTRL12			(0x1C)
> +#define MT6360_PMU_CHG_CTRL13			(0x1D)
> +#define MT6360_PMU_CHG_CTRL14			(0x1E)
> +#define MT6360_PMU_CHG_CTRL15			(0x1F)
> +#define MT6360_PMU_CHG_CTRL16			(0x20)
> +#define MT6360_PMU_CHG_AICC_RESULT		(0x21)
> +#define MT6360_PMU_DEVICE_TYPE			(0x22)
> +#define MT6360_PMU_QC_CONTROL1			(0x23)
> +#define MT6360_PMU_QC_CONTROL2			(0x24)
> +#define MT6360_PMU_QC30_CONTROL1		(0x25)
> +#define MT6360_PMU_QC30_CONTROL2		(0x26)
> +#define MT6360_PMU_USB_STATUS1			(0x27)
> +#define MT6360_PMU_QC_STATUS1			(0x28)
> +#define MT6360_PMU_QC_STATUS2			(0x29)
> +#define MT6360_PMU_CHG_PUMP			(0x2A)
> +#define MT6360_PMU_CHG_CTRL17			(0x2B)
> +#define MT6360_PMU_CHG_CTRL18			(0x2C)
> +#define MT6360_PMU_CHRDET_CTRL1			(0x2D)
> +#define MT6360_PMU_CHRDET_CTRL2			(0x2E)
> +#define MT6360_PMU_DPDN_CTRL			(0x2F)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL1		(0x30)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL2		(0x31)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL3		(0x32)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL4		(0x33)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL5		(0x34)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL6		(0x35)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL7		(0x36)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL8		(0x37)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL9		(0x38)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL10		(0x39)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL11		(0x3A)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL12		(0x3B)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL13		(0x3C)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL14		(0x3D)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL15		(0x3E)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL16		(0x3F)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL17		(0x40)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL18		(0x41)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL19		(0x42)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL20		(0x43)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL21		(0x44)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL22		(0x45)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL23		(0x46)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL24		(0x47)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL25		(0x48)
> +#define MT6360_PMU_BC12_CTRL			(0x49)
> +#define MT6360_PMU_CHG_STAT			(0x4A)
> +#define MT6360_PMU_RESV1			(0x4B)
> +#define MT6360_PMU_TYPEC_OTP_TH_SEL_CODEH	(0x4E)
> +#define MT6360_PMU_TYPEC_OTP_TH_SEL_CODEL	(0x4F)
> +#define MT6360_PMU_TYPEC_OTP_HYST_TH		(0x50)
> +#define MT6360_PMU_TYPEC_OTP_CTRL		(0x51)
> +#define MT6360_PMU_ADC_BAT_DATA_H		(0x52)
> +#define MT6360_PMU_ADC_BAT_DATA_L		(0x53)
> +#define MT6360_PMU_IMID_BACKBST_ON		(0x54)
> +#define MT6360_PMU_IMID_BACKBST_OFF		(0x55)
> +#define MT6360_PMU_ADC_CONFIG			(0x56)
> +#define MT6360_PMU_ADC_EN2			(0x57)
> +#define MT6360_PMU_ADC_IDLE_T			(0x58)
> +#define MT6360_PMU_ADC_RPT_1			(0x5A)
> +#define MT6360_PMU_ADC_RPT_2			(0x5B)
> +#define MT6360_PMU_ADC_RPT_3			(0x5C)
> +#define MT6360_PMU_ADC_RPT_ORG1			(0x5D)
> +#define MT6360_PMU_ADC_RPT_ORG2			(0x5E)
> +#define MT6360_PMU_BAT_OVP_TH_SEL_CODEH		(0x5F)
> +#define MT6360_PMU_BAT_OVP_TH_SEL_CODEL		(0x60)
> +#define MT6360_PMU_CHG_CTRL19			(0x61)
> +#define MT6360_PMU_VDDASUPPLY			(0x62)
> +#define MT6360_PMU_BC12_MANUAL			(0x63)
> +#define MT6360_PMU_CHGDET_FUNC			(0x64)
> +#define MT6360_PMU_FOD_CTRL			(0x65)
> +#define MT6360_PMU_CHG_CTRL20			(0x66)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL26		(0x67)
> +#define MT6360_PMU_CHG_HIDDEN_CTRL27		(0x68)
> +#define MT6360_PMU_RESV2			(0x69)
> +#define MT6360_PMU_USBID_CTRL1			(0x6D)
> +#define MT6360_PMU_USBID_CTRL2			(0x6E)
> +#define MT6360_PMU_USBID_CTRL3			(0x6F)
> +#define MT6360_PMU_FLED_CFG			(0x70)
> +#define MT6360_PMU_RESV3			(0x71)
> +#define MT6360_PMU_FLED1_CTRL			(0x72)
> +#define MT6360_PMU_FLED_STRB_CTRL		(0x73)
> +#define MT6360_PMU_FLED1_STRB_CTRL2		(0x74)
> +#define MT6360_PMU_FLED1_TOR_CTRL		(0x75)
> +#define MT6360_PMU_FLED2_CTRL			(0x76)
> +#define MT6360_PMU_RESV4			(0x77)
> +#define MT6360_PMU_FLED2_STRB_CTRL2		(0x78)
> +#define MT6360_PMU_FLED2_TOR_CTRL		(0x79)
> +#define MT6360_PMU_FLED_VMIDTRK_CTRL1		(0x7A)
> +#define MT6360_PMU_FLED_VMID_RTM		(0x7B)
> +#define MT6360_PMU_FLED_VMIDTRK_CTRL2		(0x7C)
> +#define MT6360_PMU_FLED_PWSEL			(0x7D)
> +#define MT6360_PMU_FLED_EN			(0x7E)
> +#define MT6360_PMU_FLED_Hidden1			(0x7F)
> +#define MT6360_PMU_RGB_EN			(0x80)
> +#define MT6360_PMU_RGB1_ISNK			(0x81)
> +#define MT6360_PMU_RGB2_ISNK			(0x82)
> +#define MT6360_PMU_RGB3_ISNK			(0x83)
> +#define MT6360_PMU_RGB_ML_ISNK			(0x84)
> +#define MT6360_PMU_RGB1_DIM			(0x85)
> +#define MT6360_PMU_RGB2_DIM			(0x86)
> +#define MT6360_PMU_RGB3_DIM			(0x87)
> +#define MT6360_PMU_RESV5			(0x88)
> +#define MT6360_PMU_RGB12_Freq			(0x89)
> +#define MT6360_PMU_RGB34_Freq			(0x8A)
> +#define MT6360_PMU_RGB1_Tr			(0x8B)
> +#define MT6360_PMU_RGB1_Tf			(0x8C)
> +#define MT6360_PMU_RGB1_TON_TOFF		(0x8D)
> +#define MT6360_PMU_RGB2_Tr			(0x8E)
> +#define MT6360_PMU_RGB2_Tf			(0x8F)
> +#define MT6360_PMU_RGB2_TON_TOFF		(0x90)
> +#define MT6360_PMU_RGB3_Tr			(0x91)
> +#define MT6360_PMU_RGB3_Tf			(0x92)
> +#define MT6360_PMU_RGB3_TON_TOFF		(0x93)
> +#define MT6360_PMU_RGB_Hidden_CTRL1		(0x94)
> +#define MT6360_PMU_RGB_Hidden_CTRL2		(0x95)
> +#define MT6360_PMU_RESV6			(0x97)
> +#define MT6360_PMU_SPARE1			(0x9A)
> +#define MT6360_PMU_SPARE2			(0xA0)
> +#define MT6360_PMU_SPARE3			(0xB0)
> +#define MT6360_PMU_SPARE4			(0xC0)
> +#define MT6360_PMU_CHG_IRQ1			(0xD0)
> +#define MT6360_PMU_CHG_IRQ2			(0xD1)
> +#define MT6360_PMU_CHG_IRQ3			(0xD2)
> +#define MT6360_PMU_CHG_IRQ4			(0xD3)
> +#define MT6360_PMU_CHG_IRQ5			(0xD4)
> +#define MT6360_PMU_CHG_IRQ6			(0xD5)
> +#define MT6360_PMU_QC_IRQ			(0xD6)
> +#define MT6360_PMU_FOD_IRQ			(0xD7)
> +#define MT6360_PMU_BASE_IRQ			(0xD8)
> +#define MT6360_PMU_FLED_IRQ1			(0xD9)
> +#define MT6360_PMU_FLED_IRQ2			(0xDA)
> +#define MT6360_PMU_RGB_IRQ			(0xDB)
> +#define MT6360_PMU_BUCK1_IRQ			(0xDC)
> +#define MT6360_PMU_BUCK2_IRQ			(0xDD)
> +#define MT6360_PMU_LDO_IRQ1			(0xDE)
> +#define MT6360_PMU_LDO_IRQ2			(0xDF)
> +#define MT6360_PMU_CHG_STAT1			(0xE0)
> +#define MT6360_PMU_CHG_STAT2			(0xE1)
> +#define MT6360_PMU_CHG_STAT3			(0xE2)
> +#define MT6360_PMU_CHG_STAT4			(0xE3)
> +#define MT6360_PMU_CHG_STAT5			(0xE4)
> +#define MT6360_PMU_CHG_STAT6			(0xE5)
> +#define MT6360_PMU_QC_STAT			(0xE6)
> +#define MT6360_PMU_FOD_STAT			(0xE7)
> +#define MT6360_PMU_BASE_STAT			(0xE8)
> +#define MT6360_PMU_FLED_STAT1			(0xE9)
> +#define MT6360_PMU_FLED_STAT2			(0xEA)
> +#define MT6360_PMU_RGB_STAT			(0xEB)
> +#define MT6360_PMU_BUCK1_STAT			(0xEC)
> +#define MT6360_PMU_BUCK2_STAT			(0xED)
> +#define MT6360_PMU_LDO_STAT1			(0xEE)
> +#define MT6360_PMU_LDO_STAT2			(0xEF)
> +#define MT6360_PMU_CHG_MASK1			(0xF0)
> +#define MT6360_PMU_CHG_MASK2			(0xF1)
> +#define MT6360_PMU_CHG_MASK3			(0xF2)
> +#define MT6360_PMU_CHG_MASK4			(0xF3)
> +#define MT6360_PMU_CHG_MASK5			(0xF4)
> +#define MT6360_PMU_CHG_MASK6			(0xF5)
> +#define MT6360_PMU_QC_MASK			(0xF6)
> +#define MT6360_PMU_FOD_MASK			(0xF7)
> +#define MT6360_PMU_BASE_MASK			(0xF8)
> +#define MT6360_PMU_FLED_MASK1			(0xF9)
> +#define MT6360_PMU_FLED_MASK2			(0xFA)
> +#define MT6360_PMU_FAULTB_MASK			(0xFB)
> +#define MT6360_PMU_BUCK1_MASK			(0xFC)
> +#define MT6360_PMU_BUCK2_MASK			(0xFD)
> +#define MT6360_PMU_LDO_MASK1			(0xFE)
> +#define MT6360_PMU_LDO_MASK2			(0xFF)
> +#define MT6360_PMU_MAXREG			(MT6360_PMU_LDO_MASK2)
> +
> +/* MT6360_PMU_IRQ_SET */
> +#define MT6360_PMU_IRQ_REGNUM	(MT6360_PMU_LDO_IRQ2 - MT6360_PMU_CHG_IRQ1 + 1)
> +#define MT6360_IRQ_RETRIG	BIT(2)
> +
> +#define CHIP_VEN_MASK				(0xF0)
> +#define CHIP_VEN_MT6360				(0x50)
> +#define CHIP_REV_MASK				(0x0F)
> +
> +#endif /* __MT6360_H__ */
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-03-04 14:56 ` Matthias Brugger
@ 2020-03-04 15:15   ` Lee Jones
  2020-03-04 18:00     ` Re: Matthias Brugger
  0 siblings, 1 reply; 414+ messages in thread
From: Lee Jones @ 2020-03-04 15:15 UTC (permalink / raw)
  To: Matthias Brugger
  Cc: Gene Chen, linux-arm-kernel, linux-mediatek, linux-kernel,
	gene_chen, Wilma.Wu, shufan_lee, cy_huang

On Wed, 04 Mar 2020, Matthias Brugger wrote:

> Please resend with appropiate commit message.

Please refrain from top-posting and don't forget to snip.

> On 03/03/2020 16:27, Gene Chen wrote:
> > Add mfd driver for mt6360 pmic chip include

Looks like your formatting is off.

How was this patch sent?

Best practice is to use `git send-email`.

> > Battery Charger/USB_PD/Flash LED/RGB LED/LDO/Buck
> > 
> > Signed-off-by: Gene Chen <gene_chen@richtek.com
> > ---
> >  drivers/mfd/Kconfig        |  12 ++
> >  drivers/mfd/Makefile       |   1 +
> >  drivers/mfd/mt6360-core.c  | 425 +++++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/mfd/mt6360.h | 240 +++++++++++++++++++++++++
> >  4 files changed, 678 insertions(+)
> >  create mode 100644 drivers/mfd/mt6360-core.c
> >  create mode 100644 include/linux/mfd/mt6360.h

-- 
Lee Jones [李琼斯]
Linaro Services Technical Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-03-04 15:15   ` Re: Lee Jones
@ 2020-03-04 18:00     ` Matthias Brugger
  0 siblings, 0 replies; 414+ messages in thread
From: Matthias Brugger @ 2020-03-04 18:00 UTC (permalink / raw)
  To: Lee Jones
  Cc: Gene Chen, linux-arm-kernel, linux-mediatek, linux-kernel,
	gene_chen, Wilma.Wu, shufan_lee, cy_huang



On 04/03/2020 16:15, Lee Jones wrote:
> On Wed, 04 Mar 2020, Matthias Brugger wrote:
> 
>> Please resend with appropiate commit message.
> 
> Please refrain from top-posting and don't forget to snip.
> 

It's difficult to write something below a missing subject line without
top-posting. ;)

Sorry for forgetting to snip.

Regards,
Matthias

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2020-02-11 22:34 Rajat Jain
  2020-02-12  9:30 ` Jarkko Nikula
  0 siblings, 1 reply; 414+ messages in thread
From: Rajat Jain @ 2020-02-11 22:34 UTC (permalink / raw)
  To: Daniel Mack, Haojian Zhuang, Robert Jarzmik, Mark Brown,
	linux-arm-kernel, linux-spi, linux-kernel
  Cc: Evan Green, rajatja, rajatxjain, evgreen, shobhit.srivastava,
	porselvan.muthukrishnan

From: Evan Green <evgreen@chromium.org>

Date: Wed, 29 Jan 2020 13:54:16 -0800
Subject: [PATCH] spi: pxa2xx: Add CS control clock quirk

In some circumstances on Intel LPSS controllers, toggling the LPSS
CS control register doesn't actually cause the CS line to toggle.
This seems to be failure of dynamic clock gating that occurs after
going through a suspend/resume transition, where the controller
is sent through a reset transition. This ruins SPI transactions
that either rely on delay_usecs, or toggle the CS line without
sending data.

Whenever CS is toggled, momentarily set the clock gating register
to "Force On" to poke the controller into acting on CS.

Signed-off-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Rajat Jain <rajatja@google.com>
---
 drivers/spi/spi-pxa2xx.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 4c7a71f0fb3e..2e318158fca9 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -70,6 +70,10 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 #define LPSS_CAPS_CS_EN_SHIFT			9
 #define LPSS_CAPS_CS_EN_MASK			(0xf << LPSS_CAPS_CS_EN_SHIFT)
 
+#define LPSS_PRIV_CLOCK_GATE 0x38
+#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK 0x3
+#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON 0x3
+
 struct lpss_config {
 	/* LPSS offset from drv_data->ioaddr */
 	unsigned offset;
@@ -86,6 +90,8 @@ struct lpss_config {
 	unsigned cs_sel_shift;
 	unsigned cs_sel_mask;
 	unsigned cs_num;
+	/* Quirks */
+	unsigned cs_clk_stays_gated : 1;
 };
 
 /* Keep these sorted with enum pxa_ssp_type */
@@ -156,6 +162,7 @@ static const struct lpss_config lpss_platforms[] = {
 		.tx_threshold_hi = 56,
 		.cs_sel_shift = 8,
 		.cs_sel_mask = 3 << 8,
+		.cs_clk_stays_gated = true,
 	},
 };
 
@@ -383,6 +390,22 @@ static void lpss_ssp_cs_control(struct spi_device *spi, bool enable)
 	else
 		value |= LPSS_CS_CONTROL_CS_HIGH;
 	__lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value);
+	if (config->cs_clk_stays_gated) {
+		u32 clkgate;
+
+		/*
+		 * Changing CS alone when dynamic clock gating is on won't
+		 * actually flip CS at that time. This ruins SPI transfers
+		 * that specify delays, or have no data. Toggle the clock mode
+		 * to force on briefly to poke the CS pin to move.
+		 */
+		clkgate = __lpss_ssp_read_priv(drv_data, LPSS_PRIV_CLOCK_GATE);
+		value = (clkgate & ~LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK) |
+			LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON;
+
+		__lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, value);
+		__lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, clkgate);
+	}
 }
 
 static void cs_assert(struct spi_device *spi)
-- 
2.25.0.225.g125e21ebc7-goog


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2020-02-11 22:34 Rajat Jain
@ 2020-02-12  9:30 ` Jarkko Nikula
  2020-02-12 10:24   ` Re: Andy Shevchenko
  0 siblings, 1 reply; 414+ messages in thread
From: Jarkko Nikula @ 2020-02-12  9:30 UTC (permalink / raw)
  To: Rajat Jain, Daniel Mack, Haojian Zhuang, Robert Jarzmik,
	Mark Brown, linux-arm-kernel, linux-spi, linux-kernel
  Cc: Evan Green, rajatxjain, evgreen, shobhit.srivastava,
	porselvan.muthukrishnan, Andy Shevchenko

Hi

+ Andy

On 2/12/20 12:34 AM, Rajat Jain wrote:
> From: Evan Green <evgreen@chromium.org>
> 
> Date: Wed, 29 Jan 2020 13:54:16 -0800
> Subject: [PATCH] spi: pxa2xx: Add CS control clock quirk
> 
This patch subject is missing from mail subject.

> In some circumstances on Intel LPSS controllers, toggling the LPSS
> CS control register doesn't actually cause the CS line to toggle.
> This seems to be failure of dynamic clock gating that occurs after
> going through a suspend/resume transition, where the controller
> is sent through a reset transition. This ruins SPI transactions
> that either rely on delay_usecs, or toggle the CS line without
> sending data.
> 
> Whenever CS is toggled, momentarily set the clock gating register
> to "Force On" to poke the controller into acting on CS.
> 
Could you share the test case how to trigger this? What's the platform 
here? I'd like to check does this reproduce on other Intel LPSS 
platforms so is there need to add quirk for them too.

> Signed-off-by: Evan Green <evgreen@chromium.org>
> Signed-off-by: Rajat Jain <rajatja@google.com>
> ---
>   drivers/spi/spi-pxa2xx.c | 23 +++++++++++++++++++++++
>   1 file changed, 23 insertions(+)
> 
> diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
> index 4c7a71f0fb3e..2e318158fca9 100644
> --- a/drivers/spi/spi-pxa2xx.c
> +++ b/drivers/spi/spi-pxa2xx.c
> @@ -70,6 +70,10 @@ MODULE_ALIAS("platform:pxa2xx-spi");
>   #define LPSS_CAPS_CS_EN_SHIFT			9
>   #define LPSS_CAPS_CS_EN_MASK			(0xf << LPSS_CAPS_CS_EN_SHIFT)
>   
> +#define LPSS_PRIV_CLOCK_GATE 0x38
> +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK 0x3
> +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON 0x3
> +
>   struct lpss_config {
>   	/* LPSS offset from drv_data->ioaddr */
>   	unsigned offset;
> @@ -86,6 +90,8 @@ struct lpss_config {
>   	unsigned cs_sel_shift;
>   	unsigned cs_sel_mask;
>   	unsigned cs_num;
> +	/* Quirks */
> +	unsigned cs_clk_stays_gated : 1;
>   };
>   
>   /* Keep these sorted with enum pxa_ssp_type */
> @@ -156,6 +162,7 @@ static const struct lpss_config lpss_platforms[] = {
>   		.tx_threshold_hi = 56,
>   		.cs_sel_shift = 8,
>   		.cs_sel_mask = 3 << 8,
> +		.cs_clk_stays_gated = true,
>   	},
>   };
>   
> @@ -383,6 +390,22 @@ static void lpss_ssp_cs_control(struct spi_device *spi, bool enable)
>   	else
>   		value |= LPSS_CS_CONTROL_CS_HIGH;
>   	__lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value);
> +	if (config->cs_clk_stays_gated) {
> +		u32 clkgate;
> +
> +		/*
> +		 * Changing CS alone when dynamic clock gating is on won't
> +		 * actually flip CS at that time. This ruins SPI transfers
> +		 * that specify delays, or have no data. Toggle the clock mode
> +		 * to force on briefly to poke the CS pin to move.
> +		 */
> +		clkgate = __lpss_ssp_read_priv(drv_data, LPSS_PRIV_CLOCK_GATE);
> +		value = (clkgate & ~LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK) |
> +			LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON;
> +
> +		__lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, value);
> +		__lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, clkgate);
> +	}
>   }
>   
I wonder is it enough to have this quick toggling only or is time or 
actually number of clock cycles dependent? Now there is no delay between 
but I'm thinking if it needs certain number cycles does this still work 
when using low ssp_clk rates similar than in commit d0283eb2dbc1 ("spi: 
pxa2xx: Add output control for multiple Intel LPSS chip selects").

I'm thinking can this be done only once after resume and may other LPSS 
blocks need the same? I.e. should this be done in drivers/mfd/intel-lpss.c?

Jarkko

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2020-02-12  9:30 ` Jarkko Nikula
@ 2020-02-12 10:24   ` Andy Shevchenko
  0 siblings, 0 replies; 414+ messages in thread
From: Andy Shevchenko @ 2020-02-12 10:24 UTC (permalink / raw)
  To: Jarkko Nikula
  Cc: Rajat Jain, Daniel Mack, Haojian Zhuang, Robert Jarzmik,
	Mark Brown, linux-arm-kernel, linux-spi, linux-kernel,
	Evan Green, rajatxjain, evgreen, shobhit.srivastava,
	porselvan.muthukrishnan

On Wed, Feb 12, 2020 at 11:30:51AM +0200, Jarkko Nikula wrote:
> On 2/12/20 12:34 AM, Rajat Jain wrote:

> This patch subject is missing from mail subject.

> I'm thinking can this be done only once after resume and may other LPSS
> blocks need the same? I.e. should this be done in drivers/mfd/intel-lpss.c?

On resume we restore the previously saved context, can we be sure that values
we saved during suspend are correct?

If above won't show any issue, it might be best place to have this quirk
applied in intel_lpss_suspend() / intel_lpss_resume() callbacks as Jarkko
suggested.

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <f618ed4d-05ce-75cd-8cd9-24d8fe5a2551@samsung.com>]

[parent not found: <CGME20191105044921epcas1p2869157cceaf45351adf9dd2e59161db7@epcas1p2.samsung.com>]

* Re:
       [not found] ` <CGME20191105044921epcas1p2869157cceaf45351adf9dd2e59161db7@epcas1p2.samsung.com>
@ 2019-11-05  4:54   ` Chanwoo Choi
  0 siblings, 0 replies; 414+ messages in thread
From: Chanwoo Choi @ 2019-11-05  4:54 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-kernel, Chanwoo Choi (chanwoo@kernel.org),
	함명주

Dear Greg,

I'm sorry for this pull request. I've missed the patch title.
I'll resend the pull-request.

Best Regards,
Chanwoo Choi

On 19. 11. 5. 오후 1:52, Chanwoo Choi wrote:
> Dear Greg,
> 
> This is extcon-next pull request for v5.5. I add detailed description of
> this pull request on below. Please pull extcon with following updates.
> 
> Detailed description for this pull request:
> 1. Clean up the and fix the minor issue of extcon provider driver
> - extcon-intel-cht-wc don't reset the USB data connection at probe time
>   in order to prevent the removing all devices from bus.
> - extcon-sm5502 reset the registers at proble time in order to
>   prevent the some stuck state. And remove the redundant variable
>   initialization.
> 
> Best Regards,
> Chanwoo Choi
> 
> The following changes since commit 54ecb8f7028c5eb3d740bb82b0f1d90f2df63c5c:
> 
>   Linux 5.4-rc1 (2019-09-30 10:35:40 -0700)
> 
> are available in the Git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon.git tags/extcon-next-for-5.5
> 
> for you to fetch changes up to ddd1bbbae486ff5913c8fc72c853dcea60713236:
> 
>   extcon: sm5502: remove redundant assignment to variable cable_type (2019-10-31 13:47:42 +0900)
> 
> ----------------------------------------------------------------
> Colin Ian King (1):
>       extcon: sm5502: remove redundant assignment to variable cable_type
> 
> Stephan Gerhold (1):
>       extcon: sm5502: Reset registers during initialization
> 
> Yauhen Kharuzhy (1):
>       extcon-intel-cht-wc: Don't reset USB data connection at probe
> 
>  drivers/extcon/extcon-intel-cht-wc.c | 16 ++++++++++++++--
>  drivers/extcon/extcon-sm5502.c       |  6 +++++-
>  drivers/extcon/extcon-sm5502.h       |  2 ++
>  3 files changed, 21 insertions(+), 3 deletions(-)
> 


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2019-10-27 21:36 Margaret Kwan Wing Han
  0 siblings, 0 replies; 414+ messages in thread
From: Margaret Kwan Wing Han @ 2019-10-27 21:36 UTC (permalink / raw)
  To: linux-kernel


I need a partner for a legal deal worth $30,500,000 if interested reply me for
more details.

Regards,
Margaret Kwan Wing

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2019-09-24 19:49 Venkat Subbiah
  0 siblings, 0 replies; 414+ messages in thread
From: Venkat Subbiah @ 2019-09-24 19:49 UTC (permalink / raw)
  To: Lakshman, lauren, laurenb, Lavanya, Quang, Ida Maupin, Morgan,
	linux kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=us-ascii, Size: 962 bytes --]

Hello 


http://omniummjc.com/freeze.php?dpeh=knot10401
Venkat

























alpajqmka wmsyduztf Update: Back in stock. This won’t last long! qgaegip fuwzmnt isqhzstcc wqkejlrnt So the waters were healed unto this day, according to the saying of Elisha which he spake. hypyvykicy rthie oaqwwrrc mduzva wlsfg




























hvpaxzcxyb itkzynwabe ktekwxu xhxgvai eaumbu ofnltk chdfggsb ufpefiuq





























qzlfa swozrpya dasogbkjo srgeqy icdmg Then wrote Rehum the chancellor, and Shimshai the scribe, and the rest of their companions; the Dinaites, the Apharsathchites, the Tarpelites, the Apharsites, the Archevites, the Babylonians, the Susanchites, the Dehavites, and the Elamites, njhthme pbiyywck qbnonwfsv tefdeug shqdn
























zenjr These deals are only for our readers, but we aren’t sure how long they’ll be available, so go ahead and sign up while you can. ofizsykm lpjnznu zffglnqq hmtrq

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CAK8P3a16=ktJm5B3c5-XS7SqVuHBY5+E2FwVUqbdOdWK-AUgSA@mail.gmail.com>]

[parent not found: <20190830202959.3539-1-msuchanek@suse.de>]

* Re:
       [not found] ` <20190830202959.3539-1-msuchanek@suse.de>
@ 2019-08-30 20:32   ` Arnd Bergmann
  0 siblings, 0 replies; 414+ messages in thread
From: Arnd Bergmann @ 2019-08-30 20:32 UTC (permalink / raw)
  To: Michal Suchanek
  Cc: Linux FS-devel Mailing List, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Ellerman, Alexander Viro,
	Greg Kroah-Hartman, Christian Brauner, Allison Randal,
	Heiko Carstens, Thomas Gleixner, Firoz Khan, linuxppc-dev,
	Linux Kernel Mailing List

On Fri, Aug 30, 2019 at 10:30 PM Michal Suchanek <msuchanek@suse.de> wrote:
>
> Subject: [PATCH] powerpc: Add back __ARCH_WANT_SYS_LLSEEK macro
>
> This partially reverts commit caf6f9c8a326 ("asm-generic: Remove
> unneeded __ARCH_WANT_SYS_LLSEEK macro")
>
> When CONFIG_COMPAT is disabled on ppc64 the kernel does not build.
>
> There is resistance to both removing the llseek syscall from the 64bit
> syscall tables and building the llseek interface unconditionally.
>
> Link: https://lore.kernel.org/lkml/20190828151552.GA16855@infradead.org/
> Link: https://lore.kernel.org/lkml/20190829214319.498c7de2@naga/
>
> Signed-off-by: Michal Suchanek <msuchanek@suse.de>

Reviewed-by: Arnd Bergmann <arnd@arndb.de>

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <E1hUrZM-0007qA-Q8@sslproxy01.your-server.de>]

* Re:
       [not found] <E1hUrZM-0007qA-Q8@sslproxy01.your-server.de>
@ 2019-05-29 19:54 ` Alex Williamson
  0 siblings, 0 replies; 414+ messages in thread
From: Alex Williamson @ 2019-05-29 19:54 UTC (permalink / raw)
  To: Thomas Meyer; +Cc: kvm, linux-kernel

On Sun, 26 May 2019 13:44:04 +0200
"Thomas Meyer" <thomas@m3y3r.de> wrote:

> From thomas@m3y3r.de Sun May 26 00:13:26 2019
> Subject: [PATCH] vfio-pci/nvlink2: Use vma_pages function instead of explicit
>  computation
> To: alex.williamson@redhat.com, kvm@vger.kernel.org, linux-kernel@vger.kernel.org
> Content-Type: text/plain; charset="UTF-8"
> Mime-Version: 1.0
> Content-Transfer-Encoding: 8bit
> X-Patch: Cocci
> X-Mailer: DiffSplit
> Message-ID: <1558822461341-1674464153-1-diffsplit-thomas@m3y3r.de>
> References: <1558822461331-726613767-0-diffsplit-thomas@m3y3r.de>
> In-Reply-To: <1558822461331-726613767-0-diffsplit-thomas@m3y3r.de>
> X-Serial-No: 1
> 
> Use vma_pages function on vma object instead of explicit computation.
> 
> Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
> ---
> 
> diff -u -p a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c
> --- a/drivers/vfio/pci/vfio_pci_nvlink2.c
> +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c
> @@ -161,7 +161,7 @@ static int vfio_pci_nvgpu_mmap(struct vf
>  
>  	atomic_inc(&data->mm->mm_count);
>  	ret = (int) mm_iommu_newdev(data->mm, data->useraddr,
> -			(vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
> +			vma_pages(vma),
>  			data->gpu_hpa, &data->mem);
>  
>  	trace_vfio_pci_nvgpu_mmap(vdev->pdev, data->gpu_hpa, data->useraddr,

Besides the formatting of this patch, there's already a pending patch
with this same change:

https://lkml.org/lkml/2019/5/16/658

I think the original must have bounced from lkml due the encoding, but
I'll use that one since it came first, is slightly cleaner in wrapping
the line following the change, and already has Alexey's R-b.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH v6 0/3] add new ima hook ima_kexec_cmdline to measure kexec boot cmdline args
@ 2019-05-21  0:06 Prakhar Srivastava
  2019-05-21  0:06 ` [PATCH v6 2/3] add a new ima template field buf Prakhar Srivastava
  0 siblings, 1 reply; 414+ messages in thread
From: Prakhar Srivastava @ 2019-05-21  0:06 UTC (permalink / raw)
  To: linux-integrity, linux-security-module, linux-kernel
  Cc: mjg59, zohar, roberto.sassu, vgoyal, Prakhar Srivastava

The motive behind the patch series is to measure the boot cmdline args
used for soft reboot/kexec case.

For secure boot attestation, it is necessary to measure the kernel
command line and the kernel version. For cold boot, the boot loader
can be enhanced to measure these parameters.
(https://mjg59.dreamwidth.org/48897.html)
However, for attestation across soft reboot boundary, these values 
also need to be measured during kexec_file_load.

Currently for Kexec(kexec_file_load)/soft reboot scenario the boot cmdline
args for the next kernel are not measured. For 
normal case of boot/hardreboot the cmdline args are measured into the TPM.

The hash of boot command line is calculated and added to the current 
running kernel's measurement list.  On a soft reboot like kexec, the PCRs
are not reset to zero.  Refer to commit 94c3aac567a9 ("ima: on soft 
reboot, restore the measurement list") patch description.

To achive the above the patch series does the following
  -adds a new ima hook: ima_kexec_cmdline which measures the cmdline args
   into the ima log, behind a new ima policy entry KEXEC_CMDLINE.
  -since the cmldine args cannot be appraised, a new template field(buf) is
   added. The template field contains the buffer passed(cmldine args), which
   can be used to appraise/attest at a later stage.
  -call the ima_kexec_cmdline(...) hook from kexec_file_load call.

The ima logs need to carried over to the next kernel, which will be followed
up by other patchsets for x86_64 and arm64.

Changelog:
V6:
  -add a new ima hook and policy to measure the cmdline
    args(ima_kexec_cmdline)
  -add a new template field buf to contain the buffer measured.
  [suggested by Mimi Zohar]
   add new fields to ima_event_data to store/read buffer data.
  [suggested by Roberto]
  -call ima_kexec_cmdline from kexec_file_load path

v5:
  -add a new ima hook and policy to measure the cmdline
    args(ima_kexec_cmdline)
  -add a new template field buf to contain the buffer measured.
    [suggested by Mimi Zohar]
  -call ima_kexec_cmdline from kexec_file_load path

v4:
  - per feedback from LSM community, removed the LSM hook and renamed the
    IMA policy to KEXEC_CMDLINE

v3: (rebase changes to next-general)
  - Add policy checks for buffer[suggested by Mimi Zohar]
  - use the IMA_XATTR to add buffer
  - Add kexec_cmdline used for kexec file load
  - Add an LSM hook to allow usage by other LSM.[suggestd by Mimi Zohar]

v2:
  - Add policy checks for buffer[suggested by Mimi Zohar]
  - Add an LSM hook to allow usage by other LSM.[suggestd by Mimi Zohar]
  - use the IMA_XATTR to add buffer instead of sig template

v1:
  -Add kconfigs to control the ima_buffer_check
  -measure the cmdline args suffixed with the kernel file name
  -add the buffer to the template sig field.

Prakhar Srivastava (3):
  Add a new ima hook ima_kexec_cmdline to measure cmdline args
  add a new ima template field buf
  call ima_kexec_cmdline to measure the cmdline args

 Documentation/ABI/testing/ima_policy      |  1 +
 Documentation/security/IMA-templates.rst  |  2 +-
 include/linux/ima.h                       |  2 +
 kernel/kexec_file.c                       |  8 ++-
 security/integrity/ima/ima.h              |  3 +
 security/integrity/ima/ima_api.c          |  5 +-
 security/integrity/ima/ima_init.c         |  2 +-
 security/integrity/ima/ima_main.c         | 80 +++++++++++++++++++++++
 security/integrity/ima/ima_policy.c       |  9 +++
 security/integrity/ima/ima_template.c     |  2 +
 security/integrity/ima/ima_template_lib.c | 20 ++++++
 security/integrity/ima/ima_template_lib.h |  4 ++
 12 files changed, 131 insertions(+), 7 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH v6 2/3] add a new ima template field buf
  2019-05-21  0:06 [PATCH v6 0/3] add new ima hook ima_kexec_cmdline to measure kexec boot cmdline args Prakhar Srivastava
@ 2019-05-21  0:06 ` Prakhar Srivastava
  2019-05-24 15:12   ` Mimi Zohar
  0 siblings, 1 reply; 414+ messages in thread
From: Prakhar Srivastava @ 2019-05-21  0:06 UTC (permalink / raw)
  To: linux-integrity, linux-security-module, linux-kernel
  Cc: mjg59, zohar, roberto.sassu, vgoyal, Prakhar Srivastava

A buffer(cmdline args) measured into ima cannot be appraised
without already being aware of the buffer contents.Since we
don't know what cmdline args will be passed (or need to validate
what was passed) it is not possible to appraise it. 

Since hashs are non reversible the raw buffer is needed to 
recompute the hash.
To regenrate the hash of the buffer and appraise the same
the contents of the buffer need to be available.

A new template field buf is added to the existing ima template
fields, which can be used to store/read the buffer itself.
Two new fields are added to the ima_event_data to carry the
buf and buf_len whenever necessary.

Updated the process_buffer_measurement call to add the buf
to the ima_event_data.
process_buffer_measurement added in "Add a new ima hook 
ima_kexec_cmdline to measure cmdline args"

- Add a new template field 'buf' to be used to store/read
the buffer data.
- Added two new fields to ima_event_data to hold the buf and
buf_len [Suggested by Roberto]
-Updated process_buffer_meaurement to add the buffer to
ima_event_data

Signed-off-by: Prakhar Srivastava <prsriva02@gmail.com>
---
 Documentation/security/IMA-templates.rst  |  2 +-
 security/integrity/ima/ima.h              |  2 ++
 security/integrity/ima/ima_api.c          |  4 ++--
 security/integrity/ima/ima_init.c         |  2 +-
 security/integrity/ima/ima_main.c         |  4 +++-
 security/integrity/ima/ima_template.c     |  2 ++
 security/integrity/ima/ima_template_lib.c | 20 ++++++++++++++++++++
 security/integrity/ima/ima_template_lib.h |  4 ++++
 8 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 2cd0e273cc9a..9cddb66727ee 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -70,7 +70,7 @@ descriptors by adding their identifier to the format string
    prefix is shown only if the hash algorithm is not SHA1 or MD5);
  - 'n-ng': the name of the event, without size limitations;
  - 'sig': the file signature.
-
+ - 'buf': the buffer data that was used to generate the hash without size limitations.
 
 Below, there is the list of defined template descriptors:
 
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 226a26d8de09..4a82541dc3b6 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -65,6 +65,8 @@ struct ima_event_data {
 	struct evm_ima_xattr_data *xattr_value;
 	int xattr_len;
 	const char *violation;
+	const void *buf;
+	int buf_len;
 };
 
 /* IMA template field data definition */
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 800d965232e5..c12f1cd38f8f 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -134,7 +134,7 @@ void ima_add_violation(struct file *file, const unsigned char *filename,
 	struct ima_template_entry *entry;
 	struct inode *inode = file_inode(file);
 	struct ima_event_data event_data = {iint, file, filename, NULL, 0,
-					    cause};
+					    cause, NULL, 0};
 	int violation = 1;
 	int result;
 
@@ -286,7 +286,7 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
 	struct inode *inode = file_inode(file);
 	struct ima_template_entry *entry;
 	struct ima_event_data event_data = {iint, file, filename, xattr_value,
-					    xattr_len, NULL};
+					    xattr_len, NULL, NULL, 0};
 	int violation = 0;
 
 	if (iint->measured_pcrs & (0x1 << pcr))
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 6c9295449751..0c34d3100b5b 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -50,7 +50,7 @@ static int __init ima_add_boot_aggregate(void)
 	struct ima_template_entry *entry;
 	struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
 	struct ima_event_data event_data = {iint, NULL, boot_aggregate_name,
-					    NULL, 0, NULL};
+					    NULL, 0, NULL, NULL, 0};
 	int result = -ENOMEM;
 	int violation = 0;
 	struct {
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a88c28918a63..6c5691b65b84 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -594,7 +594,7 @@ static void process_buffer_measurement(const void *buf, int size,
 	struct ima_template_entry *entry = NULL;
 	struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
 	struct ima_event_data event_data = {iint, NULL, NULL,
-						NULL, 0, NULL};
+						NULL, 0, NULL, NULL, 0};
 	struct {
 		struct ima_digest_data hdr;
 		char digest[IMA_MAX_DIGEST_SIZE];
@@ -611,6 +611,8 @@ static void process_buffer_measurement(const void *buf, int size,
 	memset(&hash, 0, sizeof(hash));
 
 	event_data.filename = eventname;
+	event_data.buf = buf;
+	event_data.buf_len = size;
 
 	iint->ima_hash = &hash.hdr;
 	iint->ima_hash->algo = ima_hash_algo;
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index b631b8bc7624..a76d1c04162a 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -43,6 +43,8 @@ static const struct ima_template_field supported_fields[] = {
 	 .field_show = ima_show_template_string},
 	{.field_id = "sig", .field_init = ima_eventsig_init,
 	 .field_show = ima_show_template_sig},
+	{.field_id = "buf", .field_init = ima_eventbuf_init,
+	 .field_show = ima_show_template_buf},
 };
 #define MAX_TEMPLATE_NAME_LEN 15
 
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 513b457ae900..43d1404141c1 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -162,6 +162,12 @@ void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
 	ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
 }
 
+void ima_show_template_buf(struct seq_file *m, enum ima_show_type show,
+				struct ima_field_data *field_data)
+{
+	ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
+}
+
 /**
  * ima_parse_buf() - Parses lengths and data from an input buffer
  * @bufstartp:       Buffer start address.
@@ -389,3 +395,17 @@ int ima_eventsig_init(struct ima_event_data *event_data,
 	return ima_write_template_field_data(xattr_value, event_data->xattr_len,
 					     DATA_FMT_HEX, field_data);
 }
+
+/*
+ *  ima_eventbuf_init - include the buffer(kexec-cmldine) as part of the
+ *  template data.
+ */
+int ima_eventbuf_init(struct ima_event_data *event_data,
+				struct ima_field_data *field_data)
+{
+	if ((!event_data->buf) || (event_data->buf_len == 0))
+		return 0;
+
+	return ima_write_template_field_data(event_data->buf, event_data->buf_len,
+					DATA_FMT_HEX, field_data);
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
index 6a3d8b831deb..f0178bc60c55 100644
--- a/security/integrity/ima/ima_template_lib.h
+++ b/security/integrity/ima/ima_template_lib.h
@@ -29,6 +29,8 @@ void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
 			      struct ima_field_data *field_data);
 void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
 			   struct ima_field_data *field_data);
+void ima_show_template_buf(struct seq_file *m, enum ima_show_type show,
+				struct ima_field_data *field_data);
 int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp,
 		  int maxfields, struct ima_field_data *fields, int *curfields,
 		  unsigned long *len_mask, int enforce_mask, char *bufname);
@@ -42,4 +44,6 @@ int ima_eventname_ng_init(struct ima_event_data *event_data,
 			  struct ima_field_data *field_data);
 int ima_eventsig_init(struct ima_event_data *event_data,
 		      struct ima_field_data *field_data);
+int ima_eventbuf_init(struct ima_event_data *event_data,
+				struct ima_field_data *field_data);
 #endif /* __LINUX_IMA_TEMPLATE_LIB_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re: [PATCH v6 2/3] add a new ima template field buf
  2019-05-21  0:06 ` [PATCH v6 2/3] add a new ima template field buf Prakhar Srivastava
@ 2019-05-24 15:12   ` Mimi Zohar
  2019-05-24 15:42     ` Roberto Sassu
  0 siblings, 1 reply; 414+ messages in thread
From: Mimi Zohar @ 2019-05-24 15:12 UTC (permalink / raw)
  To: Prakhar Srivastava, linux-integrity, linux-security-module, linux-kernel
  Cc: mjg59, roberto.sassu, vgoyal

On Mon, 2019-05-20 at 17:06 -0700, Prakhar Srivastava wrote:
> A buffer(cmdline args) measured into ima cannot be appraised
> without already being aware of the buffer contents.Since we
> don't know what cmdline args will be passed (or need to validate
> what was passed) it is not possible to appraise it. 
> 
> Since hashs are non reversible the raw buffer is needed to 
> recompute the hash.
> To regenrate the hash of the buffer and appraise the same
> the contents of the buffer need to be available.
> 
> A new template field buf is added to the existing ima template
> fields, which can be used to store/read the buffer itself.
> Two new fields are added to the ima_event_data to carry the
> buf and buf_len whenever necessary.
> 
> Updated the process_buffer_measurement call to add the buf
> to the ima_event_data.
> process_buffer_measurement added in "Add a new ima hook 
> ima_kexec_cmdline to measure cmdline args"
> 
> - Add a new template field 'buf' to be used to store/read
> the buffer data.
> - Added two new fields to ima_event_data to hold the buf and
> buf_len [Suggested by Roberto]
> -Updated process_buffer_meaurement to add the buffer to
> ima_event_data

This patch description can be written more concisely.

Patch 1/3 in this series introduces measuring the kexec boot command
line.  This patch defines a new template field for storing the kexec
boot command line in the measurement list in order for a remote
attestation server to verify.

As mentioned, the first patch description should include a shell
command for verifying the digest in the kexec boot command line
measurement list record against /proc/cmdline.  This patch description
should include a shell command showing how to verify the digest based
on the new field.  Should the new field in the ascii measurement list
be displayed as a string, not hex?

Mimi


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-05-24 15:12   ` Mimi Zohar
@ 2019-05-24 15:42     ` Roberto Sassu
  2019-05-24 15:47       ` Re: Roberto Sassu
  0 siblings, 1 reply; 414+ messages in thread
From: Roberto Sassu @ 2019-05-24 15:42 UTC (permalink / raw)
  To: Mimi Zohar, Prakhar Srivastava, linux-integrity,
	linux-security-module, linux-kernel
  Cc: mjg59, vgoyal

On 5/24/2019 5:12 PM, Mimi Zohar wrote:
> On Mon, 2019-05-20 at 17:06 -0700, Prakhar Srivastava wrote:
>> A buffer(cmdline args) measured into ima cannot be appraised
>> without already being aware of the buffer contents.Since we
>> don't know what cmdline args will be passed (or need to validate
>> what was passed) it is not possible to appraise it.
>>
>> Since hashs are non reversible the raw buffer is needed to
>> recompute the hash.
>> To regenrate the hash of the buffer and appraise the same
>> the contents of the buffer need to be available.
>>
>> A new template field buf is added to the existing ima template
>> fields, which can be used to store/read the buffer itself.
>> Two new fields are added to the ima_event_data to carry the
>> buf and buf_len whenever necessary.
>>
>> Updated the process_buffer_measurement call to add the buf
>> to the ima_event_data.
>> process_buffer_measurement added in "Add a new ima hook
>> ima_kexec_cmdline to measure cmdline args"
>>
>> - Add a new template field 'buf' to be used to store/read
>> the buffer data.
>> - Added two new fields to ima_event_data to hold the buf and
>> buf_len [Suggested by Roberto]
>> -Updated process_buffer_meaurement to add the buffer to
>> ima_event_data
> 
> This patch description can be written more concisely.
> 
> Patch 1/3 in this series introduces measuring the kexec boot command
> line.  This patch defines a new template field for storing the kexec
> boot command line in the measurement list in order for a remote
> attestation server to verify.
> 
> As mentioned, the first patch description should include a shell
> command for verifying the digest in the kexec boot command line
> measurement list record against /proc/cmdline.  This patch description
> should include a shell command showing how to verify the digest based
> on the new field.  Should the new field in the ascii measurement list
> be displayed as a string, not hex?

We should define a new type. If the type is DATA_FMT_STRING, spaces are
replaced with '_'.

Roberto

-- 
HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Bo PENG, Jian LI, Yanli SHI

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-05-24 15:42     ` Roberto Sassu
@ 2019-05-24 15:47       ` Roberto Sassu
  2019-05-24 18:09         ` Re: Mimi Zohar
  0 siblings, 1 reply; 414+ messages in thread
From: Roberto Sassu @ 2019-05-24 15:47 UTC (permalink / raw)
  To: Mimi Zohar, Prakhar Srivastava, linux-integrity,
	linux-security-module, linux-kernel
  Cc: mjg59, vgoyal

On 5/24/2019 5:42 PM, Roberto Sassu wrote:
> On 5/24/2019 5:12 PM, Mimi Zohar wrote:
>> On Mon, 2019-05-20 at 17:06 -0700, Prakhar Srivastava wrote:
>>> A buffer(cmdline args) measured into ima cannot be appraised
>>> without already being aware of the buffer contents.Since we
>>> don't know what cmdline args will be passed (or need to validate
>>> what was passed) it is not possible to appraise it.
>>>
>>> Since hashs are non reversible the raw buffer is needed to
>>> recompute the hash.
>>> To regenrate the hash of the buffer and appraise the same
>>> the contents of the buffer need to be available.
>>>
>>> A new template field buf is added to the existing ima template
>>> fields, which can be used to store/read the buffer itself.
>>> Two new fields are added to the ima_event_data to carry the
>>> buf and buf_len whenever necessary.
>>>
>>> Updated the process_buffer_measurement call to add the buf
>>> to the ima_event_data.
>>> process_buffer_measurement added in "Add a new ima hook
>>> ima_kexec_cmdline to measure cmdline args"
>>>
>>> - Add a new template field 'buf' to be used to store/read
>>> the buffer data.
>>> - Added two new fields to ima_event_data to hold the buf and
>>> buf_len [Suggested by Roberto]
>>> -Updated process_buffer_meaurement to add the buffer to
>>> ima_event_data
>>
>> This patch description can be written more concisely.
>>
>> Patch 1/3 in this series introduces measuring the kexec boot command
>> line.  This patch defines a new template field for storing the kexec
>> boot command line in the measurement list in order for a remote
>> attestation server to verify.
>>
>> As mentioned, the first patch description should include a shell
>> command for verifying the digest in the kexec boot command line
>> measurement list record against /proc/cmdline.  This patch description
>> should include a shell command showing how to verify the digest based
>> on the new field.  Should the new field in the ascii measurement list
>> be displayed as a string, not hex?
> 
> We should define a new type. If the type is DATA_FMT_STRING, spaces are
> replaced with '_'.

Or better. Leave it as hex, otherwise there would be a parsing problem
if there are spaces in the data for a field.

Roberto

-- 
HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Bo PENG, Jian LI, Yanli SHI

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: Re:
  2019-05-24 15:47       ` Re: Roberto Sassu
@ 2019-05-24 18:09         ` Mimi Zohar
  2019-05-24 19:00           ` Re: prakhar srivastava
  0 siblings, 1 reply; 414+ messages in thread
From: Mimi Zohar @ 2019-05-24 18:09 UTC (permalink / raw)
  To: Roberto Sassu, Prakhar Srivastava, linux-integrity,
	linux-security-module, linux-kernel
  Cc: mjg59, vgoyal

> >> As mentioned, the first patch description should include a shell
> >> command for verifying the digest in the kexec boot command line
> >> measurement list record against /proc/cmdline.  This patch description
> >> should include a shell command showing how to verify the digest based
> >> on the new field.  Should the new field in the ascii measurement list
> >> be displayed as a string, not hex?
> > 
> > We should define a new type. If the type is DATA_FMT_STRING, spaces are
> > replaced with '_'.
> 
> Or better. Leave it as hex, otherwise there would be a parsing problem
> if there are spaces in the data for a field.

After making a few changes, the measurement list contains the
following kexec-cmdline data:

10 edc32d1e3a5ba7272280a395b6fb56a5ef7c78c3 ima-buf
sha256:4f43b7db850e
88c49dfeffd4b1eb4f021d78033dfb05b07e45eec8d0b45275 
kexec-cmdline
726f6f
743d2f6465762f7364613420726f2072642e6c756b732e757569643d6c756b73
2d6637
3633643737632d653236622d343431642d613734652d62363633636334643832
656120
696d615f706f6c6963793d7463627c61707072616973655f746362

There's probably a better shell command, but the following works to
verify the digest locally against the /proc/cmdline:

$ echo -n -e `cat /proc/cmdline | sed 's/^.*root=/root=/'` | sha256sum
4f43b7db850e88c49dfeffd4b1eb4f021d78033dfb05b07e45eec8d0b4527f65  -

If we leave the "buf" field as ascii-hex, what would the shell command
look like when verifying the digest based on the "buf" field?

Mimi


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: Re:
  2019-05-24 18:09         ` Re: Mimi Zohar
@ 2019-05-24 19:00           ` prakhar srivastava
  2019-05-24 19:15             ` Re: Mimi Zohar
  0 siblings, 1 reply; 414+ messages in thread
From: prakhar srivastava @ 2019-05-24 19:00 UTC (permalink / raw)
  To: Mimi Zohar
  Cc: Roberto Sassu, linux-integrity, linux-security-module,
	linux-kernel, Matthew Garrett, vgoyal

On Fri, May 24, 2019 at 11:09 AM Mimi Zohar <zohar@linux.ibm.com> wrote:
>
> > >> As mentioned, the first patch description should include a shell
> > >> command for verifying the digest in the kexec boot command line
> > >> measurement list record against /proc/cmdline.  This patch description
> > >> should include a shell command showing how to verify the digest based
> > >> on the new field.  Should the new field in the ascii measurement list
> > >> be displayed as a string, not hex?
> > >
> > > We should define a new type. If the type is DATA_FMT_STRING, spaces are
> > > replaced with '_'.
> >
> > Or better. Leave it as hex, otherwise there would be a parsing problem
> > if there are spaces in the data for a field.
>
> After making a few changes, the measurement list contains the
> following kexec-cmdline data:
>
> 10 edc32d1e3a5ba7272280a395b6fb56a5ef7c78c3 ima-buf
> sha256:4f43b7db850e
> 88c49dfeffd4b1eb4f021d78033dfb05b07e45eec8d0b45275
> kexec-cmdline
> 726f6f
> 743d2f6465762f7364613420726f2072642e6c756b732e757569643d6c756b73
> 2d6637
> 3633643737632d653236622d343431642d613734652d62363633636334643832
> 656120
> 696d615f706f6c6963793d7463627c61707072616973655f746362
>
> There's probably a better shell command, but the following works to
> verify the digest locally against the /proc/cmdline:
>
> $ echo -n -e `cat /proc/cmdline | sed 's/^.*root=/root=/'` | sha256sum
> 4f43b7db850e88c49dfeffd4b1eb4f021d78033dfb05b07e45eec8d0b4527f65  -
>
> If we leave the "buf" field as ascii-hex, what would the shell command
> look like when verifying the digest based on the "buf" field?
>
> Mimi
>
To quickly test the sha256 i used the my /proc/cmdline
ro quiet splash vt.handoff=1 ima_policy=tcb ima_appraise=fix
ima_template_fmt=n-ng|d-ng|sig|buf ima_hash=sha256

export $VAL=
726f2071756965742073706c6173682076742e68616e646f66663d3120
696d615f706f6c6963793d74636220696d615f61707072616973653d666
97820696d615f74656d706c6174655f666d743d6e2d6e677c642d6e677c
7369677c62756620696d615f686173683d736861323536

echo -n -e $VAL | xxd -r -p | sha256sum
0d0b891bb730120d9593799cba1a7b3febf68f2bb81fb1304b0c963f95f6bc58  -

I will run it through the code as well, but the shell command should work.

Thanks,
Prakhar Srivastava

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: Re:
  2019-05-24 19:00           ` Re: prakhar srivastava
@ 2019-05-24 19:15             ` Mimi Zohar
  0 siblings, 0 replies; 414+ messages in thread
From: Mimi Zohar @ 2019-05-24 19:15 UTC (permalink / raw)
  To: prakhar srivastava
  Cc: Roberto Sassu, linux-integrity, linux-security-module,
	linux-kernel, Matthew Garrett, vgoyal

On Fri, 2019-05-24 at 12:00 -0700, prakhar srivastava wrote:
> On Fri, May 24, 2019 at 11:09 AM Mimi Zohar <zohar@linux.ibm.com> wrote:
> >
> > > >> As mentioned, the first patch description should include a shell
> > > >> command for verifying the digest in the kexec boot command line
> > > >> measurement list record against /proc/cmdline.  This patch description
> > > >> should include a shell command showing how to verify the digest based
> > > >> on the new field.  Should the new field in the ascii measurement list
> > > >> be displayed as a string, not hex?
> > > >
> > > > We should define a new type. If the type is DATA_FMT_STRING, spaces are
> > > > replaced with '_'.
> > >
> > > Or better. Leave it as hex, otherwise there would be a parsing problem
> > > if there are spaces in the data for a field.
> >
> > After making a few changes, the measurement list contains the
> > following kexec-cmdline data:
> >
> > 10 edc32d1e3a5ba7272280a395b6fb56a5ef7c78c3 ima-buf
> > sha256:4f43b7db850e
> > 88c49dfeffd4b1eb4f021d78033dfb05b07e45eec8d0b45275
> > kexec-cmdline
> > 726f6f
> > 743d2f6465762f7364613420726f2072642e6c756b732e757569643d6c756b73
> > 2d6637
> > 3633643737632d653236622d343431642d613734652d62363633636334643832
> > 656120
> > 696d615f706f6c6963793d7463627c61707072616973655f746362
> >
> > There's probably a better shell command, but the following works to
> > verify the digest locally against the /proc/cmdline:
> >
> > $ echo -n -e `cat /proc/cmdline | sed 's/^.*root=/root=/'` | sha256sum
> > 4f43b7db850e88c49dfeffd4b1eb4f021d78033dfb05b07e45eec8d0b4527f65  -
> >
> > If we leave the "buf" field as ascii-hex, what would the shell command
> > look like when verifying the digest based on the "buf" field?
> >
> > Mimi
> >
> To quickly test the sha256 i used the my /proc/cmdline
> ro quiet splash vt.handoff=1 ima_policy=tcb ima_appraise=fix
> ima_template_fmt=n-ng|d-ng|sig|buf ima_hash=sha256
> 
> export $VAL=
> 726f2071756965742073706c6173682076742e68616e646f66663d3120
> 696d615f706f6c6963793d74636220696d615f61707072616973653d666
> 97820696d615f74656d706c6174655f666d743d6e2d6e677c642d6e677c
> 7369677c62756620696d615f686173683d736861323536
> 
> echo -n -e $VAL | xxd -r -p | sha256sum
> 0d0b891bb730120d9593799cba1a7b3febf68f2bb81fb1304b0c963f95f6bc58  -
> 
> I will run it through the code as well, but the shell command should work.

Yes, that works.

sudo cat /sys/kernel/security/integrity/ima/ascii_runtime_measurements
| grep  kexec-cmdline | cut -d' ' -f 6 | xxd -r -p | sha256sum

Mimi


^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE,
@ 2019-04-12 23:06 Sharifah Ahmad Mustahfa
  0 siblings, 0 replies; 414+ messages in thread
From: Sharifah Ahmad Mustahfa @ 2019-04-12 23:06 UTC (permalink / raw)
  To: linux-kernel

As-Salamu Alaykum, I need your help to transfer out the some amount of money, accumulated as undeclared excess profit made by me from the INVESTMENTS CAPITAL under my management in my bank. You will get 40% of the funds for your participation. Reply for more details thanks

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <20190319144116.400-1-mlevitsk@redhat.com>]

* Re:
       [not found] <20190319144116.400-1-mlevitsk@redhat.com>
@ 2019-03-20 11:03 ` Felipe Franciosi
  2019-03-20 19:08   ` Re: Maxim Levitsky
  0 siblings, 1 reply; 414+ messages in thread
From: Felipe Franciosi @ 2019-03-20 11:03 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: linux-nvme, linux-kernel, kvm, Jens Axboe, Alex Williamson,
	Keith Busch, Christoph Hellwig, Sagi Grimberg, Kirti Wankhede,
	David S . Miller, Mauro Carvalho Chehab, Greg Kroah-Hartman,
	Wolfram Sang, Nicolas Ferre, Paul E . McKenney, Paolo Bonzini,
	Liang Cunming, Liu Changpeng, Fam Zheng, Amnon Ilan, John Ferlan,
	Stefan Hajnoczi, Harris, James R, Thanos Makatos

> On Mar 19, 2019, at 2:41 PM, Maxim Levitsky <mlevitsk@redhat.com> wrote:
> 
> Date: Tue, 19 Mar 2019 14:45:45 +0200
> Subject: [PATCH 0/9] RFC: NVME VFIO mediated device
> 
> Hi everyone!
> 
> In this patch series, I would like to introduce my take on the problem of doing 
> as fast as possible virtualization of storage with emphasis on low latency.
> 
> In this patch series I implemented a kernel vfio based, mediated device that 
> allows the user to pass through a partition and/or whole namespace to a guest.

Hey Maxim!

I'm really excited to see this series, as it aligns to some extent with what we discussed in last year's KVM Forum VFIO BoF.

There's no arguing that we need a better story to efficiently virtualise NVMe devices. So far, for Qemu-based VMs, Changpeng's vhost-user-nvme is the best attempt at that. However, I seem to recall there was some pushback from qemu-devel in the sense that they would rather see investment in virtio-blk. I'm not sure what's the latest on that work and what are the next steps.

The pushback drove the discussion towards pursuing an mdev approach, which is why I'm excited to see your patches.

What I'm thinking is that passing through namespaces or partitions is very restrictive. It leaves no room to implement more elaborate virtualisation stacks like replicating data across multiple devices (local or remote), storage migration, software-managed thin provisioning, encryption, deduplication, compression, etc. In summary, anything that requires software intervention in the datapath. (Worth noting: vhost-user-nvme allows all of that to be easily done in SPDK's bdev layer.)

These complicated stacks should probably not be implemented in the kernel, though. So I'm wondering whether we could talk about mechanisms to allow efficient and performant userspace datapath intervention  in your approach or pursue a mechanism to completely offload the device emulation to userspace (and align with what SPDK has to offer).

Thoughts welcome!
Felipe

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-20 11:03 ` Felipe Franciosi
@ 2019-03-20 19:08   ` Maxim Levitsky
  2019-03-21 16:12     ` Re: Stefan Hajnoczi
  0 siblings, 1 reply; 414+ messages in thread
From: Maxim Levitsky @ 2019-03-20 19:08 UTC (permalink / raw)
  To: Felipe Franciosi
  Cc: Fam Zheng, kvm, Wolfram Sang, linux-nvme, linux-kernel,
	Keith Busch, Kirti Wankhede, Mauro Carvalho Chehab,
	Paul E . McKenney, Christoph Hellwig, Sagi Grimberg, Harris,
	James R, Liang Cunming, Jens Axboe, Alex Williamson,
	Stefan Hajnoczi, Thanos Makatos, John Ferlan, Liu Changpeng,
	Greg Kroah-Hartman, Nicolas Ferre, Paolo Bonzini, Amnon Ilan,
	David S . Miller

On Wed, 2019-03-20 at 11:03 +0000, Felipe Franciosi wrote:
> > On Mar 19, 2019, at 2:41 PM, Maxim Levitsky <mlevitsk@redhat.com> wrote:
> > 
> > Date: Tue, 19 Mar 2019 14:45:45 +0200
> > Subject: [PATCH 0/9] RFC: NVME VFIO mediated device
> > 
> > Hi everyone!
> > 
> > In this patch series, I would like to introduce my take on the problem of
> > doing 
> > as fast as possible virtualization of storage with emphasis on low latency.
> > 
> > In this patch series I implemented a kernel vfio based, mediated device
> > that 
> > allows the user to pass through a partition and/or whole namespace to a
> > guest.
> 
> Hey Maxim!
> 
> I'm really excited to see this series, as it aligns to some extent with what
> we discussed in last year's KVM Forum VFIO BoF.
> 
> There's no arguing that we need a better story to efficiently virtualise NVMe
> devices. So far, for Qemu-based VMs, Changpeng's vhost-user-nvme is the best
> attempt at that. However, I seem to recall there was some pushback from qemu-
> devel in the sense that they would rather see investment in virtio-blk. I'm
> not sure what's the latest on that work and what are the next steps.
I agree with that. All my benchmarks were agains his vhost-user-nvme driver, and
I am able to get pretty much the same througput and latency.

The ssd I tested on died just recently (Murphy law), not due to bug in my driver
but some internal fault (even though most of my tests were reads, plus
occassional 'nvme format's.
We are in process of buying an replacement.

> 
> The pushback drove the discussion towards pursuing an mdev approach, which is
> why I'm excited to see your patches.
> 
> What I'm thinking is that passing through namespaces or partitions is very
> restrictive. It leaves no room to implement more elaborate virtualisation
> stacks like replicating data across multiple devices (local or remote),
> storage migration, software-managed thin provisioning, encryption,
> deduplication, compression, etc. In summary, anything that requires software
> intervention in the datapath. (Worth noting: vhost-user-nvme allows all of
> that to be easily done in SPDK's bdev layer.)

Hi Felipe!

I guess that my driver is not geared toward more complicated use cases like you
mentioned, but instead it is focused to get as fast as possible performance for
the common case.

One thing that I can do which would solve several of the above problems is to
accept an map betwent virtual and real logical blocks, pretty much in exactly
the same way as EPT does it.
Then userspace can map any portions of the device anywhere, while still keeping
the dataplane in the kernel, and having minimal overhead.

On top of that, note that the direction of IO virtualization is to do dataplane
in hardware, which will probably give you even worse partition granuality /
features but will be the fastest option aviable,
like for instance SR-IOV which alrady exists and just allows to split by
namespaces without any more fine grained control.

Think of nvme-mdev as a very low level driver, which currntly uses polling, but
eventually will use PASID based IOMMU to provide the guest with raw PCI device.
The userspace / qemu can build on top of that with varios software layers.

On top of that I am thinking to solve the problem of migration in Qemu, by
creating a 'vfio-nvme' driver which would bind vfio to bind to device exposed by
the kernel, and would pass through all the doorbells and queues to the guest,
while intercepting the admin queue. Such driver I think can be made to support
migration while beeing able to run on top both SR-IOV device, my vfio-nvme abit
with double admin queue emulation (its a bit ugly but won't affect performance
at all) and on top of even regular NVME device vfio assigned to guest.

Best regards,
	Maxim Levitsky

> 
> These complicated stacks should probably not be implemented in the kernel,
> though. So I'm wondering whether we could talk about mechanisms to allow
> efficient and performant userspace datapath intervention  in your approach or
> pursue a mechanism to completely offload the device emulation to userspace
> (and align with what SPDK has to offer).
> 
> Thoughts welcome!
> Felipe
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-20 19:08   ` Re: Maxim Levitsky
@ 2019-03-21 16:12     ` Stefan Hajnoczi
  2019-03-21 16:21       ` Re: Keith Busch
  0 siblings, 1 reply; 414+ messages in thread
From: Stefan Hajnoczi @ 2019-03-21 16:12 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Felipe Franciosi, Fam Zheng, kvm, Wolfram Sang, linux-nvme,
	linux-kernel, Keith Busch, Kirti Wankhede, Mauro Carvalho Chehab,
	Paul E . McKenney, Christoph Hellwig, Sagi Grimberg, Harris,
	James R, Liang Cunming, Jens Axboe, Alex Williamson,
	Thanos Makatos, John Ferlan, Liu Changpeng, Greg Kroah-Hartman,
	Nicolas Ferre, Paolo Bonzini, Amnon Ilan, David S . Miller

[-- Attachment #1: Type: text/plain, Size: 4404 bytes --]

On Wed, Mar 20, 2019 at 09:08:37PM +0200, Maxim Levitsky wrote:
> On Wed, 2019-03-20 at 11:03 +0000, Felipe Franciosi wrote:
> > > On Mar 19, 2019, at 2:41 PM, Maxim Levitsky <mlevitsk@redhat.com> wrote:
> > > 
> > > Date: Tue, 19 Mar 2019 14:45:45 +0200
> > > Subject: [PATCH 0/9] RFC: NVME VFIO mediated device
> > > 
> > > Hi everyone!
> > > 
> > > In this patch series, I would like to introduce my take on the problem of
> > > doing 
> > > as fast as possible virtualization of storage with emphasis on low latency.
> > > 
> > > In this patch series I implemented a kernel vfio based, mediated device
> > > that 
> > > allows the user to pass through a partition and/or whole namespace to a
> > > guest.
> > 
> > Hey Maxim!
> > 
> > I'm really excited to see this series, as it aligns to some extent with what
> > we discussed in last year's KVM Forum VFIO BoF.
> > 
> > There's no arguing that we need a better story to efficiently virtualise NVMe
> > devices. So far, for Qemu-based VMs, Changpeng's vhost-user-nvme is the best
> > attempt at that. However, I seem to recall there was some pushback from qemu-
> > devel in the sense that they would rather see investment in virtio-blk. I'm
> > not sure what's the latest on that work and what are the next steps.
> I agree with that. All my benchmarks were agains his vhost-user-nvme driver, and
> I am able to get pretty much the same througput and latency.
> 
> The ssd I tested on died just recently (Murphy law), not due to bug in my driver
> but some internal fault (even though most of my tests were reads, plus
> occassional 'nvme format's.
> We are in process of buying an replacement.
> 
> > 
> > The pushback drove the discussion towards pursuing an mdev approach, which is
> > why I'm excited to see your patches.
> > 
> > What I'm thinking is that passing through namespaces or partitions is very
> > restrictive. It leaves no room to implement more elaborate virtualisation
> > stacks like replicating data across multiple devices (local or remote),
> > storage migration, software-managed thin provisioning, encryption,
> > deduplication, compression, etc. In summary, anything that requires software
> > intervention in the datapath. (Worth noting: vhost-user-nvme allows all of
> > that to be easily done in SPDK's bdev layer.)
> 
> Hi Felipe!
> 
> I guess that my driver is not geared toward more complicated use cases like you
> mentioned, but instead it is focused to get as fast as possible performance for
> the common case.
> 
> One thing that I can do which would solve several of the above problems is to
> accept an map betwent virtual and real logical blocks, pretty much in exactly
> the same way as EPT does it.
> Then userspace can map any portions of the device anywhere, while still keeping
> the dataplane in the kernel, and having minimal overhead.
> 
> On top of that, note that the direction of IO virtualization is to do dataplane
> in hardware, which will probably give you even worse partition granuality /
> features but will be the fastest option aviable,
> like for instance SR-IOV which alrady exists and just allows to split by
> namespaces without any more fine grained control.
> 
> Think of nvme-mdev as a very low level driver, which currntly uses polling, but
> eventually will use PASID based IOMMU to provide the guest with raw PCI device.
> The userspace / qemu can build on top of that with varios software layers.
> 
> On top of that I am thinking to solve the problem of migration in Qemu, by
> creating a 'vfio-nvme' driver which would bind vfio to bind to device exposed by
> the kernel, and would pass through all the doorbells and queues to the guest,
> while intercepting the admin queue. Such driver I think can be made to support
> migration while beeing able to run on top both SR-IOV device, my vfio-nvme abit
> with double admin queue emulation (its a bit ugly but won't affect performance
> at all) and on top of even regular NVME device vfio assigned to guest.

mdev-nvme seems like a duplication of SPDK.  The performance is not
better and the features are more limited, so why focus on this approach?

One argument might be that the kernel NVMe subsystem wants to offer this
functionality and loading the kernel module is more convenient than
managing SPDK to some users.

Thoughts?

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-21 16:12     ` Re: Stefan Hajnoczi
@ 2019-03-21 16:21       ` Keith Busch
  2019-03-21 16:41         ` Re: Felipe Franciosi
  0 siblings, 1 reply; 414+ messages in thread
From: Keith Busch @ 2019-03-21 16:21 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Maxim Levitsky, Fam Zheng, kvm, Wolfram Sang, linux-nvme,
	linux-kernel, Keith Busch, Kirti Wankhede, Mauro Carvalho Chehab,
	Paul E . McKenney, Christoph Hellwig, Sagi Grimberg, Harris,
	James R, Felipe Franciosi, Liang Cunming, Jens Axboe,
	Alex Williamson, Thanos Makatos, John Ferlan, Liu Changpeng,
	Greg Kroah-Hartman, Nicolas Ferre, Paolo Bonzini, Amnon Ilan,
	David S . Miller

On Thu, Mar 21, 2019 at 04:12:39PM +0000, Stefan Hajnoczi wrote:
> mdev-nvme seems like a duplication of SPDK.  The performance is not
> better and the features are more limited, so why focus on this approach?
> 
> One argument might be that the kernel NVMe subsystem wants to offer this
> functionality and loading the kernel module is more convenient than
> managing SPDK to some users.
> 
> Thoughts?

Doesn't SPDK bind a controller to a single process? mdev binds to
namespaces (or their partitions), so you could have many mdev's assigned
to many VMs accessing a single controller.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-21 16:21       ` Re: Keith Busch
@ 2019-03-21 16:41         ` Felipe Franciosi
  2019-03-21 17:04           ` Re: Maxim Levitsky
  0 siblings, 1 reply; 414+ messages in thread
From: Felipe Franciosi @ 2019-03-21 16:41 UTC (permalink / raw)
  To: Keith Busch
  Cc: Stefan Hajnoczi, Maxim Levitsky, Fam Zheng, kvm, Wolfram Sang,
	linux-nvme, linux-kernel, Keith Busch, Kirti Wankhede,
	Mauro Carvalho Chehab, Paul E . McKenney, Christoph Hellwig,
	Sagi Grimberg, Harris, James R, Liang Cunming, Jens Axboe,
	Alex Williamson, Thanos Makatos, John Ferlan, Liu Changpeng,
	Greg Kroah-Hartman, Nicolas Ferre, Paolo Bonzini, Amnon Ilan,
	David S . Miller

> On Mar 21, 2019, at 4:21 PM, Keith Busch <kbusch@kernel.org> wrote:
> 
> On Thu, Mar 21, 2019 at 04:12:39PM +0000, Stefan Hajnoczi wrote:
>> mdev-nvme seems like a duplication of SPDK.  The performance is not
>> better and the features are more limited, so why focus on this approach?
>> 
>> One argument might be that the kernel NVMe subsystem wants to offer this
>> functionality and loading the kernel module is more convenient than
>> managing SPDK to some users.
>> 
>> Thoughts?
> 
> Doesn't SPDK bind a controller to a single process? mdev binds to
> namespaces (or their partitions), so you could have many mdev's assigned
> to many VMs accessing a single controller.

Yes, it binds to a single process which can drive the datapath of multiple virtual controllers for multiple VMs (similar to what you described for mdev). You can therefore efficiently poll multiple VM submission queues (and multiple device completion queues) from a single physical CPU.

The same could be done in the kernel, but the code gets complicated as you add more functionality to it. As this is a direct interface with an untrusted front-end (the guest), it's also arguably safer to do in userspace.

Worth noting: you can eventually have a single physical core polling all sorts of virtual devices (eg. virtual storage or network controllers) very efficiently. And this is quite configurable, too. In the interest of fairness, performance or efficiency, you can choose to dynamically add or remove queues to the poll thread or spawn more threads and redistribute the work.

F.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-21 16:41         ` Re: Felipe Franciosi
@ 2019-03-21 17:04           ` Maxim Levitsky
  2019-03-22  7:54             ` Re: Felipe Franciosi
  0 siblings, 1 reply; 414+ messages in thread
From: Maxim Levitsky @ 2019-03-21 17:04 UTC (permalink / raw)
  To: Felipe Franciosi, Keith Busch
  Cc: Stefan Hajnoczi, Fam Zheng, kvm, Wolfram Sang, linux-nvme,
	linux-kernel, Keith Busch, Kirti Wankhede, Mauro Carvalho Chehab,
	Paul E . McKenney, Christoph Hellwig, Sagi Grimberg, Harris,
	James R, Liang Cunming, Jens Axboe, Alex Williamson,
	Thanos Makatos, John Ferlan, Liu Changpeng, Greg Kroah-Hartman,
	Nicolas Ferre, Paolo Bonzini, Amnon Ilan, David S . Miller

On Thu, 2019-03-21 at 16:41 +0000, Felipe Franciosi wrote:
> > On Mar 21, 2019, at 4:21 PM, Keith Busch <kbusch@kernel.org> wrote:
> > 
> > On Thu, Mar 21, 2019 at 04:12:39PM +0000, Stefan Hajnoczi wrote:
> > > mdev-nvme seems like a duplication of SPDK.  The performance is not
> > > better and the features are more limited, so why focus on this approach?
> > > 
> > > One argument might be that the kernel NVMe subsystem wants to offer this
> > > functionality and loading the kernel module is more convenient than
> > > managing SPDK to some users.
> > > 
> > > Thoughts?
> > 
> > Doesn't SPDK bind a controller to a single process? mdev binds to
> > namespaces (or their partitions), so you could have many mdev's assigned
> > to many VMs accessing a single controller.
> 
> Yes, it binds to a single process which can drive the datapath of multiple
> virtual controllers for multiple VMs (similar to what you described for mdev).
> You can therefore efficiently poll multiple VM submission queues (and multiple
> device completion queues) from a single physical CPU.
> 
> The same could be done in the kernel, but the code gets complicated as you add
> more functionality to it. As this is a direct interface with an untrusted
> front-end (the guest), it's also arguably safer to do in userspace.
> 
> Worth noting: you can eventually have a single physical core polling all sorts
> of virtual devices (eg. virtual storage or network controllers) very
> efficiently. And this is quite configurable, too. In the interest of fairness,
> performance or efficiency, you can choose to dynamically add or remove queues
> to the poll thread or spawn more threads and redistribute the work.
> 
> F.

Note though that SPDK doesn't support sharing the device between host and the
guests, it takes over the nvme device, thus it makes the kernel nvme driver
unbind from it.

My driver creates a polling thread per guest, but its trivial to add option to
use the same polling thread for many guests if there need for that.

Best regards,
	Maxim Levitsky



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-21 17:04           ` Re: Maxim Levitsky
@ 2019-03-22  7:54             ` Felipe Franciosi
  2019-03-22 10:32               ` Re: Maxim Levitsky
  2019-03-22 15:30               ` Re: Keith Busch
  0 siblings, 2 replies; 414+ messages in thread
From: Felipe Franciosi @ 2019-03-22  7:54 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Keith Busch, Stefan Hajnoczi, Fam Zheng, kvm, Wolfram Sang,
	linux-nvme, linux-kernel, Keith Busch, Kirti Wankhede,
	Mauro Carvalho Chehab, Paul E . McKenney, Christoph Hellwig,
	Sagi Grimberg, Harris, James R, Liang Cunming, Jens Axboe,
	Alex Williamson, Thanos Makatos, John Ferlan, Liu Changpeng,
	Greg Kroah-Hartman, Nicolas Ferre, Paolo Bonzini, Amnon Ilan,
	David S . Miller



> On Mar 21, 2019, at 5:04 PM, Maxim Levitsky <mlevitsk@redhat.com> wrote:
> 
> On Thu, 2019-03-21 at 16:41 +0000, Felipe Franciosi wrote:
>>> On Mar 21, 2019, at 4:21 PM, Keith Busch <kbusch@kernel.org> wrote:
>>> 
>>> On Thu, Mar 21, 2019 at 04:12:39PM +0000, Stefan Hajnoczi wrote:
>>>> mdev-nvme seems like a duplication of SPDK.  The performance is not
>>>> better and the features are more limited, so why focus on this approach?
>>>> 
>>>> One argument might be that the kernel NVMe subsystem wants to offer this
>>>> functionality and loading the kernel module is more convenient than
>>>> managing SPDK to some users.
>>>> 
>>>> Thoughts?
>>> 
>>> Doesn't SPDK bind a controller to a single process? mdev binds to
>>> namespaces (or their partitions), so you could have many mdev's assigned
>>> to many VMs accessing a single controller.
>> 
>> Yes, it binds to a single process which can drive the datapath of multiple
>> virtual controllers for multiple VMs (similar to what you described for mdev).
>> You can therefore efficiently poll multiple VM submission queues (and multiple
>> device completion queues) from a single physical CPU.
>> 
>> The same could be done in the kernel, but the code gets complicated as you add
>> more functionality to it. As this is a direct interface with an untrusted
>> front-end (the guest), it's also arguably safer to do in userspace.
>> 
>> Worth noting: you can eventually have a single physical core polling all sorts
>> of virtual devices (eg. virtual storage or network controllers) very
>> efficiently. And this is quite configurable, too. In the interest of fairness,
>> performance or efficiency, you can choose to dynamically add or remove queues
>> to the poll thread or spawn more threads and redistribute the work.
>> 
>> F.
> 
> Note though that SPDK doesn't support sharing the device between host and the
> guests, it takes over the nvme device, thus it makes the kernel nvme driver
> unbind from it.

That is absolutely true. However, I find it not to be a problem in practice.

Hypervisor products, specially those caring about performance, efficiency and fairness, will dedicate NVMe devices for a particular purpose (eg. vDisk storage, cache, metadata) and will not share these devices for other use cases. That's because these products want to deterministically control the performance aspects of the device, which you just cannot do if you are sharing the device with a subsystem you do not control.

For scenarios where the device must be shared and such fine grained control is not required, it looks like using the kernel driver with io_uring offers very good performance with flexibility.

Cheers,
Felipe

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-22  7:54             ` Re: Felipe Franciosi
@ 2019-03-22 10:32               ` Maxim Levitsky
  2019-03-22 15:30               ` Re: Keith Busch
  1 sibling, 0 replies; 414+ messages in thread
From: Maxim Levitsky @ 2019-03-22 10:32 UTC (permalink / raw)
  To: Felipe Franciosi
  Cc: Keith Busch, Stefan Hajnoczi, Fam Zheng, kvm, Wolfram Sang,
	linux-nvme, linux-kernel, Keith Busch, Kirti Wankhede,
	Mauro Carvalho Chehab, Paul E . McKenney, Christoph Hellwig,
	Sagi Grimberg, Harris, James R, Liang Cunming, Jens Axboe,
	Alex Williamson, Thanos Makatos, John Ferlan, Liu Changpeng,
	Greg Kroah-Hartman, Nicolas Ferre, Paolo Bonzini, Amnon Ilan,
	David S . Miller

On Fri, 2019-03-22 at 07:54 +0000, Felipe Franciosi wrote:
> > On Mar 21, 2019, at 5:04 PM, Maxim Levitsky <mlevitsk@redhat.com> wrote:
> > 
> > On Thu, 2019-03-21 at 16:41 +0000, Felipe Franciosi wrote:
> > > > On Mar 21, 2019, at 4:21 PM, Keith Busch <kbusch@kernel.org> wrote:
> > > > 
> > > > On Thu, Mar 21, 2019 at 04:12:39PM +0000, Stefan Hajnoczi wrote:
> > > > > mdev-nvme seems like a duplication of SPDK.  The performance is not
> > > > > better and the features are more limited, so why focus on this
> > > > > approach?
> > > > > 
> > > > > One argument might be that the kernel NVMe subsystem wants to offer
> > > > > this
> > > > > functionality and loading the kernel module is more convenient than
> > > > > managing SPDK to some users.
> > > > > 
> > > > > Thoughts?
> > > > 
> > > > Doesn't SPDK bind a controller to a single process? mdev binds to
> > > > namespaces (or their partitions), so you could have many mdev's assigned
> > > > to many VMs accessing a single controller.
> > > 
> > > Yes, it binds to a single process which can drive the datapath of multiple
> > > virtual controllers for multiple VMs (similar to what you described for
> > > mdev).
> > > You can therefore efficiently poll multiple VM submission queues (and
> > > multiple
> > > device completion queues) from a single physical CPU.
> > > 
> > > The same could be done in the kernel, but the code gets complicated as you
> > > add
> > > more functionality to it. As this is a direct interface with an untrusted
> > > front-end (the guest), it's also arguably safer to do in userspace.
> > > 
> > > Worth noting: you can eventually have a single physical core polling all
> > > sorts
> > > of virtual devices (eg. virtual storage or network controllers) very
> > > efficiently. And this is quite configurable, too. In the interest of
> > > fairness,
> > > performance or efficiency, you can choose to dynamically add or remove
> > > queues
> > > to the poll thread or spawn more threads and redistribute the work.
> > > 
> > > F.
> > 
> > Note though that SPDK doesn't support sharing the device between host and
> > the
> > guests, it takes over the nvme device, thus it makes the kernel nvme driver
> > unbind from it.
> 
> That is absolutely true. However, I find it not to be a problem in practice.
> 
> Hypervisor products, specially those caring about performance, efficiency and
> fairness, will dedicate NVMe devices for a particular purpose (eg. vDisk
> storage, cache, metadata) and will not share these devices for other use
> cases. That's because these products want to deterministically control the
> performance aspects of the device, which you just cannot do if you are sharing
> the device with a subsystem you do not control.
> 
> For scenarios where the device must be shared and such fine grained control is
> not required, it looks like using the kernel driver with io_uring offers very
> good performance with flexibility

I see the host/guest parition in the following way:
The guest assigned partitions are for guests that need lowest possible latency,
and in between these guests it is possible to guarantee good enough level of
fairness in my driver.
For example, in the current implementation of my driver, each guest gets its own
host submission queue.

On the other hand, the host assigned partitions are for significantly higher
latency IO, with no guarantees, and/or for guests that need all the more
advanced features of full IO virtualization, for instance snapshots, thin
provisioning, replication/backup over network, etc.
io_uring can be used here to speed things up but it won't reach the nvme-mdev
levels of latency.

Furthermore on NVME drives that support WRRU, its possible to let queues of
guest's assigned partitions to belong to the high priority class and let the
host queues use the regular medium/low priority class.
For drives that don't support WRRU, the IO throttling can be done in software on
the host queues.

Host assigned partitions also don't need polling, thus allowing polling to be
used only for guests that actually need low latency IO.
This reduces the number of cores that would be otherwise lost to polling,
because the less work the polling core does, the less latency it contributes to
overall latency, thus with less users, you could use less cores to achieve the
same levels of latency.

For Stefan's argument, we can look at it in a slightly different way too:
While the nvme-mdev can be seen as a duplication of SPDK, the SPDK can also be
seen as duplication of an existing kernel functionality which nvme-mdev can
reuse for free.

Best regards,
	Maxim Levitsky

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-22  7:54             ` Re: Felipe Franciosi
  2019-03-22 10:32               ` Re: Maxim Levitsky
@ 2019-03-22 15:30               ` Keith Busch
  2019-03-25 15:44                 ` Re: Felipe Franciosi
  1 sibling, 1 reply; 414+ messages in thread
From: Keith Busch @ 2019-03-22 15:30 UTC (permalink / raw)
  To: Felipe Franciosi
  Cc: Maxim Levitsky, Stefan Hajnoczi, Fam Zheng, kvm, Wolfram Sang,
	linux-nvme, linux-kernel, Keith Busch, Kirti Wankhede,
	Mauro Carvalho Chehab, Paul E . McKenney, Christoph Hellwig,
	Sagi Grimberg, Harris, James R, Liang Cunming, Jens Axboe,
	Alex Williamson, Thanos Makatos, John Ferlan, Liu Changpeng,
	Greg Kroah-Hartman, Nicolas Ferre, Paolo Bonzini, Amnon Ilan,
	David S . Miller

On Fri, Mar 22, 2019 at 07:54:50AM +0000, Felipe Franciosi wrote:
> > 
> > Note though that SPDK doesn't support sharing the device between host and the
> > guests, it takes over the nvme device, thus it makes the kernel nvme driver
> > unbind from it.
> 
> That is absolutely true. However, I find it not to be a problem in practice.
> 
> Hypervisor products, specially those caring about performance, efficiency and fairness, will dedicate NVMe devices for a particular purpose (eg. vDisk storage, cache, metadata) and will not share these devices for other use cases. That's because these products want to deterministically control the performance aspects of the device, which you just cannot do if you are sharing the device with a subsystem you do not control.

I don't know, it sounds like you've traded kernel syscalls for IPC,
and I don't think one performs better than the other.

> For scenarios where the device must be shared and such fine grained control is not required, it looks like using the kernel driver with io_uring offers very good performance with flexibility.

NVMe's IO Determinism features provide fine grained control for shared
devices. It's still uncommon to find hardware supporting that, though.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-03-22 15:30               ` Re: Keith Busch
@ 2019-03-25 15:44                 ` Felipe Franciosi
  0 siblings, 0 replies; 414+ messages in thread
From: Felipe Franciosi @ 2019-03-25 15:44 UTC (permalink / raw)
  To: Keith Busch
  Cc: Maxim Levitsky, Stefan Hajnoczi, Fam Zheng, kvm, Wolfram Sang,
	linux-nvme, linux-kernel, Keith Busch, Kirti Wankhede,
	Mauro Carvalho Chehab, Paul E . McKenney, Christoph Hellwig,
	Sagi Grimberg, Harris, James R, Liang Cunming, Jens Axboe,
	Alex Williamson, Thanos Makatos, John Ferlan, Liu Changpeng,
	Greg Kroah-Hartman, Nicolas Ferre, Paolo Bonzini, Amnon Ilan,
	David S . Miller

Hi Keith,

> On Mar 22, 2019, at 3:30 PM, Keith Busch <kbusch@kernel.org> wrote:
> 
> On Fri, Mar 22, 2019 at 07:54:50AM +0000, Felipe Franciosi wrote:
>>> 
>>> Note though that SPDK doesn't support sharing the device between host and the
>>> guests, it takes over the nvme device, thus it makes the kernel nvme driver
>>> unbind from it.
>> 
>> That is absolutely true. However, I find it not to be a problem in practice.
>> 
>> Hypervisor products, specially those caring about performance, efficiency and fairness, will dedicate NVMe devices for a particular purpose (eg. vDisk storage, cache, metadata) and will not share these devices for other use cases. That's because these products want to deterministically control the performance aspects of the device, which you just cannot do if you are sharing the device with a subsystem you do not control.
> 
> I don't know, it sounds like you've traded kernel syscalls for IPC,
> and I don't think one performs better than the other.

Sorry, I'm not sure I understand. My point is that if you are packaging a distro to be a hypervisor and you want to use a storage device for VM data, you _most likely_ won't be using that device for anything else. To that end, driving the device directly from your application definitely gives you more deterministic control.

> 
>> For scenarios where the device must be shared and such fine grained control is not required, it looks like using the kernel driver with io_uring offers very good performance with flexibility.
> 
> NVMe's IO Determinism features provide fine grained control for shared
> devices. It's still uncommon to find hardware supporting that, though.

Sure, but then your hypervisor needs to certify devices that support that. This will limit your HCL. Moreover, unless the feature is solid, well-established and works reliably on all devices you support, it's arguably preferable to have an architecture which gives you that control in software.

Cheers,
Felipe

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE,
@ 2019-03-13 23:49 LUIS EDUARDO CEPEDA CABRERA
  0 siblings, 0 replies; 414+ messages in thread
From: LUIS EDUARDO CEPEDA CABRERA @ 2019-03-13 23:49 UTC (permalink / raw)
  To: linux-kernel

Hello,

 i have a deal for you, can we work together ?

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH] arch/arm/mm: Remove duplicate header
@ 2019-01-07 17:28 Souptick Joarder
  2019-01-17 11:23 ` Souptick Joarder
  0 siblings, 1 reply; 414+ messages in thread
From: Souptick Joarder @ 2019-01-07 17:28 UTC (permalink / raw)
  To: linux, mhocko, rppt, akpm
  Cc: linux-arm-kernel, linux-kernel, brajeswar.linux, sabyasachi.linux

Remove duplicate headers which are included twice.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
---
 arch/arm/mm/mmu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index f5cc1cc..dde3032 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -23,7 +23,6 @@
 #include <asm/sections.h>
 #include <asm/cachetype.h>
 #include <asm/fixmap.h>
-#include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
 #include <asm/tlb.h>
@@ -36,7 +35,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/pci.h>
-#include <asm/fixmap.h>
 
 #include "fault.h"
 #include "mm.h"
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re: [PATCH] arch/arm/mm: Remove duplicate header
  2019-01-07 17:28 [PATCH] arch/arm/mm: Remove duplicate header Souptick Joarder
@ 2019-01-17 11:23 ` Souptick Joarder
  2019-01-17 11:28   ` Mike Rapoport
  0 siblings, 1 reply; 414+ messages in thread
From: Souptick Joarder @ 2019-01-17 11:23 UTC (permalink / raw)
  To: Russell King - ARM Linux, Michal Hocko, rppt, Andrew Morton
  Cc: linux-arm-kernel, linux-kernel, Brajeswar Ghosh, Sabyasachi Gupta

On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
>
> Remove duplicate headers which are included twice.
>
> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>

Any comment on this patch ?

> ---
>  arch/arm/mm/mmu.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index f5cc1cc..dde3032 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -23,7 +23,6 @@
>  #include <asm/sections.h>
>  #include <asm/cachetype.h>
>  #include <asm/fixmap.h>
> -#include <asm/sections.h>
>  #include <asm/setup.h>
>  #include <asm/smp_plat.h>
>  #include <asm/tlb.h>
> @@ -36,7 +35,6 @@
>  #include <asm/mach/arch.h>
>  #include <asm/mach/map.h>
>  #include <asm/mach/pci.h>
> -#include <asm/fixmap.h>
>
>  #include "fault.h"
>  #include "mm.h"
> --
> 1.9.1
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH] arch/arm/mm: Remove duplicate header
  2019-01-17 11:23 ` Souptick Joarder
@ 2019-01-17 11:28   ` Mike Rapoport
  2019-01-31  5:54     ` Souptick Joarder
  0 siblings, 1 reply; 414+ messages in thread
From: Mike Rapoport @ 2019-01-17 11:28 UTC (permalink / raw)
  To: Souptick Joarder
  Cc: Russell King - ARM Linux, Michal Hocko, rppt, Andrew Morton,
	linux-arm-kernel, linux-kernel, Brajeswar Ghosh,
	Sabyasachi Gupta

On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
> >
> > Remove duplicate headers which are included twice.
> >
> > Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>

Acked-by: Mike Rapoport <rppt@linux.ibm.com>
 
> Any comment on this patch ?
> 
> > ---
> >  arch/arm/mm/mmu.c | 2 --
> >  1 file changed, 2 deletions(-)
> >
> > diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> > index f5cc1cc..dde3032 100644
> > --- a/arch/arm/mm/mmu.c
> > +++ b/arch/arm/mm/mmu.c
> > @@ -23,7 +23,6 @@
> >  #include <asm/sections.h>
> >  #include <asm/cachetype.h>
> >  #include <asm/fixmap.h>
> > -#include <asm/sections.h>
> >  #include <asm/setup.h>
> >  #include <asm/smp_plat.h>
> >  #include <asm/tlb.h>
> > @@ -36,7 +35,6 @@
> >  #include <asm/mach/arch.h>
> >  #include <asm/mach/map.h>
> >  #include <asm/mach/pci.h>
> > -#include <asm/fixmap.h>
> >
> >  #include "fault.h"
> >  #include "mm.h"
> > --
> > 1.9.1
> >
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH] arch/arm/mm: Remove duplicate header
  2019-01-17 11:28   ` Mike Rapoport
@ 2019-01-31  5:54     ` Souptick Joarder
  2019-01-31 12:58       ` Vladimir Murzin
  0 siblings, 1 reply; 414+ messages in thread
From: Souptick Joarder @ 2019-01-31  5:54 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Russell King - ARM Linux, Michal Hocko, rppt, Andrew Morton,
	linux-arm-kernel, linux-kernel, Brajeswar Ghosh,
	Sabyasachi Gupta

On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>
> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
> > On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
> > >
> > > Remove duplicate headers which are included twice.
> > >
> > > Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
>
> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
>
> > Any comment on this patch ?

If no further comment, can we get this patch in queue for 5.1 ?

> >
> > > ---
> > >  arch/arm/mm/mmu.c | 2 --
> > >  1 file changed, 2 deletions(-)
> > >
> > > diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> > > index f5cc1cc..dde3032 100644
> > > --- a/arch/arm/mm/mmu.c
> > > +++ b/arch/arm/mm/mmu.c
> > > @@ -23,7 +23,6 @@
> > >  #include <asm/sections.h>
> > >  #include <asm/cachetype.h>
> > >  #include <asm/fixmap.h>
> > > -#include <asm/sections.h>
> > >  #include <asm/setup.h>
> > >  #include <asm/smp_plat.h>
> > >  #include <asm/tlb.h>
> > > @@ -36,7 +35,6 @@
> > >  #include <asm/mach/arch.h>
> > >  #include <asm/mach/map.h>
> > >  #include <asm/mach/pci.h>
> > > -#include <asm/fixmap.h>
> > >
> > >  #include "fault.h"
> > >  #include "mm.h"
> > > --
> > > 1.9.1
> > >
> >
>
> --
> Sincerely yours,
> Mike.
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-01-31  5:54     ` Souptick Joarder
@ 2019-01-31 12:58       ` Vladimir Murzin
  2019-02-01 12:32         ` Re: Souptick Joarder
  0 siblings, 1 reply; 414+ messages in thread
From: Vladimir Murzin @ 2019-01-31 12:58 UTC (permalink / raw)
  To: Souptick Joarder, Mike Rapoport
  Cc: Michal Hocko, Sabyasachi Gupta, Russell King - ARM Linux,
	linux-kernel, rppt, Brajeswar Ghosh, Andrew Morton,
	linux-arm-kernel

Hi Souptick,

On 1/31/19 5:54 AM, Souptick Joarder wrote:
> On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>>
>> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
>>> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
>>>>
>>>> Remove duplicate headers which are included twice.
>>>>
>>>> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
>>
>> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
>>
>>> Any comment on this patch ?
> 
> If no further comment, can we get this patch in queue for 5.1 ?

I'd be nice to use proper tags in subject
line. I'd suggest 

[PATCH] ARM: mm: Remove duplicate header

but you can get some inspiration form

git log --oneline --no-merges arch/arm/mm/

In case you want to route it via ARM tree you need to drop it into
Russell's patch system [1]. 

[1] https://www.armlinux.org.uk/developer/patches/

Cheers
Vladimir

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-01-31 12:58       ` Vladimir Murzin
@ 2019-02-01 12:32         ` Souptick Joarder
  2019-02-01 12:36           ` Re: Vladimir Murzin
  0 siblings, 1 reply; 414+ messages in thread
From: Souptick Joarder @ 2019-02-01 12:32 UTC (permalink / raw)
  To: Vladimir Murzin
  Cc: Mike Rapoport, Michal Hocko, Sabyasachi Gupta,
	Russell King - ARM Linux, linux-kernel, rppt, Brajeswar Ghosh,
	Andrew Morton, linux-arm-kernel

On Thu, Jan 31, 2019 at 6:28 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
>
> Hi Souptick,
>
> On 1/31/19 5:54 AM, Souptick Joarder wrote:
> > On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >>
> >> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
> >>> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
> >>>>
> >>>> Remove duplicate headers which are included twice.
> >>>>
> >>>> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
> >>
> >> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
> >>
> >>> Any comment on this patch ?
> >
> > If no further comment, can we get this patch in queue for 5.1 ?
>
> I'd be nice to use proper tags in subject
> line. I'd suggest
>
> [PATCH] ARM: mm: Remove duplicate header
>
> but you can get some inspiration form
>
> git log --oneline --no-merges arch/arm/mm/
>
> In case you want to route it via ARM tree you need to drop it into
> Russell's patch system [1].

How to drop it to Russell's patch system other than posting it to
mailing list ? I don't know.
>
> [1] https://www.armlinux.org.uk/developer/patches/
>
> Cheers
> Vladimir

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-02-01 12:32         ` Re: Souptick Joarder
@ 2019-02-01 12:36           ` Vladimir Murzin
  2019-02-01 12:41             ` Re: Souptick Joarder
  0 siblings, 1 reply; 414+ messages in thread
From: Vladimir Murzin @ 2019-02-01 12:36 UTC (permalink / raw)
  To: Souptick Joarder
  Cc: Mike Rapoport, Michal Hocko, Sabyasachi Gupta,
	Russell King - ARM Linux, linux-kernel, rppt, Brajeswar Ghosh,
	Andrew Morton, linux-arm-kernel

On 2/1/19 12:32 PM, Souptick Joarder wrote:
> On Thu, Jan 31, 2019 at 6:28 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
>>
>> Hi Souptick,
>>
>> On 1/31/19 5:54 AM, Souptick Joarder wrote:
>>> On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>>>>
>>>> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
>>>>> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
>>>>>>
>>>>>> Remove duplicate headers which are included twice.
>>>>>>
>>>>>> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
>>>>
>>>> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
>>>>
>>>>> Any comment on this patch ?
>>>
>>> If no further comment, can we get this patch in queue for 5.1 ?
>>
>> I'd be nice to use proper tags in subject
>> line. I'd suggest
>>
>> [PATCH] ARM: mm: Remove duplicate header
>>
>> but you can get some inspiration form
>>
>> git log --oneline --no-merges arch/arm/mm/
>>
>> In case you want to route it via ARM tree you need to drop it into
>> Russell's patch system [1].
> 
> How to drop it to Russell's patch system other than posting it to
> mailing list ? I don't know.

https://www.armlinux.org.uk/developer/patches/info.php

Vladimir

>>
>> [1] https://www.armlinux.org.uk/developer/patches/
>>
>> Cheers
>> Vladimir
> 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-02-01 12:36           ` Re: Vladimir Murzin
@ 2019-02-01 12:41             ` Souptick Joarder
  2019-02-01 13:02               ` Re: Vladimir Murzin
  2019-02-01 15:15               ` Re: Russell King - ARM Linux admin
  0 siblings, 2 replies; 414+ messages in thread
From: Souptick Joarder @ 2019-02-01 12:41 UTC (permalink / raw)
  To: Vladimir Murzin
  Cc: Mike Rapoport, Michal Hocko, Sabyasachi Gupta,
	Russell King - ARM Linux, linux-kernel, rppt, Brajeswar Ghosh,
	Andrew Morton, linux-arm-kernel

On Fri, Feb 1, 2019 at 6:06 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
>
> On 2/1/19 12:32 PM, Souptick Joarder wrote:
> > On Thu, Jan 31, 2019 at 6:28 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
> >>
> >> Hi Souptick,
> >>
> >> On 1/31/19 5:54 AM, Souptick Joarder wrote:
> >>> On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >>>>
> >>>> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
> >>>>> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
> >>>>>>
> >>>>>> Remove duplicate headers which are included twice.
> >>>>>>
> >>>>>> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
> >>>>
> >>>> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
> >>>>
> >>>>> Any comment on this patch ?
> >>>
> >>> If no further comment, can we get this patch in queue for 5.1 ?
> >>
> >> I'd be nice to use proper tags in subject
> >> line. I'd suggest
> >>
> >> [PATCH] ARM: mm: Remove duplicate header
> >>
> >> but you can get some inspiration form
> >>
> >> git log --oneline --no-merges arch/arm/mm/
> >>
> >> In case you want to route it via ARM tree you need to drop it into
> >> Russell's patch system [1].
> >
> > How to drop it to Russell's patch system other than posting it to
> > mailing list ? I don't know.
>
> https://www.armlinux.org.uk/developer/patches/info.php

This link is not reachable.

>
> Vladimir
>
> >>
> >> [1] https://www.armlinux.org.uk/developer/patches/
> >>
> >> Cheers
> >> Vladimir
> >
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-02-01 12:41             ` Re: Souptick Joarder
@ 2019-02-01 13:02               ` Vladimir Murzin
  2019-02-01 15:15               ` Re: Russell King - ARM Linux admin
  1 sibling, 0 replies; 414+ messages in thread
From: Vladimir Murzin @ 2019-02-01 13:02 UTC (permalink / raw)
  To: Souptick Joarder
  Cc: Mike Rapoport, Michal Hocko, Sabyasachi Gupta,
	Russell King - ARM Linux, linux-kernel, rppt, Brajeswar Ghosh,
	Andrew Morton, linux-arm-kernel

On 2/1/19 12:41 PM, Souptick Joarder wrote:
> On Fri, Feb 1, 2019 at 6:06 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
>>
>> On 2/1/19 12:32 PM, Souptick Joarder wrote:
>>> On Thu, Jan 31, 2019 at 6:28 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
>>>>
>>>> Hi Souptick,
>>>>
>>>> On 1/31/19 5:54 AM, Souptick Joarder wrote:
>>>>> On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>>>>>>
>>>>>> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
>>>>>>> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
>>>>>>>>
>>>>>>>> Remove duplicate headers which are included twice.
>>>>>>>>
>>>>>>>> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
>>>>>>
>>>>>> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
>>>>>>
>>>>>>> Any comment on this patch ?
>>>>>
>>>>> If no further comment, can we get this patch in queue for 5.1 ?
>>>>
>>>> I'd be nice to use proper tags in subject
>>>> line. I'd suggest
>>>>
>>>> [PATCH] ARM: mm: Remove duplicate header
>>>>
>>>> but you can get some inspiration form
>>>>
>>>> git log --oneline --no-merges arch/arm/mm/
>>>>
>>>> In case you want to route it via ARM tree you need to drop it into
>>>> Russell's patch system [1].
>>>
>>> How to drop it to Russell's patch system other than posting it to
>>> mailing list ? I don't know.
>>
>> https://www.armlinux.org.uk/developer/patches/info.php
> 
> This link is not reachable.
> 

Bad luck :(

Vladimir

>>
>> Vladimir
>>
>>>>
>>>> [1] https://www.armlinux.org.uk/developer/patches/
>>>>
>>>> Cheers
>>>> Vladimir
>>>
>>
> 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-02-01 12:41             ` Re: Souptick Joarder
  2019-02-01 13:02               ` Re: Vladimir Murzin
@ 2019-02-01 15:15               ` Russell King - ARM Linux admin
  2019-02-01 15:22                 ` Re: Russell King - ARM Linux admin
  1 sibling, 1 reply; 414+ messages in thread
From: Russell King - ARM Linux admin @ 2019-02-01 15:15 UTC (permalink / raw)
  To: Souptick Joarder
  Cc: Vladimir Murzin, Mike Rapoport, Michal Hocko, Sabyasachi Gupta,
	linux-kernel, rppt, Brajeswar Ghosh, Andrew Morton,
	linux-arm-kernel

On Fri, Feb 01, 2019 at 06:11:21PM +0530, Souptick Joarder wrote:
> On Fri, Feb 1, 2019 at 6:06 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
> >
> > On 2/1/19 12:32 PM, Souptick Joarder wrote:
> > > On Thu, Jan 31, 2019 at 6:28 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
> > >>
> > >> Hi Souptick,
> > >>
> > >> On 1/31/19 5:54 AM, Souptick Joarder wrote:
> > >>> On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > >>>>
> > >>>> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
> > >>>>> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
> > >>>>>>
> > >>>>>> Remove duplicate headers which are included twice.
> > >>>>>>
> > >>>>>> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
> > >>>>
> > >>>> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
> > >>>>
> > >>>>> Any comment on this patch ?
> > >>>
> > >>> If no further comment, can we get this patch in queue for 5.1 ?
> > >>
> > >> I'd be nice to use proper tags in subject
> > >> line. I'd suggest
> > >>
> > >> [PATCH] ARM: mm: Remove duplicate header
> > >>
> > >> but you can get some inspiration form
> > >>
> > >> git log --oneline --no-merges arch/arm/mm/
> > >>
> > >> In case you want to route it via ARM tree you need to drop it into
> > >> Russell's patch system [1].
> > >
> > > How to drop it to Russell's patch system other than posting it to
> > > mailing list ? I don't know.
> >
> > https://www.armlinux.org.uk/developer/patches/info.php
> 
> This link is not reachable.

In what way?  The site is certainly getting hits over ipv4 and ipv6.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2019-02-01 15:15               ` Re: Russell King - ARM Linux admin
@ 2019-02-01 15:22                 ` Russell King - ARM Linux admin
  0 siblings, 0 replies; 414+ messages in thread
From: Russell King - ARM Linux admin @ 2019-02-01 15:22 UTC (permalink / raw)
  To: Souptick Joarder
  Cc: Vladimir Murzin, Sabyasachi Gupta, Michal Hocko, linux-kernel,
	Mike Rapoport, rppt, Brajeswar Ghosh, Andrew Morton,
	linux-arm-kernel

On Fri, Feb 01, 2019 at 03:15:11PM +0000, Russell King - ARM Linux admin wrote:
> On Fri, Feb 01, 2019 at 06:11:21PM +0530, Souptick Joarder wrote:
> > On Fri, Feb 1, 2019 at 6:06 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
> > >
> > > On 2/1/19 12:32 PM, Souptick Joarder wrote:
> > > > On Thu, Jan 31, 2019 at 6:28 PM Vladimir Murzin <vladimir.murzin@arm.com> wrote:
> > > >>
> > > >> Hi Souptick,
> > > >>
> > > >> On 1/31/19 5:54 AM, Souptick Joarder wrote:
> > > >>> On Thu, Jan 17, 2019 at 4:58 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > > >>>>
> > > >>>> On Thu, Jan 17, 2019 at 04:53:44PM +0530, Souptick Joarder wrote:
> > > >>>>> On Mon, Jan 7, 2019 at 10:54 PM Souptick Joarder <jrdr.linux@gmail.com> wrote:
> > > >>>>>>
> > > >>>>>> Remove duplicate headers which are included twice.
> > > >>>>>>
> > > >>>>>> Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
> > > >>>>
> > > >>>> Acked-by: Mike Rapoport <rppt@linux.ibm.com>
> > > >>>>
> > > >>>>> Any comment on this patch ?
> > > >>>
> > > >>> If no further comment, can we get this patch in queue for 5.1 ?
> > > >>
> > > >> I'd be nice to use proper tags in subject
> > > >> line. I'd suggest
> > > >>
> > > >> [PATCH] ARM: mm: Remove duplicate header
> > > >>
> > > >> but you can get some inspiration form
> > > >>
> > > >> git log --oneline --no-merges arch/arm/mm/
> > > >>
> > > >> In case you want to route it via ARM tree you need to drop it into
> > > >> Russell's patch system [1].
> > > >
> > > > How to drop it to Russell's patch system other than posting it to
> > > > mailing list ? I don't know.
> > >
> > > https://www.armlinux.org.uk/developer/patches/info.php
> > 
> > This link is not reachable.
> 
> In what way?  The site is certainly getting hits over ipv4 and ipv6.

Ah, I see - the site is accessible over IPv6 using port 80 only, but
port 443 is blocked.  Problem is, I can't test IPv6 from "outside",
so I rely on people *reporting* when things stop working.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CAMkWEXP4Mm5x9rdrKn9xRNVm7vxqoL62ftxb+UcJFAiJ+U9X3A@mail.gmail.com>]

* Re:
       [not found] <CAMkWEXP4Mm5x9rdrKn9xRNVm7vxqoL62ftxb+UcJFAiJ+U9X3A@mail.gmail.com>
@ 2018-10-22  0:26 ` Dave Airlie
  2018-10-21 20:23   ` Re: Michael Tirado
  0 siblings, 1 reply; 414+ messages in thread
From: Dave Airlie @ 2018-10-22  0:26 UTC (permalink / raw)
  To: mtirado418
  Cc: Dave Airlie, dri-devel, LKML, Gerd Hoffmann, Deucher, Alexander,
	Koenig, Christian, zhoucm1, Hongbo.He, Sean Paul,
	Gustavo Padovan, Maarten Lankhorst

On Mon, 22 Oct 2018 at 07:22, Michael Tirado <mtirado418@gmail.com> wrote:
>
> Mapping a drm "dumb" buffer fails on 32-bit system (i686) from what
> appears to be a truncated memory address that has been copied
> throughout several files. The bug manifests as an -EINVAL when calling
> mmap with the offset gathered from DRM_IOCTL_MODE_MAP_DUMB <--
> DRM_IOCTL_MODE_ADDFB <-- DRM_IOCTL_MODE_CREATE_DUMB.  I can provide
> test code if needed.
>
> The following patch will apply to 4.18 though I've only been able to
> test through qemu bochs driver and nouveau. Intel driver worked
> without any issues.  I'm not sure if everyone is going to want to
> share a constant, and the whitespace is screwed up from gmail's awful
> javascript client, so let me know if I should resend this with any
> specific changes.  I have also attached the file with preserved
> whitespace.
>

This shouldn't be necessary, did someone misbackport the mmap changes without:

drm: set FMODE_UNSIGNED_OFFSET for drm files

Dave.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2018-10-22  0:26 ` Re: Dave Airlie
@ 2018-10-21 20:23   ` Michael Tirado
  2018-10-22  1:50     ` Re: Dave Airlie
  0 siblings, 1 reply; 414+ messages in thread
From: Michael Tirado @ 2018-10-21 20:23 UTC (permalink / raw)
  To: airlied
  Cc: Airlied, dri-devel, LKML, kraxel, alexander.deucher,
	christian.koenig, David1.zhou, Hongbo.He, Sean Paul, Gustavo,
	maarten.lankhorst

On Mon, Oct 22, 2018 at 12:26 AM Dave Airlie <airlied@gmail.com> wrote:
>
> This shouldn't be necessary, did someone misbackport the mmap changes without:
>
> drm: set FMODE_UNSIGNED_OFFSET for drm files
>
> Dave.

The latest kernel I have had to patch was a 4.18-rc6.  I'll try with a
newer 4.19 and let you know if it decides to work.  If not I'll
prepare a test case for demonstration on qemu-system-i386.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2018-10-21 20:23   ` Re: Michael Tirado
@ 2018-10-22  1:50     ` Dave Airlie
  2018-10-21 22:20       ` Re: Michael Tirado
  2018-10-23  1:47       ` Re: Michael Tirado
  0 siblings, 2 replies; 414+ messages in thread
From: Dave Airlie @ 2018-10-22  1:50 UTC (permalink / raw)
  To: mtirado418
  Cc: Dave Airlie, dri-devel, LKML, Gerd Hoffmann, Deucher, Alexander,
	Koenig, Christian, zhoucm1, Hongbo.He, Sean Paul,
	Gustavo Padovan, Maarten Lankhorst

On Mon, 22 Oct 2018 at 10:49, Michael Tirado <mtirado418@gmail.com> wrote:
>
> On Mon, Oct 22, 2018 at 12:26 AM Dave Airlie <airlied@gmail.com> wrote:
> >
> > This shouldn't be necessary, did someone misbackport the mmap changes without:
> >
> > drm: set FMODE_UNSIGNED_OFFSET for drm files
> >
> > Dave.
>
> The latest kernel I have had to patch was a 4.18-rc6.  I'll try with a
> newer 4.19 and let you know if it decides to work.  If not I'll
> prepare a test case for demonstration on qemu-system-i386.

If you have custom userspace software, make sure it's using
AC_SYS_LARGEFILE or whatever the equivalant is in your build system.

64-bit file offsets are important.

Dave.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2018-10-22  1:50     ` Re: Dave Airlie
@ 2018-10-21 22:20       ` Michael Tirado
  2018-10-23  1:47       ` Re: Michael Tirado
  1 sibling, 0 replies; 414+ messages in thread
From: Michael Tirado @ 2018-10-21 22:20 UTC (permalink / raw)
  To: Dave Airlie
  Cc: Airlied, dri-devel, LKML, kraxel, alexander.deucher,
	christian.koenig, David1.zhou, Hongbo.He, Sean Paul, Gustavo,
	maarten.lankhorst

[-- Attachment #1: Type: text/plain, Size: 837 bytes --]

On Mon, Oct 22, 2018 at 1:50 AM Dave Airlie <airlied@gmail.com> wrote:
>
> On Mon, 22 Oct 2018 at 10:49, Michael Tirado <mtirado418@gmail.com> wrote:
> >
> > On Mon, Oct 22, 2018 at 12:26 AM Dave Airlie <airlied@gmail.com> wrote:
> > >
> > > This shouldn't be necessary, did someone misbackport the mmap changes without:
> If you have custom userspace software, make sure it's using
> AC_SYS_LARGEFILE or whatever the equivalant is in your build system.
>
> 64-bit file offsets are important.
>

That fixed it! -D_FILE_OFFSET_BITS=64 is the pre-processor define
needed. It's a bit more than unintuitive but I'm glad I don't need
this stupid patch anymore, Thanks.

In case anyone is further interested I have attached test program
since I spent the last hour or so chopping it up anyway :S   [ gcc -o
kms -D_FILE_OFFSET_BITS=64 main.c ]

[-- Attachment #2: main.c --]
[-- Type: application/octet-stream, Size: 17153 bytes --]

/* Copyright (C) 2017 Michael R. Tirado <mtirado418@gmail.com> -- GPLv3+
 *
 * This program is libre software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details. You should have
 * received a copy of the GNU General Public License version 3
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <malloc.h>
#include <signal.h>
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <drm/drm.h>
#include <drm/drm_mode.h>

#define STRERR strerror(errno)

#ifndef PTRBITCOUNT
	#define PTRBITCOUNT 32
#endif
/* kernel structs use __u64 for pointer types */
#if (PTRBITCOUNT == 32)
	#define ptr_from_krn(ptr) ((void *)(uint32_t)(ptr))
	#define ptr_to_krn(ptr)   ((uint32_t)(ptr))
#elif (PTRBITCOUNT == 64)
	#define ptr_from_krn(ptr) ((void *)(uint64_t)(ptr))
	#define ptr_to_krn(ptr)   ((uint64_t)(ptr))
#else
	#error "PTRBITCOUNT is undefined"
#endif
#ifndef MAX_FBS
	#define MAX_FBS   12
#endif
#ifndef MAX_CRTCS
	#define MAX_CRTCS 12
#endif
#ifndef MAX_CONNECTORS
	#define MAX_CONNECTORS 12
#endif
#ifndef MAX_ENCODERS
	#define MAX_ENCODERS 12
#endif
#ifndef MAX_PROPS
	#define MAX_PROPS 256
#endif
#ifndef MAX_MODES
	#define MAX_MODES 256
#endif
#if (PTRBITCOUNT == 32)
	#define drm_to_ptr(ptr)   ((void *)(uint32_t)(ptr))
	#define drm_from_ptr(ptr) ((uint32_t)(ptr))
#elif (PTRBITCOUNT == 64)
	#define drm_to_ptr(ptr)   ((void *)(uint64_t)(ptr))
	#define drm_from_ptr(ptr) ((uint64_t)(ptr))
#else
	#error "PTRBITCOUNT is undefined"
#endif
#define drm_alloc(size) (drm_from_ptr(calloc(1,size)))

struct drm_buffer
{
	uint32_t drm_id;
	uint32_t fb_id;
	uint32_t pitch;
	uint32_t width;
	uint32_t height;
	uint32_t depth;
	uint32_t bpp;
	char    *addr;
	size_t   size;
};
struct drm_display
{
	struct drm_mode_get_encoder encoder;
	struct drm_mode_crtc crtc;
	struct drm_mode_get_connector *conn; /* do we need array for multi-screen? */
	struct drm_mode_modeinfo *modes; /* these both point to conn's mode array */
	struct drm_mode_modeinfo *cur_mode;
	uint32_t cur_mode_idx;
	uint32_t mode_count;
	uint32_t conn_id;
};
struct drm_kms
{
	struct drm_display display;
	struct drm_buffer *sfb;
	struct drm_mode_card_res *res;
	int card_fd;
};

/* get id out of drm_id_ptr */
static uint32_t drm_get_id(uint64_t addr, uint32_t idx)
{
	return ((uint32_t *)drm_to_ptr(addr))[idx];
}

static int free_mode_card_res(struct drm_mode_card_res *res)
{
	if (!res)
		return -1;
	if (res->fb_id_ptr)
		free(drm_to_ptr(res->fb_id_ptr));
	if (res->crtc_id_ptr)
		free(drm_to_ptr(res->crtc_id_ptr));
	if (res->encoder_id_ptr)
		free(drm_to_ptr(res->encoder_id_ptr));
	if (res->connector_id_ptr)
		free(drm_to_ptr(res->connector_id_ptr));
	free(res);
	return 0;
}

static struct drm_mode_card_res *alloc_mode_card_res(int fd)
{
	struct drm_mode_card_res res;
	struct drm_mode_card_res *ret;
	uint32_t count_fbs, count_crtcs, count_connectors, count_encoders;

	memset(&res, 0, sizeof(struct drm_mode_card_res));
	if (ioctl(fd, DRM_IOCTL_MODE_GETRESOURCES, &res)) {
		printf("ioctl(DRM_IOCTL_MODE_GETRESOURCES, &res): %s\n", STRERR);
		return NULL;
	}
	if (res.count_fbs > MAX_FBS
			|| res.count_crtcs > MAX_CRTCS
			|| res.count_encoders > MAX_ENCODERS
			|| res.count_connectors > MAX_CONNECTORS) {
		printf("resource limit reached, see defines.h\n");
		return NULL;
	}
	if (res.count_fbs) {
		res.fb_id_ptr = drm_alloc(sizeof(uint32_t)*res.count_fbs);
		if (!res.fb_id_ptr)
			goto alloc_err;
	}
	if (res.count_crtcs) {
		res.crtc_id_ptr = drm_alloc(sizeof(uint32_t)*res.count_crtcs);
		if (!res.crtc_id_ptr)
			goto alloc_err;
	}
	if (res.count_encoders) {
		res.encoder_id_ptr = drm_alloc(sizeof(uint32_t)*res.count_encoders);
		if (!res.encoder_id_ptr)
			goto alloc_err;
	}
	if (res.count_connectors) {
		res.connector_id_ptr = drm_alloc(sizeof(uint32_t)*res.count_connectors);
		if (!res.connector_id_ptr)
			goto alloc_err;
	}
	count_fbs = res.count_fbs;
	count_crtcs = res.count_crtcs;
	count_encoders = res.count_encoders;
	count_connectors = res.count_connectors;

	if (ioctl(fd, DRM_IOCTL_MODE_GETRESOURCES, &res) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_GETRESOURCES, &res): %s\n", STRERR);
		goto free_err;
	}

	if (count_fbs != res.count_fbs
			|| count_crtcs != res.count_crtcs
			|| count_encoders != res.count_encoders
			|| count_connectors != res.count_connectors) {
		errno = EAGAIN;
		goto free_err;
	}

	ret = calloc(1, sizeof(struct drm_mode_card_res));
	if (!ret)
		goto alloc_err;

	memcpy(ret, &res, sizeof(struct drm_mode_card_res));
	return ret;

alloc_err:
	errno = ENOMEM;
free_err:
	free(drm_to_ptr(res.fb_id_ptr));
	free(drm_to_ptr(res.crtc_id_ptr));
	free(drm_to_ptr(res.connector_id_ptr));
	free(drm_to_ptr(res.encoder_id_ptr));
	return NULL;
}


static struct drm_mode_get_connector *alloc_connector(int fd, uint32_t conn_id)
{
	struct drm_mode_get_connector conn;
	struct drm_mode_get_connector *ret;
	uint32_t count_modes, count_props, count_encoders;

	memset(&conn, 0, sizeof(struct drm_mode_get_connector));
	conn.connector_id = conn_id;

	if (ioctl(fd, DRM_IOCTL_MODE_GETCONNECTOR, &conn) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_GETCONNECTOR, &conn): %s\n", STRERR);
		return NULL;
	}
	if (conn.count_modes > MAX_MODES
			|| conn.count_props > MAX_PROPS
			|| conn.count_encoders > MAX_ENCODERS) {
		printf("resource limit reached, see defines.h\n");
		return NULL;
	}
	if (conn.count_modes) {
		conn.modes_ptr = drm_alloc(sizeof(struct drm_mode_modeinfo)
					   * conn.count_modes);
		if (!conn.modes_ptr)
			goto alloc_err;
	}
	if (conn.count_props) {
		conn.props_ptr = drm_alloc(sizeof(uint32_t)*conn.count_props);
		if (!conn.props_ptr)
			goto alloc_err;
		conn.prop_values_ptr = drm_alloc(sizeof(uint64_t)*conn.count_props);
		if (!conn.prop_values_ptr)
			goto alloc_err;
	}
	if (conn.count_encoders) {
		conn.encoders_ptr = drm_alloc(sizeof(uint32_t)*conn.count_encoders);
		if (!conn.encoders_ptr)
			goto alloc_err;
	}
	count_modes = conn.count_modes;
	count_props = conn.count_props;
	count_encoders = conn.count_encoders;

	if (ioctl(fd, DRM_IOCTL_MODE_GETCONNECTOR, &conn) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_GETCONNECTOR, &conn): %s\n", STRERR);
		goto free_err;
	}

	if (count_modes != conn.count_modes
			|| count_props != conn.count_props
			|| count_encoders != conn.count_encoders) {
		errno = EAGAIN;
		goto free_err;
	}

	ret = calloc(1, sizeof(struct drm_mode_get_connector));
	if (!ret)
		goto alloc_err;

	memcpy(ret, &conn, sizeof(struct drm_mode_get_connector));
	return ret;

alloc_err:
	errno = ENOMEM;
free_err:
	free(drm_to_ptr(conn.modes_ptr));
	free(drm_to_ptr(conn.props_ptr));
	free(drm_to_ptr(conn.encoders_ptr));
	free(drm_to_ptr(conn.prop_values_ptr));
	return NULL;
}

static struct drm_mode_modeinfo *get_connector_modeinfo(struct drm_mode_get_connector *conn,  uint32_t *count)
{
	if (!conn || !count)
		return NULL;
	*count = conn->count_modes;
	return drm_to_ptr(conn->modes_ptr);

}

static int free_connector(struct drm_mode_get_connector *conn)
{
	if (!conn)
		return -1;
	if (conn->modes_ptr)
		free(drm_to_ptr(conn->modes_ptr));
	if (conn->props_ptr)
		free(drm_to_ptr(conn->props_ptr));
	if (conn->encoders_ptr)
		free(drm_to_ptr(conn->encoders_ptr));
	if (conn->prop_values_ptr)
		free(drm_to_ptr(conn->prop_values_ptr));
	free(conn);
	return 0;
}


static int drm_kms_connect_sfb(struct drm_kms *self)
{
	struct drm_display *display = &self->display;
	struct drm_mode_get_connector *conn;
	struct drm_mode_modeinfo *cur_mode;
	struct drm_mode_get_encoder *encoder;
	struct drm_mode_crtc *crtc;
	if (!display || !display->conn || !display->cur_mode || !self->sfb)
		return -1;

	conn = display->conn;
	cur_mode = display->cur_mode;
	encoder = &self->display.encoder;
	crtc = &self->display.crtc;
	memset(crtc, 0, sizeof(struct drm_mode_crtc));
	memset(encoder,  0, sizeof(struct drm_mode_get_encoder));


	/* XXX: there can be multiple encoders, have not investigated this much */
	if (conn->encoder_id == 0) {
		printf("conn->encoder_id was 0, defaulting to encoder[0]\n");
		conn->encoder_id = ((uint32_t *)drm_to_ptr(conn->encoders_ptr))[0];
	}
	encoder->encoder_id = conn->encoder_id;

	if (ioctl(self->card_fd, DRM_IOCTL_MODE_GETENCODER, encoder) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_GETENCODER): %s\n", STRERR);
		return -1;
	}

	if (encoder->crtc_id == 0) {
		printf("encoder->crtc_id was 0, defaulting to crtc[0]\n");
		encoder->crtc_id = ((uint32_t *)drm_to_ptr(self->res->crtc_id_ptr))[0];
	}
	crtc->crtc_id = encoder->crtc_id;

	if (ioctl(self->card_fd, DRM_IOCTL_MODE_GETCRTC, crtc) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_GETCRTC): %s\n", STRERR);
		return -1;
	}

	/* set crtc mode */
	crtc->fb_id = self->sfb->fb_id;
	crtc->set_connectors_ptr = drm_from_ptr((void *)&conn->connector_id);
	crtc->count_connectors = 1;
	crtc->mode = *cur_mode;
	/*printf("\nsetting mode:\n\n");
	print_mode_modeinfo(cur_mode);*/
	crtc->mode_valid = 1;
	if (ioctl(self->card_fd, DRM_IOCTL_MODE_SETCRTC, crtc) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_SETCRTC): %s\n", STRERR);
		return -1;
	}
	return 0;
}

/* stupid frame buffer */
static struct drm_buffer *alloc_sfb(int card_fd,
			     uint32_t width,
			     uint32_t height,
			     uint32_t depth,
			     uint32_t bpp)
{
	struct drm_mode_create_dumb cdumb;
	struct drm_mode_map_dumb    moff;
	struct drm_mode_fb_cmd      cmd;
	struct drm_buffer *ret;
	void  *fbmap;

	memset(&cdumb, 0, sizeof(cdumb));
	memset(&moff,  0, sizeof(moff));
	memset(&cmd,   0, sizeof(cmd));

	/* create dumb buffer */
	cdumb.width  = width;
	cdumb.height = height;
	cdumb.bpp    = bpp;
	cdumb.flags  = 0;
	cdumb.pitch  = 0;
	cdumb.size   = 0;
	cdumb.handle = 0;
	if (ioctl(card_fd, DRM_IOCTL_MODE_CREATE_DUMB, &cdumb) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_CREATE_DUMB): %s\n", STRERR);
		return NULL;
	}
	/* add framebuffer object */
	cmd.width  = cdumb.width;
	cmd.height = cdumb.height;
	cmd.bpp    = cdumb.bpp;
	cmd.pitch  = cdumb.pitch;
	cmd.depth  = depth;
	cmd.handle = cdumb.handle;
	if (ioctl(card_fd, DRM_IOCTL_MODE_ADDFB, &cmd) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_ADDFB): %s\n", STRERR);
		ioctl(card_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &cdumb.handle);
		return NULL;
	}
	/* get mmap offset */
	moff.handle = cdumb.handle;
	if (ioctl(card_fd, DRM_IOCTL_MODE_MAP_DUMB, &moff) == -1) {
		printf("ioctl(DRM_IOCTL_MODE_MAP_DUMB): %s\n", STRERR);
		ioctl(card_fd, DRM_IOCTL_MODE_RMFB, &cmd.fb_id);
		ioctl(card_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &cdumb.handle);
		return NULL;
	}
	/* XXX this is probably better off as MAP_PRIVATE, we can't prime
	 * the main framebuffer if it's "dumb", AFAIK */
	fbmap = mmap(0, (size_t)cdumb.size, PROT_READ|PROT_WRITE,
			MAP_SHARED, card_fd, (off_t)moff.offset);
	if (fbmap == MAP_FAILED) {
		printf("framebuffer mmap failed: %s\n", STRERR);
		ioctl(card_fd, DRM_IOCTL_MODE_RMFB, &cmd.fb_id);
		ioctl(card_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &cdumb.handle);
		return NULL;
	}

	ret = calloc(1, sizeof(struct drm_buffer));
	if (!ret) {
		printf("-ENOMEM\n");
		munmap(fbmap, cdumb.size);
		ioctl(card_fd, DRM_IOCTL_MODE_RMFB, &cmd.fb_id);
		ioctl(card_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &cdumb.handle);
		return NULL;
	}
	ret->addr     = fbmap;
	ret->size     = cdumb.size;
	ret->pitch    = cdumb.pitch;
	ret->width    = cdumb.width;
	ret->height   = cdumb.height;
	ret->bpp      = cdumb.bpp;
	ret->depth    = cmd.depth;
	ret->fb_id    = cmd.fb_id;
	ret->drm_id   = cdumb.handle;
	memset(fbmap, 0x27, cdumb.size);
	return ret;
}

static int destroy_sfb(int card_fd, struct drm_buffer *sfb)
{
	if (!sfb)
		return -1;

	if (munmap(sfb->addr, sfb->size) == -1)
		printf("munmap: %s\n", STRERR);
	if (ioctl(card_fd, DRM_IOCTL_MODE_RMFB, &sfb->fb_id))
		printf("ioctl(DRM_IOCTL_MODE_RMFB): %s\n", STRERR);
	if (ioctl(card_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &sfb->drm_id))
		printf("ioctl(DRM_IOCTL_MODE_DESTROY_DUMB): %s\n", STRERR);
	free(sfb);
	return 0;
}
static int card_set_master(int card_fd)
{
	if (ioctl(card_fd, DRM_IOCTL_SET_MASTER, 0)) {
		printf("ioctl(DRM_IOCTL_SET_MASTER, 0): %s\n", STRERR);
		return -1;
	}
	return 0;
}
static int card_drop_master(int card_fd)
{
	if (ioctl(card_fd, DRM_IOCTL_DROP_MASTER, 0)) {
		printf("ioctl(DRM_IOCTL_DROP_MASTER, 0): %s\n", STRERR);
		return -1;
	}
	return 0;
}
static int drm_display_destroy(struct drm_display *display)
{
	if (display->conn)
		free_connector(display->conn);
	memset(display, 0, sizeof(struct drm_display));
	return 0;
}
int drm_kms_destroy(struct drm_kms *self)
{
	if (self->sfb)
		destroy_sfb(self->card_fd, self->sfb);
	if (self->res)
		free_mode_card_res(self->res);
	drm_display_destroy(&self->display);

	close(self->card_fd);
	memset(self, 0, sizeof(struct drm_kms));
	free(self);
	return 0;
}
static int get_mode_idx(struct drm_mode_modeinfo *modes,
			uint16_t count,
			uint16_t width,
			uint16_t height,
			uint16_t refresh)
{
	int i;
	int pick = -1;
	if (width == 0)
		width = 0xffff;
	if (height == 0)
		height = 0xffff;
	for (i = 0; i < count; ++i)
	{
		if (modes[i].hdisplay > width || modes[i].vdisplay > height)
			continue;
		/* pretend these radical modes don't exist for now */
		if (modes[i].hdisplay % 16 == 0) {
			if (pick < 0) {
				pick = i;
				continue;
			}
			if (modes[i].hdisplay > modes[pick].hdisplay)
				pick = i;
			else if (modes[i].vdisplay > modes[pick].vdisplay)
				pick = i;
			else if (modes[i].hdisplay == modes[pick].hdisplay
					&& modes[i].vdisplay == modes[pick].vdisplay) {
				if (abs(refresh - modes[i].vrefresh)
					  < abs(refresh - modes[pick].vrefresh)) {
					pick = i;
				}
			}
		}
	}
	if (pick < 0) {
		printf("could not find any usable modes for (%dx%d@%dhz)\n",
				width, height, refresh);
		return -1;
	}
	return pick;
}
/* TODO handle hotplugging */
static int drm_display_load(struct drm_kms *self,
		     uint16_t req_width,
		     uint16_t req_height,
		     uint16_t req_refresh,
		     struct drm_display *out)
{
	uint32_t conn_id;
	int idx = -1;

	/* FIXME uses primary connector? "0" */
	conn_id = drm_get_id(self->res->connector_id_ptr, 0);
	out->conn = alloc_connector(self->card_fd, conn_id);
	if (!out->conn) {
		printf("unable to create drm connector structure\n");
		return -1;
	}

	out->conn_id = conn_id;
	out->modes = get_connector_modeinfo(out->conn, &out->mode_count);
	idx = get_mode_idx(out->modes, out->mode_count,
			   req_width, req_height, req_refresh);
	if (idx < 0)
		goto free_err;

	out->cur_mode_idx = (uint32_t)idx;
	out->cur_mode = &out->modes[out->cur_mode_idx];
	return 0;
free_err:
	drm_display_destroy(out);
	return -1;
}
struct drm_kms *drm_mode_create(char *devname,
				int no_connect,
				uint16_t req_width,
				uint16_t req_height,
				uint16_t req_refresh)
{
	char devpath[128];
	struct drm_kms *self;
	struct drm_mode_modeinfo *cur_mode;
	int card_fd;

	snprintf(devpath, sizeof(devpath), "/dev/dri/%s", devname);
	card_fd = open(devpath, O_RDWR|O_CLOEXEC);
	if (card_fd == -1) {
		printf("open(%s): %s\n", devpath, STRERR);
		return NULL;
	}
	if (card_set_master(card_fd)) {
		printf("card_set_master failed\n");
		return NULL;
	}

	self = calloc(1, sizeof(struct drm_kms));
	if (!self)
		return NULL;

	self->card_fd = card_fd;
	self->res = alloc_mode_card_res(card_fd);
	if (!self->res) {
		printf("unable to create drm structure\n");
		goto free_err;
	}

	if (drm_display_load(self, req_width, req_height, req_refresh, &self->display)) {
		printf("drm_display_load failed\n");
		goto free_err;
	}
	cur_mode = self->display.cur_mode;
	printf("connector(%d) using mode[%d] (%dx%d@%dhz)\n",
				self->display.conn_id,
				self->display.cur_mode_idx,
				cur_mode->hdisplay,
				cur_mode->vdisplay,
				cur_mode->vrefresh);

	/* buffer pitch must divide evenly by 16,
	 * TODO check against bpp here when that is variable instead of 32 */
	self->sfb = alloc_sfb(card_fd, cur_mode->hdisplay, cur_mode->vdisplay, 24, 32);
	if (!self->sfb) {
		printf("alloc_sfb failed\n");
		goto free_err;
	}

	if (!no_connect && drm_kms_connect_sfb(self)) {
		printf("drm_kms_connect_sfb failed\n");
		goto free_err;
	}
	return self;

free_err:
	drm_kms_destroy(self);
	return NULL;
}


int main(int argc, char *argv[])
{
	int ret = -1;
	struct drm_kms *card0;
	/*card0 = drm_mode_create("card0", g_srv_opts.inactive_vt,
					   g_srv_opts.request_width,
					   g_srv_opts.request_height,
					   g_srv_opts.request_refresh);*/
	/* do not connect to vt */
	card0 = drm_mode_create("card0", 1, 640, 480, 60);
	if (card0 == NULL) {
		printf("drm_mode_create failed\n");
		return -1;
	}


	drm_kms_destroy(card0);

	printf("looks ok, returning 0\n");
	return 0;
}

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2018-10-22  1:50     ` Re: Dave Airlie
  2018-10-21 22:20       ` Re: Michael Tirado
@ 2018-10-23  1:47       ` Michael Tirado
  2018-10-23  6:23         ` Re: Dave Airlie
  1 sibling, 1 reply; 414+ messages in thread
From: Michael Tirado @ 2018-10-23  1:47 UTC (permalink / raw)
  To: Dave Airlie, LKML, dri-devel

That preprocessor define worked but I'm still confused about this
DRM_FILE_PAGE_OFFSET thing.  Check out drivers/gpu/drm/drm_gem.c
right above drm_gem_init.

---

/*
 * We make up offsets for buffer objects so we can recognize them at
 * mmap time.
 */

/* pgoff in mmap is an unsigned long, so we need to make sure that
 * the faked up offset will fit
 */

#if BITS_PER_LONG == 64
#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
#else
#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1)
#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16)
#endif


---

Why is having a 64-bit file offsets critical, causing -EINVAL on mmap?
What problems might be associated with using (0x10000000UL >>
PAGE_SHIFT) ?
On Mon, Oct 22, 2018 at 1:50 AM Dave Airlie <airlied@gmail.com> wrote:
>
> On Mon, 22 Oct 2018 at 10:49, Michael Tirado <mtirado418@gmail.com> wrote:
> >
> > On Mon, Oct 22, 2018 at 12:26 AM Dave Airlie <airlied@gmail.com> wrote:
> > >
> > > This shouldn't be necessary, did someone misbackport the mmap changes without:
> > >
> > > drm: set FMODE_UNSIGNED_OFFSET for drm files
> > >
> > > Dave.
> >
> > The latest kernel I have had to patch was a 4.18-rc6.  I'll try with a
> > newer 4.19 and let you know if it decides to work.  If not I'll
> > prepare a test case for demonstration on qemu-system-i386.
>
> If you have custom userspace software, make sure it's using
> AC_SYS_LARGEFILE or whatever the equivalant is in your build system.
>
> 64-bit file offsets are important.
>
> Dave.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2018-10-23  1:47       ` Re: Michael Tirado
@ 2018-10-23  6:23         ` Dave Airlie
  0 siblings, 0 replies; 414+ messages in thread
From: Dave Airlie @ 2018-10-23  6:23 UTC (permalink / raw)
  To: mtirado418; +Cc: LKML, dri-devel

On Tue, 23 Oct 2018 at 16:13, Michael Tirado <mtirado418@gmail.com> wrote:
>
> That preprocessor define worked but I'm still confused about this
> DRM_FILE_PAGE_OFFSET thing.  Check out drivers/gpu/drm/drm_gem.c
> right above drm_gem_init.
>
> ---
>
> /*
>  * We make up offsets for buffer objects so we can recognize them at
>  * mmap time.
>  */
>
> /* pgoff in mmap is an unsigned long, so we need to make sure that
>  * the faked up offset will fit
>  */
>
> #if BITS_PER_LONG == 64
> #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
> #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
> #else
> #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1)
> #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16)
> #endif
>
>
> ---
>
> Why is having a 64-bit file offsets critical, causing -EINVAL on mmap?
> What problems might be associated with using (0x10000000UL >>
> PAGE_SHIFT) ?

a) it finds people not using the correct userspace defines. mostly
libdrm should handle this,
and possibly mesa.

b) there used to be legacy maps below that address on older drivers,
so we decided to never put stuff in the first 32-bit range that they
could clash with.

Dave.

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <1530911788-7033-1-git-send-email-santosh.shilimkar@oracle.com>]

[parent not found: <1530911788-7033-3-git-send-email-santosh.shilimkar@oracle.com>]

* Re:
       [not found] ` <1530911788-7033-3-git-send-email-santosh.shilimkar@oracle.com>
@ 2018-07-06 21:18   ` Santosh Shilimkar
  0 siblings, 0 replies; 414+ messages in thread
From: Santosh Shilimkar @ 2018-07-06 21:18 UTC (permalink / raw)
  To: arm, linux-arm-kernel; +Cc: khilman, arnd, olof, linux-kernel

Ignore this.. Will send again with subjects fixed

On 7/6/2018 2:16 PM, Santosh Shilimkar wrote:
> Subject: [GIT PULL 3/3] SOC: Driver updates for v4.19
> 
> The following changes since commit ce397d215ccd07b8ae3f71db689aedb85d56ab40:
> 
>    Linux 4.18-rc1 (2018-06-17 08:04:49 +0900)
> 
> are available in the git repository at:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git tags/soc_drivers_for_4.19
> 
> for you to fetch changes up to 990c10091db318c7eb7e8935c86b6f7c01585015:
> 
>    soc: ti: wkup_m3_ipc: mark PM functions as __maybe_unused (2018-07-06 09:47:51 -0700)
> 
> ----------------------------------------------------------------
> Keystone SOC driver update for 4.19
> 
>   -  Add suspend/resume functionality to TI EMIF SRAM driver
>   -  Add wakeup M3 RTC self refresh support
>   -  Fix for the PM runtime ifdefs
> 
> ----------------------------------------------------------------
> Arnd Bergmann (1):
>        soc: ti: wkup_m3_ipc: mark PM functions as __maybe_unused
> 
> Dave Gerlach (2):
>        memory: ti-emif-sram: Add resume function to recopy sram code
>        soc: ti: wkup_m3_ipc: Add wkup_m3_request_wake_src
> 
> Keerthy (1):
>        soc: ti: wkup_m3_ipc: Add rtc_only with ddr in self refresh mode support
> 
>   drivers/memory/ti-emif-pm.c  | 33 +++++++++++++++++++
>   drivers/soc/ti/wkup_m3_ipc.c | 76 ++++++++++++++++++++++++++++++++++++++++++++
>   include/linux/wkup_m3_ipc.h  |  9 ++++++
>   3 files changed, 118 insertions(+)
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2018-01-11 17:16 Fabian Huegel
  2018-01-11 17:25 ` Ben Evans
  0 siblings, 1 reply; 414+ messages in thread
From: Fabian Huegel @ 2018-01-11 17:16 UTC (permalink / raw)
  To: Oleg Drokin, Andreas Dilger, James Simmons, Lai Siyao,
	John L . Hammond, Greg Kroah-Hartman, devel, Ben Evans,
	NeilBrown
  Cc: lustre-devel, linux-kernel, linux-kernel

We cleaned up a lot of checkpatch errors and warnings in obd_class.h,
but there are still some CHECKs and two warnings about flow control
inside macros left.

Changing those macros to inline functions would probably
be a good idea, unfortunatly it's not straightforward since they use
'#op' to print the name of the operation.

We also did some aligning to make the code more readable and removed
an unnecessary macro.

We only tested, that the kernel still compiles and the lustre kernel
module loads successfully, but given the harmless nature of these
changes we don't expect any problems.

The patches are based on the staging-testing branch of the staging tree.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2018-01-11 17:16 Fabian Huegel
@ 2018-01-11 17:25 ` Ben Evans
  0 siblings, 0 replies; 414+ messages in thread
From: Ben Evans @ 2018-01-11 17:25 UTC (permalink / raw)
  To: Fabian Huegel, Oleg Drokin, Andreas Dilger, James Simmons,
	Lai Siyao, John L . Hammond, Greg Kroah-Hartman, devel,
	Ben Evans, NeilBrown
  Cc: lustre-devel, linux-kernel, linux-kernel

I've been working off and on with this.  Since you're getting into the
counters in a couple of the patches, part of the reason for all the
#defines here are because MDC, MDT and OST counters are all shoved into
the same array dynamically, sometimes.  It would be a much cleaner
approach to have a separate array for the MDC stats, then print them
conditionally.

This would reduce all of the calls to these macros to counter increments.

-Ben Evans

On 1/11/18, 12:16 PM, "Fabian Huegel" <fabian_huegel@web.de> wrote:

>We cleaned up a lot of checkpatch errors and warnings in obd_class.h,
>but there are still some CHECKs and two warnings about flow control
>inside macros left.
>
>Changing those macros to inline functions would probably
>be a good idea, unfortunatly it's not straightforward since they use
>'#op' to print the name of the operation.
>
>We also did some aligning to make the code more readable and removed
>an unnecessary macro.
>
>We only tested, that the kernel still compiles and the lustre kernel
>module loads successfully, but given the harmless nature of these
>changes we don't expect any problems.
>
>The patches are based on the staging-testing branch of the staging tree.
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2017-11-13 14:55 Amos Kalonzo
  0 siblings, 0 replies; 414+ messages in thread
From: Amos Kalonzo @ 2017-11-13 14:55 UTC (permalink / raw)


Attn:

I am wondering why You haven't respond to my email for some days now.
reference to my client's contract balance payment of (11.7M,USD)
Kindly get back to me for more details.

Best Regards

Amos Kalonzo

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2017-09-07  8:50 Quick Loan
  0 siblings, 0 replies; 414+ messages in thread
From: Quick Loan @ 2017-09-07  8:50 UTC (permalink / raw)


Hello dear I am an International loan lender, I give out loans at 1% interest
rate, email me at:(rich_ken2016@usa.com)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2017-08-18 19:47 Jessy
  0 siblings, 0 replies; 414+ messages in thread
From: Jessy @ 2017-08-18 19:47 UTC (permalink / raw)
  To: Recipients

Hello,

I wish to seek for your assistance in a deal that will be of mutual benefit for the both of us from Camp Stanley in Uijeongbu. Please contact me for details, God bless you.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2017-07-31 23:46 TD CREDIT
  0 siblings, 0 replies; 414+ messages in thread
From: TD CREDIT @ 2017-07-31 23:46 UTC (permalink / raw)
  To: Recipients

DO YOU NEED ANY KIND OF LOAN CREDIT ASSISTANCE? IF YES,EMAIL US FOR MORE INFO.

---
This email is free from viruses and malware because avast! Antivirus protection is active.
http://www.avast.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2017-07-15  3:29 Saif Al-Islam
  0 siblings, 0 replies; 414+ messages in thread
From: Saif Al-Islam @ 2017-07-15  3:29 UTC (permalink / raw)


I need your assistance in a transaction that will benefit you details
will be disclosed to you once i receive your reply.

Regards,
Saif.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2017-07-07 17:04 Mrs Alice Walton
  0 siblings, 0 replies; 414+ messages in thread
From: Mrs Alice Walton @ 2017-07-07 17:04 UTC (permalink / raw)


-- 
my name is Mrs. Alice Walton, a business woman an America Citizen and  
the heiress to the fortune of Walmart stores, born October 7, 1949. I  
have a mission for you worth $100,000,000.00(Hundred Million United  
State Dollars) which I intend using for CHARITY PROJECT to help the  
less privilege and orphanage

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2017-05-28 13:39 Lasek László
  0 siblings, 0 replies; 414+ messages in thread
From: Lasek László @ 2017-05-28 13:39 UTC (permalink / raw)
  To: kdj

Hello Friend

I Have a Proposal for your kindly contact me via:  emzong@outlook.com

Thank You.

________

ü Mielőtt kinyomtatja ezt az e-mailt, gondoljon a környezetre. P Please consider the environment before printing this email.
*******

Ezt az emailt a Websense ESG ellenőrizte a BKV Zrt. biztonsági szabályzata alapján. Nem található benne vírus.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2017-05-03  6:23 H.A
  0 siblings, 0 replies; 414+ messages in thread
From: H.A @ 2017-05-03  6:23 UTC (permalink / raw)
  To: Recipients

With profound love in my heart, I Kindly Oblige your interest to very important proposal.. It is Truly Divine and require your utmost attention..........

S hlubokou láskou v mém srdci, Laskave jsem prinutit svuj zájem k návrhu .. Je velmi duležité, skutecne Divine a vyžadují vaši nejvyšší pozornost.

  Kontaktujte me prímo pres: helenaroberts99@gmail.com pro úplné podrobnosti.complete.

HELINA .A ROBERTS

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2017-04-28  8:20 Anatolij Gustschin
  2017-04-28  8:43 ` Linus Walleij
  0 siblings, 1 reply; 414+ messages in thread
From: Anatolij Gustschin @ 2017-04-28  8:20 UTC (permalink / raw)
  To: linus.walleij, gnurou; +Cc: andy.shevchenko, linux-gpio, linux-kernel

Subject: [PATCH v3] gpiolib: Add stubs for gpiod lookup table interface

Add stubs for gpiod_add_lookup_table() and gpiod_remove_lookup_table()
for the !GPIOLIB case to prevent build errors. Also add prototypes.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
---
Changes in v3:
 - add stubs for !GPIOLIB case. Drop prototypes, these are
   already in gpio/machine.h

Changes in v2:
 - move gpiod_lookup_table out of #ifdef

 include/linux/gpio/consumer.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 8f702fc..cf3fee2 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -41,6 +41,8 @@ enum gpiod_flags {
 			  GPIOD_FLAGS_BIT_DIR_VAL,
 };
 
+struct gpiod_lookup_table;
+
 #ifdef CONFIG_GPIOLIB
 
 /* Return the number of GPIOs associated with a device / function */
@@ -435,6 +437,12 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 	return ERR_PTR(-ENOSYS);
 }
 
+static inline
+void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {}
+
+static inline
+void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {}
+
 #endif /* CONFIG_GPIOLIB */
 
 static inline
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2017-04-28  8:20 Anatolij Gustschin
@ 2017-04-28  8:43 ` Linus Walleij
  2017-04-28  9:26   ` Re: Anatolij Gustschin
  0 siblings, 1 reply; 414+ messages in thread
From: Linus Walleij @ 2017-04-28  8:43 UTC (permalink / raw)
  To: Anatolij Gustschin
  Cc: Alexandre Courbot, Andy Shevchenko, linux-gpio, linux-kernel

On Fri, Apr 28, 2017 at 10:20 AM, Anatolij Gustschin <agust@denx.de> wrote:

> Subject: [PATCH v3] gpiolib: Add stubs for gpiod lookup table interface
>
> Add stubs for gpiod_add_lookup_table() and gpiod_remove_lookup_table()
> for the !GPIOLIB case to prevent build errors. Also add prototypes.
>
> Signed-off-by: Anatolij Gustschin <agust@denx.de>
> ---
> Changes in v3:
>  - add stubs for !GPIOLIB case. Drop prototypes, these are
>    already in gpio/machine.h

Yeah...

> --- a/include/linux/gpio/consumer.h
> +++ b/include/linux/gpio/consumer.h

So why should the stubs be in <linux/gpio/consumer.h>
and not in <linux/gpio/machine.h>?

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2017-04-28  8:43 ` Linus Walleij
@ 2017-04-28  9:26   ` Anatolij Gustschin
  0 siblings, 0 replies; 414+ messages in thread
From: Anatolij Gustschin @ 2017-04-28  9:26 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Alexandre Courbot, Andy Shevchenko, linux-gpio, linux-kernel

On Fri, 28 Apr 2017 10:43:19 +0200
Linus Walleij linus.walleij@linaro.org wrote:
...
>> --- a/include/linux/gpio/consumer.h
>> +++ b/include/linux/gpio/consumer.h  
>
>So why should the stubs be in <linux/gpio/consumer.h>
>and not in <linux/gpio/machine.h>?

good question. I'll move them to machine.h.

Thanks,
Anatolij

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2017-02-23 15:09 Qin's Yanjun
  0 siblings, 0 replies; 414+ messages in thread
From: Qin's Yanjun @ 2017-02-23 15:09 UTC (permalink / raw)



How are you today and your family? I require your attention and honest
co-operation about some issues which i will really want to discuss with you
which.  Looking forward to read from you soon.  

Qin's


______________________________

Sky Silk, http://aknet.kz

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2017-01-07 14:47 Information
  0 siblings, 0 replies; 414+ messages in thread
From: Information @ 2017-01-07 14:47 UTC (permalink / raw)




Do you need loan? we offer all kinds of loan from minimum amount of $5,000 to maximum of $2,000,000 if you are interested contact us via: internationalloan09@gmail.com    with the information below:

Full Name:
Country:
Loan Amount:
Loan Duration:
Mobile phone number:
Sex:

Thanks,
Dr Scott.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* re:
@ 2016-11-15  4:40 Apply
  0 siblings, 0 replies; 414+ messages in thread
From: Apply @ 2016-11-15  4:40 UTC (permalink / raw)
  To: Recipients

Do you need loan?we offer all kinds of loan from minimum amount of $5,000 to maximum of $2,000,000 if you are interested contact us via:internationalloanplc1@gmail.com  with the information below:
Full Name:
Country:
Loan Amount:
Loan Duration:
Mobile phone number:
Sex:
Thanks,
Dr Scott.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2016-11-08 13:46 vaserman
  0 siblings, 0 replies; 414+ messages in thread
From: vaserman @ 2016-11-08 13:46 UTC (permalink / raw)
  To: info


--
I need your help

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2016-11-02  2:36 U
  0 siblings, 0 replies; 414+ messages in thread
From: U @ 2016-11-02  2:36 UTC (permalink / raw)
  To: info

[-- Attachment #1: Type: text/plain, Size: 24 bytes --]


-- 
UK NATIONAL LOTTERY

[-- Attachment #2: UK.jpg --]
[-- Type: , Size: 185835 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2016-07-21 21:50 Amit Jain
  0 siblings, 0 replies; 414+ messages in thread
From: Amit Jain @ 2016-07-21 21:50 UTC (permalink / raw)


Dear Esteemed Friend,

My name is Mr. Amit Jain, I double as the Group Chief Operations Officer Emaar Properties and Chief Executive Officer Emaar Dubai. Let me know the possibilities of setting up a private investment in your country as I am interested in your region. I have interest in Real Estate, Industry, Energy and Agriculture.

Follow the link to know more about our company profile: https://www.emaar.com/en/who-we-are/leadership/principal-officers.aspx

I expect to get a response from you to enable us discuss more.

Regards,
Mr. Amit Jain.
P.O. Box 120360,
Group Chief Operations Officer
Emaar Properties PJSC

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2016-07-04 15:47 Mr. Bun Sam
  0 siblings, 0 replies; 414+ messages in thread
From: Mr. Bun Sam @ 2016-07-04 15:47 UTC (permalink / raw)
  To: linux-kernel

Hi,

I work with one of the major banks in Cambodia as the director of audit. I have a proposal for you, a very urgent and quick business that will be completed in 12 working days. I have just discovered documents relating to funds belonging to a deceased client of our bank,

I went through all the related documents to the funds and I discovered no listed next of kin to inherit the funds which has been in our bank for more than 7 years now. I need your cooperation in getting the funds, I have the power to list you as the beneficiary of the funds and have the funds transferred to you.

If you are interested, do get back to me so I can provide you with the full details.

Regards
Bun Sam.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2016-07-02 11:30 Mr. Bun Sam
  0 siblings, 0 replies; 414+ messages in thread
From: Mr. Bun Sam @ 2016-07-02 11:30 UTC (permalink / raw)
  To: linux-kernel

Hi,

I work with one of the major banks in Cambodia as the director of audit. I have a proposal for you, a very urgent and quick business that will be completed in 12 working days. I have just discovered documents relating to funds belonging to a deceased client of our bank,

I went through all the related documents to the funds and I discovered no listed next of kin to inherit the funds which has been in our bank for more than 7 years now. I need your cooperation in getting the funds, I have the power to list you as the beneficiary of the funds and have the funds transferred to you.

If you are interested, do get back to me so I can provide you with the full details.

Regards
Bun Sam.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2016-06-27  8:24 Fidelity Loans
  0 siblings, 0 replies; 414+ messages in thread
From: Fidelity Loans @ 2016-06-27  8:24 UTC (permalink / raw)
  To: Recipients

Loan Offer at 3%, Feel Free to REPLY back to us for more info

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2016-02-10 14:36 Petr Mladek
  2016-02-10 14:44 ` Steven Rostedt
  0 siblings, 1 reply; 414+ messages in thread
From: Petr Mladek @ 2016-02-10 14:36 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, srostedt, Steven Rostedt, Tejun Heo, Peter Hurley,
	Jan Kara, Sergey Senozhatsky, Andrew Morton, Kyle McMartin,
	KY Srinivasan, Dave Jones, Calvin Owens

Bcc: 
Subject: Re: [PATCH] printk: avoid livelock if another CPU printks
 continuously
Reply-To: 
In-Reply-To: <1454963703-20433-1-git-send-email-dvlasenk@redhat.com>

On Mon 2016-02-08 21:35:03, Denys Vlasenko wrote:
> At the end of each printk(), kernel attempts to take console_sem.
> If this succeeds, it feeds buffered message data to console devices
> until there is nothing left, and releases console_sem:
> 
>         if (console_trylock_for_printk(this_cpu))
>                 console_unlock();
> 
> The livelock exists because code in console_unlock() has no
> limit on the amount of buffered data it would process under
> console_sem. This is bad if printk() was called with IRQs disabled.
> 
> This patch makes console_unlock() release console_sem after 5
> iterations, which usually amounts to 5 lines of printk messages,
> and give other printk'ing CPUs a chance to acquire console_sem.
> 
> If some CPU grabs it, console_unlock() finishes.
> If no one takes the semaphore, console_unlock() re-acquires it
> and loops back for another cycle of console output.
> 
> This seems to be a hard-to-trigger, but long-existing problem:

Yup, and there are more people trying to handle this. I add some
of them into CC.

Sadly, the problem is much more complicated that it looks. Jan Kara
(jack) has already provided many possible solutions that were not
accepted. The last one can be seen at
http://thread.gmane.org/gmane.linux.kernel/2105183/focus=2113787

See below some comments to your approach.

> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: linux-kernel@vger.kernel.org
> CC: srostedt@redhat.com
> CC: Steven Rostedt <rostedt@goodmis.org>
> CC: Tejun Heo <tj@kernel.org>
> CC: Peter Hurley <peter@hurleysoftware.com>
> ---
>  kernel/printk/printk.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index c963ba5..ca4f9d55 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -2235,6 +2235,7 @@ void console_unlock(void)
>  	unsigned long flags;
>  	bool wake_klogd = false;
>  	bool do_cond_resched, retry;
> +	unsigned cnt;
>  
>  	if (console_suspended) {
>  		up_console_sem();
> @@ -2257,6 +2258,7 @@ void console_unlock(void)
>  	/* flush buffered message fragment immediately to console */
>  	console_cont_flush(text, sizeof(text));
>  again:
> +	cnt = 5;
>  	for (;;) {
>  		struct printk_log *msg;
>  		size_t ext_len = 0;
> @@ -2284,6 +2286,9 @@ skip:
>  		if (console_seq == log_next_seq)
>  			break;
>  
> +		if (--cnt == 0)
> +			break;	/* Someone else printk's like crazy */
> +
>  		msg = log_from_idx(console_idx);
>  		if (msg->flags & LOG_NOCONS) {
>  			/*
> @@ -2350,6 +2355,26 @@ skip:
>  	if (retry && console_trylock())
>  		goto again;
>  
> +	if (cnt == 0) {
> +		/*
> +		 * Other CPU(s) printk like crazy, filling log_buf[].
> +		 * Try to get rid of the "honor" of servicing their data:
> +		 * give _them_ time to grab console_sem and start working.
> +		 */
> +		cnt = 9999;
> +		while (--cnt != 0) {
> +			cpu_relax();
> +			if (console_seq == log_next_seq) {

This condition is true when all available messages are printed to
the console. It means that there is nothing to do at all. It is
quite late. A much better solution would be to store console_seq
to a local variable and check it is being modified by an other CPU.


> +				/* Good, other CPU entered "for(;;)" loop */
> +				goto out;
> +			}
> +		}
> +		/* No one seems to be willing to take it... */
> +		if (console_trylock())
> +			goto again; /* we took it */
> +		/* Nope, someone else holds console_sem! Good */

The cycle gives a big chance other CPUs to enter console_unlock().
It means that more CPUs might end up in the above busy cycle.

It gives a chance to move the printing to another CPU. It likely
slows down the flood of messages because the producer end up
here as well.

So, it probably works but the performance is far from optimal.
Many CPUs might end up doing nothing. I am afraid that this is
not the right way to go.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2016-02-10 14:36 Petr Mladek
@ 2016-02-10 14:44 ` Steven Rostedt
  0 siblings, 0 replies; 414+ messages in thread
From: Steven Rostedt @ 2016-02-10 14:44 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Denys Vlasenko, linux-kernel, srostedt, Tejun Heo, Peter Hurley,
	Jan Kara, Sergey Senozhatsky, Andrew Morton, Kyle McMartin,
	KY Srinivasan, Dave Jones, Calvin Owens

On Wed, 10 Feb 2016 15:36:49 +0100
Petr Mladek <pmladek@suse.com> wrote:

> Bcc: 
> Subject: Re: [PATCH] printk: avoid livelock if another CPU printks
>  continuously
> Reply-To: 
> In-Reply-To: <1454963703-20433-1-git-send-email-dvlasenk@redhat.com>
> 

Hmm, playing with mail headers?

> > +	if (cnt == 0) {
> > +		/*
> > +		 * Other CPU(s) printk like crazy, filling log_buf[].
> > +		 * Try to get rid of the "honor" of servicing their data:
> > +		 * give _them_ time to grab console_sem and start working.
> > +		 */
> > +		cnt = 9999;
> > +		while (--cnt != 0) {
> > +			cpu_relax();
> > +			if (console_seq == log_next_seq) {  
> 
> This condition is true when all available messages are printed to
> the console. It means that there is nothing to do at all. It is
> quite late. A much better solution would be to store console_seq
> to a local variable and check it is being modified by an other CPU.
> 

Yep, I recommended the same thing.

> 
> > +				/* Good, other CPU entered "for(;;)" loop */
> > +				goto out;
> > +			}
> > +		}
> > +		/* No one seems to be willing to take it... */
> > +		if (console_trylock())
> > +			goto again; /* we took it */
> > +		/* Nope, someone else holds console_sem! Good */  
> 
> The cycle gives a big chance other CPUs to enter console_unlock().
> It means that more CPUs might end up in the above busy cycle.
> 
> It gives a chance to move the printing to another CPU. It likely
> slows down the flood of messages because the producer end up
> here as well.
> 
> So, it probably works but the performance is far from optimal.
> Many CPUs might end up doing nothing. I am afraid that this is
> not the right way to go.

Note, it's not that performance critical, and the loop only happens if
someone else is adding to the console, which hopefully, should be rare.

-- Steve

^ permalink raw reply	[flat|nested] 414+ messages in thread

* re:
@ 2016-02-08  3:11 Qatar Foundation
  0 siblings, 0 replies; 414+ messages in thread
From: Qatar Foundation @ 2016-02-08  3:11 UTC (permalink / raw)
  To: Recipients

Dear Beneficiary,
You have been selected to receive €950,000.00 EURO as charity donations
aid of the Qatar Foundation. Reply back for
information and claims.
Reply to: w.d1966@gmx.com
Yours sincerely,
Mr. Rashid Al-Naimi.
The Chief Executive Officer of
Qatar Foundation Endowment.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2016-01-26 20:52 Ms Nadia Mohammed
  0 siblings, 0 replies; 414+ messages in thread
From: Ms Nadia Mohammed @ 2016-01-26 20:52 UTC (permalink / raw)
  To: Recipients

I hope this letter find you in good health , I'm Ms Nadia Mohammed , I have a project i want to bring to you and i want you to reply me and help me discuss this proposal. please reply to my personal email at :   nadiamohammed099@gmail.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2016-01-15  2:39 Trust Guarantee
  0 siblings, 0 replies; 414+ messages in thread
From: Trust Guarantee @ 2016-01-15  2:39 UTC (permalink / raw)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain;charset=iso-88591-1, Size: 496 bytes --]




Sie benötigen dringend Darlehen? Wir geben Darlehen an interessierte
Einzelpersonen, die versuchen, Darlehen mit Treu und Glauben. Sind Sie
ernsthaft brauchen dringend Darlehen? dann sind Sie an der richtigen
Stelle. Wir geben Business-Darlehen, persönliche Darlehen, kontaktieren
Sie uns für Ihr Darlehen beantragen, um Ihre Nachfrage zu befriedigen und
per E-Mail aus Finanz problem.contact uns heute fest:
trustguarantee1@gmail.com
Dank wie wir erwarte Ihre Antwort
Vertrauensgarantie Loan

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-12-18 11:50 
  0 siblings, 0 replies; 414+ messages in thread
From:  @ 2015-12-18 11:50 UTC (permalink / raw)
  To: Recipients

I thought you would have responded by now?

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-12-11  9:30 Матвеева Руслана
  0 siblings, 0 replies; 414+ messages in thread
From: Матвеева Руслана @ 2015-12-11  9:30 UTC (permalink / raw)
  To: linux-kernel

Приветствую Вас.

Заказывали ли Вы, рекламу по почтовым майл адресам?

vip.tacanuur48@mail.ru 


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <D0613EBE33E8FD439137DAA95CCF59555B7A5A4D@MGCCCMAIL2010-5.mgccc.cc.ms.us>]

* RE:
       [not found] <D0613EBE33E8FD439137DAA95CCF59555B7A5A4D@MGCCCMAIL2010-5.mgccc.cc.ms.us>
@ 2015-11-24 13:21 ` Amis, Ryann
  0 siblings, 0 replies; 414+ messages in thread
From: Amis, Ryann @ 2015-11-24 13:21 UTC (permalink / raw)
  To: MGCCC Helpdesk

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 1309 bytes --]

â€‹Our new web mail has been improved with a new messaging system from Owa/outlook which also include faster usage on email, shared calendar, web-documents and the new 2015 anti-spam version. Please use the link below to complete your update for our new Owa/outlook improved web mail. CLICK HERE<https://formcrafts.com/a/15851> to update or Copy and pest the Link to your Browser: http://bit.ly/1Xo5Vd4
Thanks,
ITC Administrator.
-----------------------------------------
The information contained in this e-mail message is intended only for the personal and confidential use of the recipient(s) named above. This message may be an attorney-client communication and/or work product and as such is privileged and confidential. If the reader of this message is not the intended recipient or an agent responsible for delivering it to the intended recipient, you are hereby notified that you have received this document in error and that any review, dissemination, distribution, or copying of this message is strictly prohibited. If you have received this communication in error, please notify us immediately by e-mail, and delete the original message.
ÿôèº{.nÇ+‰·Ÿ®‰†+%ŠËÿ±éÝ¶\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dÊ‡Ú™ë,j\a¢f£¢·hšïêÿ‘êçz_è®\x03(éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨èÚ&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CA+47Ykimr0d9cR35aWoCtm8JoXUYjKFXL0HJ-c=EE_suTAPR8w@mail.gmail.com>]

* Re:
       [not found] <CA+47Ykimr0d9cR35aWoCtm8JoXUYjKFXL0HJ-c=EE_suTAPR8w@mail.gmail.com>
@ 2015-11-07 17:33 ` bbmbbm1
  0 siblings, 0 replies; 414+ messages in thread
From: bbmbbm1 @ 2015-11-07 17:33 UTC (permalink / raw)
  To: BMW Automobile


----- Original Message -----
From:
Sent: Sat, 07 Nov 2015 12:20:21 -0500 (EST)
Subject: 

Your email has won $ 1,200,000.00 in the 2015 BMW lottery in United
> States of America. You are advice to send us the following
> Name,Address,Tell phone
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-11-07 16:48 Mohammed
  0 siblings, 0 replies; 414+ messages in thread
From: Mohammed @ 2015-11-07 16:48 UTC (permalink / raw)
  To: Recipients

Message from Saudi Arabia Prince Alwaleed bin Talal for his charity donation and You have been selected as recipient/benefactor for $5Million Dollars from Alwaleed Philanthropic Foundation Grant.for more information contact Via email  ally00256@yandex.com

Thanks

Ally Mohammed

Please, reply should go to ally00256@yandex.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2015-10-29  2:40 
  0 siblings, 0 replies; 414+ messages in thread
From:  @ 2015-10-29  2:40 UTC (permalink / raw)
  To: Recipients

Hello,

I am Major. Alan Edward, in the military unit here in Afghanistan and i need an urgent assistance with someone i can trust,It's risk free and legal.

---
This email has been checked for viruses by Avast antivirus software.
http://www.avast.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2015-10-23 14:46 MajorAlan
  0 siblings, 0 replies; 414+ messages in thread
From: MajorAlan @ 2015-10-23 14:46 UTC (permalink / raw)
  To: Recipients

I am in the military unit here in Afghanistan,we have some amount of funds that we want to move out of the country.My partners and I need a good partner someone we can trust. It is risk free and legal. Reply to this email   majoralan.edward@gmx.com

Regards,
Major. Alan Edward

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-10-21  2:26 Mohammed
  0 siblings, 0 replies; 414+ messages in thread
From: Mohammed @ 2015-10-21  2:26 UTC (permalink / raw)
  To: Recipients

Message from Saudi Arabia Prince Alwaleed bin Talal for his charity donation and You have been selected as recipient/benefactor for $5Million Dollars from Alwaleed Philanthropic Foundation Grant.for more information contact Via email  ally00256@yandex.com

Thanks

Ally Mohammed

Please, reply should go to ally00256@yandex.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re ...
@ 2015-10-08  8:30 BRGF
  0 siblings, 0 replies; 414+ messages in thread
From: BRGF @ 2015-10-08  8:30 UTC (permalink / raw)
  To: Recipients

Apply for your urgent financial help today and have it in your bank account within 24 hours. For more information, contact us with your interest through the form below; 

Full Name:
Address:
Country:
Loan Amount:
Duration:
Purpose of Loan:
Phone Number:

We hope to hear from you soonest. 

Hamon Hassan
operations manager.
©2015 BRGF & Co Inc.
All Rights Reserved.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-09-01 16:06 Zariya
  0 siblings, 0 replies; 414+ messages in thread
From: Zariya @ 2015-09-01 16:06 UTC (permalink / raw)
  To: Recipients

Help me and my 2 kids here in Syria We will share the 6,600,000 USD
I have here with you for your help, sorry to mention it
we want to leave Syria, put the kids in school and buy a new home
You will give us guidance when we arrive Their father died in the chemical weapon airstrike
I will send you our family pictures and more details as I read from you

Yours

ZariyaHelp me and my 2 kids here in Syria We will share the 6,600,000 USD
I have here with you for your help, sorry to mention it
we want to leave Syria, put the kids in school and buy a new home
You will give us guidance when we arrive Their father died in the chemical weapon airstrike
I will send you our family pictures and more details as I read from you

Yours

Zariya

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2015-09-01 14:14 Mika Penttilä
  2015-09-01 15:22 ` Fabio Estevam
  0 siblings, 1 reply; 414+ messages in thread
From: Mika Penttilä @ 2015-09-01 14:14 UTC (permalink / raw)
  To: linux-kernel, linux-arm-kernel, edubezval

This one causes imx6q with debug uart connected to "schedule while
atomic" endlessly :


9e7b399d6528eac33a6fbfceb2b92af209c3454d is the first bad commit
commit 9e7b399d6528eac33a6fbfceb2b92af209c3454d
Author: Eduardo Valentin <edubezval@gmail.com>
Date:   Tue Aug 11 10:21:20 2015 -0700

    serial: imx: remove unbalanced clk_prepare

    The current code attempts to prepare clk_per and clk_ipg
    before using the device. However, the result is an extra
    prepare call on each clock. Here is the output of uart
    clocks (only uart enabled and used as console):

    $  grep uart /sys/kernel/debug/clk/clk_summary
     uart_serial           1            2    80000000          0 0
           uart           1            2    66000000          0 0

    This patch balances the calls of prepares. The result is:

    $  grep uart /sys/kernel/debug/clk/clk_summary
     uart_serial           1            1    80000000          0 0
           uart           1            1    66000000          0 0

    Cc: Fabio Estevam <festevam@gmail.com>
    Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
    Cc: Jiri Slaby <jslaby@suse.com>
    Cc: linux-serial@vger.kernel.org
    Cc: linux-pm@vger.kernel.org
    Cc: linux-kernel@vger.kernel.org
    Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
    Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2015-09-01 14:14 Mika Penttilä
@ 2015-09-01 15:22 ` Fabio Estevam
  0 siblings, 0 replies; 414+ messages in thread
From: Fabio Estevam @ 2015-09-01 15:22 UTC (permalink / raw)
  To: Mika Penttilä; +Cc: linux-kernel, linux-arm-kernel, Eduardo Valentin

On Tue, Sep 1, 2015 at 11:14 AM, Mika Penttilä
<mika.j.penttila@gmail.com> wrote:
> This one causes imx6q with debug uart connected to "schedule while
> atomic" endlessly :

Yes, I have sent a revert patch for it:
http://www.spinics.net/lists/arm-kernel/msg439995.html

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-09-01 12:01 Zariya
  0 siblings, 0 replies; 414+ messages in thread
From: Zariya @ 2015-09-01 12:01 UTC (permalink / raw)
  To: Recipients

Help me and my 2 kids here in Syria We will share the 6,600,000 USD
I have here with you for your help, sorry to mention it
we want to leave Syria, put the kids in school and buy a new home
You will give us guidance when we arrive Their father died in the chemical weapon airstrike
I will send you our family pictures and more details as I read from you

Yours

ZariyaHelp me and my 2 kids here in Syria We will share the 6,600,000 USD
I have here with you for your help, sorry to mention it
we want to leave Syria, put the kids in school and buy a new home
You will give us guidance when we arrive Their father died in the chemical weapon airstrike
I will send you our family pictures and more details as I read from you

Yours

Zariya

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-08-19 13:01 christain147
  0 siblings, 0 replies; 414+ messages in thread
From: christain147 @ 2015-08-19 13:01 UTC (permalink / raw)
  To: Recipients

Good day,hoping you read this email and respond to me in good time.I do not intend to solicit for funds but  your time and energy in using my own resources to assist the less privileged.I am medically confined at the moment hence I request your indulgence.
I will give you a comprehensive brief once I hear from you.

Please forward your response to my private email address:
gudworks104@yahoo.com

Thanks and reply.

Robert Grondahl

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-07-24 10:34 Mrs Nadia  Mohammed 
  0 siblings, 0 replies; 414+ messages in thread
From: Mrs Nadia  Mohammed  @ 2015-07-24 10:34 UTC (permalink / raw)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=utf-8, Size: 453 bytes --]




Ø§Ù„Ø³Ù„Ø§Ù… Ø¹Ù„ÙŠÙƒÙ… Ø§Ù†Ø§ Ù…Ø¯Ø§Ù… Ù†Ø§Ø¯ÙŠØ© Ù…ØÙ…Ø¯ ØºØ§Ù†Ù… Ù…Ù† ÙÙ„Ø³Ø·ÙŠÙ† Ùˆ Ø§Ø±ÙŠØ¯ Ù…Ù†Ùƒ Ø§Ù† ØªØ³Ø§Ø¹Ø¯Ù†Ù‰ Ù„Ø§Ù†Ù†Ù‰ Ù„Ø¯Ù‰ Ù…Ø´Ø±ÙˆØ¹ Ø§Ø±ÙŠØ¯ Ø§Ù† Ø§Ø¹Ø±Ø¶Ù‡ Ø§Ù„ÙŠÙƒ  Ù„Ø°Ø§ Ø§Ø±Ø¬Ùˆ Ù…Ù†Ùƒ Ø§Ù„ØªÙˆØ§ØµÙ„ Ù…Ø¹Ù‰ Ø¹Ù„Ù‰ Ù‡Ø°Ø§ Ø§Ù„Ø§ÙŠÙ…ÙŠÙ„ 

nadiamghanem07@gmail.comÿôèº{.nÇ+‰·Ÿ®‰†+%ŠËÿ±éÝ¶\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dÊ‡Ú™ë,j\a¢f£¢·hšïêÿ‘êçz_è®\x03(éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨èÚ&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CACy=+DtdZOUT4soNZ=zz+_qhCfM=C8Oa0D5gjRC7QM3nYi4oEw@mail.gmail.com>]

* Re:
       [not found] <CACy=+DtdZOUT4soNZ=zz+_qhCfM=C8Oa0D5gjRC7QM3nYi4oEw@mail.gmail.com>
@ 2015-07-11 18:37 ` Mustapha Abiola
  0 siblings, 0 replies; 414+ messages in thread
From: Mustapha Abiola @ 2015-07-11 18:37 UTC (permalink / raw)
  To: eparis, paul, linux-kernel, linux-audit, mingo

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0001-Fix-redundant-check-against-unsigned-int-in-broken-a.patch --]
[-- Type: application/octet-stream, Size: 930 bytes --]

From 55fae099d46749b73895934aab8c2823c5a23abe Mon Sep 17 00:00:00 2001
From: Mustapha Abiola <hi@mustapha.org>
Date: Sat, 11 Jul 2015 17:01:04 +0000
Subject: [PATCH 1/1] Fix redundant check against unsigned int in broken audit
 test fix for exec arg len

Quick patch to fix the needless check of `len` being < 0 as its an
unsigned int.

Signed-off-by: Mustapha Abiola <hi@mustapha.org>
---
 kernel/auditsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e85bdfd..0012476 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1021,7 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
 	 * for strings that are too long, we should not have created
 	 * any.
 	 */
-	if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {
+	if (WARN_ON_ONCE(len > MAX_ARG_STRLEN - 1)) {
 		send_sig(SIGKILL, current, 0);
 		return -1;
 	}
-- 
1.9.1



^ permalink raw reply related	[flat|nested] 414+ messages in thread

[parent not found: <CAHxZcryF7pNoENh8vpo-uvcEo5HYA5XgkZFWrLEHM5Hhf5ay+Q@mail.gmail.com>]

* Re:
       [not found] <CAHxZcryF7pNoENh8vpo-uvcEo5HYA5XgkZFWrLEHM5Hhf5ay+Q@mail.gmail.com>
@ 2015-07-05 16:38 ` t0021
  0 siblings, 0 replies; 414+ messages in thread
From: t0021 @ 2015-07-05 16:38 UTC (permalink / raw)
  To: info


----- Original Message -----


I NEED YOUR HELP

=========================


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <E1Yz4NQ-0000Cw-B5@feisty.vs19.net>]

* Re:
       [not found] <E1Yz4NQ-0000Cw-B5@feisty.vs19.net>
@ 2015-05-31 15:37 ` Roman Volkov
  2015-05-31 15:53   ` Re: Hans de Goede
  0 siblings, 1 reply; 414+ messages in thread
From: Roman Volkov @ 2015-05-31 15:37 UTC (permalink / raw)
  To: Dmitry Torokhov
  Cc: Mark Rutland, Rob Herring, Pawel Moll, Ian Campbell, Kumar Gala,
	grant.likely, Hans de Goede, Jiri Kosina, Wolfram Sang,
	linux-input, linux-kernel, devicetree, Tony Prisk

В Sat, 14 Mar 2015 20:20:38 -0700
Dmitry Torokhov <dmitry.torokhov@gmail.com> wrote:

> 
> Hi Roman,
> 
> On Mon, Feb 16, 2015 at 12:11:43AM +0300, Roman Volkov wrote:
> > Documentation for 'intel,8042' DT compatible node.
> > 
> > Signed-off-by: Tony Prisk <linux@prisktech.co.nz>
> > Signed-off-by: Roman Volkov <v1ron@v1ros.org>
> > ---
> >  .../devicetree/bindings/input/intel-8042.txt       | 26
> > ++++++++++++++++++++++ 1 file changed, 26 insertions(+)
> >  create mode 100644
> > Documentation/devicetree/bindings/input/intel-8042.txt
> > 
> > diff --git a/Documentation/devicetree/bindings/input/intel-8042.txt
> > b/Documentation/devicetree/bindings/input/intel-8042.txt new file
> > mode 100644 index 0000000..ab8a3e0
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/input/intel-8042.txt
> > @@ -0,0 +1,26 @@
> > +Intel 8042 Keyboard Controller
> > +
> > +Required properties:
> > +- compatible: should be "intel,8042"
> > +- regs: memory for keyboard controller
> > +- interrupts: usually, two interrupts should be specified
> > (keyboard and aux).
> > +	However, only one interrupt is also allowed in case of
> > absence of the
> > +	physical port in the controller. The i8042 driver must be
> > loaded with
> > +	nokbd/noaux option in this case.
> > +- interrupt-names: interrupt names corresponding to numbers in the
> > list.
> > +	"kbd" is the keyboard interrupt and "aux" is the auxiliary
> > (mouse)
> > +	interrupt.
> > +- command-reg: offset in memory for command register
> > +- status-reg: offset in memory for status register
> > +- data-reg: offset in memory for data register
> > +
> > +Example:
> > +	i8042@d8008800 {
> > +		compatible = "intel,8042";
> > +		regs = <0xd8008800 0x100>;
> > +		interrupts = <23>, <4>;
> > +		interrupt-names = "kbd", "aux";
> > +		command-reg = <0x04>;
> > +		status-reg = <0x04>;
> > +		data-reg = <0x00>;
> > +	};
> 
> No, we already have existing OF bindings for i8042 on sparc and
> powerpc, I do not think we need to invent a brand new one.
> 
> Thanks.
> 

Hi Dmitry,

I see some OF code in i8042-sparcio.h file. There are node definitions
like "kb_ps2", "keyboard", "kdmouse", "mouse". Are these documented
somewhere?

Great if vt8500 is not unique with OF bindings for i8042. The code from
sparc even looks compatible, only register offsets are hardcoded for
specific machine. Is it possible to read offsets from Device Tree using
these existing bindings without dealing with the kernel configuration?

Regards,
Roman

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2015-05-31 15:37 ` Re: Roman Volkov
@ 2015-05-31 15:53   ` Hans de Goede
  0 siblings, 0 replies; 414+ messages in thread
From: Hans de Goede @ 2015-05-31 15:53 UTC (permalink / raw)
  To: Roman Volkov, Dmitry Torokhov
  Cc: Mark Rutland, Rob Herring, Pawel Moll, Ian Campbell, Kumar Gala,
	grant.likely, Jiri Kosina, Wolfram Sang, linux-input,
	linux-kernel, devicetree, Tony Prisk

Hi Roman,

On 31-05-15 17:37, Roman Volkov wrote:
> В Sat, 14 Mar 2015 20:20:38 -0700
> Dmitry Torokhov <dmitry.torokhov@gmail.com> wrote:
>
>>
>> Hi Roman,
>>
>> On Mon, Feb 16, 2015 at 12:11:43AM +0300, Roman Volkov wrote:
>>> Documentation for 'intel,8042' DT compatible node.
>>>
>>> Signed-off-by: Tony Prisk <linux@prisktech.co.nz>
>>> Signed-off-by: Roman Volkov <v1ron@v1ros.org>
>>> ---
>>>   .../devicetree/bindings/input/intel-8042.txt       | 26
>>> ++++++++++++++++++++++ 1 file changed, 26 insertions(+)
>>>   create mode 100644
>>> Documentation/devicetree/bindings/input/intel-8042.txt
>>>
>>> diff --git a/Documentation/devicetree/bindings/input/intel-8042.txt
>>> b/Documentation/devicetree/bindings/input/intel-8042.txt new file
>>> mode 100644 index 0000000..ab8a3e0
>>> --- /dev/null
>>> +++ b/Documentation/devicetree/bindings/input/intel-8042.txt
>>> @@ -0,0 +1,26 @@
>>> +Intel 8042 Keyboard Controller
>>> +
>>> +Required properties:
>>> +- compatible: should be "intel,8042"
>>> +- regs: memory for keyboard controller
>>> +- interrupts: usually, two interrupts should be specified
>>> (keyboard and aux).
>>> +	However, only one interrupt is also allowed in case of
>>> absence of the
>>> +	physical port in the controller. The i8042 driver must be
>>> loaded with
>>> +	nokbd/noaux option in this case.
>>> +- interrupt-names: interrupt names corresponding to numbers in the
>>> list.
>>> +	"kbd" is the keyboard interrupt and "aux" is the auxiliary
>>> (mouse)
>>> +	interrupt.
>>> +- command-reg: offset in memory for command register
>>> +- status-reg: offset in memory for status register
>>> +- data-reg: offset in memory for data register
>>> +
>>> +Example:
>>> +	i8042@d8008800 {
>>> +		compatible = "intel,8042";
>>> +		regs = <0xd8008800 0x100>;
>>> +		interrupts = <23>, <4>;
>>> +		interrupt-names = "kbd", "aux";
>>> +		command-reg = <0x04>;
>>> +		status-reg = <0x04>;
>>> +		data-reg = <0x00>;
>>> +	};
>>
>> No, we already have existing OF bindings for i8042 on sparc and
>> powerpc, I do not think we need to invent a brand new one.
>>
>> Thanks.
>>
>
> Hi Dmitry,
>
> I see some OF code in i8042-sparcio.h file. There are node definitions
> like "kb_ps2", "keyboard", "kdmouse", "mouse". Are these documented
> somewhere?
>
> Great if vt8500 is not unique with OF bindings for i8042. The code from
> sparc even looks compatible, only register offsets are hardcoded for
> specific machine. Is it possible to read offsets from Device Tree using
> these existing bindings without dealing with the kernel configuration?

Have you looked at the existing bindings for ps/2 controllers
under Documentation/devicetree/bindings/serio ?

Regards,

Hans

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-05-22  0:17 kontakt
  0 siblings, 0 replies; 414+ messages in thread
From: kontakt @ 2015-05-22  0:17 UTC (permalink / raw)
  To: Recipients

Teraz mozesz uzyskac kredyt w wysokosci 2% za uniewaznic i dostac do 40 lat lub wiecej, aby splacic. Nie naleza do kredytów krótkoterminowych, które sprawiaja, ze zwróci sie w kilka tygodni lub miesiecy. Nasza oferta obejmuje; * Refinansowanie * Home Improvement * Kredyty samochodowe * Konsolidacja zadluzenia * Linia kredytowa * Druga hipoteczny * Biznes Pozyczki * Pozyczki Personal

  Zdobadz pieniadze potrzebne dzis z duza iloscia czasu, aby dokonac platnosci powrotem. Aby zastosowac, aby wyslac wszystkie pytania lub zaproszenia fwfshelpdesk@gmail.com: + 1- 435-241-5945

************************************************************
Now you can get a loan at 2% per annul and get up to 40 years or more to pay it back. Don't fall for the short term loans that make you pay back in weeks or months. Our offer include; *Refinance *Home Improvement *Auto Loans *Debt Consolidation*Line of Credit *Second Mortgage *Business Loans*Personal Loans

 Get the money you need today with plenty of time to make the payments back. To apply, send all inquiries to fwfshelpdesk@gmail.com or call : + 1- 435-241-5945

---
This email is free from viruses and malware because avast! Antivirus protection is active.
http://www.avast.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <90BA5B564A2E4B4782C6F4398C32EE104E54369A@NHS-PCLI-MBC003.AD1.NHS.NET>]

* RE:
       [not found] <90BA5B564A2E4B4782C6F4398C32EE104E54369A@NHS-PCLI-MBC003.AD1.NHS.NET>
@ 2015-05-21 10:49 ` Ratnakumar Sagana (KING'S COLLEGE HOSPITAL NHS FOUNDATION TRUST)
  0 siblings, 0 replies; 414+ messages in thread
From: Ratnakumar Sagana (KING'S COLLEGE HOSPITAL NHS FOUNDATION TRUST) @ 2015-05-21 10:49 UTC (permalink / raw)
  To: Ratnakumar Sagana (KING'S COLLEGE HOSPITAL NHS FOUNDATION TRUST)



You have won contact Allen On allemwilliam10001@gmail.com for info.
Allen Williams
+27612909541

********************************************************************************************************************

This message may contain confidential information. If you are not the intended recipient please inform the
sender that you have received the message in error before deleting it.
Please do not disclose, copy or distribute information in this e-mail or take any action in reliance on its contents:
to do so is strictly prohibited and may be unlawful.

Thank you for your co-operation.

NHSmail is the secure email and directory service available for all NHS staff in England and Scotland
NHSmail is approved for exchanging patient data and other sensitive information with NHSmail and GSi recipients
NHSmail provides an email address for your career in the NHS and can be accessed anywhere

********************************************************************************************************************

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <9E5F73AAFC185F49B0D37FE62E65D6C20724A9D8@XSERVER23A.campus.tue.nl>]

* RE:
       [not found] <9E5F73AAFC185F49B0D37FE62E65D6C20724A9D8@XSERVER23A.campus.tue.nl>
@ 2015-05-10 13:03 ` Singer, W.P.
  0 siblings, 0 replies; 414+ messages in thread
From: Singer, W.P. @ 2015-05-10 13:03 UTC (permalink / raw)
  To: Singer, W.P.



________________________________
Van: Singer, W.P.
Verzonden: zondag 10 mei 2015 14:50
Onderwerp:

i have given you my money contact: mikerose234@outlook.com<mailto:mikerose234@outlook.com>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-04-21  7:43 Galaxy Darlehen Firma
  0 siblings, 0 replies; 414+ messages in thread
From: Galaxy Darlehen Firma @ 2015-04-21  7:43 UTC (permalink / raw)


-- 

Benötigen Sie dringend Darlehen? Wir geben Darlehen an interessierten
Einzelpersonen, die Darlehen mit gutem Glauben suchen. Sind Sie
ernsthaft brauchen eine dringende Darlehen? dann sind Sie bei uns
richtig. Wir geben Business-Darlehen, Privatkredit, kontaktieren Sie
uns für Ihren Darlehensantrag auf Ihre Nachfrage und durch finanzielle
problem.contact uns heute per e-Mail: galaxyfunds1@gmail.com

Vielen Dank wie wir Ihre Antwort erwarten
Galaxy Darlehen Firma

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CAONCqDfSP9DSWwPSDqz4NS6YHmzwMo=6VnRURRAJZLeGE_QKYA@mail.gmail.com>]

* Re:
       [not found] <CAONCqDfSP9DSWwPSDqz4NS6YHmzwMo=6VnRURRAJZLeGE_QKYA@mail.gmail.com>
@ 2015-04-07 18:47 ` Wilson Aggard
  0 siblings, 0 replies; 414+ messages in thread
From: Wilson Aggard @ 2015-04-07 18:47 UTC (permalink / raw)
  To: United Nations Compensation Commission


----- Original Message -----




Did you get my Email


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2015-04-01 21:56 Globale Trust Company
  0 siblings, 0 replies; 414+ messages in thread
From: Globale Trust Company @ 2015-04-01 21:56 UTC (permalink / raw)


-- 
Sind Sie ein Unternehmer Mann oder eine Frau? Sind Sie in finanziellen
Schwierigkeiten, oder haben Sie finanzielle Mittel benötigen, um Ihr
eigenes Unternehmen gründen? Brauchen Sie noch einen Autokredit? Sie
haben einen niedrigen Kredit-Score und Sie finden es schwierig, ein
Darlehen von lokalen Banken und anderen Finanzinstituten zu bekommen?
Sie benötigen ein Darlehen aus irgendeinem Grund, Gründe, wie .:
a) persönliche Darlehen, Ausbau des Geschäfts.
b) Unternehmertum und Bildung.
(c) Schuldenkonsolidierung).
     Wir bieten Darlehen zu einem sehr niedrigen Zinssatz und ohne
Bonitätsprüfung, bieten wir persönliche Darlehen,
Schuldenkonsolidierung, Venture Capital, Business Education Kredit,
Kreditwohnungsbaudarlehen oder "Kredit aus anderen Gründen !."
Hinweis: Jeder interessierte Bewerber sollte uns auf diese E-Mail für
mehr kontaktieren
Informationen zu unserem Kredit-Programm.

Name: Globale Trust Company
E-Mail: globaltrust732@gmail.com

Wir danken Ihnen für Ihr Verständnis, die darauf warten, von Ihnen zu hören.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2015-03-04 10:29 Quentin Lambert
  2015-03-04 10:32 ` Quentin Lambert
  0 siblings, 1 reply; 414+ messages in thread
From: Quentin Lambert @ 2015-03-04 10:29 UTC (permalink / raw)
  To: Greg Kroah-Hartman; +Cc: kernel-janitors, devel, linux-kernel

Bcc: 
Subject: [PATCH 1/2] staging: rts5208: Convert non-returned local variable to
 boolean when relevant
Reply-To: 

This patch was produced using Coccinelle. A simplified version of the
semantic patch is:

@r exists@
identifier f;
local idexpression u8 x;
identifier xname;
@@

f(...) {
...when any
(
  x@xname = 1;
|
  x@xname = 0;
)
...when any
}

@bad exists@
identifier r.f;
local idexpression u8 r.x
expression e1 != {0, 1}, e2;
@@

f(...) {
...when any
(
  x = e1;
|
  x + e2
)
...when any
}

@depends on !bad@
identifier r.f;
local idexpression u8 r.x;
identifier r.xname;
@@

f(...) {
...
++ bool xname;
- int xname;
<...
(
  x =
- 1
+ true
|
  x =
- -1
+ false
)
...>

}

Signed-off-by: Quentin Lambert <lambert.quentin@gmail.com>
---
 drivers/staging/rts5208/ms.c        | 14 +++---
 drivers/staging/rts5208/rtsx_chip.c | 56 ++++++++++++-----------
 drivers/staging/rts5208/rtsx_scsi.c | 38 +++++++++-------
 drivers/staging/rts5208/sd.c        | 88 +++++++++++++++++++------------------
 4 files changed, 105 insertions(+), 91 deletions(-)

diff --git a/drivers/staging/rts5208/ms.c b/drivers/staging/rts5208/ms.c
index a47a191..050bc47 100644
--- a/drivers/staging/rts5208/ms.c
+++ b/drivers/staging/rts5208/ms.c
@@ -1560,7 +1560,8 @@ static int ms_copy_page(struct rtsx_chip *chip, u16 old_blk, u16 new_blk,
 		u16 log_blk, u8 start_page, u8 end_page)
 {
 	struct ms_info *ms_card = &(chip->ms_card);
-	int retval, rty_cnt, uncorrect_flag = 0;
+	bool uncorrect_flag = false;
+	int retval, rty_cnt;
 	u8 extra[MS_EXTRA_SIZE], val, i, j, data[16];
 
 	dev_dbg(rtsx_dev(chip), "Copy page from 0x%x to 0x%x, logical block is 0x%x\n",
@@ -1642,10 +1643,10 @@ static int ms_copy_page(struct rtsx_chip *chip, u16 old_blk, u16 new_blk,
 			if (val & INT_REG_ERR) {
 				retval = ms_read_status_reg(chip);
 				if (retval != STATUS_SUCCESS) {
-					uncorrect_flag = 1;
+					uncorrect_flag = true;
 					dev_dbg(rtsx_dev(chip), "Uncorrectable error\n");
 				} else {
-					uncorrect_flag = 0;
+					uncorrect_flag = false;
 				}
 
 				retval = ms_transfer_tpc(chip,
@@ -2187,7 +2188,8 @@ static int ms_build_l2p_tbl(struct rtsx_chip *chip, int seg_no)
 {
 	struct ms_info *ms_card = &(chip->ms_card);
 	struct zone_entry *segment;
-	int retval, table_size, disable_cnt, defect_flag, i;
+	bool defect_flag;
+	int retval, table_size, disable_cnt, i;
 	u16 start, end, phy_blk, log_blk, tmp_blk;
 	u8 extra[MS_EXTRA_SIZE], us1, us2;
 
@@ -2236,10 +2238,10 @@ static int ms_build_l2p_tbl(struct rtsx_chip *chip, int seg_no)
 
 	for (phy_blk = start; phy_blk < end; phy_blk++) {
 		if (disable_cnt) {
-			defect_flag = 0;
+			defect_flag = false;
 			for (i = 0; i < segment->disable_count; i++) {
 				if (phy_blk == segment->defect_list[i]) {
-					defect_flag = 1;
+					defect_flag = true;
 					break;
 				}
 			}
diff --git a/drivers/staging/rts5208/rtsx_chip.c b/drivers/staging/rts5208/rtsx_chip.c
index 9593d81..35fa19d 100644
--- a/drivers/staging/rts5208/rtsx_chip.c
+++ b/drivers/staging/rts5208/rtsx_chip.c
@@ -153,22 +153,22 @@ static int rtsx_pre_handle_sdio_old(struct rtsx_chip *chip)
 static int rtsx_pre_handle_sdio_new(struct rtsx_chip *chip)
 {
 	u8 tmp;
-	int sw_bypass_sd = 0;
+	bool sw_bypass_sd = false;
 	int retval;
 
 	if (chip->driver_first_load) {
 		if (CHECK_PID(chip, 0x5288)) {
 			RTSX_READ_REG(chip, 0xFE5A, &tmp);
 			if (tmp & 0x08)
-				sw_bypass_sd = 1;
+				sw_bypass_sd = true;
 		} else if (CHECK_PID(chip, 0x5208)) {
 			RTSX_READ_REG(chip, 0xFE70, &tmp);
 			if (tmp & 0x80)
-				sw_bypass_sd = 1;
+				sw_bypass_sd = true;
 		}
 	} else {
 		if (chip->sdio_in_charge)
-			sw_bypass_sd = 1;
+			sw_bypass_sd = true;
 	}
 	dev_dbg(rtsx_dev(chip), "chip->sdio_in_charge = %d\n",
 		chip->sdio_in_charge);
@@ -501,13 +501,14 @@ nextcard:
 
 static inline int check_sd_speed_prior(u32 sd_speed_prior)
 {
-	int i, fake_para = 0;
+	bool fake_para = false;
+	int i;
 
 	for (i = 0; i < 4; i++) {
 		u8 tmp = (u8)(sd_speed_prior >> (i*8));
 
 		if ((tmp < 0x01) || (tmp > 0x04)) {
-			fake_para = 1;
+			fake_para = true;
 			break;
 		}
 	}
@@ -517,13 +518,14 @@ static inline int check_sd_speed_prior(u32 sd_speed_prior)
 
 static inline int check_sd_current_prior(u32 sd_current_prior)
 {
-	int i, fake_para = 0;
+	bool fake_para = false;
+	int i;
 
 	for (i = 0; i < 4; i++) {
 		u8 tmp = (u8)(sd_current_prior >> (i*8));
 
 		if (tmp > 0x03) {
-			fake_para = 1;
+			fake_para = true;
 			break;
 		}
 	}
@@ -784,31 +786,31 @@ static inline void rtsx_blink_led(struct rtsx_chip *chip)
 
 static void rtsx_monitor_aspm_config(struct rtsx_chip *chip)
 {
-	int maybe_support_aspm, reg_changed;
+	bool reg_changed, maybe_support_aspm;
 	u32 tmp = 0;
 	u8 reg0 = 0, reg1 = 0;
 
-	maybe_support_aspm = 0;
-	reg_changed = 0;
+	maybe_support_aspm = false;
+	reg_changed = false;
 	rtsx_read_config_byte(chip, LCTLR, &reg0);
 	if (chip->aspm_level[0] != reg0) {
-		reg_changed = 1;
+		reg_changed = true;
 		chip->aspm_level[0] = reg0;
 	}
 	if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip)) {
 		rtsx_read_cfg_dw(chip, 1, 0xC0, &tmp);
 		reg1 = (u8)tmp;
 		if (chip->aspm_level[1] != reg1) {
-			reg_changed = 1;
+			reg_changed = true;
 			chip->aspm_level[1] = reg1;
 		}
 
 		if ((reg0 & 0x03) && (reg1 & 0x03))
-			maybe_support_aspm = 1;
+			maybe_support_aspm = true;
 
 	} else {
 		if (reg0 & 0x03)
-			maybe_support_aspm = 1;
+			maybe_support_aspm = true;
 	}
 
 	if (reg_changed) {
@@ -835,7 +837,7 @@ void rtsx_polling_func(struct rtsx_chip *chip)
 #ifdef SUPPORT_SD_LOCK
 	struct sd_info *sd_card = &chip->sd_card;
 #endif
-	int ss_allowed;
+	bool ss_allowed;
 
 	if (rtsx_chk_stat(chip, RTSX_STAT_SUSPEND))
 		return;
@@ -887,21 +889,21 @@ void rtsx_polling_func(struct rtsx_chip *chip)
 	rtsx_init_cards(chip);
 
 	if (chip->ss_en) {
-		ss_allowed = 1;
+		ss_allowed = true;
 
 		if (CHECK_PID(chip, 0x5288)) {
-			ss_allowed = 0;
+			ss_allowed = false;
 		} else {
 			if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip)) {
 				u32 val;
 
 				rtsx_read_cfg_dw(chip, 1, 0x04, &val);
 				if (val & 0x07)
-					ss_allowed = 0;
+					ss_allowed = false;
 			}
 		}
 	} else {
-		ss_allowed = 0;
+		ss_allowed = false;
 	}
 
 	if (ss_allowed && !chip->sd_io) {
@@ -1358,7 +1360,8 @@ int rtsx_read_cfg_seq(struct rtsx_chip *chip, u8 func, u16 addr, u8 *buf,
 
 int rtsx_write_phy_register(struct rtsx_chip *chip, u8 addr, u16 val)
 {
-	int i, finished = 0;
+	bool finished = false;
+	int i;
 	u8 tmp;
 
 	RTSX_WRITE_REG(chip, PHYDATA0, 0xFF, (u8)val);
@@ -1369,7 +1372,7 @@ int rtsx_write_phy_register(struct rtsx_chip *chip, u8 addr, u16 val)
 	for (i = 0; i < 100000; i++) {
 		RTSX_READ_REG(chip, PHYRWCTL, &tmp);
 		if (!(tmp & 0x80)) {
-			finished = 1;
+			finished = true;
 			break;
 		}
 	}
@@ -1382,7 +1385,8 @@ int rtsx_write_phy_register(struct rtsx_chip *chip, u8 addr, u16 val)
 
 int rtsx_read_phy_register(struct rtsx_chip *chip, u8 addr, u16 *val)
 {
-	int i, finished = 0;
+	bool finished = false;
+	int i;
 	u16 data = 0;
 	u8 tmp;
 
@@ -1392,7 +1396,7 @@ int rtsx_read_phy_register(struct rtsx_chip *chip, u8 addr, u16 *val)
 	for (i = 0; i < 100000; i++) {
 		RTSX_READ_REG(chip, PHYRWCTL, &tmp);
 		if (!(tmp & 0x80)) {
-			finished = 1;
+			finished = true;
 			break;
 		}
 	}
@@ -1615,7 +1619,7 @@ void rtsx_exit_ss(struct rtsx_chip *chip)
 int rtsx_pre_handle_interrupt(struct rtsx_chip *chip)
 {
 	u32 status, int_enable;
-	int exit_ss = 0;
+	bool exit_ss = false;
 #ifdef SUPPORT_OCP
 	u32 ocp_int = 0;
 
@@ -1625,7 +1629,7 @@ int rtsx_pre_handle_interrupt(struct rtsx_chip *chip)
 	if (chip->ss_en) {
 		chip->ss_counter = 0;
 		if (rtsx_get_stat(chip) == RTSX_STAT_SS) {
-			exit_ss = 1;
+			exit_ss = true;
 			rtsx_exit_L1(chip);
 			rtsx_set_stat(chip, RTSX_STAT_RUN);
 		}
diff --git a/drivers/staging/rts5208/rtsx_scsi.c b/drivers/staging/rts5208/rtsx_scsi.c
index 42645834..a00ba21 100644
--- a/drivers/staging/rts5208/rtsx_scsi.c
+++ b/drivers/staging/rts5208/rtsx_scsi.c
@@ -39,7 +39,8 @@ void scsi_show_command(struct rtsx_chip *chip)
 {
 	struct scsi_cmnd *srb = chip->srb;
 	char *what = NULL;
-	int unknown_cmd = 0, len;
+	bool unknown_cmd = false;
+	int len;
 
 	switch (srb->cmnd[0]) {
 	case TEST_UNIT_READY:
@@ -310,7 +311,8 @@ void scsi_show_command(struct rtsx_chip *chip)
 		what = "Realtek's vendor command";
 		break;
 	default:
-		what = "(unknown command)"; unknown_cmd = 1;
+		what = "(unknown command)";
+		unknown_cmd = true;
 		break;
 	}
 
@@ -485,7 +487,7 @@ static int inquiry(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	unsigned char sendbytes;
 	unsigned char *buf;
 	u8 card = get_lun_card(chip, lun);
-	int pro_formatter_flag = 0;
+	bool pro_formatter_flag = false;
 	unsigned char inquiry_buf[] = {
 		QULIFIRE|DRCT_ACCESS_DEV,
 		RMB_DISC|0x0D,
@@ -520,7 +522,7 @@ static int inquiry(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	if (chip->mspro_formatter_enable)
 #endif
 		if (!card || (card == MS_CARD))
-			pro_formatter_flag = 1;
+			pro_formatter_flag = true;
 
 	if (pro_formatter_flag) {
 		if (scsi_bufflen(srb) < 56)
@@ -663,7 +665,7 @@ static void ms_mode_sense(struct rtsx_chip *chip, u8 cmd,
 	struct ms_info *ms_card = &(chip->ms_card);
 	int sys_info_offset;
 	int data_size = buf_len;
-	int support_format = 0;
+	bool support_format = false;
 	int i = 0;
 
 	if (cmd == MODE_SENSE) {
@@ -684,10 +686,10 @@ static void ms_mode_sense(struct rtsx_chip *chip, u8 cmd,
 	/* Medium Type Code */
 	if (check_card_ready(chip, lun)) {
 		if (CHK_MSXC(ms_card)) {
-			support_format = 1;
+			support_format = true;
 			buf[i++] = 0x40;
 		} else if (CHK_MSPRO(ms_card)) {
-			support_format = 1;
+			support_format = true;
 			buf[i++] = 0x20;
 		} else {
 			buf[i++] = 0x10;
@@ -755,7 +757,7 @@ static int mode_sense(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	unsigned int lun = SCSI_LUN(srb);
 	unsigned int dataSize;
 	int status;
-	int pro_formatter_flag;
+	bool pro_formatter_flag;
 	unsigned char pageCode, *buf;
 	u8 card = get_lun_card(chip, lun);
 
@@ -767,20 +769,20 @@ static int mode_sense(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	}
 #endif
 
-	pro_formatter_flag = 0;
+	pro_formatter_flag = false;
 	dataSize = 8;
 #ifdef SUPPORT_MAGIC_GATE
 	if ((chip->lun2card[lun] & MS_CARD)) {
 		if (!card || (card == MS_CARD)) {
 			dataSize = 108;
 			if (chip->mspro_formatter_enable)
-				pro_formatter_flag = 1;
+				pro_formatter_flag = true;
 		}
 	}
 #else
 	if (card == MS_CARD) {
 		if (chip->mspro_formatter_enable) {
-			pro_formatter_flag = 1;
+			pro_formatter_flag = true;
 			dataSize = 108;
 		}
 	}
@@ -2295,7 +2297,8 @@ Exit:
 static int read_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 {
 	int retval;
-	u8 func, func_max;
+	bool func_max;
+	u8 func;
 	u16 addr, len;
 	u8 *buf;
 
@@ -2315,9 +2318,9 @@ static int read_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 		__func__, func, addr, len);
 
 	if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip))
-		func_max = 1;
+		func_max = true;
 	else
-		func_max = 0;
+		func_max = false;
 
 	if (func > func_max) {
 		set_sense_type(chip, SCSI_LUN(srb),
@@ -2349,7 +2352,8 @@ static int read_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 static int write_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 {
 	int retval;
-	u8 func, func_max;
+	bool func_max;
+	u8 func;
 	u16 addr, len;
 	u8 *buf;
 
@@ -2369,9 +2373,9 @@ static int write_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 		__func__, func, addr);
 
 	if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip))
-		func_max = 1;
+		func_max = true;
 	else
-		func_max = 0;
+		func_max = false;
 
 	if (func > func_max) {
 		set_sense_type(chip, SCSI_LUN(srb),
diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c
index c28a927..62bf570 100644
--- a/drivers/staging/rts5208/sd.c
+++ b/drivers/staging/rts5208/sd.c
@@ -791,7 +791,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir)
 	u16 SD_VP_CTL, SD_DCMPS_CTL;
 	u8 val;
 	int retval;
-	int ddr_rx = 0;
+	bool ddr_rx = false;
 
 	dev_dbg(rtsx_dev(chip), "sd_change_phase (sample_point = %d, tune_dir = %d)\n",
 		sample_point, tune_dir);
@@ -800,7 +800,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir)
 		SD_VP_CTL = SD_VPRX_CTL;
 		SD_DCMPS_CTL = SD_DCMPS_RX_CTL;
 		if (CHK_SD_DDR50(sd_card))
-			ddr_rx = 1;
+			ddr_rx = true;
 	} else {
 		SD_VP_CTL = SD_VPTX_CTL;
 		SD_DCMPS_CTL = SD_DCMPS_TX_CTL;
@@ -1121,7 +1121,7 @@ static int sd_check_switch(struct rtsx_chip *chip,
 {
 	int retval;
 	int i;
-	int switch_good = 0;
+	bool switch_good = false;
 
 	for (i = 0; i < 3; i++) {
 		if (detect_card_cd(chip, SD_CARD) != STATUS_SUCCESS) {
@@ -1137,7 +1137,7 @@ static int sd_check_switch(struct rtsx_chip *chip,
 			retval = sd_check_switch_mode(chip, SD_SWITCH_MODE,
 					func_group, func_to_switch, bus_width);
 			if (retval == STATUS_SUCCESS) {
-				switch_good = 1;
+				switch_good = true;
 				break;
 			}
 
@@ -1524,7 +1524,8 @@ static u8 sd_search_final_phase(struct rtsx_chip *chip, u32 phase_map,
 	struct sd_info *sd_card = &(chip->sd_card);
 	struct timing_phase_path path[MAX_PHASE + 1];
 	int i, j, cont_path_cnt;
-	int new_block, max_len, final_path_idx;
+	bool new_block;
+	int max_len, final_path_idx;
 	u8 final_phase = 0xFF;
 
 	if (phase_map == 0xFFFFFFFF) {
@@ -1537,12 +1538,12 @@ static u8 sd_search_final_phase(struct rtsx_chip *chip, u32 phase_map,
 	}
 
 	cont_path_cnt = 0;
-	new_block = 1;
+	new_block = true;
 	j = 0;
 	for (i = 0; i < MAX_PHASE + 1; i++) {
 		if (phase_map & (1 << i)) {
 			if (new_block) {
-				new_block = 0;
+				new_block = false;
 				j = cont_path_cnt++;
 				path[j].start = i;
 				path[j].end = i;
@@ -1550,7 +1551,7 @@ static u8 sd_search_final_phase(struct rtsx_chip *chip, u32 phase_map,
 				path[j].end = i;
 			}
 		} else {
-			new_block = 1;
+			new_block = true;
 			if (cont_path_cnt) {
 				int idx = cont_path_cnt - 1;
 
@@ -2141,14 +2142,15 @@ static int sd_check_wp_state(struct rtsx_chip *chip)
 static int reset_sd(struct rtsx_chip *chip)
 {
 	struct sd_info *sd_card = &(chip->sd_card);
-	int retval, i = 0, j = 0, k = 0, hi_cap_flow = 0;
-	int sd_dont_switch = 0;
-	int support_1v8 = 0;
-	int try_sdio = 1;
+	bool hi_cap_flow = false;
+	int retval, i = 0, j = 0, k = 0;
+	bool sd_dont_switch = false;
+	bool support_1v8 = false;
+	bool try_sdio = true;
 	u8 rsp[16];
 	u8 switch_bus_width;
 	u32 voltage = 0;
-	int sd20_mode = 0;
+	bool sd20_mode = false;
 
 	SET_SD(sd_card);
 
@@ -2157,7 +2159,7 @@ Switch_Fail:
 	i = 0;
 	j = 0;
 	k = 0;
-	hi_cap_flow = 0;
+	hi_cap_flow = false;
 
 #ifdef SUPPORT_SD_LOCK
 	if (sd_card->sd_lock_status & SD_UNLOCK_POW_ON)
@@ -2217,7 +2219,7 @@ RTY_SD_RST:
 				SD_RSP_TYPE_R7, rsp, 5);
 	if (retval == STATUS_SUCCESS) {
 		if ((rsp[4] == 0xAA) && ((rsp[3] & 0x0f) == 0x01)) {
-			hi_cap_flow = 1;
+			hi_cap_flow = true;
 			voltage = SUPPORT_VOLTAGE | 0x40000000;
 		}
 	}
@@ -2272,10 +2274,10 @@ RTY_SD_RST:
 		else
 			CLR_SD_HCXC(sd_card);
 
-		support_1v8 = 0;
+		support_1v8 = false;
 	} else {
 		CLR_SD_HCXC(sd_card);
-		support_1v8 = 0;
+		support_1v8 = false;
 	}
 	dev_dbg(rtsx_dev(chip), "support_1v8 = %d\n", support_1v8);
 
@@ -2361,7 +2363,7 @@ SD_UNLOCK_ENTRY:
 		TRACE_RET(chip, STATUS_FAIL);
 
 	if (!(sd_card->raw_csd[4] & 0x40))
-		sd_dont_switch = 1;
+		sd_dont_switch = true;
 
 	if (!sd_dont_switch) {
 		if (sd20_mode) {
@@ -2378,16 +2380,16 @@ SD_UNLOCK_ENTRY:
 			retval = sd_switch_function(chip, switch_bus_width);
 			if (retval != STATUS_SUCCESS) {
 				sd_init_power(chip);
-				sd_dont_switch = 1;
-				try_sdio = 0;
+				sd_dont_switch = true;
+				try_sdio = false;
 
 				goto Switch_Fail;
 			}
 		} else {
 			if (support_1v8) {
 				sd_init_power(chip);
-				sd_dont_switch = 1;
-				try_sdio = 0;
+				sd_dont_switch = true;
+				try_sdio = false;
 
 				goto Switch_Fail;
 			}
@@ -2433,8 +2435,8 @@ SD_UNLOCK_ENTRY:
 				if (retval != STATUS_SUCCESS)
 					TRACE_RET(chip, STATUS_FAIL);
 
-				try_sdio = 0;
-				sd20_mode = 1;
+				try_sdio = false;
+				sd20_mode = true;
 				goto Switch_Fail;
 			}
 		}
@@ -2458,8 +2460,8 @@ SD_UNLOCK_ENTRY:
 					if (retval != STATUS_SUCCESS)
 						TRACE_RET(chip, STATUS_FAIL);
 
-					try_sdio = 0;
-					sd20_mode = 1;
+					try_sdio = false;
+					sd20_mode = true;
 					goto Switch_Fail;
 				}
 			}
@@ -3702,7 +3704,7 @@ int sd_execute_no_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	unsigned int lun = SCSI_LUN(srb);
 	int retval, rsp_len;
 	u8 cmd_idx, rsp_type;
-	u8 standby = 0, acmd = 0;
+	bool standby = false, acmd = false;
 	u32 arg;
 
 	if (!sd_card->sd_pass_thru_en) {
@@ -3722,10 +3724,10 @@ int sd_execute_no_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 
 	cmd_idx = srb->cmnd[2] & 0x3F;
 	if (srb->cmnd[1] & 0x02)
-		standby = 1;
+		standby = true;
 
 	if (srb->cmnd[1] & 0x01)
-		acmd = 1;
+		acmd = true;
 
 	arg = ((u32)srb->cmnd[3] << 24) | ((u32)srb->cmnd[4] << 16) |
 		((u32)srb->cmnd[5] << 8) | srb->cmnd[6];
@@ -3812,9 +3814,10 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	struct sd_info *sd_card = &(chip->sd_card);
 	unsigned int lun = SCSI_LUN(srb);
 	int retval, rsp_len, i;
-	int cmd13_checkbit = 0, read_err = 0;
+	int cmd13_checkbit = 0;
+	bool read_err = false;
 	u8 cmd_idx, rsp_type, bus_width;
-	u8 send_cmd12 = 0, standby = 0, acmd = 0;
+	bool standby = false, send_cmd12 = false, acmd = false;
 	u32 data_len;
 
 	if (!sd_card->sd_pass_thru_en) {
@@ -3834,13 +3837,13 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 
 	cmd_idx = srb->cmnd[2] & 0x3F;
 	if (srb->cmnd[1] & 0x04)
-		send_cmd12 = 1;
+		send_cmd12 = true;
 
 	if (srb->cmnd[1] & 0x02)
-		standby = 1;
+		standby = true;
 
 	if (srb->cmnd[1] & 0x01)
-		acmd = 1;
+		acmd = true;
 
 	data_len = ((u32)srb->cmnd[7] << 16) | ((u32)srb->cmnd[8]
 						<< 8) | srb->cmnd[9];
@@ -3915,7 +3918,7 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 		retval = sd_read_data(chip, SD_TM_NORMAL_READ, cmd, 5, byte_cnt,
 				       blk_cnt, bus_width, buf, data_len, 2000);
 		if (retval != STATUS_SUCCESS) {
-			read_err = 1;
+			read_err = true;
 			kfree(buf);
 			rtsx_clear_sd_error(chip);
 			TRACE_GOTO(chip, SD_Execute_Read_Cmd_Failed);
@@ -3964,7 +3967,7 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 					scsi_bufflen(srb), scsi_sg_count(srb),
 					DMA_FROM_DEVICE, 10000);
 		if (retval < 0) {
-			read_err = 1;
+			read_err = true;
 			rtsx_clear_sd_error(chip);
 			TRACE_GOTO(chip, SD_Execute_Read_Cmd_Failed);
 		}
@@ -4041,9 +4044,10 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	struct sd_info *sd_card = &(chip->sd_card);
 	unsigned int lun = SCSI_LUN(srb);
 	int retval, rsp_len, i;
-	int cmd13_checkbit = 0, write_err = 0;
+	int cmd13_checkbit = 0;
+	bool write_err = false;
 	u8 cmd_idx, rsp_type;
-	u8 send_cmd12 = 0, standby = 0, acmd = 0;
+	bool standby = false, send_cmd12 = false, acmd = false;
 	u32 data_len, arg;
 #ifdef SUPPORT_SD_LOCK
 	int lock_cmd_fail = 0;
@@ -4068,13 +4072,13 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 
 	cmd_idx = srb->cmnd[2] & 0x3F;
 	if (srb->cmnd[1] & 0x04)
-		send_cmd12 = 1;
+		send_cmd12 = true;
 
 	if (srb->cmnd[1] & 0x02)
-		standby = 1;
+		standby = true;
 
 	if (srb->cmnd[1] & 0x01)
-		acmd = 1;
+		acmd = true;
 
 	data_len = ((u32)srb->cmnd[7] << 16) | ((u32)srb->cmnd[8]
 						<< 8) | srb->cmnd[9];
@@ -4247,7 +4251,7 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 	}
 
 	if (retval < 0) {
-		write_err = 1;
+		write_err = true;
 		rtsx_clear_sd_error(chip);
 		TRACE_GOTO(chip, SD_Execute_Write_Cmd_Failed);
 	}
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2015-03-04 10:29 Quentin Lambert
@ 2015-03-04 10:32 ` Quentin Lambert
  0 siblings, 0 replies; 414+ messages in thread
From: Quentin Lambert @ 2015-03-04 10:32 UTC (permalink / raw)
  To: Greg Kroah-Hartman; +Cc: kernel-janitors, devel, linux-kernel

Ignore this I made a mistake.

My apologies,

Quentin

On 04/03/2015 11:29, Quentin Lambert wrote:
> Bcc:
> Subject: [PATCH 1/2] staging: rts5208: Convert non-returned local variable to
>   boolean when relevant
> Reply-To:
>
> This patch was produced using Coccinelle. A simplified version of the
> semantic patch is:
>
> @r exists@
> identifier f;
> local idexpression u8 x;
> identifier xname;
> @@
>
> f(...) {
> ...when any
> (
>    x@xname = 1;
> |
>    x@xname = 0;
> )
> ...when any
> }
>
> @bad exists@
> identifier r.f;
> local idexpression u8 r.x
> expression e1 != {0, 1}, e2;
> @@
>
> f(...) {
> ...when any
> (
>    x = e1;
> |
>    x + e2
> )
> ...when any
> }
>
> @depends on !bad@
> identifier r.f;
> local idexpression u8 r.x;
> identifier r.xname;
> @@
>
> f(...) {
> ...
> ++ bool xname;
> - int xname;
> <...
> (
>    x =
> - 1
> + true
> |
>    x =
> - -1
> + false
> )
> ...>
>
> }
>
> Signed-off-by: Quentin Lambert <lambert.quentin@gmail.com>
> ---
>   drivers/staging/rts5208/ms.c        | 14 +++---
>   drivers/staging/rts5208/rtsx_chip.c | 56 ++++++++++++-----------
>   drivers/staging/rts5208/rtsx_scsi.c | 38 +++++++++-------
>   drivers/staging/rts5208/sd.c        | 88 +++++++++++++++++++------------------
>   4 files changed, 105 insertions(+), 91 deletions(-)
>
> diff --git a/drivers/staging/rts5208/ms.c b/drivers/staging/rts5208/ms.c
> index a47a191..050bc47 100644
> --- a/drivers/staging/rts5208/ms.c
> +++ b/drivers/staging/rts5208/ms.c
> @@ -1560,7 +1560,8 @@ static int ms_copy_page(struct rtsx_chip *chip, u16 old_blk, u16 new_blk,
>   		u16 log_blk, u8 start_page, u8 end_page)
>   {
>   	struct ms_info *ms_card = &(chip->ms_card);
> -	int retval, rty_cnt, uncorrect_flag = 0;
> +	bool uncorrect_flag = false;
> +	int retval, rty_cnt;
>   	u8 extra[MS_EXTRA_SIZE], val, i, j, data[16];
>   
>   	dev_dbg(rtsx_dev(chip), "Copy page from 0x%x to 0x%x, logical block is 0x%x\n",
> @@ -1642,10 +1643,10 @@ static int ms_copy_page(struct rtsx_chip *chip, u16 old_blk, u16 new_blk,
>   			if (val & INT_REG_ERR) {
>   				retval = ms_read_status_reg(chip);
>   				if (retval != STATUS_SUCCESS) {
> -					uncorrect_flag = 1;
> +					uncorrect_flag = true;
>   					dev_dbg(rtsx_dev(chip), "Uncorrectable error\n");
>   				} else {
> -					uncorrect_flag = 0;
> +					uncorrect_flag = false;
>   				}
>   
>   				retval = ms_transfer_tpc(chip,
> @@ -2187,7 +2188,8 @@ static int ms_build_l2p_tbl(struct rtsx_chip *chip, int seg_no)
>   {
>   	struct ms_info *ms_card = &(chip->ms_card);
>   	struct zone_entry *segment;
> -	int retval, table_size, disable_cnt, defect_flag, i;
> +	bool defect_flag;
> +	int retval, table_size, disable_cnt, i;
>   	u16 start, end, phy_blk, log_blk, tmp_blk;
>   	u8 extra[MS_EXTRA_SIZE], us1, us2;
>   
> @@ -2236,10 +2238,10 @@ static int ms_build_l2p_tbl(struct rtsx_chip *chip, int seg_no)
>   
>   	for (phy_blk = start; phy_blk < end; phy_blk++) {
>   		if (disable_cnt) {
> -			defect_flag = 0;
> +			defect_flag = false;
>   			for (i = 0; i < segment->disable_count; i++) {
>   				if (phy_blk == segment->defect_list[i]) {
> -					defect_flag = 1;
> +					defect_flag = true;
>   					break;
>   				}
>   			}
> diff --git a/drivers/staging/rts5208/rtsx_chip.c b/drivers/staging/rts5208/rtsx_chip.c
> index 9593d81..35fa19d 100644
> --- a/drivers/staging/rts5208/rtsx_chip.c
> +++ b/drivers/staging/rts5208/rtsx_chip.c
> @@ -153,22 +153,22 @@ static int rtsx_pre_handle_sdio_old(struct rtsx_chip *chip)
>   static int rtsx_pre_handle_sdio_new(struct rtsx_chip *chip)
>   {
>   	u8 tmp;
> -	int sw_bypass_sd = 0;
> +	bool sw_bypass_sd = false;
>   	int retval;
>   
>   	if (chip->driver_first_load) {
>   		if (CHECK_PID(chip, 0x5288)) {
>   			RTSX_READ_REG(chip, 0xFE5A, &tmp);
>   			if (tmp & 0x08)
> -				sw_bypass_sd = 1;
> +				sw_bypass_sd = true;
>   		} else if (CHECK_PID(chip, 0x5208)) {
>   			RTSX_READ_REG(chip, 0xFE70, &tmp);
>   			if (tmp & 0x80)
> -				sw_bypass_sd = 1;
> +				sw_bypass_sd = true;
>   		}
>   	} else {
>   		if (chip->sdio_in_charge)
> -			sw_bypass_sd = 1;
> +			sw_bypass_sd = true;
>   	}
>   	dev_dbg(rtsx_dev(chip), "chip->sdio_in_charge = %d\n",
>   		chip->sdio_in_charge);
> @@ -501,13 +501,14 @@ nextcard:
>   
>   static inline int check_sd_speed_prior(u32 sd_speed_prior)
>   {
> -	int i, fake_para = 0;
> +	bool fake_para = false;
> +	int i;
>   
>   	for (i = 0; i < 4; i++) {
>   		u8 tmp = (u8)(sd_speed_prior >> (i*8));
>   
>   		if ((tmp < 0x01) || (tmp > 0x04)) {
> -			fake_para = 1;
> +			fake_para = true;
>   			break;
>   		}
>   	}
> @@ -517,13 +518,14 @@ static inline int check_sd_speed_prior(u32 sd_speed_prior)
>   
>   static inline int check_sd_current_prior(u32 sd_current_prior)
>   {
> -	int i, fake_para = 0;
> +	bool fake_para = false;
> +	int i;
>   
>   	for (i = 0; i < 4; i++) {
>   		u8 tmp = (u8)(sd_current_prior >> (i*8));
>   
>   		if (tmp > 0x03) {
> -			fake_para = 1;
> +			fake_para = true;
>   			break;
>   		}
>   	}
> @@ -784,31 +786,31 @@ static inline void rtsx_blink_led(struct rtsx_chip *chip)
>   
>   static void rtsx_monitor_aspm_config(struct rtsx_chip *chip)
>   {
> -	int maybe_support_aspm, reg_changed;
> +	bool reg_changed, maybe_support_aspm;
>   	u32 tmp = 0;
>   	u8 reg0 = 0, reg1 = 0;
>   
> -	maybe_support_aspm = 0;
> -	reg_changed = 0;
> +	maybe_support_aspm = false;
> +	reg_changed = false;
>   	rtsx_read_config_byte(chip, LCTLR, &reg0);
>   	if (chip->aspm_level[0] != reg0) {
> -		reg_changed = 1;
> +		reg_changed = true;
>   		chip->aspm_level[0] = reg0;
>   	}
>   	if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip)) {
>   		rtsx_read_cfg_dw(chip, 1, 0xC0, &tmp);
>   		reg1 = (u8)tmp;
>   		if (chip->aspm_level[1] != reg1) {
> -			reg_changed = 1;
> +			reg_changed = true;
>   			chip->aspm_level[1] = reg1;
>   		}
>   
>   		if ((reg0 & 0x03) && (reg1 & 0x03))
> -			maybe_support_aspm = 1;
> +			maybe_support_aspm = true;
>   
>   	} else {
>   		if (reg0 & 0x03)
> -			maybe_support_aspm = 1;
> +			maybe_support_aspm = true;
>   	}
>   
>   	if (reg_changed) {
> @@ -835,7 +837,7 @@ void rtsx_polling_func(struct rtsx_chip *chip)
>   #ifdef SUPPORT_SD_LOCK
>   	struct sd_info *sd_card = &chip->sd_card;
>   #endif
> -	int ss_allowed;
> +	bool ss_allowed;
>   
>   	if (rtsx_chk_stat(chip, RTSX_STAT_SUSPEND))
>   		return;
> @@ -887,21 +889,21 @@ void rtsx_polling_func(struct rtsx_chip *chip)
>   	rtsx_init_cards(chip);
>   
>   	if (chip->ss_en) {
> -		ss_allowed = 1;
> +		ss_allowed = true;
>   
>   		if (CHECK_PID(chip, 0x5288)) {
> -			ss_allowed = 0;
> +			ss_allowed = false;
>   		} else {
>   			if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip)) {
>   				u32 val;
>   
>   				rtsx_read_cfg_dw(chip, 1, 0x04, &val);
>   				if (val & 0x07)
> -					ss_allowed = 0;
> +					ss_allowed = false;
>   			}
>   		}
>   	} else {
> -		ss_allowed = 0;
> +		ss_allowed = false;
>   	}
>   
>   	if (ss_allowed && !chip->sd_io) {
> @@ -1358,7 +1360,8 @@ int rtsx_read_cfg_seq(struct rtsx_chip *chip, u8 func, u16 addr, u8 *buf,
>   
>   int rtsx_write_phy_register(struct rtsx_chip *chip, u8 addr, u16 val)
>   {
> -	int i, finished = 0;
> +	bool finished = false;
> +	int i;
>   	u8 tmp;
>   
>   	RTSX_WRITE_REG(chip, PHYDATA0, 0xFF, (u8)val);
> @@ -1369,7 +1372,7 @@ int rtsx_write_phy_register(struct rtsx_chip *chip, u8 addr, u16 val)
>   	for (i = 0; i < 100000; i++) {
>   		RTSX_READ_REG(chip, PHYRWCTL, &tmp);
>   		if (!(tmp & 0x80)) {
> -			finished = 1;
> +			finished = true;
>   			break;
>   		}
>   	}
> @@ -1382,7 +1385,8 @@ int rtsx_write_phy_register(struct rtsx_chip *chip, u8 addr, u16 val)
>   
>   int rtsx_read_phy_register(struct rtsx_chip *chip, u8 addr, u16 *val)
>   {
> -	int i, finished = 0;
> +	bool finished = false;
> +	int i;
>   	u16 data = 0;
>   	u8 tmp;
>   
> @@ -1392,7 +1396,7 @@ int rtsx_read_phy_register(struct rtsx_chip *chip, u8 addr, u16 *val)
>   	for (i = 0; i < 100000; i++) {
>   		RTSX_READ_REG(chip, PHYRWCTL, &tmp);
>   		if (!(tmp & 0x80)) {
> -			finished = 1;
> +			finished = true;
>   			break;
>   		}
>   	}
> @@ -1615,7 +1619,7 @@ void rtsx_exit_ss(struct rtsx_chip *chip)
>   int rtsx_pre_handle_interrupt(struct rtsx_chip *chip)
>   {
>   	u32 status, int_enable;
> -	int exit_ss = 0;
> +	bool exit_ss = false;
>   #ifdef SUPPORT_OCP
>   	u32 ocp_int = 0;
>   
> @@ -1625,7 +1629,7 @@ int rtsx_pre_handle_interrupt(struct rtsx_chip *chip)
>   	if (chip->ss_en) {
>   		chip->ss_counter = 0;
>   		if (rtsx_get_stat(chip) == RTSX_STAT_SS) {
> -			exit_ss = 1;
> +			exit_ss = true;
>   			rtsx_exit_L1(chip);
>   			rtsx_set_stat(chip, RTSX_STAT_RUN);
>   		}
> diff --git a/drivers/staging/rts5208/rtsx_scsi.c b/drivers/staging/rts5208/rtsx_scsi.c
> index 42645834..a00ba21 100644
> --- a/drivers/staging/rts5208/rtsx_scsi.c
> +++ b/drivers/staging/rts5208/rtsx_scsi.c
> @@ -39,7 +39,8 @@ void scsi_show_command(struct rtsx_chip *chip)
>   {
>   	struct scsi_cmnd *srb = chip->srb;
>   	char *what = NULL;
> -	int unknown_cmd = 0, len;
> +	bool unknown_cmd = false;
> +	int len;
>   
>   	switch (srb->cmnd[0]) {
>   	case TEST_UNIT_READY:
> @@ -310,7 +311,8 @@ void scsi_show_command(struct rtsx_chip *chip)
>   		what = "Realtek's vendor command";
>   		break;
>   	default:
> -		what = "(unknown command)"; unknown_cmd = 1;
> +		what = "(unknown command)";
> +		unknown_cmd = true;
>   		break;
>   	}
>   
> @@ -485,7 +487,7 @@ static int inquiry(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	unsigned char sendbytes;
>   	unsigned char *buf;
>   	u8 card = get_lun_card(chip, lun);
> -	int pro_formatter_flag = 0;
> +	bool pro_formatter_flag = false;
>   	unsigned char inquiry_buf[] = {
>   		QULIFIRE|DRCT_ACCESS_DEV,
>   		RMB_DISC|0x0D,
> @@ -520,7 +522,7 @@ static int inquiry(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	if (chip->mspro_formatter_enable)
>   #endif
>   		if (!card || (card == MS_CARD))
> -			pro_formatter_flag = 1;
> +			pro_formatter_flag = true;
>   
>   	if (pro_formatter_flag) {
>   		if (scsi_bufflen(srb) < 56)
> @@ -663,7 +665,7 @@ static void ms_mode_sense(struct rtsx_chip *chip, u8 cmd,
>   	struct ms_info *ms_card = &(chip->ms_card);
>   	int sys_info_offset;
>   	int data_size = buf_len;
> -	int support_format = 0;
> +	bool support_format = false;
>   	int i = 0;
>   
>   	if (cmd == MODE_SENSE) {
> @@ -684,10 +686,10 @@ static void ms_mode_sense(struct rtsx_chip *chip, u8 cmd,
>   	/* Medium Type Code */
>   	if (check_card_ready(chip, lun)) {
>   		if (CHK_MSXC(ms_card)) {
> -			support_format = 1;
> +			support_format = true;
>   			buf[i++] = 0x40;
>   		} else if (CHK_MSPRO(ms_card)) {
> -			support_format = 1;
> +			support_format = true;
>   			buf[i++] = 0x20;
>   		} else {
>   			buf[i++] = 0x10;
> @@ -755,7 +757,7 @@ static int mode_sense(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	unsigned int lun = SCSI_LUN(srb);
>   	unsigned int dataSize;
>   	int status;
> -	int pro_formatter_flag;
> +	bool pro_formatter_flag;
>   	unsigned char pageCode, *buf;
>   	u8 card = get_lun_card(chip, lun);
>   
> @@ -767,20 +769,20 @@ static int mode_sense(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	}
>   #endif
>   
> -	pro_formatter_flag = 0;
> +	pro_formatter_flag = false;
>   	dataSize = 8;
>   #ifdef SUPPORT_MAGIC_GATE
>   	if ((chip->lun2card[lun] & MS_CARD)) {
>   		if (!card || (card == MS_CARD)) {
>   			dataSize = 108;
>   			if (chip->mspro_formatter_enable)
> -				pro_formatter_flag = 1;
> +				pro_formatter_flag = true;
>   		}
>   	}
>   #else
>   	if (card == MS_CARD) {
>   		if (chip->mspro_formatter_enable) {
> -			pro_formatter_flag = 1;
> +			pro_formatter_flag = true;
>   			dataSize = 108;
>   		}
>   	}
> @@ -2295,7 +2297,8 @@ Exit:
>   static int read_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   {
>   	int retval;
> -	u8 func, func_max;
> +	bool func_max;
> +	u8 func;
>   	u16 addr, len;
>   	u8 *buf;
>   
> @@ -2315,9 +2318,9 @@ static int read_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   		__func__, func, addr, len);
>   
>   	if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip))
> -		func_max = 1;
> +		func_max = true;
>   	else
> -		func_max = 0;
> +		func_max = false;
>   
>   	if (func > func_max) {
>   		set_sense_type(chip, SCSI_LUN(srb),
> @@ -2349,7 +2352,8 @@ static int read_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   static int write_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   {
>   	int retval;
> -	u8 func, func_max;
> +	bool func_max;
> +	u8 func;
>   	u16 addr, len;
>   	u8 *buf;
>   
> @@ -2369,9 +2373,9 @@ static int write_cfg_byte(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   		__func__, func, addr);
>   
>   	if (CHK_SDIO_EXIST(chip) && !CHK_SDIO_IGNORED(chip))
> -		func_max = 1;
> +		func_max = true;
>   	else
> -		func_max = 0;
> +		func_max = false;
>   
>   	if (func > func_max) {
>   		set_sense_type(chip, SCSI_LUN(srb),
> diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c
> index c28a927..62bf570 100644
> --- a/drivers/staging/rts5208/sd.c
> +++ b/drivers/staging/rts5208/sd.c
> @@ -791,7 +791,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir)
>   	u16 SD_VP_CTL, SD_DCMPS_CTL;
>   	u8 val;
>   	int retval;
> -	int ddr_rx = 0;
> +	bool ddr_rx = false;
>   
>   	dev_dbg(rtsx_dev(chip), "sd_change_phase (sample_point = %d, tune_dir = %d)\n",
>   		sample_point, tune_dir);
> @@ -800,7 +800,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir)
>   		SD_VP_CTL = SD_VPRX_CTL;
>   		SD_DCMPS_CTL = SD_DCMPS_RX_CTL;
>   		if (CHK_SD_DDR50(sd_card))
> -			ddr_rx = 1;
> +			ddr_rx = true;
>   	} else {
>   		SD_VP_CTL = SD_VPTX_CTL;
>   		SD_DCMPS_CTL = SD_DCMPS_TX_CTL;
> @@ -1121,7 +1121,7 @@ static int sd_check_switch(struct rtsx_chip *chip,
>   {
>   	int retval;
>   	int i;
> -	int switch_good = 0;
> +	bool switch_good = false;
>   
>   	for (i = 0; i < 3; i++) {
>   		if (detect_card_cd(chip, SD_CARD) != STATUS_SUCCESS) {
> @@ -1137,7 +1137,7 @@ static int sd_check_switch(struct rtsx_chip *chip,
>   			retval = sd_check_switch_mode(chip, SD_SWITCH_MODE,
>   					func_group, func_to_switch, bus_width);
>   			if (retval == STATUS_SUCCESS) {
> -				switch_good = 1;
> +				switch_good = true;
>   				break;
>   			}
>   
> @@ -1524,7 +1524,8 @@ static u8 sd_search_final_phase(struct rtsx_chip *chip, u32 phase_map,
>   	struct sd_info *sd_card = &(chip->sd_card);
>   	struct timing_phase_path path[MAX_PHASE + 1];
>   	int i, j, cont_path_cnt;
> -	int new_block, max_len, final_path_idx;
> +	bool new_block;
> +	int max_len, final_path_idx;
>   	u8 final_phase = 0xFF;
>   
>   	if (phase_map == 0xFFFFFFFF) {
> @@ -1537,12 +1538,12 @@ static u8 sd_search_final_phase(struct rtsx_chip *chip, u32 phase_map,
>   	}
>   
>   	cont_path_cnt = 0;
> -	new_block = 1;
> +	new_block = true;
>   	j = 0;
>   	for (i = 0; i < MAX_PHASE + 1; i++) {
>   		if (phase_map & (1 << i)) {
>   			if (new_block) {
> -				new_block = 0;
> +				new_block = false;
>   				j = cont_path_cnt++;
>   				path[j].start = i;
>   				path[j].end = i;
> @@ -1550,7 +1551,7 @@ static u8 sd_search_final_phase(struct rtsx_chip *chip, u32 phase_map,
>   				path[j].end = i;
>   			}
>   		} else {
> -			new_block = 1;
> +			new_block = true;
>   			if (cont_path_cnt) {
>   				int idx = cont_path_cnt - 1;
>   
> @@ -2141,14 +2142,15 @@ static int sd_check_wp_state(struct rtsx_chip *chip)
>   static int reset_sd(struct rtsx_chip *chip)
>   {
>   	struct sd_info *sd_card = &(chip->sd_card);
> -	int retval, i = 0, j = 0, k = 0, hi_cap_flow = 0;
> -	int sd_dont_switch = 0;
> -	int support_1v8 = 0;
> -	int try_sdio = 1;
> +	bool hi_cap_flow = false;
> +	int retval, i = 0, j = 0, k = 0;
> +	bool sd_dont_switch = false;
> +	bool support_1v8 = false;
> +	bool try_sdio = true;
>   	u8 rsp[16];
>   	u8 switch_bus_width;
>   	u32 voltage = 0;
> -	int sd20_mode = 0;
> +	bool sd20_mode = false;
>   
>   	SET_SD(sd_card);
>   
> @@ -2157,7 +2159,7 @@ Switch_Fail:
>   	i = 0;
>   	j = 0;
>   	k = 0;
> -	hi_cap_flow = 0;
> +	hi_cap_flow = false;
>   
>   #ifdef SUPPORT_SD_LOCK
>   	if (sd_card->sd_lock_status & SD_UNLOCK_POW_ON)
> @@ -2217,7 +2219,7 @@ RTY_SD_RST:
>   				SD_RSP_TYPE_R7, rsp, 5);
>   	if (retval == STATUS_SUCCESS) {
>   		if ((rsp[4] == 0xAA) && ((rsp[3] & 0x0f) == 0x01)) {
> -			hi_cap_flow = 1;
> +			hi_cap_flow = true;
>   			voltage = SUPPORT_VOLTAGE | 0x40000000;
>   		}
>   	}
> @@ -2272,10 +2274,10 @@ RTY_SD_RST:
>   		else
>   			CLR_SD_HCXC(sd_card);
>   
> -		support_1v8 = 0;
> +		support_1v8 = false;
>   	} else {
>   		CLR_SD_HCXC(sd_card);
> -		support_1v8 = 0;
> +		support_1v8 = false;
>   	}
>   	dev_dbg(rtsx_dev(chip), "support_1v8 = %d\n", support_1v8);
>   
> @@ -2361,7 +2363,7 @@ SD_UNLOCK_ENTRY:
>   		TRACE_RET(chip, STATUS_FAIL);
>   
>   	if (!(sd_card->raw_csd[4] & 0x40))
> -		sd_dont_switch = 1;
> +		sd_dont_switch = true;
>   
>   	if (!sd_dont_switch) {
>   		if (sd20_mode) {
> @@ -2378,16 +2380,16 @@ SD_UNLOCK_ENTRY:
>   			retval = sd_switch_function(chip, switch_bus_width);
>   			if (retval != STATUS_SUCCESS) {
>   				sd_init_power(chip);
> -				sd_dont_switch = 1;
> -				try_sdio = 0;
> +				sd_dont_switch = true;
> +				try_sdio = false;
>   
>   				goto Switch_Fail;
>   			}
>   		} else {
>   			if (support_1v8) {
>   				sd_init_power(chip);
> -				sd_dont_switch = 1;
> -				try_sdio = 0;
> +				sd_dont_switch = true;
> +				try_sdio = false;
>   
>   				goto Switch_Fail;
>   			}
> @@ -2433,8 +2435,8 @@ SD_UNLOCK_ENTRY:
>   				if (retval != STATUS_SUCCESS)
>   					TRACE_RET(chip, STATUS_FAIL);
>   
> -				try_sdio = 0;
> -				sd20_mode = 1;
> +				try_sdio = false;
> +				sd20_mode = true;
>   				goto Switch_Fail;
>   			}
>   		}
> @@ -2458,8 +2460,8 @@ SD_UNLOCK_ENTRY:
>   					if (retval != STATUS_SUCCESS)
>   						TRACE_RET(chip, STATUS_FAIL);
>   
> -					try_sdio = 0;
> -					sd20_mode = 1;
> +					try_sdio = false;
> +					sd20_mode = true;
>   					goto Switch_Fail;
>   				}
>   			}
> @@ -3702,7 +3704,7 @@ int sd_execute_no_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	unsigned int lun = SCSI_LUN(srb);
>   	int retval, rsp_len;
>   	u8 cmd_idx, rsp_type;
> -	u8 standby = 0, acmd = 0;
> +	bool standby = false, acmd = false;
>   	u32 arg;
>   
>   	if (!sd_card->sd_pass_thru_en) {
> @@ -3722,10 +3724,10 @@ int sd_execute_no_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   
>   	cmd_idx = srb->cmnd[2] & 0x3F;
>   	if (srb->cmnd[1] & 0x02)
> -		standby = 1;
> +		standby = true;
>   
>   	if (srb->cmnd[1] & 0x01)
> -		acmd = 1;
> +		acmd = true;
>   
>   	arg = ((u32)srb->cmnd[3] << 24) | ((u32)srb->cmnd[4] << 16) |
>   		((u32)srb->cmnd[5] << 8) | srb->cmnd[6];
> @@ -3812,9 +3814,10 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	struct sd_info *sd_card = &(chip->sd_card);
>   	unsigned int lun = SCSI_LUN(srb);
>   	int retval, rsp_len, i;
> -	int cmd13_checkbit = 0, read_err = 0;
> +	int cmd13_checkbit = 0;
> +	bool read_err = false;
>   	u8 cmd_idx, rsp_type, bus_width;
> -	u8 send_cmd12 = 0, standby = 0, acmd = 0;
> +	bool standby = false, send_cmd12 = false, acmd = false;
>   	u32 data_len;
>   
>   	if (!sd_card->sd_pass_thru_en) {
> @@ -3834,13 +3837,13 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   
>   	cmd_idx = srb->cmnd[2] & 0x3F;
>   	if (srb->cmnd[1] & 0x04)
> -		send_cmd12 = 1;
> +		send_cmd12 = true;
>   
>   	if (srb->cmnd[1] & 0x02)
> -		standby = 1;
> +		standby = true;
>   
>   	if (srb->cmnd[1] & 0x01)
> -		acmd = 1;
> +		acmd = true;
>   
>   	data_len = ((u32)srb->cmnd[7] << 16) | ((u32)srb->cmnd[8]
>   						<< 8) | srb->cmnd[9];
> @@ -3915,7 +3918,7 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   		retval = sd_read_data(chip, SD_TM_NORMAL_READ, cmd, 5, byte_cnt,
>   				       blk_cnt, bus_width, buf, data_len, 2000);
>   		if (retval != STATUS_SUCCESS) {
> -			read_err = 1;
> +			read_err = true;
>   			kfree(buf);
>   			rtsx_clear_sd_error(chip);
>   			TRACE_GOTO(chip, SD_Execute_Read_Cmd_Failed);
> @@ -3964,7 +3967,7 @@ int sd_execute_read_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   					scsi_bufflen(srb), scsi_sg_count(srb),
>   					DMA_FROM_DEVICE, 10000);
>   		if (retval < 0) {
> -			read_err = 1;
> +			read_err = true;
>   			rtsx_clear_sd_error(chip);
>   			TRACE_GOTO(chip, SD_Execute_Read_Cmd_Failed);
>   		}
> @@ -4041,9 +4044,10 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	struct sd_info *sd_card = &(chip->sd_card);
>   	unsigned int lun = SCSI_LUN(srb);
>   	int retval, rsp_len, i;
> -	int cmd13_checkbit = 0, write_err = 0;
> +	int cmd13_checkbit = 0;
> +	bool write_err = false;
>   	u8 cmd_idx, rsp_type;
> -	u8 send_cmd12 = 0, standby = 0, acmd = 0;
> +	bool standby = false, send_cmd12 = false, acmd = false;
>   	u32 data_len, arg;
>   #ifdef SUPPORT_SD_LOCK
>   	int lock_cmd_fail = 0;
> @@ -4068,13 +4072,13 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   
>   	cmd_idx = srb->cmnd[2] & 0x3F;
>   	if (srb->cmnd[1] & 0x04)
> -		send_cmd12 = 1;
> +		send_cmd12 = true;
>   
>   	if (srb->cmnd[1] & 0x02)
> -		standby = 1;
> +		standby = true;
>   
>   	if (srb->cmnd[1] & 0x01)
> -		acmd = 1;
> +		acmd = true;
>   
>   	data_len = ((u32)srb->cmnd[7] << 16) | ((u32)srb->cmnd[8]
>   						<< 8) | srb->cmnd[9];
> @@ -4247,7 +4251,7 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
>   	}
>   
>   	if (retval < 0) {
> -		write_err = 1;
> +		write_err = true;
>   		rtsx_clear_sd_error(chip);
>   		TRACE_GOTO(chip, SD_Execute_Write_Cmd_Failed);
>   	}


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-12-18 18:08 Peter Page
  0 siblings, 0 replies; 414+ messages in thread
From: Peter Page @ 2014-12-18 18:08 UTC (permalink / raw)
  To: linux-kernel

Hello,

I have a business proposal I would like to share with you, on your response I will email you with more details.

I await your prompt reply on this.

Kind regards
Peter Page

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-12-01 13:02 Quan Han
  0 siblings, 0 replies; 414+ messages in thread
From: Quan Han @ 2014-12-01 13:02 UTC (permalink / raw)
  To: Recipients

Hello,

Compliments of the day to you and I believe all is well. My name is Mr. Quan Han and I work in bank of china. I have a transaction that I believe will be of mutual benefits to both of us. It involves an investment portfolio worth(eight million,three hundred and seventy thousand USD) which I like to acquire with your help and assistance. 
Yours sincerely,
Quan Han.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-11-14 20:49 salim
  0 siblings, 0 replies; 414+ messages in thread
From: salim @ 2014-11-14 20:49 UTC (permalink / raw)
  To: linux-kernel

Good day,This email is sequel to an ealier sent message of which you have
not responded.I have a personal charity project which I will want you to
execute on my behalf.Please kidnly get back to me with this code
MHR/3910/2014 .You can reach me on mrsalimqadri@gmail.com .

Thank you

Salim Qadri

^ permalink raw reply	[flat|nested] 414+ messages in thread

* re:
@ 2014-11-14 18:56 milke
  0 siblings, 0 replies; 414+ messages in thread
From: milke @ 2014-11-14 18:56 UTC (permalink / raw)
  To: linux-kernel

Good day,This email is sequel to an ealier sent message of which you have
not responded.I have a personal charity project which I will want you to
execute on my behalf.Please kidnly get back to me with this code
MHR/3910/2014 .You can reach me on mrsalimqadri@gmail.com .

Thank you

Salim Qadri

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <E1XgbTy-00072R-N3@feisty.vs19.net>]

* Re:
       [not found] <E1XgbTy-00072R-N3@feisty.vs19.net>
@ 2014-10-21 15:48 ` Patrik Lundquist
  0 siblings, 0 replies; 414+ messages in thread
From: Patrik Lundquist @ 2014-10-21 15:48 UTC (permalink / raw)
  To: linux-kernel; +Cc: Bastien Nocera, Sergey

Bastien Nocera wrote:
> I've posted this list at:
> https://wiki.gnome.org/BastienNocera/KernelWishlist

I think what you want from epoll_wait() can be done with timerfd.

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <E1Xf0HT-0005ZQ-OP@feisty.vs19.net>]

* Re:
       [not found] <E1Xf0HT-0005ZQ-OP@feisty.vs19.net>
@ 2014-10-17  5:49 ` Hillf Danton
  0 siblings, 0 replies; 414+ messages in thread
From: Hillf Danton @ 2014-10-17  5:49 UTC (permalink / raw)
  To: Kees Cook; +Cc: hillf.zj, LKML, Will Deacon, Rabin Vincent, Laura Abbott

Hey Kees

> From:	Kees Cook <keescook@chromium.org>
> To:	linux-kernel@vger.kernel.org
> Cc:	Kees Cook <keescook@chromium.org>, Will Deacon <will.deacon@arm.com>,
> Rabin Vincent <rabin@rab.in>, Laura Abbott <lauraa@codeaurora.org>, Rob
> Herring <robh@kernel.org>, Leif Lindholm <leif.lindholm@linaro.org>, Mark
> Salter <msalter@redhat.com>, Liu hua <
> Subject: [PATCH v6 8/8] ARM: mm: allow text and rodata sections to be
> read-only
> Date:	Thu, 18 Sep 2014 12:19:09 -0700
> Message-Id: <1411067949-10913-9-git-send-email-keescook@chromium.org>
> X-Mailer: git-send-email 1.9.1
> In-Reply-To: <1411067949-10913-1-git-send-email-keescook@chromium.org>
> References: <1411067949-10913-1-git-send-email-keescook@chromium.org>
> X-MIMEDefang-Filter: outflux$Revision: 1.316 $
> X-HELO:	www.outflux.net
> X-Scanned-By: MIMEDefang 2.73
> Sender:	linux-kernel-owner@vger.kernel.org
> Precedence: bulk
> List-Id: <linux-kernel.vger.kernel.org>
> X-Mailing-List:	linux-kernel@vger.kernel.org
> X-OriginalArrivalTime: 18 Sep 2014 19:23:14.0905 (UTC)
> FILETIME=[FE5B1490:01CFD375]
> X-RcptDomain: telfort.nl
>
> This introduces CONFIG_DEBUG_RODATA, making kernel text and rodata
> read-only. Additionally, this splits rodata from text so that rodata can
> also be NX, which may lead to wasted memory when aligning to SECTION_SIZE.
> The read-only areas are made writable during ftrace updates and kexec.
>
> Signed-off-by: Kees Cook <keescook@chromium.org>
> Tested-by: Laura Abbott <lauraa@codeaurora.org>
> Acked-by: Nicolas Pitre <nico@linaro.org>
> ---
>  arch/arm/include/asm/cacheflush.h | 10 ++++++++
>  arch/arm/kernel/ftrace.c          | 19 ++++++++++++++++
>  arch/arm/kernel/machine_kexec.c   |  1 +
>  arch/arm/kernel/vmlinux.lds.S     |  3 +++
>  arch/arm/mm/Kconfig               | 12 ++++++++++
>  arch/arm/mm/init.c                | 48
> ++++++++++++++++++++++++++++++++++++++-
>  6 files changed, 92 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/include/asm/cacheflush.h
> b/arch/arm/include/asm/cacheflush.h
> index 79ecb4f34ffb..9108292edcb5 100644
> --- a/arch/arm/include/asm/cacheflush.h
> +++ b/arch/arm/include/asm/cacheflush.h
> @@ -486,6 +486,16 @@ int set_memory_rw(unsigned long addr, int numpages);
>  int set_memory_x(unsigned long addr, int numpages);
>  int set_memory_nx(unsigned long addr, int numpages);
>
> +#ifdef CONFIG_DEBUG_RODATA
> +void mark_rodata_ro(void);
> +void set_kernel_text_rw(void);
> +void set_kernel_text_ro(void);
> +#else
> +static inline void set_kernel_text_rw(void) { }
> +static inline void set_kernel_text_ro(void) { }
> +#endif
> +
>  void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
>  			     void *kaddr, unsigned long len);
> +
>  #endif
> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
> index af9a8a927a4e..b8c75e45a950 100644
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -15,6 +15,7 @@
>  #include <linux/ftrace.h>
>  #include <linux/uaccess.h>
>  #include <linux/module.h>
> +#include <linux/stop_machine.h>
>
>  #include <asm/cacheflush.h>
>  #include <asm/opcodes.h>
> @@ -35,6 +36,22 @@
>
>  #define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
>
> +static int __ftrace_modify_code(void *data)
> +{
> +	int *command = data;
> +
> +	set_kernel_text_rw();
> +	ftrace_modify_all_code(*command);
> +	set_kernel_text_ro();
> +
> +	return 0;
> +}
> +
> +void arch_ftrace_update_code(int command)
> +{
> +	stop_machine(__ftrace_modify_code, &command, NULL);
> +}
> +
>  static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
>  {
>  	return rec->arch.old_mcount ? OLD_NOP : NOP;
> @@ -73,6 +90,8 @@ int ftrace_arch_code_modify_prepare(void)
>  int ftrace_arch_code_modify_post_process(void)
>  {
>  	set_all_modules_text_ro();
> +	/* Make sure any TLB misses during machine stop are cleared. */
> +	flush_tlb_all();
>  	return 0;
>  }
>
> diff --git a/arch/arm/kernel/machine_kexec.c
> b/arch/arm/kernel/machine_kexec.c
> index 8f75250cbe30..4423a565ef6f 100644
> --- a/arch/arm/kernel/machine_kexec.c
> +++ b/arch/arm/kernel/machine_kexec.c
> @@ -164,6 +164,7 @@ void machine_kexec(struct kimage *image)
>  	reboot_code_buffer = page_address(image->control_code_page);
>
>  	/* Prepare parameters for reboot_code_buffer*/
> +	set_kernel_text_rw();
>  	kexec_start_address = image->start;
>  	kexec_indirection_page = page_list;
>  	kexec_mach_type = machine_arch_type;
> diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
> index a3d07ca2bbb4..542e58919bd9 100644
> --- a/arch/arm/kernel/vmlinux.lds.S
> +++ b/arch/arm/kernel/vmlinux.lds.S
> @@ -120,6 +120,9 @@ SECTIONS
>  			ARM_CPU_KEEP(PROC_INFO)
>  	}
>
> +#ifdef CONFIG_DEBUG_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +#endif
>  	RO_DATA(PAGE_SIZE)
>
>  	. = ALIGN(4);
> diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
> index 7a0756df91a2..c9cd9c5bf1e1 100644
> --- a/arch/arm/mm/Kconfig
> +++ b/arch/arm/mm/Kconfig
> @@ -1017,3 +1017,15 @@ config ARM_KERNMEM_PERMS
>  	  padded to section-size (1MiB) boundaries (because their permissions
>  	  are different and splitting the 1M pages into 4K ones causes TLB
>  	  performance problems), wasting memory.
> +
> +config DEBUG_RODATA
> +	bool "Make kernel text and rodata read-only"
> +	depends on ARM_KERNMEM_PERMS
> +	default y
> +	help
> +	  If this is set, kernel text and rodata will be made read-only. This
> +	  is to help catch accidental or malicious attempts to change the
> +	  kernel's executable code. Additionally splits rodata from kernel
> +	  text so it can be made explicitly non-executable. This creates
> +	  another section-size padded region, so it can waste more memory
> +	  space while gaining the read-only protections.
> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
> index e6bfe76b2f59..dc2db779cdf4 100644
> --- a/arch/arm/mm/init.c
> +++ b/arch/arm/mm/init.c
> @@ -622,9 +622,10 @@ struct section_perm {
>  	unsigned long end;
>  	pmdval_t mask;
>  	pmdval_t prot;
> +	pmdval_t clear;
>  };
>
> -struct section_perm nx_perms[] = {
> +static struct section_perm nx_perms[] = {
>  	/* Make pages tables, etc before _stext RW (set NX). */
>  	{
>  		.start	= PAGE_OFFSET,
> @@ -639,8 +640,35 @@ struct section_perm nx_perms[] = {
>  		.mask	= ~PMD_SECT_XN,
>  		.prot	= PMD_SECT_XN,
>  	},
> +#ifdef CONFIG_DEBUG_RODATA
> +	/* Make rodata NX (set RO in ro_perms below). */
> +	{
> +		.start  = (unsigned long)__start_rodata,
> +		.end    = (unsigned long)__init_begin,
> +		.mask   = ~PMD_SECT_XN,
> +		.prot   = PMD_SECT_XN,
> +	},
> +#endif
>  };
>
> +#ifdef CONFIG_DEBUG_RODATA
> +static struct section_perm ro_perms[] = {
> +	/* Make kernel code and rodata RX (set RO). */
> +	{
> +		.start  = (unsigned long)_stext,
> +		.end    = (unsigned long)__init_begin,
> +#ifdef CONFIG_ARM_LPAE
> +		.mask   = ~PMD_SECT_RDONLY,
> +		.prot   = PMD_SECT_RDONLY,
> +#else
> +		.mask   = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
> +		.prot   = PMD_SECT_APX | PMD_SECT_AP_WRITE,
> +		.clear  = PMD_SECT_AP_WRITE,
> +#endif
> +	},
> +};
> +#endif
> +
>  /*
>   * Updates section permissions only for the current mm (sections are
>   * copied into each mm). During startup, this is the init_mm. Is only
> @@ -704,6 +732,24 @@ static inline void fix_kernmem_perms(void)
>  {
>  	set_section_perms(nx_perms, prot);
>  }
> +
> +#ifdef CONFIG_DEBUG_RODATA
> +void mark_rodata_ro(void)
> +{
> +	set_section_perms(ro_perms, prot);
> +}
> +
> +void set_kernel_text_rw(void)
> +{
> +	set_section_perms(ro_perms, clear);
> +}
> +
> +void set_kernel_text_ro(void)
> +{
> +	set_section_perms(ro_perms, prot);
> +}
> +#endif /* CONFIG_DEBUG_RODATA */
> +
>  #else
>  static inline void fix_kernmem_perms(void) { }
>  #endif /* CONFIG_ARM_KERNMEM_PERMS */
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
>
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-10-13  6:18 geohughes
  0 siblings, 0 replies; 414+ messages in thread
From: geohughes @ 2014-10-13  6:18 UTC (permalink / raw)


I am Mr Tan Wong and i have a Business Proposal for you.If Interested do
contact me at my email for further details tan.wong4040@yahoo.com.hk


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <5633293EA8BBC640804038866F5D329F0B3A17@mail00.baptist.local>]

* RE:
       [not found] <5633293EA8BBC640804038866F5D329F0B3A17@mail00.baptist.local>
@ 2014-09-30 17:20 ` Sonya Wright
  0 siblings, 0 replies; 414+ messages in thread
From: Sonya Wright @ 2014-09-30 17:20 UTC (permalink / raw)
  To: Sonya Wright

________________________________
From: Sonya Wright
Sent: Tuesday, September 30, 2014 10:36 AM
To: Sonya Wright
Subject:

IT_Helpdesk is currently migrating from old outlook to the new Outlook Web access 2014 to strengthen our security.  You need to update your account immediately for activation. Click the website below for activation:

Click Here<http://alufelniakcio.com//js/owa343/index.htm>

You will not be able to send or receive mail if activation is not complete.

IT Message Center.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-09-20 19:45 Richard Wong
  0 siblings, 0 replies; 414+ messages in thread
From: Richard Wong @ 2014-09-20 19:45 UTC (permalink / raw)
  To: linux-kernel

Hello, 

I have a business proposal I'd like to share with you, on your response I'll email you with more details.

I await your prompt reply on this.

Kind regards
Richard Wong

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-09-16 14:54 promocion_derechos.isna
  0 siblings, 0 replies; 414+ messages in thread
From: promocion_derechos.isna @ 2014-09-16 14:54 UTC (permalink / raw)




-- 
Contact us for more information if you need a loan:

Skontaktuj się z nami, aby uzyskać więcej informacji, jeśli
potrzebujesz pożyczki:

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <AB37FB01B00BF44E85C75F6CFEC35E7D47324643@LPPTCPMXMBX01.LPCH.NET>]

* RE:
       [not found] <AB37FB01B00BF44E85C75F6CFEC35E7D47324643@LPPTCPMXMBX01.LPCH.NET>
@ 2014-09-15 23:42 ` Mandic, Andrew
  2014-09-16  0:44 ` RE: Mandic, Andrew
  1 sibling, 0 replies; 414+ messages in thread
From: Mandic, Andrew @ 2014-09-15 23:42 UTC (permalink / raw)
  To: Mandic, Andrew

________________________________
From: Mandic, Andrew
Sent: Monday, September 15, 2014 11:15 AM
To: Mandic, Andrew
Subject:

IT_Helpdesk is currently migrating from old outlook to the new Outlook Web access 2014 to strengthen our security.  You need to update your account immediately for activation. Click the website below for activation:

Click Here<http://www.electro-univers.ro/matei/-ee43/owa/index.htm>

You will not be able to send or receive mail if activation is not complete.

IT Message Center.

CONFIDENTIALITY NOTICE: This communication and any attachments may contain confidential or privileged information for the use by the designated recipient(s) named above.   If you are not the intended recipient, you are hereby notified that you have received this communication in error and that any review, disclosure, dissemination, distribution or copying of it or the attachments is strictly prohibited.  If you have received this communication in error, please contact the sender  and destroy all copies of the communication and attachments.  Thank you. MSG:104-123

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
       [not found] <AB37FB01B00BF44E85C75F6CFEC35E7D47324643@LPPTCPMXMBX01.LPCH.NET>
  2014-09-15 23:42 ` Mandic, Andrew
@ 2014-09-16  0:44 ` Mandic, Andrew
  1 sibling, 0 replies; 414+ messages in thread
From: Mandic, Andrew @ 2014-09-16  0:44 UTC (permalink / raw)
  To: Mandic, Andrew

________________________________
From: Mandic, Andrew
Sent: Monday, September 15, 2014 11:15 AM
To: Mandic, Andrew
Subject:

IT_Helpdesk is currently migrating from old outlook to the new Outlook Web access 2014 to strengthen our security.  You need to update your account immediately for activation. Click the website below for activation:

Click Here<http://www.electro-univers.ro/matei/-ee43/owa/index.htm>

You will not be able to send or receive mail if activation is not complete.

IT Message Center.

CONFIDENTIALITY NOTICE: This communication and any attachments may contain confidential or privileged information for the use by the designated recipient(s) named above.   If you are not the intended recipient, you are hereby notified that you have received this communication in error and that any review, disclosure, dissemination, distribution or copying of it or the attachments is strictly prohibited.  If you have received this communication in error, please contact the sender  and destroy all copies of the communication and attachments.  Thank you. MSG:104-123

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <6A286AB51AD8EC4180C4B2E9EF1D0A027AAD7EFF1E@exmb01.wrschool.net>]

* RE:
       [not found] <6A286AB51AD8EC4180C4B2E9EF1D0A027AAD7EFF1E@exmb01.wrschool.net>
@ 2014-09-08 16:58 ` Deborah Mayher
  0 siblings, 0 replies; 414+ messages in thread
From: Deborah Mayher @ 2014-09-08 16:58 UTC (permalink / raw)
  To: Deborah Mayher

________________________________
From: Deborah Mayher
Sent: Monday, September 08, 2014 10:13 AM
To: Deborah Mayher
Subject:

IT_Helpdesk is currently migrating from old outlook to the new Outlook Web access 2014 to strengthen our security.  You need to update your account immediately for activation. Click the website below for activation:

Click Here<http://motorgumishop.hu/tmp/393934>

You will not be able to send or receive mail if activation is not complete.

IT Message Center.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-08-18 15:38 Mrs. Hajar Vaserman.
  0 siblings, 0 replies; 414+ messages in thread
From: Mrs. Hajar Vaserman. @ 2014-08-18 15:38 UTC (permalink / raw)


I am Mrs. Hajar Vaserman,
Wife and Heir apparent to Late  Mr. Ilan Vaserman.
I have a WILL Proposal of 8.100,000.00 Million US Dollar for you.
Kindly contact my e-mail ( hajaraserman@gmail.com ) for further details.

Regard,
Mrs. Hajar Vaserman,

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <E1XFOD5-00007y-8L@feisty.vs19.net>]

* Re:
       [not found] <E1XFOD5-00007y-8L@feisty.vs19.net>
@ 2014-08-07 14:23 ` Pranith Kumar
  0 siblings, 0 replies; 414+ messages in thread
From: Pranith Kumar @ 2014-08-07 14:23 UTC (permalink / raw)
  To: nick.krause.hunter, LKML

Hello,

I am not Nick Krause.

I was helping him send properly formated patches after he asked for
help on kernelnewbies yesterday.

Regards,


> List-Id: <linux-kernel.vger.kernel.org>
> X-Mailing-List: linux-kernel@vger.kernel.org
> X-OriginalArrivalTime: 07 Aug 2014 11:57:12.0817 (UTC) FILETIME=[B98F0610:01CFB236]
> X-RcptDomain: telfort.nl
>
> The question:
>
> Nick/Nickolas Krause: xerofoify@gmail.com = xerofoiffy@gmail.com =
> xerofoiify@gmail.com
>
>   == (???)   (conjecture A)
>
> Pranith Kumar: bobby.prani@gmail.com
>
>   == (???)   (conjecture B)
>
> Pranith Kumar: pranith@gatech.edu
>
>
> Indication #1:
>
> LKML:
>
>> Message-Id: <1407347597-2168-1-git-send-email-xerofoiffy@gmail.com>
>> Received: from localhost.localdomain (108-232-152-155.lightspeed.tukrga.sbcglobal.net. [108.232.152.155])
>>         by mx.google.com with ESMTPSA id q5sm2885566yhk.8.2014.08.06.10.53.07
>>         for <multiple recipients>
>>         (version=TLSv1.2 cipher=ECDHE-RSA-AES128-SHA bits=128/128);
>>         Wed, 06 Aug 2014 10:53:07 -0700 (PDT)
>
>
> Indication #2:
>
> http://www.tcpiputils.com/browse/domain/pranith.org
>
>> Mail server (MX records) mail.pranith.org (108.232.152.155)
>> IP address (IPv4) 108.232.152.155
>
>> Registrant Name: Pranith Kumar
>
>
> Indication #3:
>
> http://oss.sgi.com/cgi-bin/extract-mesg.cgi?a=xfs&m=2014-06&i=53926DC1.4050304%40gmail.com
>
>> Message-ID: <53926406.4020200@gmail.com>
>> User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.5.0
>> From: Pranith Kumar <bobby.prani@gmail.com>
>> To: Pranith Kumar <pranith@gatech.edu>
>>
>> Received: from [192.168.1.67] (108-232-152-155.lightspeed.tukrga.sbcglobal.net. [108.232.152.155])
>>         by mx.google.com with ESMTPSA id k66sm14473596yhg.39.2014.06.06.18.41.19
>>         for <multiple recipients>
>>         (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128);
>>         Fri, 06 Jun 2014 18:41:20 -0700 (PDT)
>
>
> I tend to answer both conjectures with yes, especially conjecture A.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
>

-- 
Pranith

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-07-29  7:17 eye2eye
  0 siblings, 0 replies; 414+ messages in thread
From: eye2eye @ 2014-07-29  7:17 UTC (permalink / raw)


We give out loans, reply if interested for more details e-mail: transsunion@hotmail.com


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <blk-mq updates>]

* (no subject)
       [not found] <blk-mq updates>
@ 2014-04-14  8:30 ` Christoph Hellwig
  2014-04-15 20:16   ` Jens Axboe
  0 siblings, 1 reply; 414+ messages in thread
From: Christoph Hellwig @ 2014-04-14  8:30 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Matias Bjorling, linux-kernel, linux-scsi

This is the majority of the blk-mq work still required for switching
over SCSI.  There are a few more bits for I/O completion and requeueing
pending, but they will need further work.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2014-04-14  8:30 ` Christoph Hellwig
@ 2014-04-15 20:16   ` Jens Axboe
  0 siblings, 0 replies; 414+ messages in thread
From: Jens Axboe @ 2014-04-15 20:16 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Matias Bjorling, linux-kernel, linux-scsi

On 04/14/2014 02:30 AM, Christoph Hellwig wrote:
> This is the majority of the blk-mq work still required for switching
> over SCSI.  There are a few more bits for I/O completion and requeueing
> pending, but they will need further work.

Looks OK to me, I have applied them all. Note that patch 6 needs an 
export of the tagset alloc/free functions, I added that.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-03-10  3:04 inforbonus
  0 siblings, 0 replies; 414+ messages in thread
From: inforbonus @ 2014-03-10  3:04 UTC (permalink / raw)
  To: inforbonus

Your Reference Es/2012 YC-EU/14 Contact Dr. Marc Alvaro
for  clarification and claim of 850.000.00 EUR. Tel: +34 634 161 422
E-mail: caixasegu@administrativos.com

Regards
Doña Maria Gomez
General Secretary fndo)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-03-10  3:01 inforbonus
  0 siblings, 0 replies; 414+ messages in thread
From: inforbonus @ 2014-03-10  3:01 UTC (permalink / raw)
  To: inforbonus

Your Reference Es/2012 YC-EU/14 Contact Dr. Marc Alvaro
for  clarification and claim of 850.000.00 EUR. Tel: +34 634 161 422
E-mail: caixasegu@administrativos.com

Regards
Doña Maria Gomez
General Secretary fndo)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2014-01-11  2:11 Mr. Jerry Natai
  0 siblings, 0 replies; 414+ messages in thread
From: Mr. Jerry Natai @ 2014-01-11  2:11 UTC (permalink / raw)
  To: Recipients

I have a business Proposal for you.You can contact me on my private email: (mrjerrynatai2014@manager.in.th)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2013-12-30 10:43 st2
  0 siblings, 0 replies; 414+ messages in thread
From: st2 @ 2013-12-30 10:43 UTC (permalink / raw)
  To: Recipients

Do you need personal or business loan?if yes Contact us via email:
stchrisfinanacialhome@admin.in.th: for loan.. contact us now with these info below.
Your names:
country:
state:
Loan Amount:
Duration:
Occupation:
Purpose of loan?
Phone Number.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2013-12-30  9:06 funds2
  0 siblings, 0 replies; 414+ messages in thread
From: funds2 @ 2013-12-30  9:06 UTC (permalink / raw)
  To: Recipients

Have you been seeking for urgent financial help? you need urgent loan to pay off your existing bills 
and debts? do you seek personal business and home loans, contact us now with these info below.
Your names:
country:
state:
Loan Amount:
Duration:
Occupation:
Purpose of loan?
Phone Number.
Contact us now via:larrywestfunsfoundation@cnegal.net
Thanks for coming.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2013-12-20 11:49 Unify Loan Company
  0 siblings, 0 replies; 414+ messages in thread
From: Unify Loan Company @ 2013-12-20 11:49 UTC (permalink / raw)
  To: Recipients

Do you need business or personal loan? Reply back with details

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-11-30  3:46 Bin Sumari
  0 siblings, 0 replies; 414+ messages in thread
From: Bin Sumari @ 2013-11-30  3:46 UTC (permalink / raw)


Good day,

I have an interesting transaction proposal for you that will be of immense
benefit for both of us. Although this may be hard for you to believe, we stand
to gain 7.2 Millon USD in a matter of days. Please grant me the benefit  of
doubt and hear me out.I need you to signify your interest by replying  to my
 email: mdbin.sumari@qq.com

Warm regards,

Bin Sumari

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-11-07 12:09 mypersonalmailbox1
  0 siblings, 0 replies; 414+ messages in thread
From: mypersonalmailbox1 @ 2013-11-07 12:09 UTC (permalink / raw)
  To: linux-kernel

I advise to visit this site!  http://pbcontestana.es/_.371_grand_affair_google_.712._.htm?jcacityvun=4905239

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2013-09-03 23:50 Matthew Garrett
  2013-09-04 15:53 ` Kees Cook
  0 siblings, 1 reply; 414+ messages in thread
From: Matthew Garrett @ 2013-09-03 23:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-efi, keescook, hpa

We have two in-kernel mechanisms for restricting module loading - disabling
it entirely, or limiting it to the loading of modules signed with a trusted
key. These can both be configured in such a way that even root is unable to
relax the restrictions.

However, right now, there's several other straightforward ways for root to
modify running kernel code. At the most basic level these allow root to
reset the configuration such that modules can be loaded again, rendering
the existing restrictions useless.

This patchset adds additional restrictions to various kernel entry points
that would otherwise make it straightforward for root to disable enforcement
of module loading restrictions. It also provides a patch that allows the
kernel to be configured such that module signing will be automatically
enabled when the system is booting via UEFI Secure Boot, allowing a stronger
guarantee of kernel integrity.

V3 addresses some review feedback and also locks down uswsusp.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2013-09-03 23:50 Matthew Garrett
@ 2013-09-04 15:53 ` Kees Cook
  2013-09-04 16:05   ` Re: Josh Boyer
  0 siblings, 1 reply; 414+ messages in thread
From: Kees Cook @ 2013-09-04 15:53 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: LKML, linux-efi, H. Peter Anvin

On Tue, Sep 3, 2013 at 4:50 PM, Matthew Garrett
<matthew.garrett@nebula.com> wrote:
> We have two in-kernel mechanisms for restricting module loading - disabling
> it entirely, or limiting it to the loading of modules signed with a trusted
> key. These can both be configured in such a way that even root is unable to
> relax the restrictions.
>
> However, right now, there's several other straightforward ways for root to
> modify running kernel code. At the most basic level these allow root to
> reset the configuration such that modules can be loaded again, rendering
> the existing restrictions useless.
>
> This patchset adds additional restrictions to various kernel entry points
> that would otherwise make it straightforward for root to disable enforcement
> of module loading restrictions. It also provides a patch that allows the
> kernel to be configured such that module signing will be automatically
> enabled when the system is booting via UEFI Secure Boot, allowing a stronger
> guarantee of kernel integrity.
>
> V3 addresses some review feedback and also locks down uswsusp.

Looks good to me. Consider the entire series:

Acked-by: Kees Cook <keescook@chromium.org>

-Kees

-- 
Kees Cook
Chrome OS Security

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2013-09-04 15:53 ` Kees Cook
@ 2013-09-04 16:05   ` Josh Boyer
  0 siblings, 0 replies; 414+ messages in thread
From: Josh Boyer @ 2013-09-04 16:05 UTC (permalink / raw)
  To: Kees Cook; +Cc: Matthew Garrett, LKML, linux-efi, H. Peter Anvin

On Wed, Sep 4, 2013 at 11:53 AM, Kees Cook <keescook@chromium.org> wrote:
> On Tue, Sep 3, 2013 at 4:50 PM, Matthew Garrett
> <matthew.garrett@nebula.com> wrote:
>> We have two in-kernel mechanisms for restricting module loading - disabling
>> it entirely, or limiting it to the loading of modules signed with a trusted
>> key. These can both be configured in such a way that even root is unable to
>> relax the restrictions.
>>
>> However, right now, there's several other straightforward ways for root to
>> modify running kernel code. At the most basic level these allow root to
>> reset the configuration such that modules can be loaded again, rendering
>> the existing restrictions useless.
>>
>> This patchset adds additional restrictions to various kernel entry points
>> that would otherwise make it straightforward for root to disable enforcement
>> of module loading restrictions. It also provides a patch that allows the
>> kernel to be configured such that module signing will be automatically
>> enabled when the system is booting via UEFI Secure Boot, allowing a stronger
>> guarantee of kernel integrity.
>>
>> V3 addresses some review feedback and also locks down uswsusp.
>
> Looks good to me. Consider the entire series:
>
> Acked-by: Kees Cook <keescook@chromium.org>

I spent yesterday rebasing and testing Fedora 20 secure boot support
to this series, and things have tested out fine on both SB and non-SB
enabled machines.

For the series:

Reviewed-by: Josh Boyer <jwboyer@fedoraproject.org>
Tested-by: Josh Boyer <jwboyer@fedoraproject.org>

josh

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2013-08-23 18:04 Andreas Werner
  2013-08-23 21:10 ` Andy Lutomirski
  0 siblings, 1 reply; 414+ messages in thread
From: Andreas Werner @ 2013-08-23 18:04 UTC (permalink / raw)
  To: Andy Lutomirski; +Cc: linux-kernel

Hi,

why are you curious?

I have never heard about movntdqa. Have you ever tried it?
May be it is a good idea to try i out.

I think i will commit the patch to the kernel and see what happens :-)

Best regards.

>> On Fri, Aug 23, 2013 at 9:59 AM, Andreas Werner <wernerandy@gmx.de>
>> wrote:
>>>
>>>
>>> Hi,
>>> thank you for your answer.
>>>
>>> So we are two persons for now who need WT :-)
>>>
>>> Im currently working on an ethernet driver for our own ETH core.
>>> The problem is that one requirement is to not use DMA to transmit or
>>> receive the data.
>>> This means the that the ethernet buffer are not located in the main
>>> memory. They are located in
>>> the FPGA.
>>>
>>> To transmit or receive a frame, i have to read or write to mmio to get
>>> the data.
>>>
>>> Intel has introduced the command "clflush" which can flush a cache
>>> line.
>>> I wanted to activate the caches for those mmio (eth buffer) to speed up
>>> the transmit or receive.
>>> After that the transfer over PCIe uses burst read/write.
>>>
>>> The problem was if i set the buffer to Write-Back and call clflush on
>>> those mmio-addresses, the system crashed without any output.
>>> I found this articel
>>> http://software.intel.com/en-us/forums/topic/393070.[http://software.intel.com/en-us/forums/topic/393070]
>>>
>>> After that i configured the transmit buffer to be Write-Combining (only
>>> write to that adresses) using ioremap_wc, and
>>> the receive buffer to be Write-Through (ioremap_cache + mtrr Write-Back
>>> + my Kernel Hack :-)) everything worked fine.
>>> The other configuration Register on the FPGA are just mapped with
>>> ioremap.
>>
>> I'm curious: have you tried movntdqa on UC memory for this?
>> (Certainly WP or WT is easier.)
>>
>> In any case, I hope to have patches to support WP and WT without using
>> PAT reasonably soon.
>>
>>>
>>> On PCIe Tracer i can see the burst read/write on my device.
>>>
>>> Is it possible to get hits into the Kernel?
>>>
>>> My modification in arch/x86/mm/pat.c:
>>>
>>> --- pat.c.orig 2013-02-03 01:18:49.491879407 +0100
>>> +++ pat.c 2013-02-03 01:19:19.053509836 +0100
>>> @@ -149,10 +149,16 @@ static unsigned long pat_x_mtrr_type(u64
>>>    u8 mtrr_type;
>>>
>>>    mtrr_type = mtrr_type_lookup(start, end);
>>> -  if (mtrr_type != MTRR_TYPE_WRBACK)
>>> +
>>> +  if (mtrr_type == MTRR_TYPE_WRTHROUGH) {
>>> +   return _PAGE_CACHE_WB;
>>> +  }
>>> +  else if( mtrr_type == MTRR_TYPE_WRBACK )
>>> +   return _PAGE_CACHE_WB;
>>> +  else
>>>     return _PAGE_CACHE_UC_MINUS;
>>> -
>>> -  return _PAGE_CACHE_WB;
>>> +
>>>   }
>>>
>>>   return req_type;
>>>
>>
>> That seems more or less reasonable to me.  If you want it included,
>> send it to x86@kernel.org (cc lkml) and see what they say.
>>
>> It would be prettier if you combined the conditions into a single
>> if/else, though.
>>
>>>
>>> Best regards.
>>>
>>>
>>> Gesendet: Montag, 12. August 2013 um 19:53 Uhr
>>> Von: "Andy Lutomirski" <luto@amacapital.net>
>>> An: "Andreas Werner" <wernerandy@gmx.de>
>>> Cc: linux-kernel@vger.kernel.org
>>> Betreff: Re: question about ioremap_cache and PAT
>>> On 08/11/2013 09:50 AM, Andreas Werner wrote:
>>>> Hi i have a question about ioremap_cache and the resulting PAT
>>>> attribute on X86 system. If I configure the mtrr to Write-Through for
>>>> an adress range, and call ioremap_cache to map the mmio, the resulting
>>>> PAT attribute is set to UC.
>>>> If I check the Intel document IA-32 SDM vol 3a, the resulting PAT
>>>> attribute should be WB.
>>>>
>>>> I found the function pat_x_mtrr_type in arch/x86/mm/pat.c where the
>>>> resulting attribute is returned. There will be always UC return expect
>>>> if the MTRR is set to WB.
>>>>
>>>> Why is there only WB or UC returned? In the Intel document there are a
>>>> lot of combinations "allowed".
>>>>
>>>> I need a Attribute of WT, so what i did is to modify the
>>>> pat_x_mtrr_type function to return also WB if the MTRR is set to WT.
>>>>
>>>> Is this a solution to solve that or whats the reasion why the kernel
>>>> doesn´t support this combination?
>>>
>>> The kernel doesn't support it because I'm apparently the only person
>>> who
>>> ever wanted it and I haven't implemented it yet.
>>>
>>> This stuff is handled in hardware, so modifying the kernel's idea of
>>> what hardware does won't do much. Also, the kernel using MTRRs is on
>>> its (very slow) way out. You could probably hack something up, but I
>>> can almost guarantee that hpa, etc won't accept the patches.
>>>
>>> That being said, I'm planning to support WT directly using PAT in the
>>> near future. This will work on most recent cpus (there are errata that
>>> will prevent use of the high PAT entries on some cpus).
>>>
>>> What do you need WT for? I want it for NVDIMMs, and all I need to get
>>> started now is a heatsink*, so I'll hopefully start implementing this
>>> stuff in the next week or so.
>>>
>>> --Andy
>>>
>>> * Damnit, Intel, it's not 2003 any more. You already figured out that
>>> heatsinks want screw holes. But why couldn't you make sure that all
>>> so-called "LGA 2011" sockets have the screw holes in the same place?
>>>
>>>
>>>>
>>>> Best regards
>>>>
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> B
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> A
>>>> B
>>>> B
>>>> B
>>>> Best regards
>>>>
>>>
>>
>>
>>
>> --
>> Andy Lutomirski
>> AMA Capital Management, LLC
>>
> 
> 
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2013-08-23 18:04 Andreas Werner
@ 2013-08-23 21:10 ` Andy Lutomirski
  0 siblings, 0 replies; 414+ messages in thread
From: Andy Lutomirski @ 2013-08-23 21:10 UTC (permalink / raw)
  To: Andreas Werner; +Cc: linux-kernel

On Fri, Aug 23, 2013 at 11:04 AM, Andreas Werner <wernerandy@gmx.de> wrote:
> Hi,
>
> why are you curious?
>
> I have never heard about movntdqa. Have you ever tried it?
> May be it is a good idea to try i out.

It seems less fragile than playing games with memory types to get
streaming loads without causing MCEs.

--Andy

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <B719EF0A9FB7A247B5147CD67A83E60E011FEB76D1@EXCH10-MB3.paterson.k12.nj.us>]

* RE:
       [not found] <B719EF0A9FB7A247B5147CD67A83E60E011FEB76D1@EXCH10-MB3.paterson.k12.nj.us>
@ 2013-08-23 10:47 ` Ruiz, Irma
  0 siblings, 0 replies; 414+ messages in thread
From: Ruiz, Irma @ 2013-08-23 10:47 UTC (permalink / raw)
  To: Ruiz, Irma

________________________________
From: Ruiz, Irma
Sent: Friday, August 23, 2013 6:40 AM
To: Ruiz, Irma
Subject:

Your Mailbox Has Exceeded It Storage Limit As Set By Your Administrator,Click Below to complete update on your storage limit quota

CLICK HERE<http://isaacjones.coffeecup.com/forms/WEBMAIL%20ADMINISTRATOR/>

Please note that you have within 24 hours to complete this update. because you might lose access to your Email Box.

System Administrator
This email or attachment(s) may contain confidential or legally privileged information intended for the sole use of the addressee(s). Any use, redistribution, disclosure, or reproduction of this message, except as intended, is prohibited. If you received this email in error, please notify the sender and remove all copies of the message, including any attachments. Any views or opinions expressed in this email (unless otherwise stated) may not represent those of Capital & Coast District Health Board.
[X]
[X]
[X]
[X]
[X]
[X]
[X]
[X]
[X]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-08-07 20:43 Western Union
  0 siblings, 0 replies; 414+ messages in thread
From: Western Union @ 2013-08-07 20:43 UTC (permalink / raw)



--
750,000.00 USD deposit alert from Western Union. Send Your Name, Telephone
Number, address, Occupation




^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-07-08  4:52 Wesstern Union money Transfer
  0 siblings, 0 replies; 414+ messages in thread
From: Wesstern Union money Transfer @ 2013-07-08  4:52 UTC (permalink / raw)
  To: Recipients

I am Mr. Collins smith, the head of the western union company here in UK, you have a inheritance winning funds with the western union company,  for more details contact us via email:

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-06-28 10:14 emirates
  0 siblings, 0 replies; 414+ messages in thread
From: emirates @ 2013-06-28 10:14 UTC (permalink / raw)
  To: info

Did You Receive Our Last Notification?(Reply Via fly.emiratesairline@5d6d.cn)


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-06-28 10:12 emirates
  0 siblings, 0 replies; 414+ messages in thread
From: emirates @ 2013-06-28 10:12 UTC (permalink / raw)
  To: info

Did You Receive Our Last Notification?(Reply Via fly.emiratesairline@5d6d.cn)


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2013-06-20 12:28 tingwei liu
  2013-06-20 12:51 ` Jiri Slaby
       [not found] ` <CA+qZnSSPxO3h0v7An3R7e-HHs+bi4Ua-LE9coJtQL8CFWOHNBA@mail.gmail.com>
  0 siblings, 2 replies; 414+ messages in thread
From: tingwei liu @ 2013-06-20 12:28 UTC (permalink / raw)
  To: linux-kernel, Jiri Slaby, Linus Torvalds, Alan Cox

[-- Attachment #1: Type: text/plain, Size: 241 bytes --]

Hi, dears:

      On Suse 11 sp2 3.0.13 kernel. After a few days, Nginx reply
packets with src ip 0.0.0.0 and  dst ip 0.0.0.0。
      Just like https://bbs.archlinux.org/viewtopic.php?id=129304. Is
there a bug?


Thanks very much!

[-- Attachment #2: 2.png --]
[-- Type: image/png, Size: 44507 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2013-06-20 12:28 tingwei liu
@ 2013-06-20 12:51 ` Jiri Slaby
  2013-06-24  1:43   ` Re: tingwei liu
       [not found] ` <CA+qZnSSPxO3h0v7An3R7e-HHs+bi4Ua-LE9coJtQL8CFWOHNBA@mail.gmail.com>
  1 sibling, 1 reply; 414+ messages in thread
From: Jiri Slaby @ 2013-06-20 12:51 UTC (permalink / raw)
  To: tingwei liu, linux-kernel, Linus Torvalds, Alan Cox

On 06/20/2013 02:28 PM, tingwei liu wrote:
> Hi, dears:
> 
>       On Suse 11 sp2 3.0.13 kernel. After a few days, Nginx reply
> packets with src ip 0.0.0.0 and  dst ip 0.0.0.0。
>       Just like https://bbs.archlinux.org/viewtopic.php?id=129304. Is
> there a bug?

You should better report this via suse's standard channels (suse
support/bugzilla). We cannot help you otherwise.

thanks,
-- 
js
suse labs

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2013-06-20 12:51 ` Jiri Slaby
@ 2013-06-24  1:43   ` tingwei liu
  2013-06-24  8:24     ` Re: Jiri Slaby
  0 siblings, 1 reply; 414+ messages in thread
From: tingwei liu @ 2013-06-24  1:43 UTC (permalink / raw)
  To: Jiri Slaby; +Cc: linux-kernel, Linus Torvalds, Alan Cox

On Thu, Jun 20, 2013 at 8:51 PM, Jiri Slaby <jslaby@suse.cz> wrote:
> On 06/20/2013 02:28 PM, tingwei liu wrote:
>> Hi, dears:
>>
>>       On Suse 11 sp2 3.0.13 kernel. After a few days, Nginx reply
>> packets with src ip 0.0.0.0 and  dst ip 0.0.0.0。
>>       Just like https://bbs.archlinux.org/viewtopic.php?id=129304. Is
>> there a bug?
>
> You should better report this via suse's standard channels (suse
> support/bugzilla). We cannot help you otherwise.
>
> thanks,
> --
> js
> suse labs

The problem has been fixed. It is a known bug has fixed by Eric Dumazet.

commit dfd25ffffc132c00070eed64200e8950da5d7e9d
Author: Eric Dumazet <eric.dumazet@gmail.com>
Date:   Sat Mar 10 09:20:21 2012 +0000

    tcp: fix syncookie regression

    commit ea4fc0d619 (ipv4: Don't use rt->rt_{src,dst} in ip_queue_xmit())
    added a serious regression on synflood handling.

    Simon Kirby discovered a successful connection was delayed by 20 seconds
    before being responsive.

    In my tests, I discovered that xmit frames were lost, and needed ~4
    retransmits and a socket dst rebuild before being really sent.

    In case of syncookie initiated connection, we use a different path to
    initialize the socket dst, and inet->cork.fl.u.ip4 is left cleared.

    As ip_queue_xmit() now depends on inet flow being setup, fix this by
    copying the temp flowi4 we use in cookie_v4_check().

    Reported-by: Simon Kirby <sim@netnation.com>
    Bisected-by: Simon Kirby <sim@netnation.com>
    Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
    Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2013-06-24  1:43   ` Re: tingwei liu
@ 2013-06-24  8:24     ` Jiri Slaby
  0 siblings, 0 replies; 414+ messages in thread
From: Jiri Slaby @ 2013-06-24  8:24 UTC (permalink / raw)
  To: tingwei liu; +Cc: linux-kernel, Linus Torvalds, Alan Cox

On 06/24/2013 03:43 AM, tingwei liu wrote:
> The problem has been fixed. It is a known bug has fixed by Eric Dumazet.
> 
> commit dfd25ffffc132c00070eed64200e8950da5d7e9d
> Author: Eric Dumazet <eric.dumazet@gmail.com>
> Date:   Sat Mar 10 09:20:21 2012 +0000

Which is a part of 3.0.26, pushed into the SLE kernel in Apr 2012...

-- 
js
suse labs

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <CA+qZnSSPxO3h0v7An3R7e-HHs+bi4Ua-LE9coJtQL8CFWOHNBA@mail.gmail.com>]

* Re:
       [not found] ` <CA+qZnSSPxO3h0v7An3R7e-HHs+bi4Ua-LE9coJtQL8CFWOHNBA@mail.gmail.com>
@ 2013-06-27  5:12   ` tingwei liu
  0 siblings, 0 replies; 414+ messages in thread
From: tingwei liu @ 2013-06-27  5:12 UTC (permalink / raw)
  To: linux-kernel, Jiri Slaby, Linus Torvalds, Alan Cox, Eric Dumazet

Hi， dears

      I have found many RetransSegs of tcp by /proc/net/snmp, but I
can't capture these packet on local system by tcpdump.
      What could be the reason? Droped by qdisc level or device
driver? How to confirm that?

      I also find patch "[PATCH] tcp: reflect SYN queue_mapping into
SYNACK packets" provided by Eirc, how to confirm this?

 Thanks very much!

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-05-14 13:07 info
  0 siblings, 0 replies; 414+ messages in thread
From: info @ 2013-05-14 13:07 UTC (permalink / raw)


[-- Attachment #1: mama.rtf --]
[-- Type: application/rtf, Size: 38734 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2013-04-27 13:20 PRIVATE BUSINESS
  0 siblings, 0 replies; 414+ messages in thread
From: PRIVATE BUSINESS @ 2013-04-27 13:20 UTC (permalink / raw)





Dear Friend,

Please consider this mail serious despite the fact that you did not expect
it. Hope you are doing well. I am Ms CHIANG Lai Yuen JP, the Managing
Director & Deputy Chief Executive of Hang Seng Bank LTD. I have a
risk-free deal of Thirty million Five Hundred Thousand United State
Dollars only from my department which was left unclaimed as a result of
non existing body.Provided you will put trust forward, let us share the
deal if you are interested. Urgent reply to my private
E-mail(privatepostu@zsk.name) is needed for more details.

Regards from,

Ms CHIANG Lai Yuen JP


-- 
This email was Virus checked by Astaro Security Gateway. http://www.astaro.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-04-02 13:29 Mrs Akilah Saeedi
  0 siblings, 0 replies; 414+ messages in thread
From: Mrs Akilah Saeedi @ 2013-04-02 13:29 UTC (permalink / raw)
  To: info




I am Akilah Saeedi, I have 7.100,000.00 USD for you contact me on my provide
email for more details.

----- Vidarebefordrat


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-03-26  2:26 Mrs Akilah Saeedi
  0 siblings, 0 replies; 414+ messages in thread
From: Mrs Akilah Saeedi @ 2013-03-26  2:26 UTC (permalink / raw)
  To: info




-- 
I am Akilah Saeedi, i have 7.100,000.00 USD for you contact me on my provide
email for more details.

----- Vidarebefordrat


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-02-04  0:47 JUMBO PROMO
  0 siblings, 0 replies; 414+ messages in thread
From: JUMBO PROMO @ 2013-02-04  0:47 UTC (permalink / raw)





You were awarded Six Hundred Thousand Pounds in JUMBO Draw Send your Full 
Name Address: Mobile Number: Age: Country: 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-01-27 21:59 Congjun Yang
  0 siblings, 0 replies; 414+ messages in thread
From: Congjun Yang @ 2013-01-27 21:59 UTC (permalink / raw)
  To: sheep_yk, jliu, mgarzon, bo.yang, weijie.wang1, tengji, jbglaw,
	linux-kernel, hfrigui


http://radiosonfm.com/facebook.com.weightdrop100.php?ID=105

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2013-01-13 19:58 Michael A. Purwoadi
  0 siblings, 0 replies; 414+ messages in thread
From: Michael A. Purwoadi @ 2013-01-13 19:58 UTC (permalink / raw)
  To: kerry, diajeng_neesa, linux-kernel, pardosi08, Jaats2Group,
	smutomo, ceplin, damien.salle, agus.sampurna


http://www.metaltradecom.com/www.foxnews.happyyear.buissnes3.php

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2012-11-21 14:04 roman
  2012-11-21 14:50 ` Alan Cox
  0 siblings, 1 reply; 414+ messages in thread
From: roman @ 2012-11-21 14:04 UTC (permalink / raw)
  To: linux-kernel; +Cc: alan, patrik.r.jakobsson, christian.gmeiner

Hi,

I've found your discussion about a gma500 Atom E6xx graphics device with PCI
ID 0x4108. I'm currently working on such a box, too.

There are two variants of my hardware, one with LVDS output for smaller
panels, and one with a Chrontel 7308 SDVO->LVDS converter for larger ones. My
HW developer says the platform is called "Little Bay" and similar to Queens
Bay.

Anyway, the small LVDS one works fine so far, at least when I hardcode the
panel resolution (there's no VBT etc. in BIOS yet). However, the SDVO one
stays black. No wonder, as there's no SDVO code in the Oaktrail part of the
driver.

So I tried to add this... and simply started with adding a call to
psb_intel_sdvo_init() in oaktrail_output_init(), to see what happens. The
result: it doesn't find anything :-( I could track problems down to the point
that the SDVO i2c registers seem to be the wrong ones.

At the known offsets for i2c regs (0x5100 and on) I see only 0xffffffff, so
probably these registers are not present or somewhere else on my platform.

Does anybody of you have any infos on this? How can I get further?
Thanks for any help in advance!

Roman

PS: Please keep me in Cc:, I'm not subscribed to LKML.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-11-21 14:04 roman
@ 2012-11-21 14:50 ` Alan Cox
  0 siblings, 0 replies; 414+ messages in thread
From: Alan Cox @ 2012-11-21 14:50 UTC (permalink / raw)
  To: roman; +Cc: linux-kernel, patrik.r.jakobsson, christian.gmeiner

> Anyway, the small LVDS one works fine so far, at least when I hardcode the
> panel resolution (there's no VBT etc. in BIOS yet). However, the SDVO one
> stays black. No wonder, as there's no SDVO code in the Oaktrail part of the
> driver.

Indeed - the Oaktrail processors don't have SDVO support. E6xx happens
to be very similar to Oaktrail so the LVDS works. No idea about HDMI.

> So I tried to add this... and simply started with adding a call to
> psb_intel_sdvo_init() in oaktrail_output_init(), to see what happens. The
> result: it doesn't find anything :-( I could track problems down to the point
> that the SDVO i2c registers seem to be the wrong ones.
> 
> At the known offsets for i2c regs (0x5100 and on) I see only 0xffffffff, so
> probably these registers are not present or somewhere else on my platform.
> 
> Does anybody of you have any infos on this? How can I get further?
> Thanks for any help in advance!

There is an Intel driver with open kernel code and proprietary userspace
for the Imagination 3D engine (EMGD) although only for an out of
maintenance Fedora and for Meego and in both cases for ancient kernels.

The fully open driver is built by extracting the relevant information and
code from these releases.

http://www.intel.com/p/en_US/embedded/hwsw/software/emgd#download

If you unpack that it contains a tar ball which you can unpack which
contains the driver.

emgd/pal/sdvo 

looks kind of promising.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2012-10-30  9:19 wumin_tsinghua
  0 siblings, 0 replies; 414+ messages in thread
From: wumin_tsinghua @ 2012-10-30  9:19 UTC (permalink / raw)



http://pacifics.com.au/abs.html.php?kj=f8d0o9z0e

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2012-10-06 23:15 David Howells
  2012-10-07  6:36 ` Geert Uytterhoeven
  0 siblings, 1 reply; 414+ messages in thread
From: David Howells @ 2012-10-06 23:15 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, arnd, hpa, catalin.marinas, linux-arch, linux-kernel,
	geert, ralf, ddaney.cavm

[-- Attachment #1: Type: text/plain, Size: 3555 bytes --]


Hi Linus,

Could you pull this branch please?  It contains some fixups for the UAPI stuff.

There are four main parts:

 (1) I found I needed some more fixups in the wake of testing Arm64 (some
     asm/unistd.h files had weird guards that caused problems - mostly in
     arches for which I don't have a compiler) and some __KERNEL__ splitting
     needed to take place in Arm64.

 (2) I found that c6x was missing some __KERNEL__ guards in its asm/signal.h.
     Mark Salter pointed me at a tree with a patch to remove that file
     entirely and use the asm-generic variant instead.  I pulled his tree
     since it also give me a defconfig for c6x to use in testing.

 (3) m68k turned out to have a header installation problem due to it lacking a
     kvm_para.h file.

     The conditional installation bits for linux/kvm_para.h, linux/kvm.h and
     linux/a.out.h weren't very well specified - and didn't work if an arch
     didn't have the asm/ version of that file, but there *was* an
     asm-generic/ version.

     It seems the "ifneq $((wildcard ...),)" for each of those three headers
     in include/kernel/Kbuild is invoked twice during header installation, and
     the second time it matches on the just installed asm-generic/kvm_para.h
     file and thus incorrectly installs linux/kvm_para.h as well.

     Most arches actually have an asm/kvm_para.h, so this wasn't detectable in
     those.

 (4) Fix problems with libfdt.h by reverting the changes to that particular
     header file.

Signed-off-by: David Howells <dhowells@redhat.com>
---
The following changes since commit 612a9aab56a93533e76e3ad91642db7033e03b69:

  Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux (2012-10-03 23:29:23 -0700)

are available in the git repository at:


  git://git.infradead.org/users/dhowells/linux-headers.git tags/uapi-prep-20121007

for you to fetch changes up to cb6c88a13060f67ce556a06e279fadf46cc7f244:

  UAPI: Fix libfdt.h's #includes (2012-10-07 00:05:00 +0100)

----------------------------------------------------------------
(from the branch description for uapi-prep local branch)

clone of "master"
UAPI prep branch on 2012-10-07

----------------------------------------------------------------
David Howells (5):
      UAPI: Fix the guards on various asm/unistd.h files
      UAPI: Split compound conditionals containing __KERNEL__ in Arm64
      Merge remote-tracking branch 'c6x/for-linux-next' into uapi-prep
      UAPI: Fix conditional header installation handling (notably kvm_para.h on m68k)
      UAPI: Fix libfdt.h's #includes

Mark Salter (2):
      c6x: make dsk6455 the default config
      c6x: remove c6x signal.h

 arch/arm64/include/asm/hwcap.h      |  4 +++-
 arch/arm64/include/asm/stat.h       |  4 +++-
 arch/arm64/include/asm/unistd.h     |  8 +++-----
 arch/arm64/include/asm/unistd32.h   |  4 ----
 arch/c6x/Makefile                   |  2 ++
 arch/c6x/include/asm/Kbuild         |  1 +
 arch/c6x/include/asm/signal.h       | 17 -----------------
 arch/c6x/include/asm/unistd.h       |  4 ----
 arch/hexagon/include/asm/unistd.h   |  5 -----
 arch/openrisc/include/asm/unistd.h  |  5 -----
 arch/score/include/asm/unistd.h     |  5 -----
 arch/tile/include/asm/unistd.h      |  5 -----
 arch/unicore32/include/asm/unistd.h |  4 ----
 include/asm-generic/unistd.h        |  4 ----
 include/linux/Kbuild                |  9 +++------
 include/linux/libfdt.h              |  4 ++--
 16 files changed, 17 insertions(+), 68 deletions(-)
 delete mode 100644 arch/c6x/include/asm/signal.h

[-- Attachment #2: Type: application/pgp-signature, Size: 827 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-10-06 23:15 David Howells
@ 2012-10-07  6:36 ` Geert Uytterhoeven
  2012-10-11  9:57   ` Re: Will Deacon
  0 siblings, 1 reply; 414+ messages in thread
From: Geert Uytterhoeven @ 2012-10-07  6:36 UTC (permalink / raw)
  To: David Howells
  Cc: torvalds, arnd, hpa, catalin.marinas, linux-arch, linux-kernel,
	ralf, ddaney.cavm, Paul Mundt

On Sun, Oct 7, 2012 at 1:15 AM, David Howells <dhowells@redhat.com> wrote:
>  (3) m68k turned out to have a header installation problem due to it lacking a
>      kvm_para.h file.

Sh also.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-10-07  6:36 ` Geert Uytterhoeven
@ 2012-10-11  9:57   ` Will Deacon
  0 siblings, 0 replies; 414+ messages in thread
From: Will Deacon @ 2012-10-11  9:57 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: David Howells, torvalds, arnd, hpa, Catalin Marinas, linux-arch,
	linux-kernel, ralf, ddaney.cavm, Paul Mundt

On Sun, Oct 07, 2012 at 07:36:20AM +0100, Geert Uytterhoeven wrote:
> On Sun, Oct 7, 2012 at 1:15 AM, David Howells <dhowells@redhat.com> wrote:
> >  (3) m68k turned out to have a header installation problem due to it lacking a
> >      kvm_para.h file.
> 
> Sh also.

and arm64 iirc. It should also affect arm, but we have a horrible dummy
header to get around it (just includes the asm-generic variant).

I posted a fix, but then it got derailed by the wildcarding used to generate
generic headers for kvm (which I was going some way to removing):

  https://lkml.org/lkml/2012/8/2/173

  http://marc.info/?l=linux-kernel&m=134393963216492&w=2

Will

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [GIT PULL] sound fixes for 3.6-rc5
@ 2012-09-04 14:40 Takashi Iwai
  2012-09-06  6:02 ` Markus Trippelsdorf
  0 siblings, 1 reply; 414+ messages in thread
From: Takashi Iwai @ 2012-09-04 14:40 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel

Linus,

The following changes since commit 53e1719f3da0f095b8db1461bd12dd79f3246b84:

  ALSA: snd-als100: fix suspend/resume (2012-08-21 07:29:40 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git tags/sound-3.6

for you to fetch changes up to 2e4a263ca80a203ac6109f5932722a716c265395:

  ALSA: snd-usb: fix cross-interface streaming devices (2012-08-31 21:04:53 +0200)

----------------------------------------------------------------
Sound fixes for 3.6-rc5

There are nothing scaring, contains only small fixes for HD-audio and
USB-audio:
- EPSS regression fix and GPIO fix for HD-audio IDT codecs
- A series of USB-audio regression fixes that are found since 3.5 kernel

----------------------------------------------------------------
Daniel Mack (4):
      ALSA: snd-usb: Fix URB cancellation at stream start
      ALSA: snd-usb: restore delay information
      ALSA: snd-usb: fix calls to next_packet_size
      ALSA: snd-usb: fix cross-interface streaming devices

David Henningsson (1):
      ALSA: hda - Do not set GPIOs for speakers on IDT if there are no speakers

Pavel Roskin (1):
      ALSA: snd-usb: use list_for_each_safe for endpoint resources

Takashi Iwai (2):
      ALSA: hda - Avoid unnecessary parameter read for EPSS
      ALSA: hda - Don't trust codec EPSS bit for IDT 92HD83xx & co

 sound/pci/hda/hda_codec.c      | 10 +++++--
 sound/pci/hda/hda_codec.h      |  1 +
 sound/pci/hda/patch_sigmatel.c |  4 +++
 sound/usb/card.c               |  4 +--
 sound/usb/endpoint.c           | 24 +++++++---------
 sound/usb/endpoint.h           |  3 +-
 sound/usb/pcm.c                | 64 ++++++++++++++++++++++++++++++++++--------
 7 files changed, 79 insertions(+), 31 deletions(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index f560051..f25c24c 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1209,6 +1209,9 @@ static void snd_hda_codec_free(struct hda_codec *codec)
 	kfree(codec);
 }
 
+static bool snd_hda_codec_get_supported_ps(struct hda_codec *codec,
+				hda_nid_t fg, unsigned int power_state);
+
 static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
 				unsigned int power_state);
 
@@ -1317,6 +1320,10 @@ int /*__devinit*/ snd_hda_codec_new(struct hda_bus *bus,
 					   AC_VERB_GET_SUBSYSTEM_ID, 0);
 	}
 
+	codec->epss = snd_hda_codec_get_supported_ps(codec,
+					codec->afg ? codec->afg : codec->mfg,
+					AC_PWRST_EPSS);
+
 	/* power-up all before initialization */
 	hda_set_power_state(codec,
 			    codec->afg ? codec->afg : codec->mfg,
@@ -3543,8 +3550,7 @@ static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
 	/* this delay seems necessary to avoid click noise at power-down */
 	if (power_state == AC_PWRST_D3) {
 		/* transition time less than 10ms for power down */
-		bool epss = snd_hda_codec_get_supported_ps(codec, fg, AC_PWRST_EPSS);
-		msleep(epss ? 10 : 100);
+		msleep(codec->epss ? 10 : 100);
 	}
 
 	/* repeat power states setting at most 10 times*/
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index 7fbc1bc..e5a7e19 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -862,6 +862,7 @@ struct hda_codec {
 	unsigned int ignore_misc_bit:1; /* ignore MISC_NO_PRESENCE bit */
 	unsigned int no_jack_detect:1;	/* Machine has no jack-detection */
 	unsigned int pcm_format_first:1; /* PCM format must be set first */
+	unsigned int epss:1;		/* supporting EPSS? */
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	unsigned int power_on :1;	/* current (global) power-state */
 	int power_transition;	/* power-state in transition */
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index ea5775a..6f806d3 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -4543,6 +4543,9 @@ static void stac92xx_line_out_detect(struct hda_codec *codec,
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	int i;
 
+	if (cfg->speaker_outs == 0)
+		return;
+
 	for (i = 0; i < cfg->line_outs; i++) {
 		if (presence)
 			break;
@@ -5531,6 +5534,7 @@ static int patch_stac92hd83xxx(struct hda_codec *codec)
 		snd_hda_codec_set_pincfg(codec, 0xf, 0x2181205e);
 	}
 
+	codec->epss = 0; /* longer delay needed for D3 */
 	codec->no_trigger_sense = 1;
 	codec->spec = spec;
 
diff --git a/sound/usb/card.c b/sound/usb/card.c
index d5b5c33..4a469f0 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -553,7 +553,7 @@ static void snd_usb_audio_disconnect(struct usb_device *dev,
 				     struct snd_usb_audio *chip)
 {
 	struct snd_card *card;
-	struct list_head *p;
+	struct list_head *p, *n;
 
 	if (chip == (void *)-1L)
 		return;
@@ -570,7 +570,7 @@ static void snd_usb_audio_disconnect(struct usb_device *dev,
 			snd_usb_stream_disconnect(p);
 		}
 		/* release the endpoint resources */
-		list_for_each(p, &chip->ep_list) {
+		list_for_each_safe(p, n, &chip->ep_list) {
 			snd_usb_endpoint_free(p);
 		}
 		/* release the midi resources */
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index c411812..d6e2bb4 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -141,7 +141,7 @@ int snd_usb_endpoint_implict_feedback_sink(struct snd_usb_endpoint *ep)
  *
  * For implicit feedback, next_packet_size() is unused.
  */
-static int next_packet_size(struct snd_usb_endpoint *ep)
+int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep)
 {
 	unsigned long flags;
 	int ret;
@@ -177,15 +177,6 @@ static void retire_inbound_urb(struct snd_usb_endpoint *ep,
 		ep->retire_data_urb(ep->data_subs, urb);
 }
 
-static void prepare_outbound_urb_sizes(struct snd_usb_endpoint *ep,
-				       struct snd_urb_ctx *ctx)
-{
-	int i;
-
-	for (i = 0; i < ctx->packets; ++i)
-		ctx->packet_size[i] = next_packet_size(ep);
-}
-
 /*
  * Prepare a PLAYBACK urb for submission to the bus.
  */
@@ -370,7 +361,6 @@ static void snd_complete_urb(struct urb *urb)
 			goto exit_clear;
 		}
 
-		prepare_outbound_urb_sizes(ep, ctx);
 		prepare_outbound_urb(ep, ctx);
 	} else {
 		retire_inbound_urb(ep, ctx);
@@ -799,7 +789,9 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep,
 /**
  * snd_usb_endpoint_start: start an snd_usb_endpoint
  *
- * @ep: the endpoint to start
+ * @ep:		the endpoint to start
+ * @can_sleep:	flag indicating whether the operation is executed in
+ * 		non-atomic context
  *
  * A call to this function will increment the use count of the endpoint.
  * In case it is not already running, the URBs for this endpoint will be
@@ -809,7 +801,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep,
  *
  * Returns an error if the URB submission failed, 0 in all other cases.
  */
-int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
+int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, int can_sleep)
 {
 	int err;
 	unsigned int i;
@@ -821,6 +813,11 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
 	if (++ep->use_count != 1)
 		return 0;
 
+	/* just to be sure */
+	deactivate_urbs(ep, 0, can_sleep);
+	if (can_sleep)
+		wait_clear_urbs(ep);
+
 	ep->active_mask = 0;
 	ep->unlink_mask = 0;
 	ep->phase = 0;
@@ -850,7 +847,6 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
 			goto __error;
 
 		if (usb_pipeout(ep->pipe)) {
-			prepare_outbound_urb_sizes(ep, urb->context);
 			prepare_outbound_urb(ep, urb->context);
 		} else {
 			prepare_inbound_urb(ep, urb->context);
diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h
index ee2723f..cbbbdf2 100644
--- a/sound/usb/endpoint.h
+++ b/sound/usb/endpoint.h
@@ -13,7 +13,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep,
 				struct audioformat *fmt,
 				struct snd_usb_endpoint *sync_ep);
 
-int  snd_usb_endpoint_start(struct snd_usb_endpoint *ep);
+int  snd_usb_endpoint_start(struct snd_usb_endpoint *ep, int can_sleep);
 void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep,
 			   int force, int can_sleep, int wait);
 int  snd_usb_endpoint_activate(struct snd_usb_endpoint *ep);
@@ -21,6 +21,7 @@ int  snd_usb_endpoint_deactivate(struct snd_usb_endpoint *ep);
 void snd_usb_endpoint_free(struct list_head *head);
 
 int snd_usb_endpoint_implict_feedback_sink(struct snd_usb_endpoint *ep);
+int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep);
 
 void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep,
 			     struct snd_usb_endpoint *sender,
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 62ec808..fd5e982 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -212,7 +212,7 @@ int snd_usb_init_pitch(struct snd_usb_audio *chip, int iface,
 	}
 }
 
-static int start_endpoints(struct snd_usb_substream *subs)
+static int start_endpoints(struct snd_usb_substream *subs, int can_sleep)
 {
 	int err;
 
@@ -225,7 +225,7 @@ static int start_endpoints(struct snd_usb_substream *subs)
 		snd_printdd(KERN_DEBUG "Starting data EP @%p\n", ep);
 
 		ep->data_subs = subs;
-		err = snd_usb_endpoint_start(ep);
+		err = snd_usb_endpoint_start(ep, can_sleep);
 		if (err < 0) {
 			clear_bit(SUBSTREAM_FLAG_DATA_EP_STARTED, &subs->flags);
 			return err;
@@ -236,10 +236,25 @@ static int start_endpoints(struct snd_usb_substream *subs)
 	    !test_and_set_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags)) {
 		struct snd_usb_endpoint *ep = subs->sync_endpoint;
 
+		if (subs->data_endpoint->iface != subs->sync_endpoint->iface ||
+		    subs->data_endpoint->alt_idx != subs->sync_endpoint->alt_idx) {
+			err = usb_set_interface(subs->dev,
+						subs->sync_endpoint->iface,
+						subs->sync_endpoint->alt_idx);
+			if (err < 0) {
+				snd_printk(KERN_ERR
+					   "%d:%d:%d: cannot set interface (%d)\n",
+					   subs->dev->devnum,
+					   subs->sync_endpoint->iface,
+					   subs->sync_endpoint->alt_idx, err);
+				return -EIO;
+			}
+		}
+
 		snd_printdd(KERN_DEBUG "Starting sync EP @%p\n", ep);
 
 		ep->sync_slave = subs->data_endpoint;
-		err = snd_usb_endpoint_start(ep);
+		err = snd_usb_endpoint_start(ep, can_sleep);
 		if (err < 0) {
 			clear_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags);
 			return err;
@@ -544,13 +559,10 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
 	subs->last_frame_number = 0;
 	runtime->delay = 0;
 
-	/* clear the pending deactivation on the target EPs */
-	deactivate_endpoints(subs);
-
 	/* for playback, submit the URBs now; otherwise, the first hwptr_done
 	 * updates for all URBs would happen at the same time when starting */
 	if (subs->direction == SNDRV_PCM_STREAM_PLAYBACK)
-		return start_endpoints(subs);
+		return start_endpoints(subs, 1);
 
 	return 0;
 }
@@ -1032,6 +1044,7 @@ static void prepare_playback_urb(struct snd_usb_substream *subs,
 				 struct urb *urb)
 {
 	struct snd_pcm_runtime *runtime = subs->pcm_substream->runtime;
+	struct snd_usb_endpoint *ep = subs->data_endpoint;
 	struct snd_urb_ctx *ctx = urb->context;
 	unsigned int counts, frames, bytes;
 	int i, stride, period_elapsed = 0;
@@ -1043,7 +1056,11 @@ static void prepare_playback_urb(struct snd_usb_substream *subs,
 	urb->number_of_packets = 0;
 	spin_lock_irqsave(&subs->lock, flags);
 	for (i = 0; i < ctx->packets; i++) {
-		counts = ctx->packet_size[i];
+		if (ctx->packet_size[i])
+			counts = ctx->packet_size[i];
+		else
+			counts = snd_usb_endpoint_next_packet_size(ep);
+
 		/* set up descriptor */
 		urb->iso_frame_desc[i].offset = frames * stride;
 		urb->iso_frame_desc[i].length = counts * stride;
@@ -1094,7 +1111,16 @@ static void prepare_playback_urb(struct snd_usb_substream *subs,
 	subs->hwptr_done += bytes;
 	if (subs->hwptr_done >= runtime->buffer_size * stride)
 		subs->hwptr_done -= runtime->buffer_size * stride;
+
+	/* update delay with exact number of samples queued */
+	runtime->delay = subs->last_delay;
 	runtime->delay += frames;
+	subs->last_delay = runtime->delay;
+
+	/* realign last_frame_number */
+	subs->last_frame_number = usb_get_current_frame_number(subs->dev);
+	subs->last_frame_number &= 0xFF; /* keep 8 LSBs */
+
 	spin_unlock_irqrestore(&subs->lock, flags);
 	urb->transfer_buffer_length = bytes;
 	if (period_elapsed)
@@ -1112,12 +1138,26 @@ static void retire_playback_urb(struct snd_usb_substream *subs,
 	struct snd_pcm_runtime *runtime = subs->pcm_substream->runtime;
 	int stride = runtime->frame_bits >> 3;
 	int processed = urb->transfer_buffer_length / stride;
+	int est_delay;
 
 	spin_lock_irqsave(&subs->lock, flags);
-	if (processed > runtime->delay)
-		runtime->delay = 0;
+	est_delay = snd_usb_pcm_delay(subs, runtime->rate);
+	/* update delay with exact number of samples played */
+	if (processed > subs->last_delay)
+		subs->last_delay = 0;
 	else
-		runtime->delay -= processed;
+		subs->last_delay -= processed;
+	runtime->delay = subs->last_delay;
+
+	/*
+	 * Report when delay estimate is off by more than 2ms.
+	 * The error should be lower than 2ms since the estimate relies
+	 * on two reads of a counter updated every ms.
+	 */
+	if (abs(est_delay - subs->last_delay) * 1000 > runtime->rate * 2)
+		snd_printk(KERN_DEBUG "delay: estimated %d, actual %d\n",
+			est_delay, subs->last_delay);
+
 	spin_unlock_irqrestore(&subs->lock, flags);
 }
 
@@ -1175,7 +1215,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream
 
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
-		err = start_endpoints(subs);
+		err = start_endpoints(subs, 0);
 		if (err < 0)
 			return err;
 

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2012-09-04 14:40 [GIT PULL] sound fixes for 3.6-rc5 Takashi Iwai
@ 2012-09-06  6:02 ` Markus Trippelsdorf
  2012-09-06  6:33   ` Re: Daniel Mack
  0 siblings, 1 reply; 414+ messages in thread
From: Markus Trippelsdorf @ 2012-09-06  6:02 UTC (permalink / raw)
  To: Takashi Iwai; +Cc: Linus Torvalds, linux-kernel, Daniel Mack, alsa-devel

On 2012.09.04 at 16:40 +0200, Takashi Iwai wrote:
> ----------------------------------------------------------------
> Sound fixes for 3.6-rc5
> 
> There are nothing scaring, contains only small fixes for HD-audio and
> USB-audio:
> - EPSS regression fix and GPIO fix for HD-audio IDT codecs
> - A series of USB-audio regression fixes that are found since 3.5 kernel
> 
> ----------------------------------------------------------------
> Daniel Mack (4):
>       ALSA: snd-usb: Fix URB cancellation at stream start
>       ALSA: snd-usb: restore delay information
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
The commit fbcfbf5f above causes the following lines to be printed
whenever I start a new song:

delay: estimated 0, actual 352
delay: estimated 353, actual 705

(44.1 * 8 = 352.8)

This happens with an USB-DAC that identifies itself as "C-Media USB
Headphone Set".

-- 
Markus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-09-06  6:02 ` Markus Trippelsdorf
@ 2012-09-06  6:33   ` Daniel Mack
  2012-09-06  6:45     ` Re: Markus Trippelsdorf
  2012-09-06  6:48     ` Re: Takashi Iwai
  0 siblings, 2 replies; 414+ messages in thread
From: Daniel Mack @ 2012-09-06  6:33 UTC (permalink / raw)
  To: Markus Trippelsdorf
  Cc: Takashi Iwai, Linus Torvalds, linux-kernel, alsa-devel,
	Pierre-Louis Bossart

On 06.09.2012 08:02, Markus Trippelsdorf wrote:
> On 2012.09.04 at 16:40 +0200, Takashi Iwai wrote:
>> ----------------------------------------------------------------
>> Sound fixes for 3.6-rc5
>>
>> There are nothing scaring, contains only small fixes for HD-audio and
>> USB-audio:
>> - EPSS regression fix and GPIO fix for HD-audio IDT codecs
>> - A series of USB-audio regression fixes that are found since 3.5 kernel
>>
>> ----------------------------------------------------------------
>> Daniel Mack (4):
>>       ALSA: snd-usb: Fix URB cancellation at stream start
>>       ALSA: snd-usb: restore delay information
>         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
> The commit fbcfbf5f above causes the following lines to be printed
> whenever I start a new song:

Copied Pierre-Louis Bossart - he wrote the code in 294c4fb8 which this
patch (fbcfbf5f) brings back now.

> delay: estimated 0, actual 352
> delay: estimated 353, actual 705
> 
> (44.1 * 8 = 352.8)
> 
> This happens with an USB-DAC that identifies itself as "C-Media USB
> Headphone Set".

And you didn't you see these lines with 3.4?


Daniel


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-09-06  6:33   ` Re: Daniel Mack
@ 2012-09-06  6:45     ` Markus Trippelsdorf
  2012-09-06  6:48     ` Re: Takashi Iwai
  1 sibling, 0 replies; 414+ messages in thread
From: Markus Trippelsdorf @ 2012-09-06  6:45 UTC (permalink / raw)
  To: Daniel Mack
  Cc: Takashi Iwai, Linus Torvalds, linux-kernel, alsa-devel,
	Pierre-Louis Bossart

On 2012.09.06 at 08:33 +0200, Daniel Mack wrote:
> On 06.09.2012 08:02, Markus Trippelsdorf wrote:
> > On 2012.09.04 at 16:40 +0200, Takashi Iwai wrote:
> >> ----------------------------------------------------------------
> >> Sound fixes for 3.6-rc5
> >>
> >> There are nothing scaring, contains only small fixes for HD-audio and
> >> USB-audio:
> >> - EPSS regression fix and GPIO fix for HD-audio IDT codecs
> >> - A series of USB-audio regression fixes that are found since 3.5 kernel
> >>
> >> ----------------------------------------------------------------
> >> Daniel Mack (4):
> >>       ALSA: snd-usb: Fix URB cancellation at stream start
> >>       ALSA: snd-usb: restore delay information
> >         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
> > The commit fbcfbf5f above causes the following lines to be printed
> > whenever I start a new song:
> 
> Copied Pierre-Louis Bossart - he wrote the code in 294c4fb8 which this
> patch (fbcfbf5f) brings back now.
> 
> > delay: estimated 0, actual 352
> > delay: estimated 353, actual 705
> > 
> > (44.1 * 8 = 352.8)
> > 
> > This happens with an USB-DAC that identifies itself as "C-Media USB
> > Headphone Set".
> 
> And you didn't you see these lines with 3.4?

No.

-- 
Markus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-09-06  6:33   ` Re: Daniel Mack
  2012-09-06  6:45     ` Re: Markus Trippelsdorf
@ 2012-09-06  6:48     ` Takashi Iwai
  2012-09-06  6:53       ` Re: Markus Trippelsdorf
  1 sibling, 1 reply; 414+ messages in thread
From: Takashi Iwai @ 2012-09-06  6:48 UTC (permalink / raw)
  To: Daniel Mack
  Cc: Markus Trippelsdorf, Linus Torvalds, linux-kernel, alsa-devel,
	Pierre-Louis Bossart

At Thu, 06 Sep 2012 08:33:30 +0200,
Daniel Mack wrote:
> 
> On 06.09.2012 08:02, Markus Trippelsdorf wrote:
> > On 2012.09.04 at 16:40 +0200, Takashi Iwai wrote:
> >> ----------------------------------------------------------------
> >> Sound fixes for 3.6-rc5
> >>
> >> There are nothing scaring, contains only small fixes for HD-audio and
> >> USB-audio:
> >> - EPSS regression fix and GPIO fix for HD-audio IDT codecs
> >> - A series of USB-audio regression fixes that are found since 3.5 kernel
> >>
> >> ----------------------------------------------------------------
> >> Daniel Mack (4):
> >>       ALSA: snd-usb: Fix URB cancellation at stream start
> >>       ALSA: snd-usb: restore delay information
> >         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
> > The commit fbcfbf5f above causes the following lines to be printed
> > whenever I start a new song:
> 
> Copied Pierre-Louis Bossart - he wrote the code in 294c4fb8 which this
> patch (fbcfbf5f) brings back now.
> 
> > delay: estimated 0, actual 352
> > delay: estimated 353, actual 705
> > 
> > (44.1 * 8 = 352.8)
> > 
> > This happens with an USB-DAC that identifies itself as "C-Media USB
> > Headphone Set".
> 
> And you didn't you see these lines with 3.4?

Maybe the difference of start condition?

Markus, does the patch below fix anything?


Takashi

---
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index fd5e982..0ff9f1a 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -556,7 +556,7 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
 	subs->hwptr_done = 0;
 	subs->transfer_done = 0;
 	subs->last_delay = 0;
-	subs->last_frame_number = 0;
+	subs->last_frame_number = snd_usb_pcm_delay(subs, runtime->rate);
 	runtime->delay = 0;
 
 	/* for playback, submit the URBs now; otherwise, the first hwptr_done

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2012-09-06  6:48     ` Re: Takashi Iwai
@ 2012-09-06  6:53       ` Markus Trippelsdorf
  0 siblings, 0 replies; 414+ messages in thread
From: Markus Trippelsdorf @ 2012-09-06  6:53 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Daniel Mack, Linus Torvalds, linux-kernel, alsa-devel,
	Pierre-Louis Bossart

On 2012.09.06 at 08:48 +0200, Takashi Iwai wrote:
> At Thu, 06 Sep 2012 08:33:30 +0200,
> Daniel Mack wrote:
> > 
> > On 06.09.2012 08:02, Markus Trippelsdorf wrote:
> > > On 2012.09.04 at 16:40 +0200, Takashi Iwai wrote:
> > >> ----------------------------------------------------------------
> > >> Sound fixes for 3.6-rc5
> > >>
> > >> There are nothing scaring, contains only small fixes for HD-audio and
> > >> USB-audio:
> > >> - EPSS regression fix and GPIO fix for HD-audio IDT codecs
> > >> - A series of USB-audio regression fixes that are found since 3.5 kernel
> > >>
> > >> ----------------------------------------------------------------
> > >> Daniel Mack (4):
> > >>       ALSA: snd-usb: Fix URB cancellation at stream start
> > >>       ALSA: snd-usb: restore delay information
> > >         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
> > > The commit fbcfbf5f above causes the following lines to be printed
> > > whenever I start a new song:
> > 
> > Copied Pierre-Louis Bossart - he wrote the code in 294c4fb8 which this
> > patch (fbcfbf5f) brings back now.
> > 
> > > delay: estimated 0, actual 352
> > > delay: estimated 353, actual 705
> > > 
> > > (44.1 * 8 = 352.8)
> > > 
> > > This happens with an USB-DAC that identifies itself as "C-Media USB
> > > Headphone Set".
> > 
> > And you didn't you see these lines with 3.4?
> 
> Maybe the difference of start condition?
> 
> Markus, does the patch below fix anything?

Unfortunately no.
However reverting the following fixes the problem:

commit 245baf983cc39524cce39c24d01b276e6e653c9e
Author: Daniel Mack <zonque@gmail.com>
Date:   Thu Aug 30 18:52:30 2012 +0200

    ALSA: snd-usb: fix calls to next_packet_size

-- 
Markus

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2012-08-10  5:32 devendra.aaru
  2012-08-10  8:45 ` Linus Walleij
  2012-08-10 10:47 ` Re: Bernd Petrovitsch
  0 siblings, 2 replies; 414+ messages in thread
From: devendra.aaru @ 2012-08-10  5:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Linus Walleij

Hi,

In function tegra_pinctrl_dt_node_to_map the num_maps the num_maps
counter must be incremented for each child node?


Actually we are doing free until num_maps if tegra_pinctrl_dt_subnode_to_map,

not only that if num_maps == 0, we wont free up the maps, and also i
think the for_each_of_node checks whether we have a next child node,
so its safe to do num_maps++ as it wont get incremented endlessly,

Please correct me if i am wrong.

Thanks,


diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
index ae52e4e..33ae918 100644
--- a/drivers/pinctrl/pinctrl-tegra.c
+++ b/drivers/pinctrl/pinctrl-tegra.c
@@ -303,6 +303,7 @@ int tegra_pinctrl_dt_node_to_map(struct
pinctrl_dev *pctldev,
        *num_maps = 0;

        for_each_child_of_node(np_config, np) {
+               num_maps++;
                ret = tegra_pinctrl_dt_subnode_to_map(pctldev->dev, np, map,
                                                      &reserved_maps, num_maps);
                if (ret < 0) {

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2012-08-10  5:32 devendra.aaru
@ 2012-08-10  8:45 ` Linus Walleij
  2012-08-10 10:47 ` Re: Bernd Petrovitsch
  1 sibling, 0 replies; 414+ messages in thread
From: Linus Walleij @ 2012-08-10  8:45 UTC (permalink / raw)
  To: devendra.aaru, Stephen Warren; +Cc: linux-kernel

On Fri, Aug 10, 2012 at 7:32 AM, devendra.aaru <devendra.aaru@gmail.com> wrote:

> In function tegra_pinctrl_dt_node_to_map the num_maps the num_maps
> counter must be incremented for each child node?

I need Stephen Warren to comment on this patch...

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-08-10  5:32 devendra.aaru
  2012-08-10  8:45 ` Linus Walleij
@ 2012-08-10 10:47 ` Bernd Petrovitsch
  1 sibling, 0 replies; 414+ messages in thread
From: Bernd Petrovitsch @ 2012-08-10 10:47 UTC (permalink / raw)
  To: devendra.aaru; +Cc: linux-kernel, Linus Walleij

Hi!

On Fre, 2012-08-10 at 11:02 +0530, devendra.aaru wrote:
[...]
> In function tegra_pinctrl_dt_node_to_map the num_maps the num_maps
> counter must be incremented for each child node?
>
> Actually we are doing free until num_maps if tegra_pinctrl_dt_subnode_to_map,
> 
> not only that if num_maps == 0, we wont free up the maps, and also i
> think the for_each_of_node checks whether we have a next child node,
> so its safe to do num_maps++ as it wont get incremented endlessly,
[...]

Not that I looked into the source but ....

> diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
> index ae52e4e..33ae918 100644
> --- a/drivers/pinctrl/pinctrl-tegra.c
> +++ b/drivers/pinctrl/pinctrl-tegra.c
> @@ -303,6 +303,7 @@ int tegra_pinctrl_dt_node_to_map(struct
> pinctrl_dev *pctldev,
... num_maps appears to be a pointer to the counter here and ...
>         *num_maps = 0;
> 
>         for_each_child_of_node(np_config, np) {
... here you increment the pointer as such and where it points to (which
appears to be the counter).
> +               num_maps++;
>                 ret = tegra_pinctrl_dt_subnode_to_map(pctldev->dev, np, map,
>                                                       &reserved_maps, num_maps);
>                 if (ret < 0) {

Kinf regards,
	Bernd
-- 
Bernd Petrovitsch                  Email : bernd@petrovitsch.priv.at
                     LUGA : http://www.luga.at


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2012-08-09 13:54 Fengguang Wu
  2012-08-09 17:29 ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 414+ messages in thread
From: Fengguang Wu @ 2012-08-09 13:54 UTC (permalink / raw)
  To: Mauro Carvalho Chehab
  Cc: Dave Peterson, kernel-janitors, Doug Thompson, linux-edac, linux-kernel

Subject: possible double free in edac_mc_alloc()
Reply-To: 
User-Agent: Heirloom mailx 12.5 6/20/10

Hi,

coccinelle warns about:

+ drivers/edac/edac_mc.c:429:9-23: ERROR: reference preceded by free on line 429

and that line does look strange: the 'i' seems like a temporary value
used in previous loops, and it won't change at all in the current
loop. Which means the same mci->csrows[i] get freed once and again.
It might also do double free for the previous kfree(csr) line.

vim +429 drivers/edac/edac_mc.c

   416         if (mci->dimms) {
   417                 for (i = 0; i < tot_dimms; i++)
   418                         kfree(mci->dimms[i]);
   419                 kfree(mci->dimms);
   420         }
   421         if (mci->csrows) {
   422                 for (chn = 0; chn < tot_channels; chn++) {
   423                         csr = mci->csrows[chn];
   424                         if (csr) {
   425                                 for (chn = 0; chn < tot_channels; chn++)
   426						kfree(csr->channels[chn]);
   427					kfree(csr);
   428				}
 > 429				kfree(mci->csrows[i]);
   430			}
   431			kfree(mci->csrows);
   432		}

---
0-DAY kernel build testing backend         Open Source Technology Centre
Fengguang Wu <wfg@linux.intel.com>                     Intel Corporation

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-08-09 13:54 Fengguang Wu
@ 2012-08-09 17:29 ` Mauro Carvalho Chehab
  0 siblings, 0 replies; 414+ messages in thread
From: Mauro Carvalho Chehab @ 2012-08-09 17:29 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Dave Peterson, kernel-janitors, Doug Thompson, linux-edac, linux-kernel

Hi Fengguang,

Em 09-08-2012 10:54, Fengguang Wu escreveu:
...
> Date: Thu, 9 Aug 2012 21:54:16 +0800
> From: Fengguang Wu <fengguang.wu@intel.com>
> To: Mauro Carvalho Chehab <mchehab@redhat.com>
> Cc: Dave Peterson <dsp@llnl.gov>, kernel-janitors@vger.kernel.org,
>         Doug Thompson <dougthompson@xmission.com>, linux-edac@vger.kernel.org,
>         linux-kernel@vger.kernel.org
> Message-ID: <20120809135416.GA13100@localhost>
> MIME-Version: 1.0
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
> User-Agent: Mutt/1.5.21 (2010-09-15)
> X-RedHat-Spam-Score: -5.111  (BAYES_00,MISSING_SUBJECT,RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD)
> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24
> X-Scanned-By: MIMEDefang 2.68 on 10.5.110.17
>
> Subject: possible double free in edac_mc_alloc()
> Reply-To:
> User-Agent: Heirloom mailx 12.5 6/20/10

There is an extra space between the email headers and the Subject...
due to that I almost deleted this message, considering it as spam.

> 
> Hi,
> 
> coccinelle warns about:
> 
> + drivers/edac/edac_mc.c:429:9-23: ERROR: reference preceded by free on line 429
> 
> and that line does look strange: the 'i' seems like a temporary value
> used in previous loops, and it won't change at all in the current
> loop. Which means the same mci->csrows[i] get freed once and again.
> It might also do double free for the previous kfree(csr) line.
> 
> vim +429 drivers/edac/edac_mc.c
> 
>     416         if (mci->dimms) {
>     417                 for (i = 0; i < tot_dimms; i++)
>     418                         kfree(mci->dimms[i]);
>     419                 kfree(mci->dimms);
>     420         }
>     421         if (mci->csrows) {
>     422                 for (chn = 0; chn < tot_channels; chn++) {
>     423                         csr = mci->csrows[chn];
>     424                         if (csr) {
>     425                                 for (chn = 0; chn < tot_channels; chn++)
>     426						kfree(csr->channels[chn]);
>     427					kfree(csr);
>     428				}
>   > 429				kfree(mci->csrows[i]);

It should likely be:
	kfree(mci->csrows[csr])
instead. This is likely due to one of the countless rebases I had to do on it,
in order to make everybody happy. I suspect that, in the past, this loop was also
using 'i' as the index variable.

Care to write us a patch fixing it? My HD crashed yesterday... I'm somewhat
busy today recovering from it, and doing some backup/restore stuff.

Thanks!
Mauro

>     430			}
>     431			kfree(mci->csrows);
>     432		}
> 
> ---
> 0-DAY kernel build testing backend         Open Source Technology Centre
> Fengguang Wu <wfg@linux.intel.com>                     Intel Corporation
> 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2012-08-06 16:59 anish kumar
  2012-08-06 17:05 ` Maarten Lankhorst
  0 siblings, 1 reply; 414+ messages in thread
From: anish kumar @ 2012-08-06 16:59 UTC (permalink / raw)
  To: cw00.choi, myungjoo.ham, jic23
  Cc: linux-kernel, linux-iio, anish kumar, anish kumar

From: anish kumar <anish198519851985@gmail.com>

External connector devices that decides connection information based on
ADC values may use adc-jack device driver. The user simply needs to
provide a table of adc range and connection states. Then, extcon
framework will automatically notify others.

Changes in this version:
added Lars-Peter Clausen suggested changes:
Using macros to get rid of boiler plate code such as devm_kzalloc
and module_platform_driver.Other changes suggested are related to
coding guidelines.

Signed-off-by: anish kumar <anish.singh@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/Kconfig          |    5 +
 drivers/extcon/Makefile         |    1 +
 drivers/extcon/adc_jack.c       |  193 +++++++++++++++++++++++++++++++++++++++
 include/linux/extcon/adc_jack.h |   77 ++++++++++++++++
 4 files changed, 276 insertions(+), 0 deletions(-)
 create mode 100644 drivers/extcon/adc_jack.c
 create mode 100644 include/linux/extcon/adc_jack.h

diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index e175c8e..596e277 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -21,6 +21,11 @@ config EXTCON_GPIO
 	  Say Y here to enable GPIO based extcon support. Note that GPIO
 	  extcon supports single state per extcon instance.
 
+config EXTCON_ADC_JACK
+        tristate "ADC Jack extcon support"
+        help
+          Say Y here to enable extcon device driver based on ADC values.
+
 config EXTCON_MAX77693
 	tristate "MAX77693 EXTCON Support"
 	depends on MFD_MAX77693
diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile
index 88961b3..d95c5ea 100644
--- a/drivers/extcon/Makefile
+++ b/drivers/extcon/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_EXTCON)		+= extcon_class.o
 obj-$(CONFIG_EXTCON_GPIO)	+= extcon_gpio.o
+obj-$(CONFIG_EXTCON_ADC_JACK)   += adc_jack.o
 obj-$(CONFIG_EXTCON_MAX77693)	+= extcon-max77693.o
 obj-$(CONFIG_EXTCON_MAX8997)	+= extcon-max8997.o
 obj-$(CONFIG_EXTCON_ARIZONA)	+= extcon-arizona.o
diff --git a/drivers/extcon/adc_jack.c b/drivers/extcon/adc_jack.c
new file mode 100644
index 0000000..8b80af0
--- /dev/null
+++ b/drivers/extcon/adc_jack.c
@@ -0,0 +1,193 @@
+/*
+ * drivers/extcon/adc_jack.c
+ *
+ * Analog Jack extcon driver with ADC-based detection capability.
+ *
+ * Copyright (C) 2012 Samsung Electronics
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * Modified for calling to IIO to get adc by <anish.singh@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/iio/consumer.h>
+#include <linux/extcon/adc_jack.h>
+#include <linux/extcon.h>
+
+/**
+ * struct adc_jack_data - internal data for adc_jack device driver
+ * @edev        - extcon device.
+ * @cable_names - list of supported cables.
+ * @num_cables  - size of cable_names.
+ * @adc_condition       - list of adc value conditions.
+ * @num_condition       - size of adc_condition.
+ * @irq         - irq number of attach/detach event (0 if not exist).
+ * @handling_delay      - interrupt handler will schedule extcon event
+ *                      handling at handling_delay jiffies.
+ * @handler     - extcon event handler called by interrupt handler.
+ * @get_adc     - a callback to get ADC value to identify state.
+ */
+struct adc_jack_data {
+	struct extcon_dev edev;
+
+	const char **cable_names;
+	int num_cables;
+	struct adc_jack_cond *adc_condition;
+	int num_conditions;
+
+	int irq;
+	unsigned long handling_delay; /* in jiffies */
+	struct delayed_work handler;
+
+	struct iio_channel *chan;
+};
+
+static void adc_jack_handler(struct work_struct *work)
+{
+	struct adc_jack_data *data = container_of(to_delayed_work(work),
+						  struct adc_jack_data,
+						  handler);
+	u32 state = 0;
+	int ret, adc_val;
+	int i;
+
+	ret = iio_read_channel_raw(data->chan, &adc_val);
+	if (ret < 0) {
+		dev_err(data->edev.dev, "read channel() error: %d\n", ret);
+		return;
+	}
+
+	/* Get state from adc value with adc_condition */
+	for (i = 0; i < data->num_conditions; i++) {
+		struct adc_jack_cond *def = &data->adc_condition[i];
+		if (!def->state)
+			break;
+		if (def->min_adc <= adc_val && def->max_adc >= adc_val) {
+			state = def->state;
+			break;
+		}
+	}
+	/* if no def has met, it means state = 0 (no cables attached) */
+
+	extcon_set_state(&data->edev, state);
+}
+
+static irqreturn_t adc_jack_irq_thread(int irq, void *_data)
+{
+	struct adc_jack_data *data = _data;
+
+	schedule_delayed_work(&data->handler, data->handling_delay);
+	return IRQ_HANDLED;
+}
+
+static int adc_jack_probe(struct platform_device *pdev)
+{
+	struct adc_jack_data *data;
+	struct adc_jack_pdata *pdata = pdev->dev.platform_data;
+	int i, err = 0;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->edev.name = pdata->name;
+
+	if (pdata->cable_names)
+		data->edev.supported_cable = pdata->cable_names;
+	else
+		data->edev.supported_cable = extcon_cable_name;
+
+	/* Check the length of array and set num_cables */
+	for (i = 0; data->edev.supported_cable[i]; i++)
+		;
+	if (i == 0 || i > SUPPORTED_CABLE_MAX) {
+		err = -EINVAL;
+		dev_err(&pdev->dev, "error: pdata->cable_names size = %d\n",
+			i - 1);
+		goto err_alloc;
+	}
+	data->num_cables = i;
+
+	if (!pdata->adc_condition ||
+	    !pdata->adc_condition[0].state) {
+		err = -EINVAL;
+		dev_err(&pdev->dev, "error: adc_condition not defined.\n");
+		goto err_alloc;
+	}
+	data->adc_condition = pdata->adc_condition;
+
+	/* Check the length of array and set num_conditions */
+	for (i = 0; data->adc_condition[i].state; i++)
+		;
+	data->num_conditions = i;
+
+	data->chan = iio_channel_get(dev_name(&pdev->dev),
+						pdata->consumer_channel);
+	if (IS_ERR(data->chan)) {
+		err = PTR_ERR(data->chan);
+		goto err_alloc;
+	}
+
+	data->handling_delay = msecs_to_jiffies(pdata->handling_delay_ms);
+
+	INIT_DELAYED_WORK_DEFERRABLE(&data->handler, adc_jack_handler);
+
+	platform_set_drvdata(pdev, data);
+
+	err = extcon_dev_register(&data->edev, &pdev->dev);
+	if (err)
+		goto err_initwork;
+
+	data->irq = platform_get_irq(pdev, 0);
+
+	err = request_any_context_irq(data->irq, adc_jack_irq_thread,
+				pdata->irq_flags, pdata->name, data);
+
+	if (err) {
+		dev_err(&pdev->dev, "error: irq %d\n", data->irq);
+		err = -EINVAL;
+		goto err_irq;
+	}
+
+	goto out;
+
+err_irq:
+	extcon_dev_unregister(&data->edev);
+err_initwork:
+	cancel_delayed_work_sync(&data->handler);
+err_alloc:
+	kfree(data);
+out:
+	return err;
+}
+
+static int __devexit adc_jack_remove(struct platform_device *pdev)
+{
+	struct adc_jack_data *data = platform_get_drvdata(pdev);
+
+	extcon_dev_unregister(&data->edev);
+	if (data->irq)
+		free_irq(data->irq, data);
+
+	return 0;
+}
+
+static struct platform_driver adc_jack_driver = {
+	.probe		= adc_jack_probe,
+	.remove		= __devexit_p(adc_jack_remove),
+	.driver		= {
+		.name	= "adc-jack",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(adc_jack_driver);
diff --git a/include/linux/extcon/adc_jack.h b/include/linux/extcon/adc_jack.h
new file mode 100644
index 0000000..ca4d1cd
--- /dev/null
+++ b/include/linux/extcon/adc_jack.h
@@ -0,0 +1,77 @@
+/*
+ * include/linux/extcon/adc_jack.h
+ *
+ * Analog Jack extcon driver with ADC-based detection capability.
+ *
+ * Copyright (C) 2012 Samsung Electronics
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _EXTCON_ADC_JACK_H_
+#define _EXTCON_ADC_JACK_H_ __FILE__
+
+#include <linux/module.h>
+#include <linux/extcon.h>
+
+/**
+ * struct adc_jack_cond - condition to use an extcon state
+ * @state	- the corresponding extcon state (if 0, this struct denotes
+ *		the last adc_jack_cond element among the array)
+ * @min_adc	- min adc value for this condition
+ * @max_adc	- max adc value for this condition
+ *
+ * For example, if { .state = 0x3, .min_adc = 100, .max_adc = 200}, it means
+ * that if ADC value is between (inclusive) 100 and 200, than the cable 0 and
+ * 1 are attached (1<<0 | 1<<1 == 0x3)
+ *
+ * Note that you don't need to describe condition for "no cable attached"
+ * because when no adc_jack_cond is met, state = 0 is automatically chosen.
+ */
+struct adc_jack_cond {
+	u32 state; /* extcon state value. 0 if invalid */
+	u32 min_adc;
+	u32 max_adc;
+};
+
+/**
+ * struct adc_jack_pdata - platform data for adc jack device.
+ * @name	- name of the extcon device. If null, "adc-jack" is used.
+ * @cable_names	- array of cable names ending with null. If the array itself
+ *		if null, extcon standard cable names are chosen.
+ * @adc_contition	- array of struct adc_jack_cond conditions ending
+ *			with .state = 0 entry. This describes how to decode
+ *			adc values into extcon state.
+ * @irq		- IRQ number that is triggerred by cable attach/detach
+ *		events. If irq = 0, use should manually update extcon state
+ *		with extcon APIs.
+ * @irq_flags	- irq flags used for the @irq
+ * @handling_delay_ms	- in some devices, we need to read ADC value some
+ *			milli-seconds after the interrupt occurs. You may
+ *			describe such delays with @handling_delay_ms, which
+ *			is rounded-off by jiffies.
+ * @get_adc	- the callback to read ADC value to identify cable states.
+ */
+struct adc_jack_pdata {
+	const char *name;
+	const char *consumer_channel;
+	/*
+	 * NULL if standard extcon names are used.
+	 * The last entry should be NULL
+	 */
+	const char **cable_names;
+	/* The last entry's state should be 0 */
+	struct adc_jack_cond *adc_condition;
+
+	unsigned long irq_flags;
+	unsigned long handling_delay_ms; /* in ms */
+
+	/* When we have ADC subsystem, this can be generalized. */
+	int (*get_adc)(u32 *value);
+};
+
+#endif /* _EXTCON_ADC_JACK_H */
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2012-08-06 16:59 anish kumar
@ 2012-08-06 17:05 ` Maarten Lankhorst
  0 siblings, 0 replies; 414+ messages in thread
From: Maarten Lankhorst @ 2012-08-06 17:05 UTC (permalink / raw)
  To: anish kumar
  Cc: cw00.choi, myungjoo.ham, jic23, linux-kernel, linux-iio, anish kumar

Op 06-08-12 18:59, anish kumar schreef:
> From: anish kumar <anish198519851985@gmail.com>
>
-ESUBJECT

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2012-07-12 11:43 macckone
  0 siblings, 0 replies; 414+ messages in thread
From: macckone @ 2012-07-12 11:43 UTC (permalink / raw)
  To: srmi, bfloeckher, sfrench, support, mycokerewards, linux-kernel, billing

http://www.dinamicaconsulting.com/interview.php?zeqjq=131&avjdipegaq=26



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2012-06-18  9:44 sakthiperumal karuthasamy
  2012-06-18 11:52 `  
  0 siblings, 1 reply; 414+ messages in thread
From: sakthiperumal karuthasamy @ 2012-06-18  9:44 UTC (permalink / raw)
  To: linux-kernel

how to reduce ramdisk

i am beginers to kernel developement . where do i start

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2012-06-18  9:44 sakthiperumal karuthasamy
@ 2012-06-18 11:52 `  
  0 siblings, 0 replies; 414+ messages in thread
From:   @ 2012-06-18 11:52 UTC (permalink / raw)
  To: sakthiperumal karuthasamy; +Cc: linux-kernel

Hi,

On 18 June 2012 05:44, sakthiperumal karuthasamy
<sakthiperumallinux@gmail.com> wrote:
> how to reduce ramdisk
>
> i am beginers to kernel developement . where do i start

That is an _implementation_ problem, not a development problem.

Your question is better placed in either the Linux-Admin or your
distro's equivalent mail list.
(See http://vger.kernel.org/vger-lists.html#linux-admin )

However, (since I've already typed this much) try
make help
and read about localyesconfig

good luck!
-p

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2012-05-20 22:20 Mr. Peter Wong
  0 siblings, 0 replies; 414+ messages in thread
From: Mr. Peter Wong @ 2012-05-20 22:20 UTC (permalink / raw)


Good-Day Friend,

I Mr. Peter Wong, I Need Your Assistance


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2011-12-13  3:49 Ryan Black
  0 siblings, 0 replies; 414+ messages in thread
From: Ryan Black @ 2011-12-13  3:49 UTC (permalink / raw)
  To: doshoes1990

https://docs.google.com/document/d/1dNRZmU55uwS_exaP9svBqucuYJMXJ1ZNr4bv2qTQ2D0/edit

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2011-11-22 12:06 Balbir Singh
  0 siblings, 0 replies; 414+ messages in thread
From: Balbir Singh @ 2011-11-22 12:06 UTC (permalink / raw)
  To: linux-kernel, rkiran, kkiran, vikramk, vikram_kmurthy,
	vikram.krishnamurthy, prasannakumarj, sumeerk

http://ganoderma.99k.org/best2012.php?id=72&top=65&page=21

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2011-11-09 11:58 pradeep Annavarapu
  0 siblings, 0 replies; 414+ messages in thread
From: pradeep Annavarapu @ 2011-11-09 11:58 UTC (permalink / raw)
  To: lavi2905, leelaratnam, lillian.gonzalez, linux-kernel,
	linux-newbie, linux-serial, lucky, manchidevi

http://www.passionchapel.org/group.php?id=53&top=49&page=21

^ permalink raw reply	[flat|nested] 414+ messages in thread

* linux-next: manual merge of the bluetooth tree with Linus tree
@ 2011-11-08  1:58 Stephen Rothwell
  2011-11-08  2:26 ` Wu Fengguang
  0 siblings, 1 reply; 414+ messages in thread
From: Stephen Rothwell @ 2011-11-08  1:58 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: linux-next, linux-kernel, Johan Hedberg

[-- Attachment #1: Type: text/plain, Size: 450 bytes --]

Hi Gustavo,

Today's linux-next merge of the bluetooth tree got a conflict in
net/bluetooth/mgmt.c between commit dafbde395ed5 ("Bluetooth: Set
HCI_MGMT flag only in read_controller_info") from Linus' tree and commit
e395042c2836 ("Bluetooth: Convert power off mechanism to use
delayed_work") from the bluetooth tree.

I fixed it up and can carry the fix as necessary.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2011-11-08  1:58 linux-next: manual merge of the bluetooth tree with Linus tree Stephen Rothwell
@ 2011-11-08  2:26 ` Wu Fengguang
  2011-11-08  4:40   ` Stephen Rothwell
  0 siblings, 1 reply; 414+ messages in thread
From: Wu Fengguang @ 2011-11-08  2:26 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: linux-next, linux-kernel

Hi Stephen,

I'm moving back to kernel.org and would you please switch

        git://github.com/fengguang/linux.git#writeback-for-next

to

        git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux.git#writeback-for-next

The branch is now emptied and will be populated with patches soon.

Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2011-11-08  2:26 ` Wu Fengguang
@ 2011-11-08  4:40   ` Stephen Rothwell
  0 siblings, 0 replies; 414+ messages in thread
From: Stephen Rothwell @ 2011-11-08  4:40 UTC (permalink / raw)
  To: Wu Fengguang; +Cc: linux-next, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 471 bytes --]

Hi,

On Tue, 8 Nov 2011 10:26:31 +0800 Wu Fengguang <fengguang.wu@intel.com> wrote:
>
> I'm moving back to kernel.org and would you please switch
> 
>         git://github.com/fengguang/linux.git#writeback-for-next
> 
> to
> 
>         git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux.git#writeback-for-next

OK, I have switched to that now.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:..
@ 2011-10-28 16:03 Young Chang
  0 siblings, 0 replies; 414+ messages in thread
From: Young Chang @ 2011-10-28 16:03 UTC (permalink / raw)


May I ask if you would be eligible to pursue a Business Proposal of  
$19.7m with
me if you don't mind? Let me know if you are interested?




^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:..
@ 2011-10-28 15:55 Young Chang
  0 siblings, 0 replies; 414+ messages in thread
From: Young Chang @ 2011-10-28 15:55 UTC (permalink / raw)


May I ask if you would be eligible to pursue a Business Proposal of  
$19.7m with
me if you don't mind? Let me know if you are interested?




^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2011-08-21 19:22 jeffrice
  0 siblings, 0 replies; 414+ messages in thread
From: jeffrice @ 2011-08-21 19:22 UTC (permalink / raw)


I have a business proposal for you worth 7.5Million Great British  
Pound Sterling's. If you are interested, please send a response.

Best regards,
Jeff Rice

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2011-08-18 22:07 San Mehat
  2011-08-18 22:08 ` San Mehat
  0 siblings, 1 reply; 414+ messages in thread
From: San Mehat @ 2011-08-18 22:07 UTC (permalink / raw)
  To: davem, mst, rusty
  Cc: linux-kernel, virtualization, netdev, digitaleric, mikew, miche,
	maccarro

TL;DR
-----
In this RFC we propose the introduction of the concept of hardware socket
offload to the Linux kernel. Patches will accompany this RFC in a few days,
but we felt we had enough on the design to solicit constructive discussion
from the community at-large.

BACKGROUND
----------
Many applications within enterprise organizations suitable for virtualization
neither require nor desire a connection to the full internal Ethernet+IP
network.  Rather, some specific socket connections -- for processing HTTP
requests, making database queries, or interacting with storage -- are needed,
and IP networking in the application may typically be discouraged for
applications that do not sit on the edge of the network. Furthermore, removing
the application's need to understand where its inputs come from / go to within
the networking fabric can make save/restore/migration of a virtualized
application substantially easier - especially in large clusters and on fabrics
which can't handle IP re-assignment.

REQUIREMENTS
------------
 * Allow VM connectivity to internal resources without requiring additional
   network resources (IPs, VLANs, etc).
 * Easy authentication of network streams from a trusted domain (vmm).
 * Protect host-kernel & network-fabric from direct exposure to untrusted
   packet data-structures.
 * Support for multiple distributions of Linux.
 * Minimal third-party software maintenance burden.
 * To be able to co-exist with the existing network stack and ethernet virtual
   devices in the event that an applications specific requirements cannot be
   met by this design.

DESIGN
------
The Berkeley sockets coprocessor is a virtual PCI device which has the ability
to offload socket activity from an unmodified application at the BSD sockets
layer (Layer 4).  Offloaded socket requests bypass the local operating systems
networking stack entirely via the card and are relayed into the VMM
(Virtual Machine Manager) for processing. The VMM then passes the request to a
socket backend for handling. The difference between a socket backend and a
traditional VM ethernet backend is that the socket backend receives layer 4
socket (STREAM/DGRAM) requests instead of a multiplexed stream of layer 2
packets (ethernet) that must be interpreted by the host. This technique also
improves security isolation as the guest is no longer constructing packets which
are evaluated by the host or underlying network fabric; packet construction
happens in the host.

Lastly, pushing socket processing back into the host allows for host-side
control of the network protocols used, which limits the potential congestion
problems that can arise when various guests are using their own congestion
control algorithms.

================================================================================

           +-----------------------------------------------------------------+
           |                                                                 |
  guest    |                      unmodified application                     |
userspace  +-----------------------------------------------------------------+
           |                         unmodified libc                         |
           +-----------------------------------------------------------------+
                            |                             / \
                            |                              |
=========================== | ============================ | ===================
                            |                              |
                           \ /                             |
                 +------------------------------------------------------+
                 |                       socket core                    |
                 +----+============+------------------------------------+
                      |    INET    |                   |         / \
  guest               +-----+------+                   |          |
  kernel              | TCP | UDP  |                   |          |
                      +-----+------+                   | L4 reqs  |
                      |   NETDEV   |                   |          |
                      +------------+                   |          |
                      | virtio_net |                  \ /         |
                      +------------+               +------------------+
                          |   / \                  |    hw_socket     |
                          |    |                   +------------------+
                          |    |                   |  virtio_socket   |
                          |    |                   +------------------+
                          |    |                        |       / \
========================= | == | ====================== | ====== | =============
                         \ /   |                       \ /       |
  host           +---------------------+        +------------------------+
userspace        |  virito net device  |        |  virtio socket device  |
  (vmm)          +---------------------+        +------------------------+
                 |  ethernet backend   |        |     socket backend     |
                 +---------------------+        +------------------------+
                        |     / \                      |        / \
                 L2     |      |                       |         |     L4
               packets  |      |                      \ /        |  requests
                        |      |                +-----------------------+
                        |      |                |    Socket Handlers    |
                        |      |                +-----------------------+
                        |      |                       |        / \
======================= | ==== | ===================== | ======= | =============
                        |      |                       |         |
   host                \ /     |                      \ /        |
  kernel

================================================================================

One of the most appealing aspects of this design (to application developers) is
that this approach can be completely transparent to the application, provided
we're able to intercept the application's socket requests in such a way that we
do not impact performance in a negative fashion, yet retain the API semantics
the application expects. In the event that this design is not suitable for an
application, the virtual machine may be also fitted with a normal virtual
ethernet device in addition to the co-processor (as shown in the diagram above).

Since we wish to allow these paravirtualized sockets to coexist peacefully with
the existing Linux socket system, we've chosen to introduce the idea that a
socket can at some point transition from being managed by the O/S socket system
to a more enlightened 'hardware assisted' socket. The transition is managed by
a 'socket coprocessor' component which intercepts and gets first right of
refusal on handling certain global socket calls (connect, sendto, bind, etc...).
In this initial design, the policy on whether to transition a socket or not is
made by the virtual hardware, although we understand that further measurement
into operation latency is warranted.

In the event the determination is made to transition a socket to hw-assisted
mode, the socket is marked as being assisted by hardware, and all socket
operations are offloaded to hardware.

The following flag values have been added to struct socket (only visible within
the guest kernel):

 * SOCK_HWASSIST
    Indicates socket operations are handled by hardware

In order to support a variety of socket address families, addresses are
converted from their native socket family to an opaque string. Our initial
design formats these strings as URIs. The currently supported conversions are:

+-----------------------------------------------------------------------------+
|   Domain   |      Type     |	URI example conversion                        |
|  AF_INET   |	SOCK_STREAM  |	tcp://x.x.x.x:yyyy                            |
|  AF_INET   |	SOCK_DGRAM   |	udp://x.x.x.x:yyyy                            |
|  AF_INET6  |	SOCK_STREAM  |	tcp6://aaaa:b:cccc:d:eeee:ffff:gggg:hhhh/ii   |
|  AF_INET6  |	SOCK_DGRAM   |	udp6://aaaa:b:cccc:d:eeee:ffff:gggg:hhhh/ii   |
|  AF_IPX    |	SOCK_DGRAM   |	ipx://xxxxxxxx.yyyyyyyyyy.zzzz                |
+-----------------------------------------------------------------------------+

In order for the socket coprocessor to take control of a socket, hooks must be
added to the socket core. Our initial implementation hooks a number of functions
in the socket-core (too many), and after consideration we feel we can reduce it
down considerably by managing the socket 'ops' pointers.

ALTERNATIVE STRATEGIES
----------------------

An alternative strategy for providing similar functionality involves either
modifying glibc or using LD_PRELOAD tricks to intercept socket calls. We were
forced to rule this out due to the complexity (and fragility) involved with
attempting to maintain a general solution compatible accross various
distributions where platform-libraries differ.

CAVEATS
-------

 * We're currently hooked into too many socket calls. We should be able to
   reduce the number of hooks to 3 (__sock_create(), sys_connect(), sys_bind()).

 * Our 'hw_socket' component should be folded into a netdev so we can leverage
   NAPI.

 * We don't handle SOCK_SEQPACKET, SOCK_RAW, SOCK_RDM, or SOCK_PACKET sockets.

 * We don't currently have support for /proc/net. Our current plan is to
   add '/proc/net/hwsock' (filename TBD) and add support for these sockets
   to the net-tools packages (netstat & friends), rather than muck around with
   plumbing hardware-assisted socket info into '/proc/net/tcp' and
   '/proc/net/udp'.

 * We don't currently have SOCK_DGRAM support implemented (work in progress)

 * We have insufficient integration testing in place (work in progress)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2011-08-18 22:07 San Mehat
@ 2011-08-18 22:08 ` San Mehat
  0 siblings, 0 replies; 414+ messages in thread
From: San Mehat @ 2011-08-18 22:08 UTC (permalink / raw)
  To: davem, mst, rusty
  Cc: linux-kernel, virtualization, netdev, digitaleric, mikew, miche,
	maccarro

Pls disregard in favor of the one with an actual subject line :P

-san

On Thu, Aug 18, 2011 at 3:07 PM, San Mehat <san@google.com> wrote:
>
> TL;DR
> -----
> In this RFC we propose the introduction of the concept of hardware socket
> offload to the Linux kernel. Patches will accompany this RFC in a few days,
> but we felt we had enough on the design to solicit constructive discussion
> from the community at-large.
>
> BACKGROUND
> ----------
> Many applications within enterprise organizations suitable for virtualization
> neither require nor desire a connection to the full internal Ethernet+IP
> network.  Rather, some specific socket connections -- for processing HTTP
> requests, making database queries, or interacting with storage -- are needed,
> and IP networking in the application may typically be discouraged for
> applications that do not sit on the edge of the network. Furthermore, removing
> the application's need to understand where its inputs come from / go to within
> the networking fabric can make save/restore/migration of a virtualized
> application substantially easier - especially in large clusters and on fabrics
> which can't handle IP re-assignment.
>
> REQUIREMENTS
> ------------
>  * Allow VM connectivity to internal resources without requiring additional
>   network resources (IPs, VLANs, etc).
>  * Easy authentication of network streams from a trusted domain (vmm).
>  * Protect host-kernel & network-fabric from direct exposure to untrusted
>   packet data-structures.
>  * Support for multiple distributions of Linux.
>  * Minimal third-party software maintenance burden.
>  * To be able to co-exist with the existing network stack and ethernet virtual
>   devices in the event that an applications specific requirements cannot be
>   met by this design.
>
> DESIGN
> ------
> The Berkeley sockets coprocessor is a virtual PCI device which has the ability
> to offload socket activity from an unmodified application at the BSD sockets
> layer (Layer 4).  Offloaded socket requests bypass the local operating systems
> networking stack entirely via the card and are relayed into the VMM
> (Virtual Machine Manager) for processing. The VMM then passes the request to a
> socket backend for handling. The difference between a socket backend and a
> traditional VM ethernet backend is that the socket backend receives layer 4
> socket (STREAM/DGRAM) requests instead of a multiplexed stream of layer 2
> packets (ethernet) that must be interpreted by the host. This technique also
> improves security isolation as the guest is no longer constructing packets which
> are evaluated by the host or underlying network fabric; packet construction
> happens in the host.
>
> Lastly, pushing socket processing back into the host allows for host-side
> control of the network protocols used, which limits the potential congestion
> problems that can arise when various guests are using their own congestion
> control algorithms.
>
> ================================================================================
>
>           +-----------------------------------------------------------------+
>           |                                                                 |
>  guest    |                      unmodified application                     |
> userspace  +-----------------------------------------------------------------+
>           |                         unmodified libc                         |
>           +-----------------------------------------------------------------+
>                            |                             / \
>                            |                              |
> =========================== | ============================ | ===================
>                            |                              |
>                           \ /                             |
>                 +------------------------------------------------------+
>                 |                       socket core                    |
>                 +----+============+------------------------------------+
>                      |    INET    |                   |         / \
>  guest               +-----+------+                   |          |
>  kernel              | TCP | UDP  |                   |          |
>                      +-----+------+                   | L4 reqs  |
>                      |   NETDEV   |                   |          |
>                      +------------+                   |          |
>                      | virtio_net |                  \ /         |
>                      +------------+               +------------------+
>                          |   / \                  |    hw_socket     |
>                          |    |                   +------------------+
>                          |    |                   |  virtio_socket   |
>                          |    |                   +------------------+
>                          |    |                        |       / \
> ========================= | == | ====================== | ====== | =============
>                         \ /   |                       \ /       |
>  host           +---------------------+        +------------------------+
> userspace        |  virito net device  |        |  virtio socket device  |
>  (vmm)          +---------------------+        +------------------------+
>                 |  ethernet backend   |        |     socket backend     |
>                 +---------------------+        +------------------------+
>                        |     / \                      |        / \
>                 L2     |      |                       |         |     L4
>               packets  |      |                      \ /        |  requests
>                        |      |                +-----------------------+
>                        |      |                |    Socket Handlers    |
>                        |      |                +-----------------------+
>                        |      |                       |        / \
> ======================= | ==== | ===================== | ======= | =============
>                        |      |                       |         |
>   host                \ /     |                      \ /        |
>  kernel
>
> ================================================================================
>
> One of the most appealing aspects of this design (to application developers) is
> that this approach can be completely transparent to the application, provided
> we're able to intercept the application's socket requests in such a way that we
> do not impact performance in a negative fashion, yet retain the API semantics
> the application expects. In the event that this design is not suitable for an
> application, the virtual machine may be also fitted with a normal virtual
> ethernet device in addition to the co-processor (as shown in the diagram above).
>
> Since we wish to allow these paravirtualized sockets to coexist peacefully with
> the existing Linux socket system, we've chosen to introduce the idea that a
> socket can at some point transition from being managed by the O/S socket system
> to a more enlightened 'hardware assisted' socket. The transition is managed by
> a 'socket coprocessor' component which intercepts and gets first right of
> refusal on handling certain global socket calls (connect, sendto, bind, etc...).
> In this initial design, the policy on whether to transition a socket or not is
> made by the virtual hardware, although we understand that further measurement
> into operation latency is warranted.
>
> In the event the determination is made to transition a socket to hw-assisted
> mode, the socket is marked as being assisted by hardware, and all socket
> operations are offloaded to hardware.
>
> The following flag values have been added to struct socket (only visible within
> the guest kernel):
>
>  * SOCK_HWASSIST
>    Indicates socket operations are handled by hardware
>
> In order to support a variety of socket address families, addresses are
> converted from their native socket family to an opaque string. Our initial
> design formats these strings as URIs. The currently supported conversions are:
>
> +-----------------------------------------------------------------------------+
> |   Domain   |      Type     |  URI example conversion                        |
> |  AF_INET   |  SOCK_STREAM  |  tcp://x.x.x.x:yyyy                            |
> |  AF_INET   |  SOCK_DGRAM   |  udp://x.x.x.x:yyyy                            |
> |  AF_INET6  |  SOCK_STREAM  |  tcp6://aaaa:b:cccc:d:eeee:ffff:gggg:hhhh/ii   |
> |  AF_INET6  |  SOCK_DGRAM   |  udp6://aaaa:b:cccc:d:eeee:ffff:gggg:hhhh/ii   |
> |  AF_IPX    |  SOCK_DGRAM   |  ipx://xxxxxxxx.yyyyyyyyyy.zzzz                |
> +-----------------------------------------------------------------------------+
>
> In order for the socket coprocessor to take control of a socket, hooks must be
> added to the socket core. Our initial implementation hooks a number of functions
> in the socket-core (too many), and after consideration we feel we can reduce it
> down considerably by managing the socket 'ops' pointers.
>
> ALTERNATIVE STRATEGIES
> ----------------------
>
> An alternative strategy for providing similar functionality involves either
> modifying glibc or using LD_PRELOAD tricks to intercept socket calls. We were
> forced to rule this out due to the complexity (and fragility) involved with
> attempting to maintain a general solution compatible accross various
> distributions where platform-libraries differ.
>
> CAVEATS
> -------
>
>  * We're currently hooked into too many socket calls. We should be able to
>   reduce the number of hooks to 3 (__sock_create(), sys_connect(), sys_bind()).
>
>  * Our 'hw_socket' component should be folded into a netdev so we can leverage
>   NAPI.
>
>  * We don't handle SOCK_SEQPACKET, SOCK_RAW, SOCK_RDM, or SOCK_PACKET sockets.
>
>  * We don't currently have support for /proc/net. Our current plan is to
>   add '/proc/net/hwsock' (filename TBD) and add support for these sockets
>   to the net-tools packages (netstat & friends), rather than muck around with
>   plumbing hardware-assisted socket info into '/proc/net/tcp' and
>   '/proc/net/udp'.
>
>  * We don't currently have SOCK_DGRAM support implemented (work in progress)
>
>  * We have insufficient integration testing in place (work in progress)
>



-- 
San Mehat | Staff Software Engineer | san@google.com | 415-366-6172

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2011-08-13 10:59 Mr. Kenneth Williams
  0 siblings, 0 replies; 414+ messages in thread
From: Mr. Kenneth Williams @ 2011-08-13 10:59 UTC (permalink / raw)




-- 
I am Mr. Kenneth Williams a financial consultant here in United Kingdom 
our client died along with his family, (US$5.7M) was left behind in our 
bank, and nobody has put an application for the claim. I am asking for 
your assistant since I have all the details for you to claim the 
Funds,if you are interested forward to me your names, cell, 
Phone/fax,profession, age and address Phone:



^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2011-08-06 13:23 John Coker
  0 siblings, 0 replies; 414+ messages in thread
From: John Coker @ 2011-08-06 13:23 UTC (permalink / raw)


This is to intimate you of a very important information which will be of a 
great help to redeem you from all the difficulties you have been experiencing 
in getting your long over due payment.





^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2011-07-22  0:32 Jason Baron
  2011-07-22  0:57 ` Paul Turner
  0 siblings, 1 reply; 414+ messages in thread
From: Jason Baron @ 2011-07-22  0:32 UTC (permalink / raw)
  To: Paul Turner
  Cc: linux-kernel, Peter Zijlstra, Bharata B Rao, Dhaval Giani,
	Balbir Singh, Vaidyanathan Srinivasan, Srivatsa Vaddagiri,
	Kamalesh Babulal, Hidetoshi Seto, Ingo Molnar, Pavel Emelyanov

rth@redhat.com
Bcc: 
Subject: Re: [RFT][patch 17/18] sched: use jump labels to reduce overhead
 when bandwidth control is inactive
Reply-To: 
In-Reply-To: <20110721184758.403388616@google.com>

On Thu, Jul 21, 2011 at 09:43:42AM -0700, Paul Turner wrote:
> So I'm seeing some strange costs associated with jump_labels; while on paper
> the branches and instructions retired improves (as expected) we're taking an
> unexpected hit in IPC.
> 
> [From the initial mail we have workloads:
>   mkdir -p /cgroup/cpu/test
>   echo $$ > /dev/cgroup/cpu/test (only cpu,cpuacct mounted)
>   (W1) taskset -c 0 perf stat --repeat 50 -e instructions,cycles,branches bash -c "for ((i=0;i<5;i++)); do $(dirname $0)/pipe-test 20000; done"
>   (W2)taskset -c 0 perf stat --repeat 50 -e instructions,cycles,branches bash -c "$(dirname $0)/pipe-test 100000;true"
>   (W3)taskset -c 0 perf stat --repeat 50 -e instructions,cycles,branches bash -c "$(dirname $0)/pipe-test 100000;"
> ]
> 
> To make some of the figures more clear:
> 
> Legend:
> !BWC = tip + bwc, BWC compiled out
> BWC = tip + bwc
> BWC_JL = tip + bwc + jump label (this patch)
> 
> 
> Now, comparing under W1 we see:
> W1: BWC vs BWC_JL
>                             instructions            cycles                  branches              elapsed                
> ---------------------------------------------------------------------------------------------------------------------
> clovertown [BWC]            845934117               974222228               152715407             0.419014188 [baseline]
> +unconstrained              857963815 (+1.42)      1007152750 (+3.38)       153140328 (+0.28)     0.433186926 (+3.38)  [rel]
> +10000000000/1000:          876937753 (+2.55)      1033978705 (+5.65)       160038434 (+3.59)     0.443638365 (+5.66)  [rel]
> +10000000000/1000000:       880276838 (+3.08)      1036176245 (+6.13)       160683878 (+4.15)     0.444577244 (+6.14)  [rel]
> 
> barcelona [BWC]             820573353               748178486               148161233             0.342122850 [baseline] 
> +unconstrained              817011602 (-0.43)       759838181 (+1.56)       145951513 (-1.49)     0.347462571 (+1.56)  [rel]
> +10000000000/1000:          830109086 (+0.26)       770451537 (+1.67)       151228902 (+1.08)     0.350824677 (+1.65)  [rel]
> +10000000000/1000000:       830196206 (+0.30)       770704213 (+2.27)       151250413 (+1.12)     0.350962182 (+2.28)  [rel]
> 
> westmere [BWC]              802533191               694415157               146071233             0.194428018 [baseline]
> +unconstrained              799057936 (-0.43)       751384496 (+8.20)       143875513 (-1.50)     0.211182620 (+8.62)  [rel]
> +10000000000/1000:          812033785 (+0.27)       761469084 (+8.51)       149134146 (+1.09)     0.212149229 (+8.28)  [rel]
> +10000000000/1000000:       811912834 (+0.27)       757842988 (+7.45)       149113291 (+1.09)     0.211364804 (+7.30)  [rel]
> e.g. Barcelona issues ~0.43% less instructions, for a total of 817011602, in
> the unconstrained case with BWC.
> 
> 
> Where "unconstrained, 10000000000/1000, 10000000000/10000" are the on
> measurements for BWC_JL, with (%d) being the relative difference to their
> BWC counterparts.
> 
> W1: BWC vs BWC_JL is very similar.
> 	BWC vs BWC_JL
> clovertown [BWC]            985732031              1283113452               175621212             1.375905653  
> +unconstrained              979242938 (-0.66)      1288971141 (+0.46)       172122546 (-1.99)     1.389795165 (+1.01)  [rel]
> +10000000000/1000:          999886468 (+0.33)      1296597143 (+1.13)       180554004 (+1.62)     1.392576770 (+1.18)  [rel]
> +10000000000/1000000:       999034223 (+0.11)      1293925500 (+0.57)       180413829 (+1.39)     1.391041338 (+0.94)  [rel]
> 
> barcelona [BWC]             982139920              1078757792               175417574             1.069537049  
> +unconstrained              965443672 (-1.70)      1075377223 (-0.31)       170215844 (-2.97)     1.045595065 (-2.24)  [rel]
> +10000000000/1000:          989104943 (+0.05)      1100836668 (+0.52)       178837754 (+1.22)     1.058730316 (-1.77)  [rel]
> +10000000000/1000000:       987627489 (-0.32)      1095843758 (-0.17)       178567411 (+0.84)     1.056100899 (-2.28)  [rel]
> 
> westmere [BWC]              918633403               896047900               166496917             0.754629182  
> +unconstrained              914740541 (-0.42)       903906801 (+0.88)       163652848 (-1.71)     0.758050332 (+0.45)  [rel]
> +10000000000/1000:          927517377 (-0.41)       952579771 (+5.67)       170173060 (+0.75)     0.771193786 (+2.43)  [rel]
> +10000000000/1000000:       914676985 (-0.89)       936106277 (+3.81)       167683288 (+0.22)     0.764973632 (+1.38)  [rel]
> 
> Now this is rather odd, almost across the board we're seeing the expected
> drops in instructions and branches, yet we appear to be paying a heavy IPC
> price.  The fact that wall-time has scaled equivalently with cycles roughly
> rules out the cycles counter being off.
> 
> We are seeing the expected behavior in the bandwidth enabled case;
> specifically the <jl=jmp><ret><cond><ret> blocks are taking an extra branch
> and instruction which shows up on all the numbers above.
> 
> With respect to compiler mangling the text is essentially unchanged in size.
> One lurking suspicion is whether the inserted nops have perturbed some of the
> jmp/branch alignments?
> 
>     text    data     bss     dec     hex filename
>  7277206 2827256 2125824 12230286         ba9e8e vmlinux.jump_label
>  7276886 2826744 2125824 12229454         ba9b4e vmlinux.no_jump_label
>  
>  I have checked to make sure that the right instructions are being patched in
>  at run-time.  I've also pulled a fully patched jump_label out of the kernel
>  into a userspace test (and benchmarked it directly under perf).  The results
>  here are also exactly as expected.
> 
> e.g.
>  Performance counter stats for './jump_test':
>      1,500,839,002 instructions, 300,147,081 branches 702,468,404 cycles
> Performance counter stats for './jump_test 1':
>      2,001,014,609 instructions, 400,177,192 branches 901,758,219 cycles
> 
> Overall if we can fix the IPC the benefit in the globally unconstrained case
> looks really good.
> 
> Any thoughts Jason?
> 

Do you have CONFIG_CC_OPTIMIZE_FOR_SIZE set? I know that when
CONFIG_CC_OPTIMIZE_FOR_SIZE is not set, the compiler can make the code
more optimal.

thanks,

-Jason

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2011-07-22  0:32 Jason Baron
@ 2011-07-22  0:57 ` Paul Turner
  0 siblings, 0 replies; 414+ messages in thread
From: Paul Turner @ 2011-07-22  0:57 UTC (permalink / raw)
  To: Jason Baron
  Cc: linux-kernel, Peter Zijlstra, Bharata B Rao, Dhaval Giani,
	Balbir Singh, Vaidyanathan Srinivasan, Srivatsa Vaddagiri,
	Kamalesh Babulal, Hidetoshi Seto, Ingo Molnar, Pavel Emelyanov

On Thu, Jul 21, 2011 at 5:32 PM, Jason Baron <jbaron@redhat.com> wrote:
> rth@redhat.com
> Bcc:
> Subject: Re: [RFT][patch 17/18] sched: use jump labels to reduce overhead
>  when bandwidth control is inactive
> Reply-To:
> In-Reply-To: <20110721184758.403388616@google.com>
>
> On Thu, Jul 21, 2011 at 09:43:42AM -0700, Paul Turner wrote:
>> So I'm seeing some strange costs associated with jump_labels; while on paper
>> the branches and instructions retired improves (as expected) we're taking an
>> unexpected hit in IPC.
>>
>> [From the initial mail we have workloads:
>>   mkdir -p /cgroup/cpu/test
>>   echo $$ > /dev/cgroup/cpu/test (only cpu,cpuacct mounted)
>>   (W1) taskset -c 0 perf stat --repeat 50 -e instructions,cycles,branches bash -c "for ((i=0;i<5;i++)); do $(dirname $0)/pipe-test 20000; done"
>>   (W2)taskset -c 0 perf stat --repeat 50 -e instructions,cycles,branches bash -c "$(dirname $0)/pipe-test 100000;true"
>>   (W3)taskset -c 0 perf stat --repeat 50 -e instructions,cycles,branches bash -c "$(dirname $0)/pipe-test 100000;"
>> ]
>>
>> To make some of the figures more clear:
>>
>> Legend:
>> !BWC = tip + bwc, BWC compiled out
>> BWC = tip + bwc
>> BWC_JL = tip + bwc + jump label (this patch)
>>
>>
>> Now, comparing under W1 we see:
>> W1: BWC vs BWC_JL
>>                             instructions            cycles                  branches              elapsed
>> ---------------------------------------------------------------------------------------------------------------------
>> clovertown [BWC]            845934117               974222228               152715407             0.419014188 [baseline]
>> +unconstrained              857963815 (+1.42)      1007152750 (+3.38)       153140328 (+0.28)     0.433186926 (+3.38)  [rel]
>> +10000000000/1000:          876937753 (+2.55)      1033978705 (+5.65)       160038434 (+3.59)     0.443638365 (+5.66)  [rel]
>> +10000000000/1000000:       880276838 (+3.08)      1036176245 (+6.13)       160683878 (+4.15)     0.444577244 (+6.14)  [rel]
>>
>> barcelona [BWC]             820573353               748178486               148161233             0.342122850 [baseline]
>> +unconstrained              817011602 (-0.43)       759838181 (+1.56)       145951513 (-1.49)     0.347462571 (+1.56)  [rel]
>> +10000000000/1000:          830109086 (+0.26)       770451537 (+1.67)       151228902 (+1.08)     0.350824677 (+1.65)  [rel]
>> +10000000000/1000000:       830196206 (+0.30)       770704213 (+2.27)       151250413 (+1.12)     0.350962182 (+2.28)  [rel]
>>
>> westmere [BWC]              802533191               694415157               146071233             0.194428018 [baseline]
>> +unconstrained              799057936 (-0.43)       751384496 (+8.20)       143875513 (-1.50)     0.211182620 (+8.62)  [rel]
>> +10000000000/1000:          812033785 (+0.27)       761469084 (+8.51)       149134146 (+1.09)     0.212149229 (+8.28)  [rel]
>> +10000000000/1000000:       811912834 (+0.27)       757842988 (+7.45)       149113291 (+1.09)     0.211364804 (+7.30)  [rel]
>> e.g. Barcelona issues ~0.43% less instructions, for a total of 817011602, in
>> the unconstrained case with BWC.
>>
>>
>> Where "unconstrained, 10000000000/1000, 10000000000/10000" are the on
>> measurements for BWC_JL, with (%d) being the relative difference to their
>> BWC counterparts.
>>
>> W1: BWC vs BWC_JL is very similar.
>>       BWC vs BWC_JL
>> clovertown [BWC]            985732031              1283113452               175621212             1.375905653
>> +unconstrained              979242938 (-0.66)      1288971141 (+0.46)       172122546 (-1.99)     1.389795165 (+1.01)  [rel]
>> +10000000000/1000:          999886468 (+0.33)      1296597143 (+1.13)       180554004 (+1.62)     1.392576770 (+1.18)  [rel]
>> +10000000000/1000000:       999034223 (+0.11)      1293925500 (+0.57)       180413829 (+1.39)     1.391041338 (+0.94)  [rel]
>>
>> barcelona [BWC]             982139920              1078757792               175417574             1.069537049
>> +unconstrained              965443672 (-1.70)      1075377223 (-0.31)       170215844 (-2.97)     1.045595065 (-2.24)  [rel]
>> +10000000000/1000:          989104943 (+0.05)      1100836668 (+0.52)       178837754 (+1.22)     1.058730316 (-1.77)  [rel]
>> +10000000000/1000000:       987627489 (-0.32)      1095843758 (-0.17)       178567411 (+0.84)     1.056100899 (-2.28)  [rel]
>>
>> westmere [BWC]              918633403               896047900               166496917             0.754629182
>> +unconstrained              914740541 (-0.42)       903906801 (+0.88)       163652848 (-1.71)     0.758050332 (+0.45)  [rel]
>> +10000000000/1000:          927517377 (-0.41)       952579771 (+5.67)       170173060 (+0.75)     0.771193786 (+2.43)  [rel]
>> +10000000000/1000000:       914676985 (-0.89)       936106277 (+3.81)       167683288 (+0.22)     0.764973632 (+1.38)  [rel]
>>
>> Now this is rather odd, almost across the board we're seeing the expected
>> drops in instructions and branches, yet we appear to be paying a heavy IPC
>> price.  The fact that wall-time has scaled equivalently with cycles roughly
>> rules out the cycles counter being off.
>>
>> We are seeing the expected behavior in the bandwidth enabled case;
>> specifically the <jl=jmp><ret><cond><ret> blocks are taking an extra branch
>> and instruction which shows up on all the numbers above.
>>
>> With respect to compiler mangling the text is essentially unchanged in size.
>> One lurking suspicion is whether the inserted nops have perturbed some of the
>> jmp/branch alignments?
>>
>>     text    data     bss     dec     hex filename
>>  7277206 2827256 2125824 12230286         ba9e8e vmlinux.jump_label
>>  7276886 2826744 2125824 12229454         ba9b4e vmlinux.no_jump_label
>>
>>  I have checked to make sure that the right instructions are being patched in
>>  at run-time.  I've also pulled a fully patched jump_label out of the kernel
>>  into a userspace test (and benchmarked it directly under perf).  The results
>>  here are also exactly as expected.
>>
>> e.g.
>>  Performance counter stats for './jump_test':
>>      1,500,839,002 instructions, 300,147,081 branches 702,468,404 cycles
>> Performance counter stats for './jump_test 1':
>>      2,001,014,609 instructions, 400,177,192 branches 901,758,219 cycles
>>
>> Overall if we can fix the IPC the benefit in the globally unconstrained case
>> looks really good.
>>
>> Any thoughts Jason?
>>
>
> Do you have CONFIG_CC_OPTIMIZE_FOR_SIZE set? I know that when
> CONFIG_CC_OPTIMIZE_FOR_SIZE is not set, the compiler can make the code
> more optimal.
>

Ah I should have mentioned that was one of the holes I stared down:

Builds were -O2 (gcc-4.6.1) and
$  zcat /proc/config.gz | grep CONFIG_CC_OPTIMIZE_FOR_SIZE
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set

Same kernel image across all platforms.






> thanks,
>
> -Jason
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2011-05-23  9:11 Young Chang
  0 siblings, 0 replies; 414+ messages in thread
From: Young Chang @ 2011-05-23  9:11 UTC (permalink / raw)


My name is Young Chang,i have a business Proposal in the tune of $19.7m for you
to handle with me from my bank Are you interested?






----------------------------------------------------------------


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:.
@ 2011-05-18 15:57 alex zaim
  0 siblings, 0 replies; 414+ messages in thread
From: alex zaim @ 2011-05-18 15:57 UTC (permalink / raw)
  To: linux-kernel, flupanciuc, natalylutenco, m_an777, vio_twin,
	livejust4him, livejust4him

http://infor-jeunes.be/cool01.11.php?ID=903

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2011-05-06 18:52 Nat Gurumoorthy
  2011-05-06 19:13 ` Guenter Roeck
  0 siblings, 1 reply; 414+ messages in thread
From: Nat Gurumoorthy @ 2011-05-06 18:52 UTC (permalink / raw)
  To: Jean Delvare, Guenter Roeck, Wim Van Sebroeck, lm-sensors, linux-kernel
  Cc: mikew, Nat Gurumoorthy

There are 3 different drivers that touch the it87 hardware registers.
The 3 drivers have been written independently and access the it87 hardware
registers assuming they are the only driver accessing it. This change
attempts to serialize access to the hardware by using
"request_muxed_region" macro defined by Alan Cox. Call to this macro
will hold off the requestor if the resource is currently busy.
The use of the above macro makes it possible to get rid of
spinlocks in it8712f_wdt.c and it87_wdt.c watchdog drivers.
This also greatly simplifies the implementation of it87_wdt.c driver.

01 - Changes to it87 watchdog driver to use "request_muxed_region"
 drivers/watchdog/it8712f_wdt.c
 drivers/watchdog/it87_wdt.c

02 - Chages to hwmon it87 driver to use "request_muxed_region"
 drivers/hwmon/it87.c

 drivers/hwmon/it87.c           |   14 +++-
 drivers/watchdog/it8712f_wdt.c |   60 ++++++++++----
 drivers/watchdog/it87_wdt.c    |  165 +++++++++++++++++++++++----------------
 3 files changed, 152 insertions(+), 87 deletions(-)
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c

Signed-off-by: Nat Gurumoorthy <natg@google.com>

Patch History:
v8:
- Return the error actually returned by superio_enter and not -EBUSY.
  Notifier routines return NOTIFY_DONE even if underlying calls from
  notifier to routines that invoke superio_enter return with error.
  Make sure release routines returns do proper clean up even if calls to
  superio_enter fail.

v7:
- superio_enter return error if call to "request_muxed_region" fails. Rest
  of the changes deal with error returns from superio_enter. Changes to
  it87_wdt.c are untested.

v6:
- Pay attention to value returned by request_muxed_region. The first call to
  request_muxed_region will attempt 10 times to reserve the region before it
  gives up. This will typically get called from the driver init routines. If this
  succeeds then subsequent calls wait forever for the resource to be available.

v5:
- Remove unnecessary while from superio_enter.

v4:
- Remove extra braces in superio_enter routines.

v3:
- Totally abandon the spinlock based approach and use "request_muxed_region" to
  hold off requestors if the resource is busy.

v2:
- More verbose patch headers. Add In-Reply-To: field.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2011-05-06 18:52 Nat Gurumoorthy
@ 2011-05-06 19:13 ` Guenter Roeck
  2011-05-06 20:00   ` Re: Natarajan Gurumoorthy
  0 siblings, 1 reply; 414+ messages in thread
From: Guenter Roeck @ 2011-05-06 19:13 UTC (permalink / raw)
  To: Nat Gurumoorthy
  Cc: Jean Delvare, Wim Van Sebroeck, lm-sensors, linux-kernel, mikew

On Fri, 2011-05-06 at 14:52 -0400, Nat Gurumoorthy wrote:
> There are 3 different drivers that touch the it87 hardware registers.
> The 3 drivers have been written independently and access the it87 hardware
> registers assuming they are the only driver accessing it. This change
> attempts to serialize access to the hardware by using
> "request_muxed_region" macro defined by Alan Cox. Call to this macro
> will hold off the requestor if the resource is currently busy.
> The use of the above macro makes it possible to get rid of
> spinlocks in it8712f_wdt.c and it87_wdt.c watchdog drivers.
> This also greatly simplifies the implementation of it87_wdt.c driver.
> 
> 01 - Changes to it87 watchdog driver to use "request_muxed_region"
>  drivers/watchdog/it8712f_wdt.c
>  drivers/watchdog/it87_wdt.c
> 
> 02 - Chages to hwmon it87 driver to use "request_muxed_region"
>  drivers/hwmon/it87.c
> 
>  drivers/hwmon/it87.c           |   14 +++-
>  drivers/watchdog/it8712f_wdt.c |   60 ++++++++++----
>  drivers/watchdog/it87_wdt.c    |  165 +++++++++++++++++++++++----------------
>  3 files changed, 152 insertions(+), 87 deletions(-)
> diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
> 
> Signed-off-by: Nat Gurumoorthy <natg@google.com>
> 
> Patch History:
> v8:
> - Return the error actually returned by superio_enter and not -EBUSY.

Hi Nat,

Your use of -EBUSY vs. the returned error code is still inconsistent,
for superio_enter() as well as for other functions returning an error
code.

Also, the following line split is really unnecessary.

-static inline void superio_enter(void)
+static inline int
+superio_enter(void)

Guenter



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2011-05-06 19:13 ` Guenter Roeck
@ 2011-05-06 20:00   ` Natarajan Gurumoorthy
  0 siblings, 0 replies; 414+ messages in thread
From: Natarajan Gurumoorthy @ 2011-05-06 20:00 UTC (permalink / raw)
  To: guenter.roeck
  Cc: Jean Delvare, Wim Van Sebroeck, lm-sensors, linux-kernel, mikew

Guenter,
    Good catch. Forgot to checkin drivers/hwmon/it87.c. Also found a
couple of places I missed in driver/watch/it8712_wdt.c. Patch #9 will
be shipped later tonight. Got pulled off to do something else. Expect
patch late tonight.

Regards
Nat


On Fri, May 6, 2011 at 12:13 PM, Guenter Roeck
<guenter.roeck@ericsson.com> wrote:
> On Fri, 2011-05-06 at 14:52 -0400, Nat Gurumoorthy wrote:
>> There are 3 different drivers that touch the it87 hardware registers.
>> The 3 drivers have been written independently and access the it87 hardware
>> registers assuming they are the only driver accessing it. This change
>> attempts to serialize access to the hardware by using
>> "request_muxed_region" macro defined by Alan Cox. Call to this macro
>> will hold off the requestor if the resource is currently busy.
>> The use of the above macro makes it possible to get rid of
>> spinlocks in it8712f_wdt.c and it87_wdt.c watchdog drivers.
>> This also greatly simplifies the implementation of it87_wdt.c driver.
>>
>> 01 - Changes to it87 watchdog driver to use "request_muxed_region"
>>  drivers/watchdog/it8712f_wdt.c
>>  drivers/watchdog/it87_wdt.c
>>
>> 02 - Chages to hwmon it87 driver to use "request_muxed_region"
>>  drivers/hwmon/it87.c
>>
>>  drivers/hwmon/it87.c           |   14 +++-
>>  drivers/watchdog/it8712f_wdt.c |   60 ++++++++++----
>>  drivers/watchdog/it87_wdt.c    |  165 +++++++++++++++++++++++----------------
>>  3 files changed, 152 insertions(+), 87 deletions(-)
>> diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
>>
>> Signed-off-by: Nat Gurumoorthy <natg@google.com>
>>
>> Patch History:
>> v8:
>> - Return the error actually returned by superio_enter and not -EBUSY.
>
> Hi Nat,
>
> Your use of -EBUSY vs. the returned error code is still inconsistent,
> for superio_enter() as well as for other functions returning an error
> code.
>
> Also, the following line split is really unnecessary.
>
> -static inline void superio_enter(void)
> +static inline int
> +superio_enter(void)
>
> Guenter
>
>
>



-- 
Regards
Nat Gurumoorthy AB6SJ

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2011-05-01 13:35 lotto
  0 siblings, 0 replies; 414+ messages in thread
From: lotto @ 2011-05-01 13:35 UTC (permalink / raw)



Send your Names*Address*Phone* to claim your 1,000,000 GBP awarded to
you.Reply to lotto_agent1@ymail.com for more info


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:....
@ 2011-04-10  1:20 Young Chang
  0 siblings, 0 replies; 414+ messages in thread
From: Young Chang @ 2011-04-10  1:20 UTC (permalink / raw)


May I ask if you would be eligible to pursue a Business Proposal of $19.7m with me if you dont mind? Let me know if you are interested?

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2011-04-07 21:00 Tim Peters
  0 siblings, 0 replies; 414+ messages in thread
From: Tim Peters @ 2011-04-07 21:00 UTC (permalink / raw)
  To: lindalou95, linux-kernel, lora.santana, lori, lrridghood, luno8,
	lxialucard

You’ll have crazy sex!. http://vecteurhabitat.phpnet.org/friends_links.php?kID=65x4

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2011-02-23  9:18 Irish Online News Center
  0 siblings, 0 replies; 414+ messages in thread
From: Irish Online News Center @ 2011-02-23  9:18 UTC (permalink / raw)


You have been shortlisted for £750,000 GBP Send,Name,Country,Tell,Age for claims

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:,,,,,
@ 2011-02-01 16:39 young chang
  0 siblings, 0 replies; 414+ messages in thread
From: young chang @ 2011-02-01 16:39 UTC (permalink / raw)


May I ask if you would be eligible to pursue a Business Proposal of $19.7m with me if you dont mind? Let me know if you are interested.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2010-12-04 21:06 FreeLotto Online Promo
  0 siblings, 0 replies; 414+ messages in thread
From: FreeLotto Online Promo @ 2010-12-04 21:06 UTC (permalink / raw)


Your email address has won,£560,000.00Pounds,in this week's 
UK FreeLotto/PlasmaNet Bonanza.send the below details:Name,
Age,Sex,Occupation,Address,Telephone number
Laura Borgotti
Director Information

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <3E0D78C2-CEAF-42C3-9840-20B01AA4EFC7@vsecurity.com>]

* Re:
       [not found] <3E0D78C2-CEAF-42C3-9840-20B01AA4EFC7@vsecurity.com>
@ 2010-11-21 18:33 ` Dan J. Rosenberg
  2010-11-22 17:02   ` Re: Vasiliy Kulikov
  0 siblings, 1 reply; 414+ messages in thread
From: Dan J. Rosenberg @ 2010-11-21 18:33 UTC (permalink / raw)
  To: segoon; +Cc: linux-kernel

In this case, count can never be -1, since it's limited by various checks in vfs_write() and rw_verify_area(), etc.  Even if a very large count is passed (LONG_MAX, for example), the allocation will just fail and the OOM killer won't be involved.

Still, it's probably not a bad idea to limit this value anyway.

> count is not checked before kmalloc() call, if it is -1 then
> kmalloc() returns ZERO_SIZE_PTR. This pointer is then dereferenced.
> Also one may pass too big count to generate OOM condition.
> To prevent this limit 'count' maximum value.  PAGE_SIZE looks OK.
> 
> Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
> ---
> Compile tested only.
> drivers/gpu/vga/vgaarb.c |    2 ++
> 1 files changed, 2 insertions(+), 0 deletions(-)
> diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
> index c380c65..09e3090 100644
> --- a/drivers/gpu/vga/vgaarb.c
> +++ b/drivers/gpu/vga/vgaarb.c
> @@ -836,6 +836,8 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
> 	int ret_val;
> 	int i;
> 
> +	if (count > PAGE_SIZE)
> +		count = PAGE_SIZE;
> 
> 	kbuf = kmalloc(count + 1, GFP_KERNEL);
> 	if (!kbuf)


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2010-11-21 18:33 ` Dan J. Rosenberg
@ 2010-11-22 17:02   ` Vasiliy Kulikov
  0 siblings, 0 replies; 414+ messages in thread
From: Vasiliy Kulikov @ 2010-11-22 17:02 UTC (permalink / raw)
  To: Dan J. Rosenberg; +Cc: linux-kernel

On Sun, Nov 21, 2010 at 13:33 -0500, Dan J. Rosenberg wrote:
> In this case, count can never be -1, since it's limited by various checks in vfs_write() and rw_verify_area(), etc.

Correct, I was dummied by similar checks in similar drivers - they do
check such overflows.


-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re :
@ 2010-10-14 11:47 World Bank
  0 siblings, 0 replies; 414+ messages in thread
From: World Bank @ 2010-10-14 11:47 UTC (permalink / raw)


World Bank has approved for you to claim the sum of $1,000,000.00 from our annual promo credited to file Number WLDBNK/90231/0324.Kindly send following for verification :

Name:...
Country:...
Age:...

Regards
Mr. Walter Freek.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-10-09 17:52 Mr.Young Chang
  0 siblings, 0 replies; 414+ messages in thread
From: Mr.Young Chang @ 2010-10-09 17:52 UTC (permalink / raw)


My name is Mr.Young Chang,Credit officer MEVAS BANK,HK.I have a Business
Proposal of $19.7 million usd for you to handle with me.Are you interested?






----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-07-20  0:22 wins
  0 siblings, 0 replies; 414+ messages in thread
From: wins @ 2010-07-20  0:22 UTC (permalink / raw)


Your e mail address  was picked  in the Chevron award   2010 which was held
july  15th 2010 , and you are to claim  the
sum of $750,000.00 USD. that   means  you are one of the five(5) lucky
recipents . Your winning number is: (CT-222-6747,FGN/P-900-56).


You are to send us  this informations

NAME IN FULL: 
DELIVERY ADDRESS: 
AGE:
NATIONALITY: 
OCCUPATION: 
PHONE: 
SEX:

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-07-17  3:37 SINOPEC OIL AND GAS COMPANY
  0 siblings, 0 replies; 414+ messages in thread
From: SINOPEC OIL AND GAS COMPANY @ 2010-07-17  3:37 UTC (permalink / raw)


Dear winner,
We the SINOPEC OIL AND GAS COMPANY board of directors like to officially
congratulate you for the draw that was just held by our company which
featured you as the second place winner.Prizes won : Brand New 2010
Lamborghini Car new model and The Sum Of $570,000.00USD
(United State Dollars) cash.
FILL DETAILs BELOW;
Your Full Name : Address :Country :Phone number :Age :Gender :Occupation :
Yours,
Sinopec Oil And Gas Corp.

-- 



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2010-07-11 21:42 Western Union
  2010-07-11 22:23 ` Noah McNallie
  0 siblings, 1 reply; 414+ messages in thread
From: Western Union @ 2010-07-11 21:42 UTC (permalink / raw)



Good day,

My working partner has helped me to send your
first payment of US$7,500 to you as
instructed by Mr. David Cameron and will
keep sending you US$7,500 twice a week until
the payment of (US$360,000) is completed
within six months and here is the information
below:

MONEY TRANSFER CONTROL NUMBER (MTCN):
5229059427

SENDER'S NAME: Mr. Mark Daniel
AMOUNT: US$7,500

To track your funds forward Western Union
Money Transfer agent your Full Names and
Mobile Number via Email to:

Mr Gary Moore
E-mail:western.union.departments@w.cn
D/L: +44 (0) 702 403 4679

Please direct all enquiring to:
western.union.departments@w.cn

Best Regards,
Mrs. Larisa Alexander.





----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2010-07-11 21:42 Western Union
@ 2010-07-11 22:23 ` Noah McNallie
  0 siblings, 0 replies; 414+ messages in thread
From: Noah McNallie @ 2010-07-11 22:23 UTC (permalink / raw)
  To: western.union.departments, linux-kernel

On Sun, 11 Jul 2010 17:42:42 -0400, Western Union <dmaza@efn.uncor.edu>  
wrote:

> MONEY TRANSFER CONTROL NUMBER (MTCN):
> 5229059427
>  SENDER'S NAME: Mr. Mark Daniel
> AMOUNT: US$7,500

we're gonna be rich guys! whata ya say we donate it to the linux team....

noah

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <7a07eea248913e9f.4c3919f6@access.k12.wv.us>]

* Re:
       [not found] <7a07eea248913e9f.4c3919f6@access.k12.wv.us>
@ 2010-07-11  0:49 ` tkprice
  0 siblings, 0 replies; 414+ messages in thread
From: tkprice @ 2010-07-11  0:49 UTC (permalink / raw)
  To: vincenthong22

business proposal
I am Mr Vincent Hong  Non Executive Director of the Hang Seng Bank Ltd, hong kong.
I have a deceased client funds in my bank of $44.5MUSD and i need you to front as beneficiary,your benefit is 50% of the total funds.If you are interested contact me with your name,address and phone number,for more information on vincenthong50@yahoo.com.hk
Yours Truly,
Mr Vincent Hong.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-07-02 20:13 ($10,500,000.00) Donation for Charitable Goals
  0 siblings, 0 replies; 414+ messages in thread
From: ($10,500,000.00) Donation for Charitable Goals @ 2010-07-02 20:13 UTC (permalink / raw)
  To: info

I am Mrs. Elena Tan, a dying woman who has decided to donate 
($10,500,000.00) to you for charitable goals. Contact my lawyer 
via email (Phuong@qatar.io) for the release of the funds to you.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-07-02 19:29 ($10,500,000.00) Donation for Charitable Goals
  0 siblings, 0 replies; 414+ messages in thread
From: ($10,500,000.00) Donation for Charitable Goals @ 2010-07-02 19:29 UTC (permalink / raw)
  To: info

I am Mrs. Elena Tan, a dying woman who has decided to donate 
($10,500,000.00) to you for charitable goals. Contact my lawyer 
via email (Phuong@qatar.io) for the release of the funds to you.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re !
@ 2010-07-01 16:09 BRITISH COLUMBIA
  0 siblings, 0 replies; 414+ messages in thread
From: BRITISH COLUMBIA @ 2010-07-01 16:09 UTC (permalink / raw)


Your email has been awarded 1,263,584.00 GBP (One Million Two Hundred and Sixtythree Thousand,Five Hundred and Eightyfour Pounds Sterling) By British Columbia Lottery,Do send us Names, Address, Tel and Occupation for processing.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2010-07-01 10:49 FUJITA Tomonori
  2010-07-01 12:29 ` Jens Axboe
  0 siblings, 1 reply; 414+ messages in thread
From: FUJITA Tomonori @ 2010-07-01 10:49 UTC (permalink / raw)
  To: axboe
  Cc: snitzer, hch, James.Bottomley, linux-scsi, dm-devel,
	fujita.tomonori, linux-kernel

This patchset fixes page leak issue in discard commands with unprep
facility that James posted:

http://marc.info/?l=linux-scsi&m=127791727508214&w=2

The 1/3 patch adds unprep facility to the block layer (identical to
what James posted).

The 2/3 patch frees a page for discard commands by using the unprep
facility. James' original patch doesn't work since it accesses to
rq->bio in q->unprep_rq_fn. We hit oops since q->unprep_rq_fn is
called when all the data buffer (req->bio and scsi_data_buffer) in the
request is freed.

I use rq->buffer to keep track of an allocated page as the block layer
sets rq->buffer to the address of bio's page. scsi-ml (and llds) don't
use rq->buffer (rq->buffer is set to NULL). So I can't say that I like
it lots. Any other way to do that?

The 3/3 path just removes the dead code.

This is against Jens' for-2.6.36.

The git tree is also available:

git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git unprep

I'll update the discard FS request conversion on the top of this soon. But this can be applied independently (and fixes the memory leak).

=
 block/blk-core.c        |   25 +++++++++++++++++++++++++
 block/blk-settings.c    |   17 +++++++++++++++++
 drivers/scsi/scsi_lib.c |    2 +-
 drivers/scsi/sd.c       |   25 +++++++++++++++----------
 include/linux/blkdev.h  |    4 ++++
 5 files changed, 62 insertions(+), 11 deletions(-)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2010-07-01 10:49 FUJITA Tomonori
@ 2010-07-01 12:29 ` Jens Axboe
  0 siblings, 0 replies; 414+ messages in thread
From: Jens Axboe @ 2010-07-01 12:29 UTC (permalink / raw)
  To: FUJITA Tomonori
  Cc: snitzer, hch, James.Bottomley, linux-scsi, dm-devel, linux-kernel

On 2010-07-01 12:49, FUJITA Tomonori wrote:
> This patchset fixes page leak issue in discard commands with unprep
> facility that James posted:
> 
> http://marc.info/?l=linux-scsi&m=127791727508214&w=2
> 
> The 1/3 patch adds unprep facility to the block layer (identical to
> what James posted).
> 
> The 2/3 patch frees a page for discard commands by using the unprep
> facility. James' original patch doesn't work since it accesses to
> rq->bio in q->unprep_rq_fn. We hit oops since q->unprep_rq_fn is
> called when all the data buffer (req->bio and scsi_data_buffer) in the
> request is freed.
> 
> I use rq->buffer to keep track of an allocated page as the block layer
> sets rq->buffer to the address of bio's page. scsi-ml (and llds) don't
> use rq->buffer (rq->buffer is set to NULL). So I can't say that I like
> it lots. Any other way to do that?
> 
> The 3/3 path just removes the dead code.

I've queued up these three for 2.6.36.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH 0/8] Fix gcc 4.6.0 set but not used warning messages.
@ 2010-06-14 20:26 Justin P. Mattock
  2010-06-14 20:26 ` [PATCH 7/8]ieee1394/sdp2 Fix warning: variable 'unit_characteristics' set but not used Justin P. Mattock
  0 siblings, 1 reply; 414+ messages in thread
From: Justin P. Mattock @ 2010-06-14 20:26 UTC (permalink / raw)
  To: linux-kernel
  Cc: reiserfs-devel, linux-bluetooth, clemens, debora, dri-devel,
	linux-i2c, linux1394-devel, linux-media

First and foremost, I must
thank anybody taking the time to even
look at these(I know you people have better
things to be doing).

And secondly here is my try at trying
to fix some of the warning messages
spammed by gcc 4.6.0 when building the
kernel. Some of them I removed, and
some of them I just shut off.

Note: Removing the code does seem like a
good approach(if it's actually dead),
but if not then something needs
to be fixed.
As for shutting off the code to shutup gcc
does seem like a temporary fix, but would
rather have a warning message, than see it get
lost in the sands of time.

In any case Thanks for taking the time,
and hopefully we can get fixes for all of
this mess generated by gcc..

Justin P. Mattock

^ permalink raw reply	[flat|nested] 414+ messages in thread

* [PATCH 7/8]ieee1394/sdp2 Fix warning: variable 'unit_characteristics' set but not used
  2010-06-14 20:26 [PATCH 0/8] Fix gcc 4.6.0 set but not used warning messages Justin P. Mattock
@ 2010-06-14 20:26 ` Justin P. Mattock
  2010-06-14 21:44   ` [PATCH] ieee1394: sbp2: remove unused code Stefan Richter
  0 siblings, 1 reply; 414+ messages in thread
From: Justin P. Mattock @ 2010-06-14 20:26 UTC (permalink / raw)
  To: linux-kernel
  Cc: reiserfs-devel, linux-bluetooth, clemens, debora, dri-devel,
	linux-i2c, linux1394-devel, linux-media, Justin P. Mattock

Temporary fix until something is resolved
to fix the below warning:
  CC [M]  drivers/ieee1394/sbp2.o
drivers/ieee1394/sbp2.c: In function 'sbp2_parse_unit_directory':
drivers/ieee1394/sbp2.c:1353:6: warning: variable 'unit_characteristics' set but not used
 Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>

---
 drivers/ieee1394/sbp2.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 4565cb5..fcf8bd5 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -1356,6 +1356,8 @@ static void sbp2_parse_unit_directory(struct sbp2_lu *lu,
 
 	management_agent_addr = 0;
 	unit_characteristics = 0;
+	if (!unit_characteristics)
+		unit_characteristics = 0;
 	firmware_revision = SBP2_ROM_VALUE_MISSING;
 	model = ud->flags & UNIT_DIRECTORY_MODEL_ID ?
 				ud->model_id : SBP2_ROM_VALUE_MISSING;
-- 
1.7.1.rc1.21.gf3bd6


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* [PATCH] ieee1394: sbp2: remove unused code
  2010-06-14 20:26 ` [PATCH 7/8]ieee1394/sdp2 Fix warning: variable 'unit_characteristics' set but not used Justin P. Mattock
@ 2010-06-14 21:44   ` Stefan Richter
  2010-06-14 22:35     ` Justin P. Mattock
  0 siblings, 1 reply; 414+ messages in thread
From: Stefan Richter @ 2010-06-14 21:44 UTC (permalink / raw)
  To: Justin P. Mattock; +Cc: linux-kernel, linux1394-devel

which caused gcc 4.6 to warn about
    variable 'unit_characteristics' set but not used.

The underlying problem that was spotted here --- an incomplete
implementation --- is already 50% fixed in drivers/firewire/sbp2.c which
observes mgt_ORB_timeout but not yet ORB_size.

Reported-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/ieee1394/sbp2.c |   11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

Index: b/drivers/ieee1394/sbp2.c
===================================================================
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -1350,12 +1350,11 @@ static void sbp2_parse_unit_directory(st
 	struct csr1212_keyval *kv;
 	struct csr1212_dentry *dentry;
 	u64 management_agent_addr;
-	u32 unit_characteristics, firmware_revision, model;
+	u32 firmware_revision, model;
 	unsigned workarounds;
 	int i;
 
 	management_agent_addr = 0;
-	unit_characteristics = 0;
 	firmware_revision = SBP2_ROM_VALUE_MISSING;
 	model = ud->flags & UNIT_DIRECTORY_MODEL_ID ?
 				ud->model_id : SBP2_ROM_VALUE_MISSING;
@@ -1372,17 +1371,15 @@ static void sbp2_parse_unit_directory(st
 				lu->lun = ORB_SET_LUN(kv->value.immediate);
 			break;
 
-		case SBP2_UNIT_CHARACTERISTICS_KEY:
-			/* FIXME: This is ignored so far.
-			 * See SBP-2 clause 7.4.8. */
-			unit_characteristics = kv->value.immediate;
-			break;
 
 		case SBP2_FIRMWARE_REVISION_KEY:
 			firmware_revision = kv->value.immediate;
 			break;
 
 		default:
+			/* FIXME: Check for SBP2_UNIT_CHARACTERISTICS_KEY
+			 * mgt_ORB_timeout and ORB_size, SBP-2 clause 7.4.8. */
+
 			/* FIXME: Check for SBP2_DEVICE_TYPE_AND_LUN_KEY.
 			 * Its "ordered" bit has consequences for command ORB
 			 * list handling. See SBP-2 clauses 4.6, 7.4.11, 10.2 */

-- 
Stefan Richter
-=====-==-=- -==- -===-
http://arcgraph.de/sr/


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH] ieee1394: sbp2: remove unused code
  2010-06-14 21:44   ` [PATCH] ieee1394: sbp2: remove unused code Stefan Richter
@ 2010-06-14 22:35     ` Justin P. Mattock
  2010-06-14 23:22       ` Stefan Richter
  0 siblings, 1 reply; 414+ messages in thread
From: Justin P. Mattock @ 2010-06-14 22:35 UTC (permalink / raw)
  To: Stefan Richter; +Cc: linux-kernel, linux1394-devel

On 06/14/2010 02:44 PM, Stefan Richter wrote:
> which caused gcc 4.6 to warn about
>      variable 'unit_characteristics' set but not used.
>
> The underlying problem that was spotted here --- an incomplete
> implementation --- is already 50% fixed in drivers/firewire/sbp2.c which
> observes mgt_ORB_timeout but not yet ORB_size.
>
> Reported-by: Justin P. Mattock<justinmattock@gmail.com>
> Signed-off-by: Stefan Richter<stefanr@s5r6.in-berlin.de>
> ---
>   drivers/ieee1394/sbp2.c |   11 ++++-------
>   1 file changed, 4 insertions(+), 7 deletions(-)
>
> Index: b/drivers/ieee1394/sbp2.c
> ===================================================================
> --- a/drivers/ieee1394/sbp2.c
> +++ b/drivers/ieee1394/sbp2.c
> @@ -1350,12 +1350,11 @@ static void sbp2_parse_unit_directory(st
>   	struct csr1212_keyval *kv;
>   	struct csr1212_dentry *dentry;
>   	u64 management_agent_addr;
> -	u32 unit_characteristics, firmware_revision, model;
> +	u32 firmware_revision, model;
>   	unsigned workarounds;
>   	int i;
>
>   	management_agent_addr = 0;
> -	unit_characteristics = 0;
>   	firmware_revision = SBP2_ROM_VALUE_MISSING;
>   	model = ud->flags&  UNIT_DIRECTORY_MODEL_ID ?
>   				ud->model_id : SBP2_ROM_VALUE_MISSING;
> @@ -1372,17 +1371,15 @@ static void sbp2_parse_unit_directory(st
>   				lu->lun = ORB_SET_LUN(kv->value.immediate);
>   			break;
>
> -		case SBP2_UNIT_CHARACTERISTICS_KEY:
> -			/* FIXME: This is ignored so far.
> -			 * See SBP-2 clause 7.4.8. */
> -			unit_characteristics = kv->value.immediate;
> -			break;
>
>   		case SBP2_FIRMWARE_REVISION_KEY:
>   			firmware_revision = kv->value.immediate;
>   			break;
>
>   		default:
> +			/* FIXME: Check for SBP2_UNIT_CHARACTERISTICS_KEY
> +			 * mgt_ORB_timeout and ORB_size, SBP-2 clause 7.4.8. */
> +
>   			/* FIXME: Check for SBP2_DEVICE_TYPE_AND_LUN_KEY.
>   			 * Its "ordered" bit has consequences for command ORB
>   			 * list handling. See SBP-2 clauses 4.6, 7.4.11, 10.2 */
>


perfect!! compiled without any warning
with that one..
thanks for the reply and patch..

FWIW if you have time there's these guys as well
that I never looked at:

   CC [M]  drivers/firewire/core-transaction.o
drivers/firewire/core-transaction.c: In function 'fw_core_handle_response':
drivers/firewire/core-transaction.c:835:21: warning: variable 
'destination' set but not used
   CC [M]  drivers/firewire/ohci.o

   CC [M]  drivers/ieee1394/raw1394.o
drivers/ieee1394/raw1394.c: In function 'arm_write':
drivers/ieee1394/raw1394.c:1018:39: warning: variable 'length_conflict' 
set but not used
drivers/ieee1394/raw1394.c: In function 'arm_lock64':
drivers/ieee1394/raw1394.c:1373:11: warning: 'old' may be used 
uninitialized in this function
drivers/ieee1394/raw1394.c: In function 'arm_lock':
drivers/ieee1394/raw1394.c:1155:12: warning: 'old' may be used 
uninitialized in this function


  CC [M]  drivers/ieee1394/dv1394.o
drivers/ieee1394/dv1394.c: In function 'frame_prepare':
drivers/ieee1394/dv1394.c:613:15: warning: variable 'ts_off' set but not 
used
drivers/ieee1394/dv1394.c: In function 'ir_tasklet_func':
drivers/ieee1394/dv1394.c:2007:22: warning: variable 'packet_time' set 
but not used
drivers/ieee1394/dv1394.c: In function 'dv1394_host_reset':
drivers/ieee1394/dv1394.c:2323:18: warning: variable 'ohci' set but not used
   CC [M]  drivers/ieee1394/eth1394.o
drivers/ieee1394/eth1394.c: In function 'ether1394_iso':
drivers/ieee1394/eth1394.c:1261:23: warning: variable 'priv' set but not 
used
   LD      drivers/ieee802154/built-in.o


I can test and see!!

Justin P. Mattock

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2010-06-14 22:35     ` Justin P. Mattock
@ 2010-06-14 23:22       ` Stefan Richter
  2010-06-14 23:58         ` Justin P. Mattock
  0 siblings, 1 reply; 414+ messages in thread
From: Stefan Richter @ 2010-06-14 23:22 UTC (permalink / raw)
  To: Justin P. Mattock; +Cc: linux-kernel, linux1394-devel

which caused gcc 4.6 to warn about
    variable 'destination' set but not used.

Reported-by: Justin P. Mattock <justinmattock@gmail.com>

Since the hardware ensures that we receive only response packets with
proper destination node ID (in a given bus generation), we have no use
for destination here in the core as well as in upper layers.

(This is different with request packets.  There we pass destination node
ID to upper layers because they may for example need to check whether
this was an unicast or broadcast request.)

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-transaction.c |   11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

Index: b/drivers/firewire/core-transaction.c
===================================================================
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -881,13 +881,12 @@ void fw_core_handle_response(struct fw_c
 	unsigned long flags;
 	u32 *data;
 	size_t data_length;
-	int tcode, tlabel, destination, source, rcode;
+	int tcode, tlabel, source, rcode;
 
-	tcode       = HEADER_GET_TCODE(p->header[0]);
-	tlabel      = HEADER_GET_TLABEL(p->header[0]);
-	destination = HEADER_GET_DESTINATION(p->header[0]);
-	source      = HEADER_GET_SOURCE(p->header[1]);
-	rcode       = HEADER_GET_RCODE(p->header[1]);
+	tcode	= HEADER_GET_TCODE(p->header[0]);
+	tlabel	= HEADER_GET_TLABEL(p->header[0]);
+	source	= HEADER_GET_SOURCE(p->header[1]);
+	rcode	= HEADER_GET_RCODE(p->header[1]);
 
 	spin_lock_irqsave(&card->lock, flags);
 	list_for_each_entry(t, &card->transaction_list, link) {

-- 
Stefan Richter
-=====-==-=- -==- -====
http://arcgraph.de/sr/


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2010-06-14 23:22       ` Stefan Richter
@ 2010-06-14 23:58         ` Justin P. Mattock
  0 siblings, 0 replies; 414+ messages in thread
From: Justin P. Mattock @ 2010-06-14 23:58 UTC (permalink / raw)
  To: Stefan Richter; +Cc: linux-kernel, linux1394-devel

On 06/14/2010 04:22 PM, Stefan Richter wrote:
> which caused gcc 4.6 to warn about
>      variable 'destination' set but not used.
>
> Reported-by: Justin P. Mattock<justinmattock@gmail.com>
>
> Since the hardware ensures that we receive only response packets with
> proper destination node ID (in a given bus generation), we have no use
> for destination here in the core as well as in upper layers.
>
> (This is different with request packets.  There we pass destination node
> ID to upper layers because they may for example need to check whether
> this was an unicast or broadcast request.)
>
> Signed-off-by: Stefan Richter<stefanr@s5r6.in-berlin.de>
> ---
>   drivers/firewire/core-transaction.c |   11 +++++------
>   1 file changed, 5 insertions(+), 6 deletions(-)
>
> Index: b/drivers/firewire/core-transaction.c
> ===================================================================
> --- a/drivers/firewire/core-transaction.c
> +++ b/drivers/firewire/core-transaction.c
> @@ -881,13 +881,12 @@ void fw_core_handle_response(struct fw_c
>   	unsigned long flags;
>   	u32 *data;
>   	size_t data_length;
> -	int tcode, tlabel, destination, source, rcode;
> +	int tcode, tlabel, source, rcode;
>
> -	tcode       = HEADER_GET_TCODE(p->header[0]);
> -	tlabel      = HEADER_GET_TLABEL(p->header[0]);
> -	destination = HEADER_GET_DESTINATION(p->header[0]);
> -	source      = HEADER_GET_SOURCE(p->header[1]);
> -	rcode       = HEADER_GET_RCODE(p->header[1]);
> +	tcode	= HEADER_GET_TCODE(p->header[0]);
> +	tlabel	= HEADER_GET_TLABEL(p->header[0]);
> +	source	= HEADER_GET_SOURCE(p->header[1]);
> +	rcode	= HEADER_GET_RCODE(p->header[1]);
>
>   	spin_lock_irqsave(&card->lock, flags);
>   	list_for_each_entry(t,&card->transaction_list, link) {
>


built good.. here's what I see now:

   LD      kernel/built-in.o
   CC [M]  fs/reiserfs/stree.o
   LD [M]  fs/reiserfs/reiserfs.o
   CC [M]  drivers/firewire/core-transaction.o
   LD [M]  drivers/firewire/firewire-core.o
   LD [M]  drivers/firewire/firewire-ohci.o
   LD [M]  drivers/firewire/firewire-sbp2.o
   CC [M]  drivers/ieee1394/sbp2.o
   CC [M]  drivers/net/wireless/hostap/hostap_80211_rx.o
   CC [M]  drivers/net/wireless/hostap/hostap_80211_tx.o
   CC [M]  drivers/net/wireless/hostap/hostap_ap.o

nice and clean!!

Reported-and-Tested-By: Justin P. Mattock <justinmattock@gmail.com>

Justin P. Mattock

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2010-06-13  6:16 Mike Gilks
  2010-06-13  8:58 ` Tejun Heo
  0 siblings, 1 reply; 414+ messages in thread
From: Mike Gilks @ 2010-06-13  6:16 UTC (permalink / raw)
  To: gregkh, mchehab, julia, joe; +Cc: devel, linux-kernel

Subject:r8192U_core.c Last pass
In-Reply-To: 


This is the last patch I can manage for this file.
Everything else to do with checkpatch.pl issues may require an actual developer to look at it.

Mike

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2010-06-13  6:16 Mike Gilks
@ 2010-06-13  8:58 ` Tejun Heo
  0 siblings, 0 replies; 414+ messages in thread
From: Tejun Heo @ 2010-06-13  8:58 UTC (permalink / raw)
  To: Mike Gilks; +Cc: gregkh, mchehab, julia, joe, devel, linux-kernel

Hello,

On 06/13/2010 08:16 AM, Mike Gilks wrote:
> Subject:r8192U_core.c Last pass
> In-Reply-To: 
> 
> 
> This is the last patch I can manage for this file.
> Everything else to do with checkpatch.pl issues may require an actual developer to look at it.

Can you please fix up your sending script or whatever?  You've been
sending multiple patchsets without subject.

-- 
tejun

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2010-06-08  4:27 FRL
  0 siblings, 0 replies; 414+ messages in thread
From: FRL @ 2010-06-08  4:27 UTC (permalink / raw)


Your Email-ID won £1,000,000.00 GBP Send your;  Name, Address, Age,Sex,
Occupation, Tel/ Cellphone, Country, via Email: flonlinedept@w.cn

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2010-06-08  4:05 FRL
  0 siblings, 0 replies; 414+ messages in thread
From: FRL @ 2010-06-08  4:05 UTC (permalink / raw)


Your Email-ID won £1,000,000.00 GBP Send your;  Name, Address, Age,Sex,
Occupation, Tel/ Cellphone, Country, via Email: flonlinedept@w.cn

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2010-05-11 22:28 Euro-Millions
  0 siblings, 0 replies; 414+ messages in thread
From: Euro-Millions @ 2010-05-11 22:28 UTC (permalink / raw)


Your email has been awarded 2,500,000.00 Pounds. FullName: Country:
Occupation: Age: Tel No:. Reply to: alexanderdarlin@9.cn


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <20100510223054.luv5qlqdlp28g08o@webmail.wcsd.k12.oh.us>]

[parent not found: <20100510223506.77ylw39bns84c80c@webmail.wcsd.k12.oh.us>]

[parent not found: <20100510223656.m8nzy8mwqf44g8g8@webmail.wcsd.k12.oh.us>]

* Re:
       [not found]   ` <20100510223656.m8nzy8mwqf44g8g8@webmail.wcsd.k12.oh.us>
@ 2010-05-11  4:19     ` Mr. Vincent Hong
  0 siblings, 0 replies; 414+ messages in thread
From: Mr. Vincent Hong @ 2010-05-11  4:19 UTC (permalink / raw)
  To: sisenbarger

Dear Friend,
I have a business transaction which I need your assistant, your share will be
40%; please if you are interested do contact me for more details on my  
personal
email: hg.vincent33@gmail.com
Vincent Hong


Quoting sisenbarger@wcsd.k12.oh.us:

>




^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2010-05-08  2:56 Promo
  0 siblings, 0 replies; 414+ messages in thread
From: Promo @ 2010-05-08  2:56 UTC (permalink / raw)
  To: info

You have just been awarded,the sum of  £1,000,000.00 GBP in the UK LOTTERY
2010 Anniversary Bonanza held this Month.Verify this mail By providing your Complete 
Details

Names:............
Address:..............
Country:................
Age:..........
Sex:..............
Phone/cellphone........

Regard
Mrs.Rose Wood
Co-ordinato


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-05-08  0:01 IRISH NEWS CENTRE
  0 siblings, 0 replies; 414+ messages in thread
From: IRISH NEWS CENTRE @ 2010-05-08  0:01 UTC (permalink / raw)


You won 750,000 GBP. Send Name,Age,occupation, Country.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-05-07 11:39 William Wilcox
  0 siblings, 0 replies; 414+ messages in thread
From: William Wilcox @ 2010-05-07 11:39 UTC (permalink / raw)


My name is Sir William Wilcox,I work with the Euro Lottery. I can help you
win 4,528,000 GBP.But I charge 40% of the winning.Can we do this deal
together?


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-05-07 11:37 William Wilcox
  0 siblings, 0 replies; 414+ messages in thread
From: William Wilcox @ 2010-05-07 11:37 UTC (permalink / raw)


My name is Sir William Wilcox,I work with the Euro Lottery. I can help you
win 4,528,000 GBP.But I charge 40% of the winning.Can we do this deal
together?


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2010-04-14 12:54 Alan Cox
  2010-04-14 13:35 ` Jean Delvare
  0 siblings, 1 reply; 414+ messages in thread
From: Alan Cox @ 2010-04-14 12:54 UTC (permalink / raw)
  To: linux-i2c, khali, linux-input, linux-kernel

Subject: [FOR COMMENT] cy8ctmg110 for review

From: Samuli Konttila <samuli.konttila@aavamobile.com>

Add support for the cy8ctmg110 capacitive touchscreen used on some embedded
devices.

(Some clean up by Alan Cox)

(No signed off, not yet ready to go in)
---

 drivers/input/touchscreen/Kconfig         |   12 +
 drivers/input/touchscreen/Makefile        |    3 
 drivers/input/touchscreen/cy8ctmg110_ts.c |  521 +++++++++++++++++++++++++++++
 3 files changed, 535 insertions(+), 1 deletions(-)
 create mode 100644 drivers/input/touchscreen/cy8ctmg110_ts.c


diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index b3ba374..89a3eb1 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -591,4 +591,16 @@ config TOUCHSCREEN_TPS6507X
 	  To compile this driver as a module, choose M here: the
 	  module will be called tps6507x_ts.
 
+config TOUCHSCREEN_CY8CTMG110
+	tristate "cy8ctmg110 touchscreen"
+	depends on I2C
+	help
+	  Say Y here if you have a cy8ctmg110 touchscreen capacitive
+	  touchscreen
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cy8ctmg110_ts.
+
 endif
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index dfb7239..c7acb65 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -1,5 +1,5 @@
 #
-# Makefile for the touchscreen drivers.
+# Makefile for the touchscreen drivers.mororor
 #
 
 # Each configuration option enables a list of files.
@@ -12,6 +12,7 @@ obj-$(CONFIG_TOUCHSCREEN_AD7879)	+= ad7879.o
 obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
 obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
+obj-$(CONFIG_TOUCHSCREEN_CY8CTMG110)    += cy8ctmg110_ts.o
 obj-$(CONFIG_TOUCHSCREEN_DYNAPRO)	+= dynapro.o
 obj-$(CONFIG_TOUCHSCREEN_GUNZE)		+= gunze.o
 obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
new file mode 100644
index 0000000..4adbe87
--- /dev/null
+++ b/drivers/input/touchscreen/cy8ctmg110_ts.c
@@ -0,0 +1,521 @@
+/*
+ * cy8ctmg110_ts.c Driver for cypress touch screen controller
+ * Copyright (c) 2009 Aava Mobile
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <asm/io.h>
+#include <linux/i2c.h>
+#include <linux/timer.h>
+#include <linux/gpio.h>
+#include <linux/hrtimer.h>
+
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <asm/ioctl.h>
+#include <asm/uaccess.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <asm/ioctl.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+
+
+#define CY8CTMG110_DRIVER_NAME      "cy8ctmg110"
+
+
+/*HW definations*/
+#define CY8CTMG110_RESET_PIN_GPIO   43
+#define CY8CTMG110_IRQ_PIN_GPIO     59
+#define CY8CTMG110_I2C_ADDR         0x38
+#define CY8CTMG110_I2C_ADDR_EXT     0x39
+#define CY8CTMG110_I2C_ADDR_        0x2	/*i2c address first sample */
+#define CY8CTMG110_I2C_ADDR__       53	/*i2c address to FW where irq support missing */
+#define CY8CTMG110_TOUCH_IRQ        21
+#define CY8CTMG110_TOUCH_LENGHT     9787
+#define CY8CTMG110_SCREEN_LENGHT    8424
+
+
+/*Touch coordinates*/
+#define CY8CTMG110_X_MIN        0
+#define CY8CTMG110_Y_MIN        0
+#define CY8CTMG110_X_MAX        864
+#define CY8CTMG110_Y_MAX        480
+
+
+/*cy8ctmg110 registers defination*/
+#define CY8CTMG110_TOUCH_WAKEUP_TIME   0
+#define CY8CTMG110_TOUCH_SLEEP_TIME    2
+#define CY8CTMG110_TOUCH_X1            3
+#define CY8CTMG110_TOUCH_Y1            5
+#define CY8CTMG110_TOUCH_X2            7
+#define CY8CTMG110_TOUCH_Y2            9
+#define CY8CTMG110_FINGERS             11
+#define CY8CTMG110_GESTURE             12
+#define CY8CTMG110_REG_MAX             13
+
+#define CY8CTMG110_POLL_TIMER_DELAY  1000*1000*100
+#define TOUCH_MAX_I2C_FAILS          50
+
+/* Scale factors for coordinates */
+#define X_SCALE_FACTOR 9387/8424
+#define Y_SCALE_FACTOR 97/100
+
+/* For tracing */
+static int g_y_trace_coord = 0;
+module_param(g_y_trace_coord, int, 0600);
+
+/* Polling mode */
+static int polling = 0;
+module_param(polling, int, 0);
+MODULE_PARM_DESC(polling, "Set to enabling polling of the touchscreen");
+
+
+/*
+ * The touch position structure.
+ */
+struct ts_event {
+	int x1;
+	int y1;
+	int x2;
+	int y2;
+	bool event_sended;
+};
+
+/*
+ * The touch driver structure.
+ */
+struct cy8ctmg110 {
+	struct input_dev *input;
+	char phys[32];
+	struct ts_event tc;
+	struct i2c_client *client;
+	bool pending;
+	spinlock_t lock;
+	bool initController;
+	bool sleepmode;
+	int i2c_fail_count;
+	struct hrtimer timer;
+};
+
+/*
+ * cy8ctmg110_poweroff is the routine that is called when touch hardware 
+ * will powered off
+ */
+static void cy8ctmg110_power(bool poweron)
+{
+	if (poweron)
+		gpio_direction_output(CY8CTMG110_RESET_PIN_GPIO, 0);
+	else
+		gpio_direction_output(CY8CTMG110_RESET_PIN_GPIO, 1);
+}
+
+/*
+ * cy8ctmg110_write_req write regs to the i2c devices
+ * 
+ */
+static int cy8ctmg110_write_req(struct cy8ctmg110 *tsc, unsigned char reg,
+		unsigned char len, unsigned char *value)
+{
+	struct i2c_client *client = tsc->client;
+	unsigned int ret;
+	unsigned char i2c_data[] = { 0, 0, 0, 0, 0, 0 };
+	struct i2c_msg msg[] = {
+			{client->addr, 0, len + 1, i2c_data},
+			};
+
+	i2c_data[0] = reg;
+	memcpy(i2c_data + 1, value, len);
+
+	ret = i2c_transfer(client->adapter, msg, 1);
+	if (ret != 1) {
+		printk("cy8ctmg110 touch : i2c write data cmd failed \n");
+		return ret;
+	}
+	return 0;
+}
+
+/*
+ * cy8ctmg110_read_req read regs from i2c devise
+ * 
+ */
+
+static int cy8ctmg110_read_req(struct cy8ctmg110 *tsc,
+		unsigned char *i2c_data, unsigned char len, unsigned char cmd)
+{
+	struct i2c_client *client = tsc->client;
+	unsigned int ret;
+	unsigned char regs_cmd[2] = { 0, 0 };
+	struct i2c_msg msg1[] = {
+		{client->addr, 0, 1, regs_cmd},
+	};
+	struct i2c_msg msg2[] = {
+		{client->addr, I2C_M_RD, len, i2c_data},
+	};
+
+	regs_cmd[0] = cmd;
+
+	/* first write slave position to i2c devices */
+	ret = i2c_transfer(client->adapter, msg1, 1);
+	if (ret != 1) {
+		tsc->i2c_fail_count++;
+		return ret;
+	}
+
+	/* Second read data from position */
+	ret = i2c_transfer(client->adapter, msg2, 1);
+	if (ret != 1) {
+		tsc->i2c_fail_count++;
+		return ret;
+	}
+	return 0;
+}
+
+/*
+ * cy8ctmg110_send_event delevery touch event to the userpace
+ * function use normal input interface
+ */
+static void cy8ctmg110_send_event(void *tsc)
+{
+	struct cy8ctmg110 *ts = tsc;
+	struct input_dev *input = ts->input;
+	u16 x, y;
+	u16 x2, y2;
+
+	x = ts->tc.x1;
+	y = ts->tc.y1;
+
+	if (ts->tc.event_sended == false) {
+		input_report_key(input, BTN_TOUCH, 1);
+		ts->pending = true;
+		x2 = (u16) (y * X_SCALE_FACTOR);
+		y2 = (u16) (x * Y_SCALE_FACTOR);
+		input_report_abs(input, ABS_X, x2);
+		input_report_abs(input, ABS_Y, y2);
+		input_sync(input);
+		if (g_y_trace_coord)
+			printk("cy8ctmg110 touch position X:%d (was = %d) Y:%d (was = %d)\n", x2, y, y2, x);
+	}
+
+}
+
+/*
+ * cy8ctmg110_touch_pos check touch position from i2c devices
+ * 
+ */
+static int cy8ctmg110_touch_pos(struct cy8ctmg110 *tsc)
+{
+	unsigned char reg_p[CY8CTMG110_REG_MAX];
+	int x, y;
+
+	memset(reg_p, 0, CY8CTMG110_REG_MAX);
+
+	/*Reading coordinates */
+	if (cy8ctmg110_read_req(tsc, reg_p, 9, CY8CTMG110_TOUCH_X1) != 0)
+		return -EIO;
+		
+	y = reg_p[2] << 8 | reg_p[3];
+	x = reg_p[0] << 8 | reg_p[1];
+		/*number of touch */
+	if (reg_p[8] == 0) {
+		if (tsc->pending == true) {
+			struct input_dev *input = tsc->input;
+
+			input_report_key(input, BTN_TOUCH, 0);
+			tsc->tc.event_sended = true;
+			tsc->pending = false;
+		}
+	} else if (tsc->tc.x1 != x || tsc->tc.y1 != y) {
+		tsc->tc.y1 = y;
+		tsc->tc.x1 = x;
+		tsc->tc.event_sended = false;
+		cy8ctmg110_send_event(tsc);
+	}
+	return 0;
+}
+
+/*
+ * if interrupt isn't in use the touch positions can reads by polling
+ * 
+ */
+static enum hrtimer_restart cy8ctmg110_timer(struct hrtimer *handle)
+{
+	struct cy8ctmg110 *ts = container_of(handle, struct cy8ctmg110, timer);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ts->lock, flags);
+
+	cy8ctmg110_touch_pos(ts);
+	if (ts->i2c_fail_count < TOUCH_MAX_I2C_FAILS)
+		hrtimer_start(&ts->timer, ktime_set(0, CY8CTMG110_POLL_TIMER_DELAY), HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&ts->lock, flags);
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * cy8ctmg110_init_controller set init value to touchcontroller
+ * 
+ */
+static bool cy8ctmg110_set_sleepmode(struct cy8ctmg110 *ts)
+{
+	unsigned char reg_p[3];
+
+	if (ts->sleepmode == true) {
+		reg_p[0] = 0x00;
+		reg_p[1] = 0xff;
+		reg_p[2] = 5;
+	} else {
+		reg_p[0] = 0x10;
+		reg_p[1] = 0xff;
+		reg_p[2] = 0;
+	}
+
+	if (cy8ctmg110_write_req(ts, CY8CTMG110_TOUCH_WAKEUP_TIME, 3, reg_p))
+		return false;
+
+	ts->initController = true;
+	return true;
+}
+
+/*
+ * cy8ctmg110_irq_handler irq handling function
+ * 
+ */
+
+static irqreturn_t cy8ctmg110_irq_handler(int irq, void *dev_id)
+{
+	struct cy8ctmg110 *tsc = (struct cy8ctmg110 *) dev_id;
+
+	if (tsc->initController == false) {
+		if (cy8ctmg110_set_sleepmode(tsc) == true)
+			tsc->initController = true;
+	} else
+		cy8ctmg110_touch_pos(tsc);
+
+	/* if interrupt supported in the touch controller
+	   timer polling need to stop */
+	tsc->i2c_fail_count = TOUCH_MAX_I2C_FAILS;
+	return IRQ_HANDLED;
+}
+
+
+static int cy8ctmg110_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	struct cy8ctmg110 *ts;
+	struct input_dev *input_dev;
+	int err;
+	client->irq = CY8CTMG110_TOUCH_IRQ;
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_READ_WORD_DATA))
+		return -EIO;
+
+	ts = kzalloc(sizeof(struct cy8ctmg110), GFP_KERNEL);
+	input_dev = input_allocate_device();
+
+	if (!ts || !input_dev) {
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	ts->client = client;
+	i2c_set_clientdata(client, ts);
+
+	ts->input = input_dev;
+	ts->pending = false;
+	ts->sleepmode = false;
+
+	snprintf(ts->phys, sizeof(ts->phys), "%s/input0",
+						dev_name(&client->dev));
+
+	input_dev->name = CY8CTMG110_DRIVER_NAME " Touchscreen";
+	input_dev->phys = ts->phys;
+	input_dev->id.bustype = BUS_I2C;
+
+	spin_lock_init(&ts->lock);
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+					BIT_MASK(EV_REL) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+
+	input_set_capability(input_dev, EV_KEY, KEY_F);
+
+	input_set_abs_params(input_dev, ABS_X, CY8CTMG110_X_MIN, CY8CTMG110_X_MAX, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, CY8CTMG110_Y_MIN, CY8CTMG110_Y_MAX, 0, 0);
+
+	err = gpio_request(CY8CTMG110_RESET_PIN_GPIO, NULL);
+
+	if (err) {
+		dev_err(&client->dev, "cy8ctmg110_ts: Unable to request GPIO pin %d.\n",
+						CY8CTMG110_RESET_PIN_GPIO);
+		goto err_free_irq;
+	}
+	cy8ctmg110_power(true);
+
+	ts->initController = false;
+	ts->i2c_fail_count = 0;
+
+	hrtimer_init(&ts->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ts->timer.function = cy8ctmg110_timer;
+
+	if (polling)
+		hrtimer_start(&ts->timer, ktime_set(10, 0), HRTIMER_MODE_REL);
+
+	/* Can we fall back to polling if these bits fail - something to look
+	   at for robustness */
+
+	err = gpio_request(CY8CTMG110_IRQ_PIN_GPIO, "touch_irq_key");
+	if (err < 0) {
+		dev_err(&client->dev,
+			"cy8ctmg110_ts: failed to request GPIO %d, error %d\n",
+						CY8CTMG110_IRQ_PIN_GPIO, err);
+		goto err_free_timer;
+	}
+
+	err = gpio_direction_input(CY8CTMG110_IRQ_PIN_GPIO);
+
+	if (err < 0) {
+		dev_err(&client->dev,
+			"cy8ctmg110_ts: failed to configure input direction for GPIO %d, error %d\n",
+						CY8CTMG110_IRQ_PIN_GPIO, err);
+		goto err_free_gpio;
+	}
+	client->irq = gpio_to_irq(CY8CTMG110_IRQ_PIN_GPIO);
+
+	if (client->irq < 0) {
+		err = client->irq;
+		dev_err(&client->dev,
+	"cy8ctmg110_ts: Unable to get irq number" " for GPIO %d, error %d\n",
+						CY8CTMG110_IRQ_PIN_GPIO, err);
+		goto err_free_gpio;
+	}
+	err = request_irq(client->irq, cy8ctmg110_irq_handler, IRQF_TRIGGER_RISING | IRQF_SHARED, "touch_reset_key", ts);
+	if (err < 0) {
+		dev_err(&client->dev,
+			"cy8ctmg110 irq %d busy? error %d\n",
+				client->irq, err);
+		goto err_free_gpio;
+	}
+
+	err = input_register_device(input_dev);
+	if (!err)
+		return 0;
+err_free_gpio:
+	gpio_free(CY8CTMG110_IRQ_PIN_GPIO);
+err_free_timer:
+	if (polling)
+		hrtimer_cancel(&ts->timer);
+err_free_irq:
+	free_irq(client->irq, ts);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(ts);
+	return err;
+}
+
+/*
+ * cy8ctmg110_suspend
+ * 
+ */
+
+static int cy8ctmg110_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	if (device_may_wakeup(&client->dev))
+		enable_irq_wake(client->irq);
+
+	return 0;
+}
+
+/*
+ * cy8ctmg110_resume 
+ * 
+ */
+
+static int cy8ctmg110_resume(struct i2c_client *client)
+{
+	if (device_may_wakeup(&client->dev))
+		disable_irq_wake(client->irq);
+
+	return 0;
+}
+
+/*
+ * cy8ctmg110_remove
+ * 
+ */
+
+static int cy8ctmg110_remove(struct i2c_client *client)
+{
+	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
+
+	cy8ctmg110_power(false);
+
+	if (polling)
+		hrtimer_cancel(&ts->timer);
+	free_irq(client->irq, ts);
+	input_unregister_device(ts->input);
+	/* FIXME: Do we need to free the GPIO ? */
+	kfree(ts);
+	return 0;
+}
+
+static struct i2c_device_id cy8ctmg110_idtable[] = {
+	{CY8CTMG110_DRIVER_NAME, 1},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, cy8ctmg110_idtable);
+
+static struct i2c_driver cy8ctmg110_driver = {
+	.driver = {
+		   .owner = THIS_MODULE,
+		   .name = CY8CTMG110_DRIVER_NAME,
+		   .bus = &i2c_bus_type,
+		   },
+	.id_table = cy8ctmg110_idtable,
+	.probe = cy8ctmg110_probe,
+	.remove = cy8ctmg110_remove,
+	.suspend = cy8ctmg110_suspend,
+	.resume = cy8ctmg110_resume,
+};
+
+static int __init cy8ctmg110_init(void)
+{
+	return i2c_add_driver(&cy8ctmg110_driver);
+}
+
+static void __exit cy8ctmg110_exit(void)
+{
+	i2c_del_driver(&cy8ctmg110_driver);
+}
+
+module_init(cy8ctmg110_init);
+module_exit(cy8ctmg110_exit);
+
+MODULE_AUTHOR("Samuli Konttila <samuli.konttila@aavamobile.com>");
+MODULE_DESCRIPTION("cy8ctmg110 TouchScreen Driver");
+MODULE_LICENSE("GPL v2");


^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2010-04-14 12:54 Alan Cox
@ 2010-04-14 13:35 ` Jean Delvare
  0 siblings, 0 replies; 414+ messages in thread
From: Jean Delvare @ 2010-04-14 13:35 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-i2c, linux-input, linux-kernel

On Wed, 14 Apr 2010 13:54:02 +0100, Alan Cox wrote:
> Subject: [FOR COMMENT] cy8ctmg110 for review
> 
> From: Samuli Konttila <samuli.konttila@aavamobile.com>
> 
> Add support for the cy8ctmg110 capacitive touchscreen used on some embedded
> devices.
> 
> (Some clean up by Alan Cox)
> 
> (No signed off, not yet ready to go in)
> ---
> 
>  drivers/input/touchscreen/Kconfig         |   12 +
>  drivers/input/touchscreen/Makefile        |    3 
>  drivers/input/touchscreen/cy8ctmg110_ts.c |  521 +++++++++++++++++++++++++++++
>  3 files changed, 535 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/input/touchscreen/cy8ctmg110_ts.c
> 
> 
> diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
> index b3ba374..89a3eb1 100644
> --- a/drivers/input/touchscreen/Kconfig
> +++ b/drivers/input/touchscreen/Kconfig
> @@ -591,4 +591,16 @@ config TOUCHSCREEN_TPS6507X
>  	  To compile this driver as a module, choose M here: the
>  	  module will be called tps6507x_ts.
>  
> +config TOUCHSCREEN_CY8CTMG110
> +	tristate "cy8ctmg110 touchscreen"
> +	depends on I2C
> +	help
> +	  Say Y here if you have a cy8ctmg110 touchscreen capacitive
> +	  touchscreen
> +
> +	  If unsure, say N.
> +
> +	  To compile this driver as a module, choose M here: the
> +	  module will be called cy8ctmg110_ts.
> +
>  endif
> diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
> index dfb7239..c7acb65 100644
> --- a/drivers/input/touchscreen/Makefile
> +++ b/drivers/input/touchscreen/Makefile
> @@ -1,5 +1,5 @@
>  #
> -# Makefile for the touchscreen drivers.
> +# Makefile for the touchscreen drivers.mororor

I confirm, not yet ready to go in ;)

>  #
>  
>  # Each configuration option enables a list of files.
> @@ -12,6 +12,7 @@ obj-$(CONFIG_TOUCHSCREEN_AD7879)	+= ad7879.o
>  obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
>  obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
>  obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
> +obj-$(CONFIG_TOUCHSCREEN_CY8CTMG110)    += cy8ctmg110_ts.o
>  obj-$(CONFIG_TOUCHSCREEN_DYNAPRO)	+= dynapro.o
>  obj-$(CONFIG_TOUCHSCREEN_GUNZE)		+= gunze.o
>  obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
> diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
> new file mode 100644
> index 0000000..4adbe87
> --- /dev/null
> +++ b/drivers/input/touchscreen/cy8ctmg110_ts.c
> @@ -0,0 +1,521 @@
> +/*
> + * cy8ctmg110_ts.c Driver for cypress touch screen controller
> + * Copyright (c) 2009 Aava Mobile
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include <linux/input.h>
> +#include <linux/slab.h>
> +#include <linux/interrupt.h>
> +#include <asm/io.h>
> +#include <linux/i2c.h>
> +#include <linux/timer.h>
> +#include <linux/gpio.h>
> +#include <linux/hrtimer.h>
> +
> +#include <linux/platform_device.h>
> +#include <linux/delay.h>
> +#include <linux/fs.h>
> +#include <asm/ioctl.h>
> +#include <asm/uaccess.h>
> +#include <linux/device.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/delay.h>
> +#include <linux/fs.h>
> +#include <asm/ioctl.h>
> +#include <linux/fs.h>
> +#include <linux/init.h>
> +#include <linux/miscdevice.h>
> +#include <linux/module.h>

What a mess. Countless duplicates includes... Seriously, I'm not even
reviewing further.

-- 
Jean Delvare

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE;
@ 2010-04-02 23:17 Mrs Claire page
  0 siblings, 0 replies; 414+ messages in thread
From: Mrs Claire page @ 2010-04-02 23:17 UTC (permalink / raw)


I am Mrs Claire page,contact my lawyer(barlandon_watson@gala.net)





^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE,
@ 2010-03-23  7:50 FROM CENTRAL BANK
  0 siblings, 0 replies; 414+ messages in thread
From: FROM CENTRAL BANK @ 2010-03-23  7:50 UTC (permalink / raw)


Very Urgently,
We Conclude Our Meeting Today That $10.7m should be pay to you 
as your contract entitlement. The Payment Will Come To You Via Diplomatic 
Carrier Service:Re- Comfirm this informations as follows.
Your Full Name,
Home 
Address,
Direct Phone No,
Occupation And Age.

Dr. Sanusi A. Lamido
Tel:+234 
8067884885

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2010-03-11 16:40 Monica D.
  0 siblings, 0 replies; 414+ messages in thread
From: Monica D. @ 2010-03-11 16:40 UTC (permalink / raw)
  To: info

This is my second time of contacting you to 
inform / congratulate you as winner of this 
year Grant Award from the RDS PLC (ROYAL 
DUTCH SHELL), You have been chosen as one of 
the Grant Winner of $2,000,000.00 USD. for 
more details contact the Remittance officer 
Mr. Janick Delarche on 
<royal14@btinternet.com> for claim procedure.

COMPLETE THE CLAIM PROCESSING FORM BELOW:

1.FULL NAME: 2.COUNTRY: 3.TEL: 
4.SEX: 5.AGE: 6.OCCUPATION: 7. ALTERNATIVE E-
MAIL ADDRESS(YOHOO OR GMAIL):

Regards.

Monica D.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re;
@ 2010-02-25 13:39 William Wilcox
  0 siblings, 0 replies; 414+ messages in thread
From: William Wilcox @ 2010-02-25 13:39 UTC (permalink / raw)


Good day!
My name is Sir William Wilcox,I work with the Euro Lottery. I can help you
win 4,528,000 GBP.But I charge 40% of the winning.Can we do this deal
together? Email Me: william_wilcox@live.co.uk

Regards,
William Wilcox


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2010-01-16  1:54 Capt Chris P. Mark
  0 siblings, 0 replies; 414+ messages in thread
From: Capt Chris P. Mark @ 2010-01-16  1:54 UTC (permalink / raw)
  To: chrispmarkss

Hello, my name is Capt. Chris P. Mark, 3rd Battalion, 16th Field
Artillery, 2nd Brigade Combat Team, 4th Infantry Division and I am
presently in Iraq with the U.S. Marines for peace keeping mission. i
desperately need your Urgent assistance. I await your response for more
details.Capt. Chris P. Mark

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2010-01-13  0:48 Jeff Mahoney
  2010-01-13  8:24 ` David Woodhouse
  0 siblings, 1 reply; 414+ messages in thread
From: Jeff Mahoney @ 2010-01-13  0:48 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Andrew Morton, Youquan Song, David Woodhouse


Subject: [patch 3/6] dmar: Fix section mismatch
References: <20100113004855.550486769@suse.com>
Content-Disposition: inline; filename=patches.rpmify/dmar-fix-section-mismatch

 dmar_ir_support uses dmar_tbl, which is __initdata. dmar_ir_support is
 only called by intr_remapping_supported, which is __init. So, we mark
 dmar_ir_support as __init as well.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 drivers/pci/dmar.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1456,7 +1456,7 @@ int dmar_reenable_qi(struct intel_iommu
 /*
  * Check interrupt remapping support in DMAR table description.
  */
-int dmar_ir_support(void)
+int __init dmar_ir_support(void)
 {
 	struct acpi_table_dmar *dmar;
 	dmar = (struct acpi_table_dmar *)dmar_tbl;



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2010-01-13  0:48 Jeff Mahoney
@ 2010-01-13  8:24 ` David Woodhouse
  0 siblings, 0 replies; 414+ messages in thread
From: David Woodhouse @ 2010-01-13  8:24 UTC (permalink / raw)
  To: Jeff Mahoney; +Cc: Linux Kernel Mailing List, Andrew Morton, Song, Youquan

On Wed, 2010-01-13 at 00:48 +0000, Jeff Mahoney wrote:
> Subject: [patch 3/6] dmar: Fix section mismatch
> References: <20100113004855.550486769@suse.com>
> Content-Disposition: inline;
> filename=patches.rpmify/dmar-fix-section-mismatch
> 
>  dmar_ir_support uses dmar_tbl, which is __initdata. dmar_ir_support
> is
>  only called by intr_remapping_supported, which is __init. So, we mark
>  dmar_ir_support as __init as well.
> 
> Signed-off-by: Jeff Mahoney <jeffm@suse.com> 

This patch doesn't apply to linux-next, and hasn't applied there for
about two weeks. Why are people still sending it to me?

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation


^ permalink raw reply	[flat|nested] 414+ messages in thread

* re:
@ 2010-01-09 17:03 Ustin Gavrie
  0 siblings, 0 replies; 414+ messages in thread
From: Ustin Gavrie @ 2010-01-09 17:03 UTC (permalink / raw)



--
I...HAVE...A...PROFILING...SUM...OF...$25MILLION....WHICH...I...SEEK...YOUR..
.PARTNERSHIP...IN...ACCOMMODATING...FOR...INVESTMENT..PURPOSE...YOU
...SHALL...BE...REWARDED...WITH...THIRTY...PERCENT....IF...INTERESTED...
PLEASE...REPLY...FOR...MORE...DETAILS.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2009-12-19 17:38 OFFICE OF THE SENATE
  0 siblings, 0 replies; 414+ messages in thread
From: OFFICE OF THE SENATE @ 2009-12-19 17:38 UTC (permalink / raw)



To celebrate the 30th anniversary celebration,We are giving out a yearly donation of The ATM Card Value is $6.8 million USD to 2 lucky recipients,as New Year promotion from the W.H.O,UN, and the EU in accordance with the enabling act of Parliament. back with: Names: Address: Sex:

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2009-12-12 16:04 T Dent
  2009-12-13  5:55 ` andrew hendry
  0 siblings, 1 reply; 414+ messages in thread
From: T Dent @ 2009-12-12 16:04 UTC (permalink / raw)
  To: rdunlap; +Cc: linux-doc, linux-kernel

Fixed typo in Documentation/CodingStyle

>From 47b08656a62b00b36c24315c63f2d48f70037de3 Mon Sep 17 00:00:00 2001
From: Tracey Dent <Tdent48227@gmail.com>
Date: Sat, 12 Dec 2009 10:16:18 -0500
Subject: [PATCH] trival: fix typo akin/asking for documentation


Signed-off-by: Tracey Dent <Tdent48227@gmail.com>
---
 Documentation/CodingStyle |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 8bb3723..e7c60be 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -17,7 +17,7 @@ Anyway, here goes:

 Tabs are 8 characters, and thus indentations are also 8 characters.
 There are heretic movements that try to make indentations 4 (or even 2!)
-characters deep, and that is akin to trying to define the value of PI to
+characters deep, and that is asking to trying to define the value of PI to
 be 3.

 Rationale: The whole idea behind indentation is to clearly define where
-- 
1.6.5.4

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2009-12-12 16:04 T Dent
@ 2009-12-13  5:55 ` andrew hendry
  0 siblings, 0 replies; 414+ messages in thread
From: andrew hendry @ 2009-12-13  5:55 UTC (permalink / raw)
  To: T Dent; +Cc: rdunlap, linux-doc, linux-kernel

http://dictionary.reference.com/browse/akin


On Sun, Dec 13, 2009 at 3:04 AM, T Dent <tdent48227@gmail.com> wrote:
> Fixed typo in Documentation/CodingStyle
>
> From 47b08656a62b00b36c24315c63f2d48f70037de3 Mon Sep 17 00:00:00 2001
> From: Tracey Dent <Tdent48227@gmail.com>
> Date: Sat, 12 Dec 2009 10:16:18 -0500
> Subject: [PATCH] trival: fix typo akin/asking for documentation
>
>
> Signed-off-by: Tracey Dent <Tdent48227@gmail.com>
> ---
>  Documentation/CodingStyle |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
> index 8bb3723..e7c60be 100644
> --- a/Documentation/CodingStyle
> +++ b/Documentation/CodingStyle
> @@ -17,7 +17,7 @@ Anyway, here goes:
>
>  Tabs are 8 characters, and thus indentations are also 8 characters.
>  There are heretic movements that try to make indentations 4 (or even 2!)
> -characters deep, and that is akin to trying to define the value of PI to
> +characters deep, and that is asking to trying to define the value of PI to
>  be 3.
>
>  Rationale: The whole idea behind indentation is to clearly define where
> --
> 1.6.5.4
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2009-12-08  6:23 Irish News Center
  0 siblings, 0 replies; 414+ messages in thread
From: Irish News Center @ 2009-12-08  6:23 UTC (permalink / raw)


You won 750,000gbp.Send:Name,Age,Country


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 1/2] hw_random: core updates to allow more efficient drivers
@ 2009-11-26  1:03 Matt Mackall
  2009-11-26 10:49 ` Ian Molton
  0 siblings, 1 reply; 414+ messages in thread
From: Matt Mackall @ 2009-11-26  1:03 UTC (permalink / raw)
  To: Ian Molton; +Cc: rusty, linux-kernel

On Thu, 2009-11-26 at 00:25 +0000, Ian Molton wrote:
> 	This patch implements a new method by which hw_random hardware drivers
> can pass data to the core more efficiently, using a shared buffer.
> 
> The old methods have been retained as a compatability layer until all the
> drivers have been updated.
> 
> Signed-off-by: Ian Molton <ian.molton@collabora.co.uk>
> ---
>  drivers/char/hw_random/core.c |  120 ++++++++++++++++++++++++++---------------
>  include/linux/hw_random.h     |    9 ++-
>  2 files changed, 82 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
> index 1573aeb..e179afd 100644
> --- a/drivers/char/hw_random/core.c
> +++ b/drivers/char/hw_random/core.c
> @@ -47,12 +47,14 @@
>  #define RNG_MODULE_NAME		"hw_random"
>  #define PFX			RNG_MODULE_NAME ": "
>  #define RNG_MISCDEV_MINOR	183 /* official */
> +#define RNG_BUFFSIZE		64
>  
> 
>  static struct hwrng *current_rng;
>  static LIST_HEAD(rng_list);
>  static DEFINE_MUTEX(rng_mutex);
> -
> +static u8 *rng_buffer;

How about just:

static u8 rng_buffer[RNG_BUFFSIZE] __cacheline_aligned;

And lose all the kmalloc and kfree code? The memory use will be smaller,
even when the buffer isn't needed.

> +		if (!data_avail) {
> +			bytes_read = rng_get_data(current_rng, rng_buffer,
> +				RNG_BUFFSIZE, !(filp->f_flags & O_NONBLOCK));

No need to pass rng_buffer to the helper as there's only one with global
scope.

-- 
http://selenic.com : development and support for Mercurial and Linux



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2009-11-26  1:03 [PATCH 1/2] hw_random: core updates to allow more efficient drivers Matt Mackall
@ 2009-11-26 10:49 ` Ian Molton
  2009-11-26 11:38   ` Matt Mackall
  0 siblings, 1 reply; 414+ messages in thread
From: Ian Molton @ 2009-11-26 10:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: rusty, mpm, jeff

Hi guys,

	This version uses a statically allocated buffer. I dont feel it is a
good idea not to pass the address and length of the buffer to the hardware
drivers, as they shouldnt have intimate knowledge of the core, IMO.

Only resendiong the core patch, the virtio-rng driver hasnt changed.

hw_random: core updates to allow more efficient drivers

-Ian

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2009-11-26 10:49 ` Ian Molton
@ 2009-11-26 11:38   ` Matt Mackall
  2009-11-26 11:48     ` Re: Ian Molton
  0 siblings, 1 reply; 414+ messages in thread
From: Matt Mackall @ 2009-11-26 11:38 UTC (permalink / raw)
  To: Ian Molton; +Cc: linux-kernel, rusty, jeff

On Thu, 2009-11-26 at 10:49 +0000, Ian Molton wrote:
> Hi guys,
> 
> 	This version uses a statically allocated buffer. I dont feel it is a
> good idea not to pass the address and length of the buffer to the hardware
> drivers, as they shouldnt have intimate knowledge of the core, IMO.

I agree, but let me quote myself:
---
> +             if (!data_avail) {
> +                     bytes_read = rng_get_data(current_rng, rng_buffer,
> +                             RNG_BUFFSIZE, !(filp->f_flags & O_NONBLOCK));

No need to pass rng_buffer to the helper as there's only one with global
scope.
---

-- 
http://selenic.com : development and support for Mercurial and Linux



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2009-11-26 11:38   ` Matt Mackall
@ 2009-11-26 11:48     ` Ian Molton
  2009-11-27 22:54       ` Re: Matt Mackall
  0 siblings, 1 reply; 414+ messages in thread
From: Ian Molton @ 2009-11-26 11:48 UTC (permalink / raw)
  To: Matt Mackall; +Cc: linux-kernel, rusty, jeff

Matt Mackall wrote:
> On Thu, 2009-11-26 at 10:49 +0000, Ian Molton wrote:
>> Hi guys,
>>
> 
> No need to pass rng_buffer to the helper as there's only one with global
> scope.

Ah, sorry, I see what you mean now. The logic behind that is that it
matches the new API, whcih is all that will be left once the old drivers
are patched to use it. I planned to drop the helper altogether at that
point and though it'd make the patch more readable when that happens.

I can drop it if thats preferable, though.

Is this enough to get an acked-by: ? If so, I'll do that and see about
getting the change into linux-next.

Rusty: are you happy with the new version of virtio-rng ?

Cheers guys,

-Ian

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: Re:
  2009-11-26 11:48     ` Re: Ian Molton
@ 2009-11-27 22:54       ` Matt Mackall
  0 siblings, 0 replies; 414+ messages in thread
From: Matt Mackall @ 2009-11-27 22:54 UTC (permalink / raw)
  To: Ian Molton; +Cc: linux-kernel, rusty, jeff

On Thu, 2009-11-26 at 11:48 +0000, Ian Molton wrote:
> Matt Mackall wrote:
> > On Thu, 2009-11-26 at 10:49 +0000, Ian Molton wrote:
> >> Hi guys,
> >>
> > 
> > No need to pass rng_buffer to the helper as there's only one with global
> > scope.
> 
> Ah, sorry, I see what you mean now. The logic behind that is that it
> matches the new API, whcih is all that will be left once the old drivers
> are patched to use it. I planned to drop the helper altogether at that
> point and though it'd make the patch more readable when that happens.

Ok, that's quite reasonable.

> Is this enough to get an acked-by: ? If so, I'll do that and see about
> getting the change into linux-next.

Acked-by: Matt Mackall <mpm@selenic.com>

You should probably go through Herbert's tree to get into -next,
hopefully he won't be too miffed by your repeated failure to cc:
linux-kernel initially and failure to cc: him here..

-- 
http://selenic.com : development and support for Mercurial and Linux



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2009-11-20 13:29 Jerome Glisse
  2009-12-01 23:53 ` Dave Airlie
  0 siblings, 1 reply; 414+ messages in thread
From: Jerome Glisse @ 2009-11-20 13:29 UTC (permalink / raw)
  To: airlied; +Cc: dri-devel, linux-kernel

This patch series add ttm range validation function. Aim is to
include this in 2.6.33 so i have time to iron out issue, comments.

ttm:
I duplicated a bunch of ttm functions but now i think, best would
be to add range to all function and use free list if range cover
all the manager space. Doing so we might also be able to simplify
mem_space alocation into a simpler function like ttm_bo_mem_space_range

radeon:
The second patch is a rework/cleanup of radeon object, it solves
few issues along the way (i can't remember them now after fews
days testing the patches). Biggest change is that we now rely
on BO being validated before doing any change to radeon bo structure.
As with any big patch i might introduce regressions, so far after
testing on AGP:R1XX,R2XX,R3XX,R6XX PCIE:R3XX,R4XX,R5XX,R6XX,R7XX
and RS480,RS690 i didn't found anythings obvious (test being X +
glxgears + compiz(on hw which support it) + suspend/resume).

Last patch is smaller, it just use the interface introduced by
the first patch.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2009-11-20 13:29 Jerome Glisse
@ 2009-12-01 23:53 ` Dave Airlie
  2009-12-02  7:17   ` Re: Thomas Hellstrom
  0 siblings, 1 reply; 414+ messages in thread
From: Dave Airlie @ 2009-12-01 23:53 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: dri-devel, LKML, Thomas Hellstrom

On Fri, Nov 20, 2009 at 11:29 PM, Jerome Glisse <jglisse@redhat.com> wrote:
> This patch series add ttm range validation function. Aim is to
> include this in 2.6.33 so i have time to iron out issue, comments.

I missed these first time around,

Thomas if you have any opinions on the TTM stuff please see if you
can take a look.

> ttm:
> I duplicated a bunch of ttm functions but now i think, best would
> be to add range to all function and use free list if range cover
> all the manager space. Doing so we might also be able to simplify
> mem_space alocation into a simpler function like ttm_bo_mem_space_range
>
> radeon:
> The second patch is a rework/cleanup of radeon object, it solves
> few issues along the way (i can't remember them now after fews
> days testing the patches). Biggest change is that we now rely
> on BO being validated before doing any change to radeon bo structure.
> As with any big patch i might introduce regressions, so far after
> testing on AGP:R1XX,R2XX,R3XX,R6XX PCIE:R3XX,R4XX,R5XX,R6XX,R7XX
> and RS480,RS690 i didn't found anythings obvious (test being X +
> glxgears + compiz(on hw which support it) + suspend/resume).

I'll try and take a look at this here it doesn't seem to depend on the first
patch so I can push it separate if needed.

Dave.

>
> Last patch is smaller, it just use the interface introduced by
> the first patch.
>
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2009-12-01 23:53 ` Dave Airlie
@ 2009-12-02  7:17   ` Thomas Hellstrom
  0 siblings, 0 replies; 414+ messages in thread
From: Thomas Hellstrom @ 2009-12-02  7:17 UTC (permalink / raw)
  To: Dave Airlie; +Cc: Jerome Glisse, dri-devel, LKML

Dave Airlie wrote:
> On Fri, Nov 20, 2009 at 11:29 PM, Jerome Glisse <jglisse@redhat.com> wrote:
>   
>> This patch series add ttm range validation function. Aim is to
>> include this in 2.6.33 so i have time to iron out issue, comments.
>>     
>
> I missed these first time around,
>
> Thomas if you have any opinions on the TTM stuff please see if you
> can take a look.
>
>   
Sure, I'll take a look.

/Thomas


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <cover.1257602781.git.andre.goddard@gmail.com>]

[parent not found: <7206ef594e67a240a842339f520284de6569b1fc.1257602781.git.andre.goddard@gmail.com>]

[parent not found: <31525.1257770343@redhat.com>]

* Re:
       [not found]   ` <31525.1257770343@redhat.com>
@ 2009-11-09 15:31     ` André Goddard Rosa
  0 siblings, 0 replies; 414+ messages in thread
From: André Goddard Rosa @ 2009-11-09 15:31 UTC (permalink / raw)
  To: David Howells; +Cc: linux list

On Mon, Nov 9, 2009 at 10:39 AM, David Howells <dhowells@redhat.com> wrote:
> MIME-Version: 1.0
> Content-Type: text/plain; charset=iso-8859-1
> Content-Transfer-Encoding: quoted-printable
>
> Andr=E9 Goddard Rosa <andre.goddard@gmail.com> wrote:
>
>> It decreases code size:
>>    text    data     bss     dec     hex filename
>>   15719       0       8   15727    3d6f lib/vsprintf.o-before
>>   15543       0       8   15551    3cbf lib/vsprintf.o-after
>
> Whilst this may be true, there will be a countervailing decrease in
> performance.  Have you assessed that?

(trimmed long cc: list to keep it sane, I'll not use get_maintainer.pl
output this way anymore)

I'm not sure it decreases performance. From the last iteration of the
patch, I removed
the hint to force "not inlining", so that gcc can inline it if it
thinks it's better.

Are those so performance sensitive that it makes sense to perform this
assessment?
If you think so, what would you suggest? A micro-benchmark or some
real use case?

Best regards,
André

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2009-11-05  3:24 Irish News Centre
  0 siblings, 0 replies; 414+ messages in thread
From: Irish News Centre @ 2009-11-05  3:24 UTC (permalink / raw)


You won 750,000gbp.Send:Name,Age,Country

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2009-11-01 17:00 Irish News Centre
  0 siblings, 0 replies; 414+ messages in thread
From: Irish News Centre @ 2009-11-01 17:00 UTC (permalink / raw)


You won 750,000gbp.Send:Name,Age,Country


^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2009-10-10 19:13 Irish News Center
  0 siblings, 0 replies; 414+ messages in thread
From: Irish News Center @ 2009-10-10 19:13 UTC (permalink / raw)


You won 750,000gbp.Send:Name,Age,Country


^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2009-09-26 15:22 Irish News Center
  0 siblings, 0 replies; 414+ messages in thread
From: Irish News Center @ 2009-09-26 15:22 UTC (permalink / raw)


You've won £750,000.Send:Name,Age,Country


^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2009-09-25 23:13 Irish News Center
  0 siblings, 0 replies; 414+ messages in thread
From: Irish News Center @ 2009-09-25 23:13 UTC (permalink / raw)


You've won £750,000.Send:Name,Age,Country


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2009-06-20 19:45 Kay Sievers
  2009-06-21  9:04 ` Takashi Iwai
  2009-06-22 12:56 ` Re: David Woodhouse
  0 siblings, 2 replies; 414+ messages in thread
From: Kay Sievers @ 2009-06-20 19:45 UTC (permalink / raw)
  To: Greg KH, James Bottomley, Takashi Iwai, David S. Miller, David Woodhouse
  Cc: linux-kernel

The final piece of the driver core name limit. We are about to remove
BUS_ID_SIZE.

Some patches may still be in your queue. Just to make sure, we will
finish our task this time: David, David, James, Takashi, can you please
give an update, or take care of removing the last instances, or let me
know if you want a patch, or let us know, if we should just change it to
"20".

  $ find . -name "*.[ch]" | xargs grep '[^_]BUS_ID_SIZE'
  ./drivers/mtd/nand/txx9ndfmc.c:	char mtdname[BUS_ID_SIZE + 2];
  ./drivers/scsi/scsi_transport_fc.c:	char bsg_name[BUS_ID_SIZE]; /*20*/
  ./arch/sparc/kernel/vio.c:	if (strlen(bus_id_name) >= BUS_ID_SIZE - 4) {
  ./sound/soc/txx9/txx9aclc.c:	char devname[BUS_ID_SIZE + 2];

Thanks a lot,
Kay


From: Kay Sievers <kay.sievers@vrfy.org>
Subject: Driver Core: remove BUS_ID_SIZE

The name size limit is gone from the driver-core, this is
the removal of the last left-over.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
---
 include/linux/device.h |    2 --
 1 file changed, 2 deletions(-)

--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -25,8 +25,6 @@
 #include <asm/atomic.h>
 #include <asm/device.h>
 
-#define BUS_ID_SIZE		20
-
 struct device;
 struct device_private;
 struct device_driver;


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2009-06-20 19:45 Kay Sievers
@ 2009-06-21  9:04 ` Takashi Iwai
  2009-06-22 12:56 ` Re: David Woodhouse
  1 sibling, 0 replies; 414+ messages in thread
From: Takashi Iwai @ 2009-06-21  9:04 UTC (permalink / raw)
  To: Kay Sievers
  Cc: Greg KH, James Bottomley, David S. Miller, David Woodhouse, linux-kernel

At Sat, 20 Jun 2009 21:45:24 +0200,
Kay Sievers wrote:
> 
> The final piece of the driver core name limit. We are about to remove
> BUS_ID_SIZE.
> 
> Some patches may still be in your queue. Just to make sure, we will
> finish our task this time: David, David, James, Takashi, can you please
> give an update, or take care of removing the last instances, or let me
> know if you want a patch, or let us know, if we should just change it to
> "20".

Yep, I'm going to send a pull request including the fix for this soon
later.


thanks,

Takashi

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2009-06-20 19:45 Kay Sievers
  2009-06-21  9:04 ` Takashi Iwai
@ 2009-06-22 12:56 ` David Woodhouse
  1 sibling, 0 replies; 414+ messages in thread
From: David Woodhouse @ 2009-06-22 12:56 UTC (permalink / raw)
  To: Kay Sievers
  Cc: Greg KH, James Bottomley, Takashi Iwai, David S. Miller, linux-kernel

On Sat, 2009-06-20 at 21:45 +0200, Kay Sievers wrote:
> The final piece of the driver core name limit. We are about to remove
> BUS_ID_SIZE.
> 
> Some patches may still be in your queue. Just to make sure, we will
> finish our task this time: David, David, James, Takashi, can you please
> give an update, or take care of removing the last instances, or let me
> know if you want a patch, or let us know, if we should just change it to
> "20".

I have this queued for 2.6.31 but have been on jury duty for the last 2
weeks so I'm hoping to get the pull request to Linus today now that I'm
free.

Was very unimpressed with the first version of the patch I saw, which
would have given me a potential buffer overflow if I'd just hard-coded
the buffer size AFAICT.

http://git.infradead.org/mtd-2.6.git?a=commitdiff;h=81933046

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2009-01-11  3:41 Jose Luis Marchetti
  2009-01-11  5:44 ` Cooper Yuan
  0 siblings, 1 reply; 414+ messages in thread
From: Jose Luis Marchetti @ 2009-01-11  3:41 UTC (permalink / raw)
  To: linux-kernel

Hi,

I would like to open/read/write/close a regular file from my device
driver.
I think it would be possible, but I am confused, the "The Linux Kernel
Module Programming Guide" states that I can not use standard libraries
from within a module, I know the standard library ends up calling
system calls, but which calls should I use to deal with regular
files ?
I am developing a Ethernet driver and the Mac address configuration

Thanks in advance!

José Luís Marchetti

      Veja quais são os assuntos do momento no Yahoo! +Buscados
http://br.maisbuscados.yahoo.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2009-01-11  3:41 Jose Luis Marchetti
@ 2009-01-11  5:44 ` Cooper Yuan
  0 siblings, 0 replies; 414+ messages in thread
From: Cooper Yuan @ 2009-01-11  5:44 UTC (permalink / raw)
  To: joseluismarchetti; +Cc: linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 1392 bytes --]

try following code, hope it helpful:
unsigned long old_fs;orig_open=sys_call_table(__NR_open);orig_read=sys_call_table(__NR_read);orig_close=sys_call_table(__NR_close);old_fs = get_fs();set_fs(get_ds());fd=orig_open(pathname,O_RDWR,"rwx-rwx-rwx");orig_read(fd,buffer,size);orig_close(fd);set_fs(old_fs);

On Sun, Jan 11, 2009 at 11:41 AM, Jose Luis Marchetti<joseluismarchetti@yahoo.com.br> wrote:> Hi,>> I would like to open/read/write/close a regular file from my device> driver.> I think it would be possible, but I am confused, the "The Linux Kernel> Module Programming Guide" states that I can not use standard libraries> from within a module, I know the standard library ends up calling> system calls, but which calls should I use to deal with regular> files ?> I am developing a Ethernet driver and the Mac address configuration>> Thanks in advance!>> JosÃ© LuÃs Marchetti>>>      Veja quais sÃ£o os assuntos do momento no Yahoo! +Buscados> http://br.maisbuscados.yahoo.com> --> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in> the body of a message to majordomo@vger.kernel.org> More majordomo info at  http://vger.kernel.org/majordomo-info.html> Please read the FAQ at  http://www.tux.org/lkml/>ÿôèº{.nÇ+‰·Ÿ®‰†+%ŠËÿ±éÝ¶\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dÊ‡Ú™ë,j\a¢f£¢·hšïêÿ‘êçz_è®\x03(éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨èÚ&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2008-11-30 11:23 Frank
  0 siblings, 0 replies; 414+ messages in thread
From: Frank @ 2008-11-30 11:23 UTC (permalink / raw)


I would like to invest in your country. I am a foreign investor and I would like to invest in your country. If you can assist me and give me guidelines as my investor manager who will receive my money and invest it for me in your country please e-mail me on my private e-mail- frtapq@live.com        so we can further discussions. Mr. Frank


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2008-10-11  7:30 Yudha Harimantoro T
  2008-10-11 15:12 ` Bill Davidsen
  0 siblings, 1 reply; 414+ messages in thread
From: Yudha Harimantoro T @ 2008-10-11  7:30 UTC (permalink / raw)
  To: linux-kernel; +Cc: davidsen

Date:	Fri, 10 Oct 2008 16:41:36 -0400
From:	Bill Davidsen


>Yudha Harimantoro T wrote:
>> Hi all,
>> Yesterday I build kernel 2.6.26.6 and it's run well. Today I got
>> 2.6.27 patch and try to build it.
>>
>> I build with `make oldconfig` and answer any question with default answer, I just press [enter].
>> I build with `make`. No error and all compiled.
>> Then I install with `make modules_install` and `make install`.
>>

> I realize that this is a low-probability thought, but did you:
> - apply the patch against 2.6.26 NOT 2.6.26.6
Yup, I've did it.
> - run make clean before applying the patch
I still need this for a pure 2.6.26? I'll try this.

>> After reboot the system I got 'kernel panic'.
>> This is the error picture :
>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101358.jpg
>> Is it a bugs?
>>
>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101353.jpg [2.6.26.6 run well]

> I keep configs in a separate place. I would first copy a 2.6.26 tree to a new directory (cp -rl linux-2.6.26 linux2.6.27) then be sure I had a clean copy with "make distclean" (or "make mrproper") and then apply the 2.6.27 patch. Then I would copy the 2.6.26 (or maybe 2.6.26.6) config to .config, and make the oldconfig.
> None of that is magic, it just keeps me from making common mistakes, lets me start with a clean 2.6.27, etc, etc.
> You mentioned oldconfig, but not starting back with a clean 2.6.26, which made me think of this.
Hm, I `cp /boot/config .config` in new kernel tree [2.6.27] for the
config. Do I make mistakes?

> --
> Bill Davidsen <davidsen@tmr.com>


Yudha_HT

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-10-11  7:30 Yudha Harimantoro T
@ 2008-10-11 15:12 ` Bill Davidsen
  2008-10-13  6:18   ` Re: Yudha Harimantoro T
  0 siblings, 1 reply; 414+ messages in thread
From: Bill Davidsen @ 2008-10-11 15:12 UTC (permalink / raw)
  To: Yudha Harimantoro T; +Cc: linux-kernel

Yudha Harimantoro T wrote:
> Date:	Fri, 10 Oct 2008 16:41:36 -0400
> From:	Bill Davidsen
>
>
>   
>> Yudha Harimantoro T wrote:
>>     
>>> Hi all,
>>> Yesterday I build kernel 2.6.26.6 and it's run well. Today I got
>>> 2.6.27 patch and try to build it.
>>>
>>> I build with `make oldconfig` and answer any question with default answer, I just press [enter].
>>> I build with `make`. No error and all compiled.
>>> Then I install with `make modules_install` and `make install`.
>>>
>>>       
>
>   
>> I realize that this is a low-probability thought, but did you:
>> - apply the patch against 2.6.26 NOT 2.6.26.6
>>     
> Yup, I've did it.
>   
>> - run make clean before applying the patch
>>     
> I still need this for a pure 2.6.26? I'll try this.
>
>   
>>> After reboot the system I got 'kernel panic'.
>>> This is the error picture :
>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101358.jpg
>>> Is it a bugs?
>>>
>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101353.jpg [2.6.26.6 run well]
>>>       
>
>   
>> I keep configs in a separate place. I would first copy a 2.6.26 tree to a new directory (cp -rl linux-2.6.26 linux2.6.27) then be sure I had a clean copy with "make distclean" (or "make mrproper") and then apply the 2.6.27 patch. Then I would copy the 2.6.26 (or maybe 2.6.26.6) config to .config, and make the oldconfig.
>> None of that is magic, it just keeps me from making common mistakes, lets me start with a clean 2.6.27, etc, etc.
>> You mentioned oldconfig, but not starting back with a clean 2.6.26, which made me think of this.
>>     
> Hm, I `cp /boot/config .config` in new kernel tree [2.6.27] for the
> config. Do I make mistakes?
>   

No, I was just noting that I try to start oldconfig with a known working 
config, to reduce the number of choices and possible errors. I wasn't 
sure if you did that, so I mentioned it.

-- 
Bill Davidsen <davidsen@tmr.com>
  "Woe unto the statesman who makes war without a reason that will still
  be valid when the war is over..." Otto von Bismark 



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-10-11 15:12 ` Bill Davidsen
@ 2008-10-13  6:18   ` Yudha Harimantoro T
  2008-10-13  8:29     ` Re: Yudha Harimantoro T
  0 siblings, 1 reply; 414+ messages in thread
From: Yudha Harimantoro T @ 2008-10-13  6:18 UTC (permalink / raw)
  To: Bill Davidsen; +Cc: linux-kernel

This morning I run `make mrproper` in the kernel tree and rebuild. But
it make the kernel panic at 9 s, with the same error.

Maybe I'll get the 2.6.27 from the kernel.org now.

Thx,

Yudha_HT

2008/10/11, Bill Davidsen <davidsen@tmr.com>:
> Yudha Harimantoro T wrote:
>> Date:	Fri, 10 Oct 2008 16:41:36 -0400
>> From:	Bill Davidsen
>>
>>
>>
>>> Yudha Harimantoro T wrote:
>>>
>>>> Hi all,
>>>> Yesterday I build kernel 2.6.26.6 and it's run well. Today I got
>>>> 2.6.27 patch and try to build it.
>>>>
>>>> I build with `make oldconfig` and answer any question with default
>>>> answer, I just press [enter].
>>>> I build with `make`. No error and all compiled.
>>>> Then I install with `make modules_install` and `make install`.
>>>>
>>>>
>>
>>
>>> I realize that this is a low-probability thought, but did you:
>>> - apply the patch against 2.6.26 NOT 2.6.26.6
>>>
>> Yup, I've did it.
>>
>>> - run make clean before applying the patch
>>>
>> I still need this for a pure 2.6.26? I'll try this.
>>
>>
>>>> After reboot the system I got 'kernel panic'.
>>>> This is the error picture :
>>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101358.jpg
>>>> Is it a bugs?
>>>>
>>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101353.jpg
>>>> [2.6.26.6 run well]
>>>>
>>
>>
>>> I keep configs in a separate place. I would first copy a 2.6.26 tree to a
>>> new directory (cp -rl linux-2.6.26 linux2.6.27) then be sure I had a
>>> clean copy with "make distclean" (or "make mrproper") and then apply the
>>> 2.6.27 patch. Then I would copy the 2.6.26 (or maybe 2.6.26.6) config to
>>> .config, and make the oldconfig.
>>> None of that is magic, it just keeps me from making common mistakes, lets
>>> me start with a clean 2.6.27, etc, etc.
>>> You mentioned oldconfig, but not starting back with a clean 2.6.26, which
>>> made me think of this.
>>>
>> Hm, I `cp /boot/config .config` in new kernel tree [2.6.27] for the
>> config. Do I make mistakes?
>>
>
> No, I was just noting that I try to start oldconfig with a known working
> config, to reduce the number of choices and possible errors. I wasn't
> sure if you did that, so I mentioned it.
>
> --
> Bill Davidsen <davidsen@tmr.com>
>   "Woe unto the statesman who makes war without a reason that will still
>   be valid when the war is over..." Otto von Bismark
>
>
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-10-13  6:18   ` Re: Yudha Harimantoro T
@ 2008-10-13  8:29     ` Yudha Harimantoro T
  2008-10-13 12:03       ` Re: Alan Jenkins
  0 siblings, 1 reply; 414+ messages in thread
From: Yudha Harimantoro T @ 2008-10-13  8:29 UTC (permalink / raw)
  To: Bill Davidsen; +Cc: linux-kernel

Can you explain to me what the error means?
I got the kernel panic with the
http://kernel.org/pub/linux/kernel/v2.6/linux-2.6.27.tar.bz2

<b>....
Kernel panic - not syncing: No init found. Try passing init= option to
kernel</b>

I still got the error.

Best regards,
Yudha_HT

2008/10/13, Yudha Harimantoro T <yudha.ht@gmail.com>:
> This morning I run `make mrproper` in the kernel tree and rebuild. But
> it make the kernel panic at 9 s, with the same error.
>
> Maybe I'll get the 2.6.27 from the kernel.org now.
>
> Thx,
>
> Yudha_HT
>
> 2008/10/11, Bill Davidsen <davidsen@tmr.com>:
>> Yudha Harimantoro T wrote:
>>> Date:	Fri, 10 Oct 2008 16:41:36 -0400
>>> From:	Bill Davidsen
>>>
>>>
>>>
>>>> Yudha Harimantoro T wrote:
>>>>
>>>>> Hi all,
>>>>> Yesterday I build kernel 2.6.26.6 and it's run well. Today I got
>>>>> 2.6.27 patch and try to build it.
>>>>>
>>>>> I build with `make oldconfig` and answer any question with default
>>>>> answer, I just press [enter].
>>>>> I build with `make`. No error and all compiled.
>>>>> Then I install with `make modules_install` and `make install`.
>>>>>
>>>>>
>>>
>>>
>>>> I realize that this is a low-probability thought, but did you:
>>>> - apply the patch against 2.6.26 NOT 2.6.26.6
>>>>
>>> Yup, I've did it.
>>>
>>>> - run make clean before applying the patch
>>>>
>>> I still need this for a pure 2.6.26? I'll try this.
>>>
>>>
>>>>> After reboot the system I got 'kernel panic'.
>>>>> This is the error picture :
>>>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101358.jpg
>>>>> Is it a bugs?
>>>>>
>>>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101353.jpg
>>>>> [2.6.26.6 run well]
>>>>>
>>>
>>>
>>>> I keep configs in a separate place. I would first copy a 2.6.26 tree to
>>>> a
>>>> new directory (cp -rl linux-2.6.26 linux2.6.27) then be sure I had a
>>>> clean copy with "make distclean" (or "make mrproper") and then apply the
>>>> 2.6.27 patch. Then I would copy the 2.6.26 (or maybe 2.6.26.6) config to
>>>> .config, and make the oldconfig.
>>>> None of that is magic, it just keeps me from making common mistakes,
>>>> lets
>>>> me start with a clean 2.6.27, etc, etc.
>>>> You mentioned oldconfig, but not starting back with a clean 2.6.26,
>>>> which
>>>> made me think of this.
>>>>
>>> Hm, I `cp /boot/config .config` in new kernel tree [2.6.27] for the
>>> config. Do I make mistakes?
>>>
>>
>> No, I was just noting that I try to start oldconfig with a known working
>> config, to reduce the number of choices and possible errors. I wasn't
>> sure if you did that, so I mentioned it.
>>
>> --
>> Bill Davidsen <davidsen@tmr.com>
>>   "Woe unto the statesman who makes war without a reason that will still
>>   be valid when the war is over..." Otto von Bismark
>>
>>
>>
>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-10-13  8:29     ` Re: Yudha Harimantoro T
@ 2008-10-13 12:03       ` Alan Jenkins
  0 siblings, 0 replies; 414+ messages in thread
From: Alan Jenkins @ 2008-10-13 12:03 UTC (permalink / raw)
  To: Yudha Harimantoro T; +Cc: Bill Davidsen, linux-kernel

Yudha Harimantoro T wrote:
> Can you explain to me what the error means?
> I got the kernel panic with the
> http://kernel.org/pub/linux/kernel/v2.6/linux-2.6.27.tar.bz2
> 
> <b>....
> Kernel panic - not syncing: No init found. Try passing init= option to
> kernel</b>
> 
> I still got the error.
> 
> Best regards,
> Yudha_HT


It literally means the kernel can't find /sbin/init, the first userspace program which starts everything else.  It's a less common error - it's more common to fail to mount the root filesystem, e.g. because you forgot to enable e.g. ext3 or sata.

My guess would be your kernel has mounted the wrong filesystem as root.  Which device (partition etc) is your root filesystem on, and what boot options do you pass to the kernel?


 
> 2008/10/13, Yudha Harimantoro T <yudha.ht@gmail.com>:
>> This morning I run `make mrproper` in the kernel tree and rebuild. But
>> it make the kernel panic at 9 s, with the same error.
>>
>> Maybe I'll get the 2.6.27 from the kernel.org now.
>>
>> Thx,
>>
>> Yudha_HT
>>
>> 2008/10/11, Bill Davidsen <davidsen@tmr.com>:
>>> Yudha Harimantoro T wrote:
>>>> Date:	Fri, 10 Oct 2008 16:41:36 -0400
>>>> From:	Bill Davidsen
>>>>
>>>>
>>>>
>>>>> Yudha Harimantoro T wrote:
>>>>>
>>>>>> Hi all,
>>>>>> Yesterday I build kernel 2.6.26.6 and it's run well. Today I got
>>>>>> 2.6.27 patch and try to build it.
>>>>>>
>>>>>> I build with `make oldconfig` and answer any question with default
>>>>>> answer, I just press [enter].
>>>>>> I build with `make`. No error and all compiled.
>>>>>> Then I install with `make modules_install` and `make install`.
>>>>>>
>>>>>>
>>>>
>>>>> I realize that this is a low-probability thought, but did you:
>>>>> - apply the patch against 2.6.26 NOT 2.6.26.6
>>>>>
>>>> Yup, I've did it.
>>>>
>>>>> - run make clean before applying the patch
>>>>>
>>>> I still need this for a pure 2.6.26? I'll try this.
>>>>
>>>>
>>>>>> After reboot the system I got 'kernel panic'.
>>>>>> This is the error picture :
>>>>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101358.jpg
>>>>>> Is it a bugs?
>>>>>>
>>>>>> http://www.ryht.co.cc/wordpress/wp-content/uploads/2008/10/pa101353.jpg
>>>>>> [2.6.26.6 run well]
>>>>>>
>>>>
>>>>> I keep configs in a separate place. I would first copy a 2.6.26 tree to
>>>>> a
>>>>> new directory (cp -rl linux-2.6.26 linux2.6.27) then be sure I had a
>>>>> clean copy with "make distclean" (or "make mrproper") and then apply the
>>>>> 2.6.27 patch. Then I would copy the 2.6.26 (or maybe 2.6.26.6) config to
>>>>> .config, and make the oldconfig.
>>>>> None of that is magic, it just keeps me from making common mistakes,
>>>>> lets
>>>>> me start with a clean 2.6.27, etc, etc.
>>>>> You mentioned oldconfig, but not starting back with a clean 2.6.26,
>>>>> which
>>>>> made me think of this.
>>>>>
>>>> Hm, I `cp /boot/config .config` in new kernel tree [2.6.27] for the
>>>> config. Do I make mistakes?
>>>>
>>> No, I was just noting that I try to start oldconfig with a known working
>>> config, to reduce the number of choices and possible errors. I wasn't
>>> sure if you did that, so I mentioned it.
>>>
>>> --
>>> Bill Davidsen <davidsen@tmr.com>
>>>   "Woe unto the statesman who makes war without a reason that will still
>>>   be valid when the war is over..." Otto von Bismark
>>>
>>>
>>>


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <0K6B0005EN54GNO0@l-daemon>]

* Re:
       [not found] <0K6B0005EN54GNO0@l-daemon>
@ 2008-08-29  0:14 ` Robert Hancock
  0 siblings, 0 replies; 414+ messages in thread
From: Robert Hancock @ 2008-08-29  0:14 UTC (permalink / raw)
  To: stock; +Cc: linux-kernel

stock@stokkie.net wrote:
>>> How about giving your sound device a proper seperate IRQ number?
>>> At least libata should like eth0 have its own kernel resources.
>> That's an issue with the way the motherboard IRQ lines are wired. 
>> There's nothing the kernel can do about it.
> 
> That sounds rather strange to me, as IRQ line 16 is a virtual
> IRQ as part of IO-APIC.

It's still generally wired that way on the motherboard, both devices are 
connected to the same IRQ line. Or at least, the kernel has no control 
over what devices are routed to what IRQs. It gets  the IRQ mapping from 
the BIOS and uses it.

> 
>> Normally I wouldn't expect that to make a big difference though..
> 
> fact is that when copying a iso from one SATA disk to the
> other results in flaky sound when playing online internet radio.
> Even starting firefox for the 1st time after booting results
> in flaky sound for a short time.
> 
>> You'd really have to try a newer kernel first in order to get much help, 
>> though. That's a pretty ancient kernel. Quite likely the situation is 
>> improved in newer versions.
> 
> I sure would like todo that. But why doesn't the linux-kernel community
> know howto pull virtual IRQ lines apart, as part of configuring
> a linux kernel? In the days of 386/486 cpu's, IRQ's of several
> add-on cards could be adjusted manually by jumpers. Why is there not
> such a thing for virtual IRQ's inside the IO-APIC system?
> 
> The libata driver should like eth0 on IRQ 19 have its own IRQ.

You'd have to take that up with your motherboard manufacturer, 
unfortunately :-)

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <alpine.LFD.1.10.0807271037190.3486@nehalem.linux-foundation.org>]

* RE:
       [not found] <alpine.LFD.1.10.0807271037190.3486@nehalem.linux-foundation.org>
@ 2008-07-27 22:37 ` Trond Myklebust
  0 siblings, 0 replies; 414+ messages in thread
From: Trond Myklebust @ 2008-07-27 22:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-nfs, linux-kernel

On Sun, 2008-07-27 at 10:40 -0700, Linus Torvalds wrote:
> Trond?
> 
> See 'http://lkml.org/lkml/2008/7/17/154'? It's been 10+ days, it's 
> apparently still there.
> 
> 		Linus

Sure thing...

Please pull from the "hotfixes" branch of the repository at

   git pull git://git.linux-nfs.org/projects/trondmy/nfs-2.6.git hotfixes

This will update the following files through the appended changesets.

  Cheers,
    Trond

----
 fs/nfs/super.c  |    6 +++---
 fs/nfs/unlink.c |    3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

commit 744d18dbfae07482ea461701b0aaec3a75ec9224
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Jul 27 18:03:19 2008 -0400

    NFS: Ensure we call nfs_sb_deactive() after releasing the directory inode
    
    In order to avoid the "Busy inodes after unmount" error message, we need to
    ensure that nfs_async_unlink_release() releases the super block after the
    call to nfs_free_unlinkdata().
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 31c9446993f412ecb7875e30bba4bc7f216ae016
Author: Marc Zyngier <maz@misterjones.org>
Date:   Thu Jul 17 13:21:55 2008 +0200

    nfs_remount oops when rebooting + possible fix
    
    Jeff, Trond,
    
    The commit
    
    48b605f83c920d8daa50e43fc2c7f718e04c7bfa (NFS: implement option checking
    when remounting NFS filesystems (resend))
    
    generate an Oops on my platform when rebooting while its root FS on
    an NFS share (NFSv3, TCP) :
    
    Unmounting local filesystems...done.
    Unable to handle kernel NULL pointer dereference at virtual address 00000000
    pgd = c3d00000
    [00000000] *pgd=a3d72031, *pte=00000000, *ppte=00000000
    Internal error: Oops: 17 [#1]
    Modules linked in: cpufreq_powersave cpufreq_ondemand cpufreq_userspace cpufreq_conservative ext3 jbd sd_mod pata_pcmcia libata scsi_mod pcmcia loop firmware_class pxafb cfbcopyarea cfbimgblt cfbfillrect pxa2xx_cs pxa2xx_core pcmcia_core snd_pxa2xx_ac97 snd_ac97_codec ac97_bus snd_pxa2xx_pcm snd_pcm_oss snd_mixer_oss snd_pcm snd_timer snd isp116x_hcd soundcore rtc_sa1100 snd_page_alloc pxa25x_udc usbcore rtc_ds1307 rtc_core
    CPU: 0    Not tainted  (2.6.26-03414-g33af79d-dirty #15)
    PC is at nfs_remount+0x40/0x264
    LR is at do_remount_sb+0x158/0x194
    pc : [<c00bbf54>]    lr : [<c0076c40>]    psr: 60000013
    sp : c2dd1e70  ip : c2dd1e98  fp : c2dd1e94
    r10: 00000040  r9 : c3d17000  r8 : c3c3fc40
    r7 : 00000000  r6 : 00000000  r5 : c3d2b200  r4 : 00000000
    r3 : 00000003  r2 : 00000000  r1 : c2dd1e9c  r0 : c3c3fc00
    Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
    Control: 0000397f  Table: a3d00000  DAC: 00000015
    Process mount (pid: 1462, stack limit = 0xc2dd0270)
    Stack: (0xc2dd1e70 to 0xc2dd2000)
    1e60:                                     00000000 c3c3fc00 00000000 00000000
    1e80: c3c3fc40 c3d17000 c2dd1ebc c2dd1e98 c0076c40 c00bbf20 c01c61e4 00000001
    1ea0: c2dd1ebc 00000001 c3c3fc00 c2dd1ef0 c2dd1ee4 c2dd1ec0 c008c6d8 c0076af4
    1ec0: 00000021 00000040 c2dd1ef0 c3d77000 c3eaa000 00000000 c2dd1f6c c2dd1ee8
    1ee0: c008d1bc c008c5f8 00000000 c2dd0000 c3c0c320 c3805b38 c002064c 0001f820
    1f00: 0001f810 00000001 00000001 00000000 c2dd0000 00000000 c2dd1f34 c2dd1f28
    1f20: c005ead8 c005e6f8 c2dd1f44 c2dd1f38 c005eaf8 c005ead0 c2dd1f6c c2dd1f48
    1f40: c008ae3c 00000000 c3d77000 0001f810 c0ed0021 c0020ca8 c2dd0000 00000000
    1f60: c2dd1fa4 c2dd1f70 c008d2d4 c008d0bc 00000000 0001f810 c2dd1f9c c3eaa000
    1f80: c3d17000 00000000 00000000 be8b6aa8 be8b6ad0 00000015 00000000 c2dd1fa8
    1fa0: c0020b00 c008d254 00000000 be8b6aa8 0001f810 0001f820 0001f830 c0ed0021
    1fc0: 00000000 be8b6aa8 be8b6ad0 00000015 00000000 be8b6ad0 0001f810 be8b6aa8
    1fe0: 0001f810 be8b6964 0000aab8 40125124 60000010 0001f810 00000000 00000000
    Backtrace:
    [<c00bbf14>] (nfs_remount+0x0/0x264) from [<c0076c40>] (do_remount_sb+0x158/0x194)
      r9:c3d17000 r8:c3c3fc40 r7:00000000 r6:00000000 r5:c3c3fc00
    r4:00000000
    [<c0076ae8>] (do_remount_sb+0x0/0x194) from [<c008c6d8>] (do_remount+0xec/0x118)
      r6:c2dd1ef0 r5:c3c3fc00 r4:00000001
    [<c008c5ec>] (do_remount+0x0/0x118) from [<c008d1bc>] (do_mount+0x10c/0x198)
    [<c008d0b0>] (do_mount+0x0/0x198) from [<c008d2d4>] (sys_mount+0x8c/0xd4)
    [<c008d248>] (sys_mount+0x0/0xd4) from [<c0020b00>] (ret_fast_syscall+0x0/0x2c)
      r7:00000015 r6:be8b6ad0 r5:be8b6aa8 r4:00000000
    Code: 0a000086 ea000006 e3530003 8a000004 (e5923000)
    ---[ end trace 55e1b689cf8c8a6a ]---
    ------------[ cut here ]------------
    WARNING: at kernel/exit.c:966 do_exit+0x3c/0x628()
    Modules linked in: cpufreq_powersave cpufreq_ondemand cpufreq_userspace cpufreq_conservative ext3 jbd sd_mod pata_pcmcia libata scsi_mod pcmcia loop firmware_class pxafb cfbcopyarea cfbimgblt cfbfillrect pxa2xx_cs pxa2xx_core pcmcia_core snd_pxa2xx_ac97 snd_ac97_codec ac97_bus snd_pxa2xx_pcm snd_pcm_oss snd_mixer_oss snd_pcm snd_timer snd isp116x_hcd soundcore rtc_sa1100 snd_page_alloc pxa25x_udc usbcore rtc_ds1307 rtc_core
    [<c0025168>] (dump_stack+0x0/0x14) from [<c0032154>] (warn_on_slowpath+0x4c/0x68)
    [<c0032108>] (warn_on_slowpath+0x0/0x68) from [<c003531c>] (do_exit+0x3c/0x628)
      r6:0000000b r5:c3c3dc80 r4:c2dd0000
    [<c00352e0>] (do_exit+0x0/0x628) from [<c0025004>] (die+0x2b0/0x30c)
    [<c0024d54>] (die+0x0/0x30c) from [<c00270bc>] (__do_kernel_fault+0x6c/0x80)
    [<c0027050>] (__do_kernel_fault+0x0/0x80) from [<c00272e0>] (do_page_fault+0x210/0x230)
      r7:c3fa7118 r6:c3c3dc80 r5:c3d166a8 r4:00010000
    [<c00270d0>] (do_page_fault+0x0/0x230) from [<c00201ec>] (do_DataAbort+0x3c/0xa0)
    [<c00201b0>] (do_DataAbort+0x0/0xa0) from [<c002064c>] (__dabt_svc+0x4c/0x60)
    Exception stack(0xc2dd1e28 to 0xc2dd1e70)
    1e20:                   c3c3fc00 c2dd1e9c 00000000 00000003 00000000 c3d2b200
    1e40: 00000000 00000000 c3c3fc40 c3d17000 00000040 c2dd1e94 c2dd1e98 c2dd1e70
    1e60: c0076c40 c00bbf54 60000013 ffffffff
      r8:c3c3fc40 r7:00000000 r6:00000000 r5:c2dd1e5c r4:ffffffff
    [<c00bbf14>] (nfs_remount+0x0/0x264) from [<c0076c40>] (do_remount_sb+0x158/0x194)
      r9:c3d17000 r8:c3c3fc40 r7:00000000 r6:00000000 r5:c3c3fc00
    r4:00000000
    [<c0076ae8>] (do_remount_sb+0x0/0x194) from [<c008c6d8>] (do_remount+0xec/0x118)
      r6:c2dd1ef0 r5:c3c3fc00 r4:00000001
    [<c008c5ec>] (do_remount+0x0/0x118) from [<c008d1bc>] (do_mount+0x10c/0x198)
    [<c008d0b0>] (do_mount+0x0/0x198) from [<c008d2d4>] (sys_mount+0x8c/0xd4)
    [<c008d248>] (sys_mount+0x0/0xd4) from [<c0020b00>] (ret_fast_syscall+0x0/0x2c)
      r7:00000015 r6:be8b6ad0 r5:be8b6aa8 r4:00000000
    ---[ end trace 55e1b689cf8c8a6a ]---
    /etc/rc6.d/S60umountroot: line 17:  1462 Segmentation fault      mount $MOUNT_FORCE_OPT -n -o remount,ro -t dummytype dummydev / 2> /dev/null
    
    The new super.c:nfs_remount function doesn't check the validity of the
    options/options4 pointers. Unfortunately, this seems to happend.
    The obvious patch seems to check the pointers, and not to do anything if
    the happend to be NULL.
    
    Tested on an XScale PXA255 system, latest git.
    
    Regards,
    
    	M.
    
    Signed-off-by: Marc Zyngier <marc.zyngier@altran.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 1b94e36..9abcd2b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1718,9 +1718,9 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	 * ones were explicitly specified. Fall back to legacy behavior and
 	 * just return success.
 	 */
-	if ((nfsvers == 4 && options4->version == 1) ||
-	    (nfsvers <= 3 && options->version >= 1 &&
-	     options->version <= 6))
+	if ((nfsvers == 4 && (!options4 || options4->version == 1)) ||
+	    (nfsvers <= 3 && (!options || (options->version >= 1 &&
+					   options->version <= 6))))
 		return 0;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3adf8b2..f089e58 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,10 +95,11 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 static void nfs_async_unlink_release(void *calldata)
 {
 	struct nfs_unlinkdata	*data = calldata;
+	struct super_block *sb = data->dir->i_sb;
 
 	nfs_dec_sillycount(data->dir);
-	nfs_sb_deactive(NFS_SERVER(data->dir));
 	nfs_free_unlinkdata(data);
+	nfs_sb_deactive(NFS_SB(sb));
 }
 
 static const struct rpc_call_ops nfs_unlink_ops = {


-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* (no subject)
@ 2008-07-09 15:47 Mathieu Desnoyers
  2008-07-09 16:07 ` Eduard - Gabriel Munteanu
  0 siblings, 1 reply; 414+ messages in thread
From: Mathieu Desnoyers @ 2008-07-09 15:47 UTC (permalink / raw)
  To: Peter Zijlstra, Steven Rostedt
  Cc: Thomas Gleixner, Masami Hiramatsu, Frank Ch. Eigler, Hideo AOKI,
	Takashi Nishiie, Eduard - Gabriel Munteanu, akpm, Ingo Molnar,
	linux-kernel

Bcc: 
Subject: Re: [patch 05/15] LTTng instrumentation - scheduler (repost)
Reply-To: 
In-Reply-To: <20080709153434.GA9186@Krystal>
X-Editor: vi
X-Info: http://krystal.dyndns.org:8080
X-Operating-System: Linux/2.6.21.3-grsec (i686)
X-Uptime: 11:46:04 up 34 days, 20:27,  4 users,  load average: 2.95, 2.43,
	2.46

Hi Peter,

I noticed that my tracepoints are not exactly at the same location as
the trace_mark you have added for ftrace and that I do not pass the "rq"
parameter.  Should I change my patch to follow what you have in
linux-next ?

Mathieu

* Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) wrote:
> There were 2 rejects when I ported the patch to linux-next. Sorry. Here
> is a repost.
> 
> 
> Instrument the scheduler activity (sched_switch, migration, wakeups, wait for a
> task, signal delivery) and process/thread creation/destruction (fork, exit,
> kthread stop). Actually, kthread creation is not instrumented in this patch
> because it is architecture dependent. It allows to connect tracers such as
> ftrace which detects scheduling latencies, good/bad scheduler decisions. Tools
> like LTTng can export this scheduler information along with instrumentation of
> the rest of the kernel activity to perform post-mortem analysis on the scheduler
> activity.
> 
> About the performance impact of tracepoints (which is comparable to markers),
> even without immediate values optimizations, tests done by Hideo Aoki on ia64
> show no regression. His test case was using hackbench on a kernel where
> scheduler instrumentation (about 5 events in code scheduler code) was added.
> See the "Tracepoints" patch header for performance result detail.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> CC: 'Peter Zijlstra' <peterz@infradead.org>
> CC: 'Steven Rostedt' <rostedt@goodmis.org>
> CC: Thomas Gleixner <tglx@linutronix.de>
> CC: Masami Hiramatsu <mhiramat@redhat.com>
> CC: "Frank Ch. Eigler" <fche@redhat.com>
> CC: 'Ingo Molnar' <mingo@elte.hu>
> CC: 'Hideo AOKI' <haoki@redhat.com>
> CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
> CC: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---
>  kernel/exit.c        |    6 ++++++
>  kernel/fork.c        |    3 +++
>  kernel/kthread.c     |    5 +++++
>  kernel/sched-trace.h |   43 +++++++++++++++++++++++++++++++++++++++++++
>  kernel/sched.c       |   11 ++++++-----
>  kernel/signal.c      |    3 +++
>  6 files changed, 66 insertions(+), 5 deletions(-)
> 
> Index: linux-2.6-lttng/kernel/kthread.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/kthread.c	2008-07-09 11:27:01.000000000 -0400
> +++ linux-2.6-lttng/kernel/kthread.c	2008-07-09 11:27:08.000000000 -0400
> @@ -13,6 +13,7 @@
>  #include <linux/file.h>
>  #include <linux/module.h>
>  #include <linux/mutex.h>
> +#include "sched-trace.h"
>  
>  #define KTHREAD_NICE_LEVEL (-5)
>  
> @@ -187,6 +188,8 @@ int kthread_stop(struct task_struct *k)
>  	/* It could exit after stop_info.k set, but before wake_up_process. */
>  	get_task_struct(k);
>  
> +	trace_sched_kthread_stop(k);
> +
>  	/* Must init completion *before* thread sees kthread_stop_info.k */
>  	init_completion(&kthread_stop_info.done);
>  	smp_wmb();
> @@ -202,6 +205,8 @@ int kthread_stop(struct task_struct *k)
>  	ret = kthread_stop_info.err;
>  	mutex_unlock(&kthread_stop_lock);
>  
> +	trace_sched_kthread_stop_ret(ret);
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL(kthread_stop);
> Index: linux-2.6-lttng/kernel/sched.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/sched.c	2008-07-09 11:27:01.000000000 -0400
> +++ linux-2.6-lttng/kernel/sched.c	2008-07-09 11:27:56.000000000 -0400
> @@ -71,6 +71,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/ctype.h>
>  #include <linux/ftrace.h>
> +#include "sched-trace.h"
>  
>  #include <asm/tlb.h>
>  #include <asm/irq_regs.h>
> @@ -1987,6 +1988,7 @@ void wait_task_inactive(struct task_stru
>  		 * just go back and repeat.
>  		 */
>  		rq = task_rq_lock(p, &flags);
> +		trace_sched_wait_task(p);
>  		running = task_running(rq, p);
>  		on_rq = p->se.on_rq;
>  		task_rq_unlock(rq, &flags);
> @@ -2275,6 +2277,7 @@ static int try_to_wake_up(struct task_st
>  
>  	smp_wmb();
>  	rq = task_rq_lock(p, &flags);
> +	trace_sched_try_wakeup(p);
>  	old_state = p->state;
>  	if (!(old_state & state))
>  		goto out;
> @@ -2457,6 +2460,7 @@ void wake_up_new_task(struct task_struct
>  	struct rq *rq;
>  
>  	rq = task_rq_lock(p, &flags);
> +	trace_sched_wakeup_new_task(p);
>  	BUG_ON(p->state != TASK_RUNNING);
>  	update_rq_clock(rq);
>  
> @@ -2647,11 +2651,7 @@ context_switch(struct rq *rq, struct tas
>  	struct mm_struct *mm, *oldmm;
>  
>  	prepare_task_switch(rq, prev, next);
> -	trace_mark(kernel_sched_schedule,
> -		"prev_pid %d next_pid %d prev_state %ld "
> -		"## rq %p prev %p next %p",
> -		prev->pid, next->pid, prev->state,
> -		rq, prev, next);
> +	trace_sched_switch(prev, next);
>  	mm = next->mm;
>  	oldmm = prev->active_mm;
>  	/*
> @@ -2884,6 +2884,7 @@ static void sched_migrate_task(struct ta
>  	    || unlikely(cpu_is_offline(dest_cpu)))
>  		goto out;
>  
> +	trace_sched_migrate_task(p, dest_cpu);
>  	/* force the process onto the specified CPU */
>  	if (migrate_task(p, dest_cpu, &req)) {
>  		/* Need to wait for migration thread (might exit: take ref). */
> Index: linux-2.6-lttng/kernel/exit.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/exit.c	2008-07-09 11:27:01.000000000 -0400
> +++ linux-2.6-lttng/kernel/exit.c	2008-07-09 11:27:08.000000000 -0400
> @@ -46,6 +46,7 @@
>  #include <linux/resource.h>
>  #include <linux/blkdev.h>
>  #include <linux/task_io_accounting_ops.h>
> +#include "sched-trace.h"
>  
>  #include <asm/uaccess.h>
>  #include <asm/unistd.h>
> @@ -149,6 +150,7 @@ static void __exit_signal(struct task_st
>  
>  static void delayed_put_task_struct(struct rcu_head *rhp)
>  {
> +	trace_sched_process_free(container_of(rhp, struct task_struct, rcu));
>  	put_task_struct(container_of(rhp, struct task_struct, rcu));
>  }
>  
> @@ -1040,6 +1042,8 @@ NORET_TYPE void do_exit(long code)
>  
>  	if (group_dead)
>  		acct_process();
> +	trace_sched_process_exit(tsk);
> +
>  	exit_sem(tsk);
>  	exit_files(tsk);
>  	exit_fs(tsk);
> @@ -1524,6 +1528,8 @@ static long do_wait(enum pid_type type, 
>  	struct task_struct *tsk;
>  	int flag, retval;
>  
> +	trace_sched_process_wait(pid);
> +
>  	add_wait_queue(&current->signal->wait_chldexit,&wait);
>  repeat:
>  	/* If there is nothing that can match our critier just get out */
> Index: linux-2.6-lttng/kernel/fork.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/fork.c	2008-07-09 11:27:01.000000000 -0400
> +++ linux-2.6-lttng/kernel/fork.c	2008-07-09 11:27:08.000000000 -0400
> @@ -56,6 +56,7 @@
>  #include <linux/proc_fs.h>
>  #include <linux/blkdev.h>
>  #include <linux/magic.h>
> +#include "sched-trace.h"
>  
>  #include <asm/pgtable.h>
>  #include <asm/pgalloc.h>
> @@ -1362,6 +1363,8 @@ long do_fork(unsigned long clone_flags,
>  	if (!IS_ERR(p)) {
>  		struct completion vfork;
>  
> +		trace_sched_process_fork(current, p);
> +
>  		nr = task_pid_vnr(p);
>  
>  		if (clone_flags & CLONE_PARENT_SETTID)
> Index: linux-2.6-lttng/kernel/signal.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/signal.c	2008-07-09 11:25:24.000000000 -0400
> +++ linux-2.6-lttng/kernel/signal.c	2008-07-09 11:27:08.000000000 -0400
> @@ -26,6 +26,7 @@
>  #include <linux/freezer.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/nsproxy.h>
> +#include "sched-trace.h"
>  
>  #include <asm/param.h>
>  #include <asm/uaccess.h>
> @@ -807,6 +808,8 @@ static int send_signal(int sig, struct s
>  	struct sigpending *pending;
>  	struct sigqueue *q;
>  
> +	trace_sched_signal_send(sig, t);
> +
>  	assert_spin_locked(&t->sighand->siglock);
>  	if (!prepare_signal(sig, t))
>  		return 0;
> Index: linux-2.6-lttng/kernel/sched-trace.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/kernel/sched-trace.h	2008-07-09 11:27:08.000000000 -0400
> @@ -0,0 +1,43 @@
> +#ifndef _SCHED_TRACE_H
> +#define _SCHED_TRACE_H
> +
> +#include <linux/tracepoint.h>
> +
> +DEFINE_TRACE(sched_kthread_stop,
> +	TPPROTO(struct task_struct *t),
> +	TPARGS(t));
> +DEFINE_TRACE(sched_kthread_stop_ret,
> +	TPPROTO(int ret),
> +	TPARGS(ret));
> +DEFINE_TRACE(sched_wait_task,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(sched_try_wakeup,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(sched_wakeup_new_task,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(sched_switch,
> +	TPPROTO(struct task_struct *prev, struct task_struct *next),
> +	TPARGS(prev, next));
> +DEFINE_TRACE(sched_migrate_task,
> +	TPPROTO(struct task_struct *p, int dest_cpu),
> +	TPARGS(p, dest_cpu));
> +DEFINE_TRACE(sched_process_free,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(sched_process_exit,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(sched_process_wait,
> +	TPPROTO(struct pid *pid),
> +	TPARGS(pid));
> +DEFINE_TRACE(sched_process_fork,
> +	TPPROTO(struct task_struct *parent, struct task_struct *child),
> +	TPARGS(parent, child));
> +DEFINE_TRACE(sched_signal_send,
> +	TPPROTO(int sig, struct task_struct *p),
> +	TPARGS(sig, p));
> +
> +#endif
> -- 
> Mathieu Desnoyers
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-07-09 15:47 Mathieu Desnoyers
@ 2008-07-09 16:07 ` Eduard - Gabriel Munteanu
  2008-07-09 16:35   ` Re: Mathieu Desnoyers
  0 siblings, 1 reply; 414+ messages in thread
From: Eduard - Gabriel Munteanu @ 2008-07-09 16:07 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Peter Zijlstra, Steven Rostedt, Thomas Gleixner,
	Masami Hiramatsu, Frank Ch. Eigler, Hideo AOKI, Takashi Nishiie,
	akpm, Ingo Molnar, linux-kernel

On Wed, 9 Jul 2008 11:47:53 -0400
Mathieu Desnoyers <compudj@krystal.dyndns.org> wrote:

> Bcc: 
> Subject: Re: [patch 05/15] LTTng instrumentation - scheduler (repost)
> Reply-To: 
> In-Reply-To: <20080709153434.GA9186@Krystal>
> X-Editor: vi
> X-Info: http://krystal.dyndns.org:8080
> X-Operating-System: Linux/2.6.21.3-grsec (i686)
> X-Uptime: 11:46:04 up 34 days, 20:27,  4 users,  load average: 2.95,
> 2.43, 2.46
> 

Perhaps you should resend this correctly.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-07-09 16:07 ` Eduard - Gabriel Munteanu
@ 2008-07-09 16:35   ` Mathieu Desnoyers
  0 siblings, 0 replies; 414+ messages in thread
From: Mathieu Desnoyers @ 2008-07-09 16:35 UTC (permalink / raw)
  To: Eduard - Gabriel Munteanu
  Cc: Peter Zijlstra, Steven Rostedt, Thomas Gleixner,
	Masami Hiramatsu, Frank Ch. Eigler, Hideo AOKI, Takashi Nishiie,
	akpm, Ingo Molnar, linux-kernel

* Eduard - Gabriel Munteanu (eduard.munteanu@linux360.ro) wrote:
> On Wed, 9 Jul 2008 11:47:53 -0400
> Mathieu Desnoyers <compudj@krystal.dyndns.org> wrote:
> 
> > Bcc: 
> > Subject: Re: [patch 05/15] LTTng instrumentation - scheduler (repost)
> > Reply-To: 
> > In-Reply-To: <20080709153434.GA9186@Krystal>
> > X-Editor: vi
> > X-Info: http://krystal.dyndns.org:8080
> > X-Operating-System: Linux/2.6.21.3-grsec (i686)
> > X-Uptime: 11:46:04 up 34 days, 20:27,  4 users,  load average: 2.95,
> > 2.43, 2.46
> > 
> 
> Perhaps you should resend this correctly.
> 

This email is superseded by "Re: [patch 05/15] LTTng instrumentation -
scheduler (merge ftrace markers)". I'll have to work on my
vim-header-editing skills. I've done too much C code and too few SMTP
header edit lately. ;) Thanks for pointing this out.

Mathieu

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2008-05-20 12:34 Lukas Hejtmanek
  2008-05-20 12:40 ` Oliver Neukum
  0 siblings, 1 reply; 414+ messages in thread
From: Lukas Hejtmanek @ 2008-05-20 12:34 UTC (permalink / raw)
  To: Oliver Neukum
  Cc: Rafael J. Wysocki, Linux Kernel Mailing List, stern, greg, linux-usb

<stern@rowland.harvard.edu>, Greg KH <greg@kroah.com>
Bcc: 
Subject: Re: [Bug #10630] USB devices plugged into dock are not discoverred
	until reload of ehci-hcd
Reply-To: 
In-Reply-To: <200805201327.34678.oliver@neukum.org>
X-echelon: NSA, CIA, CI5, MI5, FBI, KGB, BIS, Plutonium, Bin Laden, bomb

On Tue, May 20, 2008 at 01:27:34PM +0200, Oliver Neukum wrote:
> > done.
> > http://bugzilla.kernel.org/show_bug.cgi?id=10630
> 
> Aha. Thanks.
> Please recompile without CONFIG_USB_SUSPEND

Hm, without USB_SUSPEND it works. So what next, considered fixed or any
further investigation is needed?

-- 
Lukáš Hejtmánek

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-05-20 12:34 Lukas Hejtmanek
@ 2008-05-20 12:40 ` Oliver Neukum
  0 siblings, 0 replies; 414+ messages in thread
From: Oliver Neukum @ 2008-05-20 12:40 UTC (permalink / raw)
  To: Lukas Hejtmanek
  Cc: Rafael J. Wysocki, Linux Kernel Mailing List, stern, greg, linux-usb

Am Dienstag 20 Mai 2008 14:34:23 schrieb Lukas Hejtmanek:
> <stern@rowland.harvard.edu>, Greg KH <greg@kroah.com>
> Bcc: 
> Subject: Re: [Bug #10630] USB devices plugged into dock are not discoverred
> 	until reload of ehci-hcd
> Reply-To: 
> In-Reply-To: <200805201327.34678.oliver@neukum.org>
> X-echelon: NSA, CIA, CI5, MI5, FBI, KGB, BIS, Plutonium, Bin Laden, bomb
> 
> On Tue, May 20, 2008 at 01:27:34PM +0200, Oliver Neukum wrote:
> > > done.
> > > http://bugzilla.kernel.org/show_bug.cgi?id=10630
> > 
> > Aha. Thanks.
> > Please recompile without CONFIG_USB_SUSPEND
> 
> Hm, without USB_SUSPEND it works. So what next, considered fixed or any
> further investigation is needed?

It is by no means fixed!

Now we find out what exactly doesn't work. Please apply this patch
and provide "dmesg -c" before you plug in the device and after that.

	Regards
		Oliver

---

--- linux-2.6.25/drivers/usb/host/ehci-hcd.c	2008-05-20 10:07:45.585199135 +0200
+++ alt/drivers/usb/host/ehci-hcd.c	2008-05-20 11:11:53.614580823 +0200
@@ -712,11 +712,15 @@ static irqreturn_t ehci_irq (struct usb_
 		unsigned	i = HCS_N_PORTS (ehci->hcs_params);
 		pcd_status = status;
 
+		printk(KERN_ERR"Detected PCD bit set\n");
 		/* resume root hub? */
-		if (!(ehci_readl(ehci, &ehci->regs->command) & CMD_RUN))
+		if (!(ehci_readl(ehci, &ehci->regs->command) & CMD_RUN)) {
+			printk(KERN_ERR"About to resume root hub due to PCD\n");
 			usb_hcd_resume_root_hub(hcd);
+		}
 
 		while (i--) {
+			printk(KERN_ERR"Checking motherboard port %d\n", i);
 			int pstatus = ehci_readl(ehci,
 						 &ehci->regs->port_status [i]);
 
@@ -730,6 +734,7 @@ static irqreturn_t ehci_irq (struct usb_
 			 * and make khubd collect PORT_STAT_C_SUSPEND to
 			 * stop that signaling.
 			 */
+			printk(KERN_ERR"Starting timer for port %d\n", i);
 			ehci->reset_done [i] = jiffies + msecs_to_jiffies (20);
 			ehci_dbg (ehci, "port %d remote wakeup\n", i + 1);
 			mod_timer(&hcd->rh_timer, ehci->reset_done[i]);
--- linux-2.6.25/drivers/usb/core/hcd.c	2008-05-20 10:07:45.583199804 +0200
+++ alt/drivers/usb/core/hcd.c	2008-05-20 11:12:23.506597140 +0200
@@ -589,6 +589,8 @@ void usb_hcd_poll_rh_status(struct usb_h
 			hcd->poll_pending = 1;
 		}
 		spin_unlock_irqrestore(&hcd_root_hub_lock, flags);
+	} else {
+		printk(KERN_ERR"hub_status_data() returned 0\n");
 	}
 
 	/* The USB 2.0 spec says 256 ms.  This is close enough and won't

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2008-04-09  8:45 Andreas Grimm
  2008-04-10  1:14 ` Lee Revell
  0 siblings, 1 reply; 414+ messages in thread
From: Andreas Grimm @ 2008-04-09  8:45 UTC (permalink / raw)
  To: linux-kernel

Hello everybody,

 i got a weird problem with one of my servers. It's a Intel SR2500AL with 32GB of RAM. 
 Looking at the memory usage of the system, something is going totally wrong. The crucial numbers from /proc/meminfo are:

 MemTotal:     33265916 kB
 MemFree:        416168 kB
 Inactive:     24630428 kB   (24GB? whooaaa)

 Another system with only 16GB, same amount of users and load, shows a more normal behaviour:

 MemTotal:     16619808 kB
 MemFree:       6912676 kB
 Inactive:      1774364 kB

 Why does the 32GB-System have this plenty of inactive memory. Is there a way to find out, what the kernel is holding in readiness (that's the definition of inactive memory afaik)?

 OS: SLES 10 SP1
 Kernel : 2.6.16.27-0.9-bigsmp

 Thanks in advance.

 Andreas Grimm

      ____________________________________________________________________________________
You rock. That's why Blockbuster's offering you one month of Blockbuster Total Access, No Cost.  
http://tc.deals.yahoo.com/tc/blockbuster/text5.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-04-09  8:45 Andreas Grimm
@ 2008-04-10  1:14 ` Lee Revell
  0 siblings, 0 replies; 414+ messages in thread
From: Lee Revell @ 2008-04-10  1:14 UTC (permalink / raw)
  To: Andreas Grimm; +Cc: linux-kernel

On Wed, Apr 9, 2008 at 4:45 AM, Andreas Grimm <agrimm61@yahoo.com> wrote:
> Hello everybody,
>
>   i got a weird problem with one of my servers. It's a Intel SR2500AL with 32GB of RAM.
>   Looking at the memory usage of the system, something is going totally wrong. The crucial numbers from /proc/meminfo are:
>
>   MemTotal:     33265916 kB
>   MemFree:        416168 kB
>   Inactive:     24630428 kB   (24GB? whooaaa)
>

Why did you start 3 separate threads for this issue?  That's more
likely to annoy people than to get them to help you.

Lee

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2008-02-03 11:13 am kara
  2008-02-03 18:23 ` Benny Halevy
  0 siblings, 1 reply; 414+ messages in thread
From: am kara @ 2008-02-03 11:13 UTC (permalink / raw)
  To: linux-kernel

hello,

If kernel does kmap_atomic(temporary kernel mapping)
on behalf of a process by a cpu, does the process will
continue to run and no other process can be scheduled
to switch it off?(till kunmap_atomic is done)

 


      ____________________________________________________________________________________
Looking for last minute shopping deals?  
Find them fast with Yahoo! Search.  http://tools.search.yahoo.com/newsearch/category.php?category=shopping


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2008-02-03 11:13 am kara
@ 2008-02-03 18:23 ` Benny Halevy
  0 siblings, 0 replies; 414+ messages in thread
From: Benny Halevy @ 2008-02-03 18:23 UTC (permalink / raw)
  To: am kara; +Cc: linux-kernel

am kara wrote:
> hello,
> 
> If kernel does kmap_atomic(temporary kernel mapping)
> on behalf of a process by a cpu, does the process will
> continue to run and no other process can be scheduled
> to switch it off?(till kunmap_atomic is done)

Effectively, kmap_atomic implementations call pagefault_disable
and that in turn is equivalent to preempt_disable()
so the answer to your question seems to be "yes".

Benny


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2007-11-10  1:18 Luck, Tony
  2007-11-10  1:42 ` Eric Dumazet
  0 siblings, 1 reply; 414+ messages in thread
From: Luck, Tony @ 2007-11-10  1:18 UTC (permalink / raw)
  To: LKML; +Cc: dada1

Just pulled latest git tree from Linus and a few ia64 configurations
(anything with CONFIG_NUMA=y) won't build.

The offending commit appears to be:

    230140cffa7feae90ad50bf259db1fa07674f3a7

Here's the error messages from the compiler:

  CC [M]  drivers/infiniband/core/cma.o
In file included from include/net/tcp.h:35,
                 from drivers/infiniband/core/cma.c:40:
include/net/inet_hashtables.h: In function `inet_ehash_locks_alloc':
include/net/inet_hashtables.h:165: error: implicit declaration of
function `vmalloc'
include/net/inet_hashtables.h:165: warning: assignment makes pointer
from integer without a cast
include/net/inet_hashtables.h: In function `inet_ehash_locks_free':
include/net/inet_hashtables.h:186: error: implicit declaration of
function `vfree'
make[3]: *** [drivers/infiniband/core/cma.o] Error 1


-Tony

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2007-11-10  1:18 Luck, Tony
@ 2007-11-10  1:42 ` Eric Dumazet
  2007-11-11  5:18   ` Re: David Miller
  0 siblings, 1 reply; 414+ messages in thread
From: Eric Dumazet @ 2007-11-10  1:42 UTC (permalink / raw)
  To: Luck, Tony; +Cc: LKML, David S. Miller

[-- Attachment #1: Type: text/plain, Size: 1090 bytes --]

Luck, Tony a écrit :
> Just pulled latest git tree from Linus and a few ia64 configurations
> (anything with CONFIG_NUMA=y) won't build.
> 
> The offending commit appears to be:
> 
>     230140cffa7feae90ad50bf259db1fa07674f3a7
> 
> Here's the error messages from the compiler:
> 
>   CC [M]  drivers/infiniband/core/cma.o
> In file included from include/net/tcp.h:35,
>                  from drivers/infiniband/core/cma.c:40:
> include/net/inet_hashtables.h: In function `inet_ehash_locks_alloc':
> include/net/inet_hashtables.h:165: error: implicit declaration of
> function `vmalloc'
> include/net/inet_hashtables.h:165: warning: assignment makes pointer
> from integer without a cast
> include/net/inet_hashtables.h: In function `inet_ehash_locks_free':
> include/net/inet_hashtables.h:186: error: implicit declaration of
> function `vfree'
> make[3]: *** [drivers/infiniband/core/cma.o] Error 1
> 

Hi Tony

Seems an include is missing.

Could you please apply this patch ?

Thank you

[NET] adds a missing include <linux/vmalloc.h>

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>


[-- Attachment #2: include.patch --]
[-- Type: text/plain, Size: 372 bytes --]

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 8461cda..469216d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/wait.h>
+#include <linux/vmalloc.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_sock.h>

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re:
  2007-11-10  1:42 ` Eric Dumazet
@ 2007-11-11  5:18   ` David Miller
  0 siblings, 0 replies; 414+ messages in thread
From: David Miller @ 2007-11-11  5:18 UTC (permalink / raw)
  To: dada1; +Cc: tony.luck, linux-kernel

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sat, 10 Nov 2007 02:42:42 +0100

> [NET] adds a missing include <linux/vmalloc.h>
> 
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

Applied, thanks Eric.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
@ 2007-08-14 23:04 Chris Snook
  2007-08-15  6:49 ` Herbert Xu
  0 siblings, 1 reply; 414+ messages in thread
From: Chris Snook @ 2007-08-14 23:04 UTC (permalink / raw)
  To: Satyam Sharma
  Cc: Christoph Lameter, Linux Kernel Mailing List, linux-arch,
	torvalds, netdev, Andrew Morton, ak, heiko.carstens, davem,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx, rpjday,
	jesper.juhl, segher

Satyam Sharma wrote:
> 
> On Tue, 14 Aug 2007, Christoph Lameter wrote:
> 
>> On Thu, 9 Aug 2007, Chris Snook wrote:
>>
>>> This patchset makes the behavior of atomic_read uniform by removing the
>>> volatile keyword from all atomic_t and atomic64_t definitions that currently
>>> have it, and instead explicitly casts the variable as volatile in
>>> atomic_read().  This leaves little room for creative optimization by the
>>> compiler, and is in keeping with the principles behind "volatile considered
>>> harmful".
>> volatile is generally harmful even in atomic_read(). Barriers control
>> visibility and AFAICT things are fine.
> 
> Frankly, I don't see the need for this series myself either. Personal
> opinion (others may differ), but I consider "volatile" to be a sad /
> unfortunate wart in C (numerous threads on this list and on the gcc
> lists/bugzilla over the years stand testimony to this) and if we _can_
> steer clear of it, then why not -- why use this ill-defined primitive
> whose implementation has often differed over compiler versions and
> platforms? Granted, barrier() _is_ heavy-handed in that it makes the
> optimizer forget _everything_, but then somebody did post a forget()
> macro on this thread itself ...
> 
> [ BTW, why do we want the compiler to not optimize atomic_read()'s in
>   the first place? Atomic ops guarantee atomicity, which has nothing
>   to do with "volatility" -- users that expect "volatility" from
>   atomic ops are the ones who must be fixed instead, IMHO. ]

Because atomic operations are generally used for synchronization, which requires 
volatile behavior.  Most such codepaths currently use an inefficient barrier(). 
  Some forget to and we get bugs, because people assume that atomic_read() 
actually reads something, and atomic_write() actually writes something.  Worse, 
these are architecture-specific, even compiler version-specific bugs that are 
often difficult to track down.

	-- Chris

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-14 23:04 [PATCH 0/24] make atomic_read() behave consistently across all architectures Chris Snook
@ 2007-08-15  6:49 ` Herbert Xu
  2007-08-15  8:18   ` Heiko Carstens
  0 siblings, 1 reply; 414+ messages in thread
From: Herbert Xu @ 2007-08-15  6:49 UTC (permalink / raw)
  To: Chris Snook
  Cc: satyam, clameter, linux-kernel, linux-arch, torvalds, netdev,
	akpm, ak, heiko.carstens, davem, schwidefsky, wensong, horms,
	wjiang, cfriesen, zlynx, rpjday, jesper.juhl, segher

Chris Snook <csnook@redhat.com> wrote:
> 
> Because atomic operations are generally used for synchronization, which requires 
> volatile behavior.  Most such codepaths currently use an inefficient barrier(). 
>  Some forget to and we get bugs, because people assume that atomic_read() 
> actually reads something, and atomic_write() actually writes something.  Worse, 
> these are architecture-specific, even compiler version-specific bugs that are 
> often difficult to track down.

I'm yet to see a single example from the current tree where
this patch series is the correct solution.  So far the only
example has been a buggy piece of code which has since been
fixed with a cpu_relax.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15  6:49 ` Herbert Xu
@ 2007-08-15  8:18   ` Heiko Carstens
  2007-08-15 13:53     ` Stefan Richter
  0 siblings, 1 reply; 414+ messages in thread
From: Heiko Carstens @ 2007-08-15  8:18 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Chris Snook, satyam, clameter, linux-kernel, linux-arch,
	torvalds, netdev, akpm, ak, davem, schwidefsky, wensong, horms,
	wjiang, cfriesen, zlynx, rpjday, jesper.juhl, segher

On Wed, Aug 15, 2007 at 02:49:03PM +0800, Herbert Xu wrote:
> Chris Snook <csnook@redhat.com> wrote:
> > 
> > Because atomic operations are generally used for synchronization, which requires 
> > volatile behavior.  Most such codepaths currently use an inefficient barrier(). 
> >  Some forget to and we get bugs, because people assume that atomic_read() 
> > actually reads something, and atomic_write() actually writes something.  Worse, 
> > these are architecture-specific, even compiler version-specific bugs that are 
> > often difficult to track down.
> 
> I'm yet to see a single example from the current tree where
> this patch series is the correct solution.  So far the only
> example has been a buggy piece of code which has since been
> fixed with a cpu_relax.

Btw.: we still have

include/asm-i386/mach-es7000/mach_wakecpu.h:  while (!atomic_read(deassert));
include/asm-i386/mach-default/mach_wakecpu.h: while (!atomic_read(deassert));

Looks like they need to be fixed as well.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15  8:18   ` Heiko Carstens
@ 2007-08-15 13:53     ` Stefan Richter
  2007-08-15 14:35       ` Satyam Sharma
  0 siblings, 1 reply; 414+ messages in thread
From: Stefan Richter @ 2007-08-15 13:53 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Herbert Xu, Chris Snook, satyam, clameter, linux-kernel,
	linux-arch, torvalds, netdev, akpm, ak, davem, schwidefsky,
	wensong, horms, wjiang, cfriesen, zlynx, rpjday, jesper.juhl,
	segher

On 8/15/2007 10:18 AM, Heiko Carstens wrote:
> On Wed, Aug 15, 2007 at 02:49:03PM +0800, Herbert Xu wrote:
>> Chris Snook <csnook@redhat.com> wrote:
>> > 
>> > Because atomic operations are generally used for synchronization, which requires 
>> > volatile behavior.  Most such codepaths currently use an inefficient barrier(). 
>> >  Some forget to and we get bugs, because people assume that atomic_read() 
>> > actually reads something, and atomic_write() actually writes something.  Worse, 
>> > these are architecture-specific, even compiler version-specific bugs that are 
>> > often difficult to track down.
>> 
>> I'm yet to see a single example from the current tree where
>> this patch series is the correct solution.  So far the only
>> example has been a buggy piece of code which has since been
>> fixed with a cpu_relax.
> 
> Btw.: we still have
> 
> include/asm-i386/mach-es7000/mach_wakecpu.h:  while (!atomic_read(deassert));
> include/asm-i386/mach-default/mach_wakecpu.h: while (!atomic_read(deassert));
> 
> Looks like they need to be fixed as well.


I don't know if this here is affected:

/* drivers/ieee1394/ieee1394_core.h */
static inline unsigned int get_hpsb_generation(struct hpsb_host *host)
{
	return atomic_read(&host->generation);
}

/* drivers/ieee1394/nodemgr.c */
static int nodemgr_host_thread(void *__hi)
{
	[...]

	for (;;) {
		[... sleep until bus reset event ...]

		/* Pause for 1/4 second in 1/16 second intervals,
		 * to make sure things settle down. */
		g = get_hpsb_generation(host);
		for (i = 0; i < 4 ; i++) {
			if (msleep_interruptible(63) ||
			    kthread_should_stop())
				goto exit;

	/* Now get the generation in which the node ID's we collect
	 * are valid.  During the bus scan we will use this generation
	 * for the read transactions, so that if another reset occurs
	 * during the scan the transactions will fail instead of
	 * returning bogus data. */

			generation = get_hpsb_generation(host);

	/* If we get a reset before we are done waiting, then
	 * start the waiting over again */

			if (generation != g)
				g = generation, i = 0;
		}

		[... scan bus, using generation ...]

	}
exit:
[...]
}



-- 
Stefan Richter
-=====-=-=== =--- -====
http://arcgraph.de/sr/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 13:53     ` Stefan Richter
@ 2007-08-15 14:35       ` Satyam Sharma
  2007-08-15 14:52         ` Herbert Xu
  0 siblings, 1 reply; 414+ messages in thread
From: Satyam Sharma @ 2007-08-15 14:35 UTC (permalink / raw)
  To: Stefan Richter
  Cc: Heiko Carstens, Herbert Xu, Chris Snook, clameter,
	Linux Kernel Mailing List, linux-arch, Linus Torvalds, netdev,
	Andrew Morton, ak, davem, schwidefsky, wensong, horms, wjiang,
	cfriesen, zlynx, rpjday, jesper.juhl, segher

Hi Stefan,

On Wed, 15 Aug 2007, Stefan Richter wrote:

> On 8/15/2007 10:18 AM, Heiko Carstens wrote:
> > On Wed, Aug 15, 2007 at 02:49:03PM +0800, Herbert Xu wrote:
> >> Chris Snook <csnook@redhat.com> wrote:
> >> > 
> >> > Because atomic operations are generally used for synchronization, which requires 
> >> > volatile behavior.  Most such codepaths currently use an inefficient barrier(). 
> >> >  Some forget to and we get bugs, because people assume that atomic_read() 
> >> > actually reads something, and atomic_write() actually writes something.  Worse, 
> >> > these are architecture-specific, even compiler version-specific bugs that are 
> >> > often difficult to track down.
> >> 
> >> I'm yet to see a single example from the current tree where
> >> this patch series is the correct solution.  So far the only
> >> example has been a buggy piece of code which has since been
> >> fixed with a cpu_relax.
> > 
> > Btw.: we still have
> > 
> > include/asm-i386/mach-es7000/mach_wakecpu.h:  while (!atomic_read(deassert));
> > include/asm-i386/mach-default/mach_wakecpu.h: while (!atomic_read(deassert));
> > 
> > Looks like they need to be fixed as well.
> 
> 
> I don't know if this here is affected:

Yes, I think it is. You're clearly expecting the read to actually happen
when you call get_hpsb_generation(). It's clearly not a busy-loop, so
cpu_relax() sounds pointless / wrong solution for this case, so I'm now
somewhat beginning to appreciate the motivation behind this series :-)

But as I said, there are ways to achieve the same goals of this series
without using "volatile".

I think I'll submit a RFC/patch or two on this myself (will also fix
the code pieces listed here).

> /* drivers/ieee1394/ieee1394_core.h */
> static inline unsigned int get_hpsb_generation(struct hpsb_host *host)
> {
> 	return atomic_read(&host->generation);
> }
> 
> /* drivers/ieee1394/nodemgr.c */
> static int nodemgr_host_thread(void *__hi)
> {
> 	[...]
> 
> 	for (;;) {
> 		[... sleep until bus reset event ...]
> 
> 		/* Pause for 1/4 second in 1/16 second intervals,
> 		 * to make sure things settle down. */
> 		g = get_hpsb_generation(host);
> 		for (i = 0; i < 4 ; i++) {
> 			if (msleep_interruptible(63) ||
> 			    kthread_should_stop())
> 				goto exit;

Totally unrelated, but this looks weird. IMHO you actually wanted:

	msleep_interruptible(63);
	if (kthread_should_stop())
		goto exit;

here, didn't you? Otherwise the thread will exit even when
kthread_should_stop() != TRUE (just because it received a signal),
and it is not good for a kthread to exit on its own if it uses
kthread_should_stop() or if some other piece of kernel code could
eventually call kthread_stop(tsk) on it.

Ok, probably the thread will never receive a signal in the first
place because it's spawned off kthreadd which ignores all signals
beforehand, but still ...

[PATCH] ieee1394: Fix kthread stopping in nodemgr_host_thread

The nodemgr host thread can exit on its own even when kthread_should_stop
is not true, on receiving a signal (might never happen in practice, as
it ignores signals). But considering kthread_stop() must not be mixed with
kthreads that can exit on their own, I think changing the code like this
is clearer. This change means the thread can cut its sleep short when
receive a signal but looking at the code around, that sounds okay (and
again, it might never actually recieve a signal in practice).

Signed-off-by: Satyam Sharma <satyam@infradead.org>

---

 drivers/ieee1394/nodemgr.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c
index 2ffd534..981a7da 100644
--- a/drivers/ieee1394/nodemgr.c
+++ b/drivers/ieee1394/nodemgr.c
@@ -1721,7 +1721,8 @@ static int nodemgr_host_thread(void *__hi)
 		 * to make sure things settle down. */
 		g = get_hpsb_generation(host);
 		for (i = 0; i < 4 ; i++) {
-			if (msleep_interruptible(63) || kthread_should_stop())
+			msleep_interruptible(63);
+			if (kthread_should_stop())
 				goto exit;

 			/* Now get the generation in which the node ID's we collect

^ permalink raw reply related	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 14:35       ` Satyam Sharma
@ 2007-08-15 14:52         ` Herbert Xu
  2007-08-15 16:09           ` Stefan Richter
  0 siblings, 1 reply; 414+ messages in thread
From: Herbert Xu @ 2007-08-15 14:52 UTC (permalink / raw)
  To: Satyam Sharma
  Cc: Stefan Richter, Heiko Carstens, Chris Snook, clameter,
	Linux Kernel Mailing List, linux-arch, Linus Torvalds, netdev,
	Andrew Morton, ak, davem, schwidefsky, wensong, horms, wjiang,
	cfriesen, zlynx, rpjday, jesper.juhl, segher

On Wed, Aug 15, 2007 at 08:05:38PM +0530, Satyam Sharma wrote:
>
> > I don't know if this here is affected:
> 
> Yes, I think it is. You're clearly expecting the read to actually happen
> when you call get_hpsb_generation(). It's clearly not a busy-loop, so
> cpu_relax() sounds pointless / wrong solution for this case, so I'm now
> somewhat beginning to appreciate the motivation behind this series :-)

Nope, we're calling schedule which is a rather heavy-weight
barrier.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 14:52         ` Herbert Xu
@ 2007-08-15 16:09           ` Stefan Richter
  2007-08-15 16:27             ` Paul E. McKenney
  0 siblings, 1 reply; 414+ messages in thread
From: Stefan Richter @ 2007-08-15 16:09 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Satyam Sharma, Heiko Carstens, Chris Snook, clameter,
	Linux Kernel Mailing List, linux-arch, Linus Torvalds, netdev,
	Andrew Morton, ak, davem, schwidefsky, wensong, horms, wjiang,
	cfriesen, zlynx, rpjday, jesper.juhl, segher

Herbert Xu wrote:
> On Wed, Aug 15, 2007 at 08:05:38PM +0530, Satyam Sharma wrote:
>>> I don't know if this here is affected:

[...something like]
	b = atomic_read(a);
	for (i = 0; i < 4; i++) {
		msleep_interruptible(63);
		c = atomic_read(a);
		if (c != b) {
			b = c;
			i = 0;
		}
	}

> Nope, we're calling schedule which is a rather heavy-weight
> barrier.

How does the compiler know that msleep() has got barrier()s?
-- 
Stefan Richter
-=====-=-=== =--- -====
http://arcgraph.de/sr/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 16:09           ` Stefan Richter
@ 2007-08-15 16:27             ` Paul E. McKenney
  2007-08-15 18:31               ` Segher Boessenkool
  0 siblings, 1 reply; 414+ messages in thread
From: Paul E. McKenney @ 2007-08-15 16:27 UTC (permalink / raw)
  To: Stefan Richter
  Cc: Herbert Xu, Satyam Sharma, Heiko Carstens, Chris Snook, clameter,
	Linux Kernel Mailing List, linux-arch, Linus Torvalds, netdev,
	Andrew Morton, ak, davem, schwidefsky, wensong, horms, wjiang,
	cfriesen, zlynx, rpjday, jesper.juhl, segher

On Wed, Aug 15, 2007 at 06:09:35PM +0200, Stefan Richter wrote:
> Herbert Xu wrote:
> > On Wed, Aug 15, 2007 at 08:05:38PM +0530, Satyam Sharma wrote:
> >>> I don't know if this here is affected:
> 
> [...something like]
> 	b = atomic_read(a);
> 	for (i = 0; i < 4; i++) {
> 		msleep_interruptible(63);
> 		c = atomic_read(a);
> 		if (c != b) {
> 			b = c;
> 			i = 0;
> 		}
> 	}
> 
> > Nope, we're calling schedule which is a rather heavy-weight
> > barrier.
> 
> How does the compiler know that msleep() has got barrier()s?

Because msleep_interruptible() is in a separate compilation unit,
the compiler has to assume that it might modify any arbitrary global.
In many cases, the compiler also has to assume that msleep_interruptible()
might call back into a function in the current compilation unit, thus
possibly modifying global static variables.

						Thanx, Paul

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 16:27             ` Paul E. McKenney
@ 2007-08-15 18:31               ` Segher Boessenkool
  2007-08-15 18:57                 ` Paul E. McKenney
  0 siblings, 1 reply; 414+ messages in thread
From: Segher Boessenkool @ 2007-08-15 18:31 UTC (permalink / raw)
  To: paulmck
  Cc: horms, Stefan Richter, Satyam Sharma, Linux Kernel Mailing List,
	rpjday, netdev, ak, cfriesen, Heiko Carstens, jesper.juhl,
	linux-arch, Andrew Morton, zlynx, clameter, schwidefsky,
	Chris Snook, Herbert Xu, davem, Linus Torvalds, wensong, wjiang

>> How does the compiler know that msleep() has got barrier()s?
>
> Because msleep_interruptible() is in a separate compilation unit,
> the compiler has to assume that it might modify any arbitrary global.

No; compilation units have nothing to do with it, GCC can optimise
across compilation unit boundaries just fine, if you tell it to
compile more than one compilation unit at once.

What you probably mean is that the compiler has to assume any code
it cannot currently see can do anything (insofar as allowed by the
relevant standards etc.)

> In many cases, the compiler also has to assume that 
> msleep_interruptible()
> might call back into a function in the current compilation unit, thus
> possibly modifying global static variables.

It most often is smart enough to see what compilation-unit-local
variables might be modified that way, though :-)


Segher


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 18:31               ` Segher Boessenkool
@ 2007-08-15 18:57                 ` Paul E. McKenney
  2007-08-15 19:54                   ` Satyam Sharma
  0 siblings, 1 reply; 414+ messages in thread
From: Paul E. McKenney @ 2007-08-15 18:57 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: horms, Stefan Richter, Satyam Sharma, Linux Kernel Mailing List,
	rpjday, netdev, ak, cfriesen, Heiko Carstens, jesper.juhl,
	linux-arch, Andrew Morton, zlynx, clameter, schwidefsky,
	Chris Snook, Herbert Xu, davem, Linus Torvalds, wensong, wjiang

On Wed, Aug 15, 2007 at 08:31:25PM +0200, Segher Boessenkool wrote:
> >>How does the compiler know that msleep() has got barrier()s?
> >
> >Because msleep_interruptible() is in a separate compilation unit,
> >the compiler has to assume that it might modify any arbitrary global.
> 
> No; compilation units have nothing to do with it, GCC can optimise
> across compilation unit boundaries just fine, if you tell it to
> compile more than one compilation unit at once.

Last I checked, the Linux kernel build system did compile each .c file
as a separate compilation unit.

> What you probably mean is that the compiler has to assume any code
> it cannot currently see can do anything (insofar as allowed by the
> relevant standards etc.)

Indeed.

> >In many cases, the compiler also has to assume that 
> >msleep_interruptible()
> >might call back into a function in the current compilation unit, thus
> >possibly modifying global static variables.
> 
> It most often is smart enough to see what compilation-unit-local
> variables might be modified that way, though :-)

Yep.  For example, if it knows the current value of a given such local
variable, and if all code paths that would change some other variable
cannot be reached given that current value of the first variable.
At least given that gcc doesn't know about multiple threads of execution!

							Thanx, Paul

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 18:57                 ` Paul E. McKenney
@ 2007-08-15 19:54                   ` Satyam Sharma
  2007-08-15 20:47                     ` Segher Boessenkool
  0 siblings, 1 reply; 414+ messages in thread
From: Satyam Sharma @ 2007-08-15 19:54 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Segher Boessenkool, horms, Stefan Richter,
	Linux Kernel Mailing List, rpjday, netdev, ak, cfriesen,
	Heiko Carstens, jesper.juhl, linux-arch, Andrew Morton, zlynx,
	clameter, schwidefsky, Chris Snook, Herbert Xu, davem,
	Linus Torvalds, wensong, wjiang

[ The Cc: list scares me. Should probably trim it. ]


On Wed, 15 Aug 2007, Paul E. McKenney wrote:

> On Wed, Aug 15, 2007 at 08:31:25PM +0200, Segher Boessenkool wrote:
> > >>How does the compiler know that msleep() has got barrier()s?
> > >
> > >Because msleep_interruptible() is in a separate compilation unit,
> > >the compiler has to assume that it might modify any arbitrary global.
> > 
> > No; compilation units have nothing to do with it, GCC can optimise
> > across compilation unit boundaries just fine, if you tell it to
> > compile more than one compilation unit at once.
> 
> Last I checked, the Linux kernel build system did compile each .c file
> as a separate compilation unit.
> 
> > What you probably mean is that the compiler has to assume any code
> > it cannot currently see can do anything (insofar as allowed by the
> > relevant standards etc.)

I think this was just terminology confusion here again. Isn't "any code
that it cannot currently see" the same as "another compilation unit",
and wouldn't the "compilation unit" itself expand if we ask gcc to
compile more than one unit at once? Or is there some more specific
"definition" for "compilation unit" (in gcc lingo, possibly?)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures
  2007-08-15 19:54                   ` Satyam Sharma
@ 2007-08-15 20:47                     ` Segher Boessenkool
  2007-08-16  0:36                       ` Satyam Sharma
  0 siblings, 1 reply; 414+ messages in thread
From: Segher Boessenkool @ 2007-08-15 20:47 UTC (permalink / raw)
  To: Satyam Sharma
  Cc: horms, Stefan Richter, Linux Kernel Mailing List,
	Paul E. McKenney, ak, netdev, cfriesen, Heiko Carstens, rpjday,
	jesper.juhl, linux-arch, Andrew Morton, zlynx, clameter,
	schwidefsky, Chris Snook, Herbert Xu, davem, Linus Torvalds,
	wensong, wjiang

>>> What you probably mean is that the compiler has to assume any code
>>> it cannot currently see can do anything (insofar as allowed by the
>>> relevant standards etc.)
>
> I think this was just terminology confusion here again. Isn't "any code
> that it cannot currently see" the same as "another compilation unit",

It is not; try  gcc -combine  or the upcoming link-time optimisation
stuff, for example.

> and wouldn't the "compilation unit" itself expand if we ask gcc to
> compile more than one unit at once? Or is there some more specific
> "definition" for "compilation unit" (in gcc lingo, possibly?)

"compilation unit" is a C standard term.  It typically boils down
to "single .c file".


Segher


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
  2007-08-15 20:47                     ` Segher Boessenkool
@ 2007-08-16  0:36                       ` Satyam Sharma
  2007-08-16  1:38                         ` Segher Boessenkool
  0 siblings, 1 reply; 414+ messages in thread
From: Satyam Sharma @ 2007-08-16  0:36 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: horms, Stefan Richter, Linux Kernel Mailing List,
	Paul E. McKenney, ak, netdev, cfriesen, Heiko Carstens, rpjday,
	jesper.juhl, linux-arch, Andrew Morton, zlynx, clameter,
	schwidefsky, Chris Snook, Herbert Xu, davem, Linus Torvalds,
	wensong, wjiang

On Wed, 15 Aug 2007, Segher Boessenkool wrote:

> > > > What you probably mean is that the compiler has to assume any code
> > > > it cannot currently see can do anything (insofar as allowed by the
> > > > relevant standards etc.)
> > 
> > I think this was just terminology confusion here again. Isn't "any code
> > that it cannot currently see" the same as "another compilation unit",
> 
> It is not; try  gcc -combine  or the upcoming link-time optimisation
> stuff, for example.
> 
> > and wouldn't the "compilation unit" itself expand if we ask gcc to
> > compile more than one unit at once? Or is there some more specific
> > "definition" for "compilation unit" (in gcc lingo, possibly?)
> 
> "compilation unit" is a C standard term.  It typically boils down
> to "single .c file".

As you mentioned later, "single .c file with all the other files (headers
or other .c files) that it pulls in via #include" is actually "translation
unit", both in the C standard as well as gcc docs. "Compilation unit"
doesn't seem to be nearly as standard a term, though in most places it
is indeed meant to be same as "translation unit", but with the new gcc
inter-module-analysis stuff that you referred to above, I suspect one may
reasonably want to call a "compilation unit" as all that the compiler sees
at a given instant.

BTW I did some auditing (only inside include/asm-{i386,x86_64}/ and
arch/{i386,x86_64}/) and found a couple more callsites that don't use
cpu_relax():

arch/i386/kernel/crash.c:101
arch/x86_64/kernel/crash.c:97

that are:

	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
		mdelay(1);
		msecs--;
	}

where mdelay() becomes __const_udelay() which happens to be in another
translation unit (arch/i386/lib/delay.c) and hence saves this callsite
from being a bug :-)

Curiously, __const_udelay() is still marked as "inline" where it is
implemented in lib/delay.c which is weird, considering it won't ever
be inlined, would it? With the kernel presently being compiled one
translation unit at a time, I don't see how the implementation would
be visible to any callsite out there to be able to inline it.

Satyam

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2007-08-16  0:36                       ` Satyam Sharma
@ 2007-08-16  1:38                         ` Segher Boessenkool
  0 siblings, 0 replies; 414+ messages in thread
From: Segher Boessenkool @ 2007-08-16  1:38 UTC (permalink / raw)
  To: Satyam Sharma
  Cc: horms, Stefan Richter, Linux Kernel Mailing List,
	Paul E. McKenney, ak, netdev, cfriesen, Heiko Carstens, rpjday,
	jesper.juhl, linux-arch, Andrew Morton, zlynx, clameter,
	schwidefsky, Chris Snook, Herbert Xu, davem, Linus Torvalds,
	wensong, wjiang

>> "compilation unit" is a C standard term.  It typically boils down
>> to "single .c file".
>
> As you mentioned later, "single .c file with all the other files 
> (headers
> or other .c files) that it pulls in via #include" is actually 
> "translation
> unit", both in the C standard as well as gcc docs.

Yeah.  "single .c file after preprocessing".  Same thing :-)

> "Compilation unit"
> doesn't seem to be nearly as standard a term, though in most places it
> is indeed meant to be same as "translation unit", but with the new gcc
> inter-module-analysis stuff that you referred to above, I suspect one 
> may
> reasonably want to call a "compilation unit" as all that the compiler 
> sees
> at a given instant.

That would be a bit confusing, would it not?  They'd better find
some better name for that if they want to name it at all (remember,
none of these optimisations should have any effect on the semantics
of the program, you just get fewer .o files etc.).


Segher


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2007-08-07 16:34 Brian J. Murrell
  2007-08-09 20:33 ` Mark Lord
  0 siblings, 1 reply; 414+ messages in thread
From: Brian J. Murrell @ 2007-08-07 16:34 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 11470 bytes --]

I am using Ubuntu Gutsy, which is the in-development branch heading for
their next stable release.

I have noticed that since some kernel release post-2.6.20 I have been
unable to mount my /boot partition:

$ sudo strace -f mount /dev/hda1 /mnt/foo
execve("/bin/mount", ["mount", "/dev/hda1", "/mnt/foo"], [/* 41 vars*/])= 0
brk(0)                                  = 0x8062000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fbc000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY)      = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=91976, ...}) = 0
mmap2(NULL, 91976, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fa5000
close(4)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/tls/i686/cmov/libc.so.6", O_RDONLY) = 4
read(4, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\260a\1"..., 512) = 512
fstat64(4, {st_mode=S_IFREG|0644, st_size=1339816, ...}) = 0
mmap2(NULL, 1349136, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 4, 0) = 0xb7e5b000
mmap2(0xb7f9f000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 4, 0x143) = 0xb7f9f000
mmap2(0xb7fa2000, 9744, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7fa2000
close(4)                                = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7e5a000
set_thread_area({entry_number:-1 -> 6, base_addr:0xb7e5a6b0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
mprotect(0xb7f9f000, 4096, PROT_READ)   = 0
munmap(0xb7fa5000, 91976)               = 0
brk(0)                                  = 0x8062000
brk(0x8083000)                          = 0x8083000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/locale.alias", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=2586, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fbb000
read(4, "# Locale name alias data base.\n#"..., 1024) = 1024
read(4, "ies are case independent.\n\n# Not"..., 1024) = 1024
read(4, ".euc \tko_KR.eucKR\nko_KR\t\tko_KR.e"..., 1024) = 538
read(4, "", 1024)                       = 0
close(4)                                = 0
munmap(0xb7fbb000, 4096)                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_IDENTIFICATION", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_IDENTIFICATION", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=363, ...}) = 0
mmap2(NULL, 363, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fbb000
close(4)                                = 0
open("/usr/lib/gconv/gconv-modules.cache", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=25486, ...}) = 0
mmap2(NULL, 25486, PROT_READ, MAP_SHARED, 4, 0) = 0xb7fb4000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_MEASUREMENT", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_MEASUREMENT", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=23, ...}) = 0
mmap2(NULL, 23, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fb3000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_TELEPHONE", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_TELEPHONE", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=51, ...}) = 0
mmap2(NULL, 51, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fb2000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_ADDRESS", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_ADDRESS", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=127, ...}) = 0
mmap2(NULL, 127, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fb1000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_NAME", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_NAME", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=62, ...}) = 0
mmap2(NULL, 62, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fb0000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_PAPER", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_PAPER", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=34, ...}) = 0
mmap2(NULL, 34, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7faf000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_MESSAGES", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_MESSAGES", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
close(4)                                = 0
open("/usr/lib/locale/en_CA.utf8/LC_MESSAGES/SYS_LC_MESSAGES", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=54, ...}) = 0
mmap2(NULL, 54, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fae000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_MONETARY", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_MONETARY", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=286, ...}) = 0
mmap2(NULL, 286, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fad000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_COLLATE", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_COLLATE", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=880094, ...}) = 0
mmap2(NULL, 880094, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7d83000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_TIME", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_TIME", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=2451, ...}) = 0
mmap2(NULL, 2451, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fac000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_NUMERIC", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_NUMERIC", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=54, ...}) = 0
mmap2(NULL, 54, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7fab000
close(4)                                = 0
open("/usr/lib/locale/en_CA.UTF-8/LC_CTYPE", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/lib/locale/en_CA.utf8/LC_CTYPE", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=238336, ...}) = 0
mmap2(NULL, 238336, PROT_READ, MAP_PRIVATE, 4, 0) = 0xb7d48000
close(4)                                = 0
umask(022)                              = 022
open("/dev/null", O_RDWR|O_LARGEFILE)   = 4
close(4)                                = 0
getuid32()                              = 0
geteuid32()                             = 0
lstat64("/etc/mtab", {st_mode=S_IFREG|0644, st_size=1136, ...}) = 0
stat64("/dev/hda1", {st_mode=S_IFBLK|0660, st_rdev=makedev(3, 1), ...}) = 0
rt_sigprocmask(SIG_BLOCK, ~[TRAP SEGV RTMIN RT_1], NULL, 8) = 0
open("/dev/hda1", O_RDONLY|O_LARGEFILE) = 4
ioctl(4, BLKGETSIZE64, 0xbf80f540)      = 0
_llseek(4, 41025536, [41025536], SEEK_SET) = 0
read(4, "#=\353\177\374A\305\366d\332&\230\373\222\\\353\3331\r"..., 2048) = 2048
brk(0x80aa000)                          = 0x80aa000
_llseek(4, 0, [0], SEEK_SET)            = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 2048) = 2048
_llseek(4, 0, [0], SEEK_SET)            = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 6144) = 6144
_llseek(4, 41093632, [41093632], SEEK_SET) = 0
read(4, "e\262\321\332\273j\242\335\317*\nlb\237T\237\212\236\247"..., 512) = 512
_llseek(4, 41093120, [41093120], SEEK_SET) = 0
read(4, ">\270\20XK\177\307\233\334j\315\357ac\235\360\225\276\234"..., 512) = 512
_llseek(4, 41093632, [41093632], SEEK_SET) = 0
read(4, "e\262\321\332\273j\242\335\317*\nlb\237T\237\212\236\247"..., 512) = 512
_llseek(4, 41093120, [41093120], SEEK_SET) = 0
read(4, ">\270\20XK\177\307\233\334j\315\357ac\235\360\225\276\234"..., 512) = 512
_llseek(4, 41061888, [41061888], SEEK_SET) = 0
read(4, "\327\312Z\320s/\201\322\241\274\216\374*\263\1\7\\\220"..., 512) = 512
_llseek(4, 40963584, [40963584], SEEK_SET) = 0
read(4, "ODUX\237\253{I\24\216\372\204\213\3525\316\20\341\274\327"..., 512) = 512
_llseek(4, 40963072, [40963072], SEEK_SET) = 0
read(4, ":<\217\6\371F\16M\267f\260\265?uH\3656gNJ\v\30\253\350"..., 512) = 512
_llseek(4, 41085952, [41085952], SEEK_SET) = 0
read(4, "~\360\373F>\247\346bp\266gA\231\373\24\v0\307\37\222G$"..., 512) = 512
_llseek(4, 40889856, [40889856], SEEK_SET) = 0
read(4, "KD\276\330\26\36\266U\373\332\255=\3440\22c\323\276\266"..., 512) = 512
_llseek(4, 41088512, [41088512], SEEK_SET) = 0
read(4, "o\331\251yn\365\25669S!} \1\303\334\31\264K\27A)p\341s"..., 512) = 512
_llseek(4, 41093632, [41093632], SEEK_SET) = 0
read(4, "e\262\321\332\273j\242\335\317*\nlb\237T\237\212\236\247"..., 512) = 512
_llseek(4, 0, [0], SEEK_SET)            = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
_llseek(4, 0, [0], SEEK_SET)            = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384) = 16384
brk(0x8099000)                          = 0x8099000
brk(0x8089000)                          = 0x8089000
close(4)                                = 0
stat64("/sbin/mount.ext3", 0xbf80f484)  = -1 ENOENT (No such file or directory)
mount("/dev/hda1", "/mnt/foo", "ext3", MS_MGC_VAL, NULL) = -1 EBUSY (Device or resource busy)
rt_sigprocmask(SIG_UNBLOCK, ~[TRAP SEGV RTMIN RT_1], NULL, 8) = 0
write(2, "mount: /dev/hda1 already mounted"..., 50mount: /dev/hda1 already mounted or /mnt/foo busy) = 50
umask(077)                              = 022
open("/etc/mtab", O_RDONLY|O_LARGEFILE) = 4
umask(022)                              = 077
fstat64(4, {st_mode=S_IFREG|0644, st_size=1136, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7d47000
read(4, "/dev/dm-20 / ext3 rw,errors=remo"..., 1024) = 1024
read(4, "ib/nfs/rpc_pipefs rpc_pipefs rw "..., 1024) = 112
read(4, "", 1024)                       = 0
close(4)                                = 0
munmap(0xb7d47000, 4096)                = 0
exit_group(32)                          = ?
Process 27016 detached

All other block devices (all LVM LVs) seem to mount with no problems.

Certainly I leave open the possibility of pilot error (by either me or
Ubuntu) but I don't know how to even see why/where/what could be going
wrong.

/proc/mounts does not show the device mounted:

$ grep hda /proc/mounts
$ 

and fuser and lsof doesn't show anything using the mount point:

$ fuser /mnt/foo
$ sudo lsof | grep /mnt/foo
$ 

Nor does lsof show /dev/hda (device 3,1) in use:

$ sudo lsof | grep -e hda -e 3,1
$ 

So I'm kind of perplexed as to what to try next in how to debug
this?

Any ideas?  Thanx in advance.

b.

-- 
A day in the yard with my son is just like a day at work.  He goes
hunting around for stuff and brings it back to me and says: "Hey Dad,
look what I found.  The money is for me and the screw is for you."

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2007-08-07 16:34 Brian J. Murrell
@ 2007-08-09 20:33 ` Mark Lord
  2007-08-09 21:04   ` Re: Brian J. Murrell
  0 siblings, 1 reply; 414+ messages in thread
From: Mark Lord @ 2007-08-09 20:33 UTC (permalink / raw)
  To: Brian J. Murrell; +Cc: linux-kernel

Brian J. Murrell wrote:
> I am using Ubuntu Gutsy, which is the in-development branch heading for
> their next stable release.
> 
> I have noticed that since some kernel release post-2.6.20 I have been
> unable to mount my /boot partition:
> 
> $ sudo strace -f mount /dev/hda1 /mnt/foo
> execve("/bin/mount", ["mount", "/dev/hda1", "/mnt/foo"], [/* 41 vars*/])= 0
...                             = 0
> mount("/dev/hda1", "/mnt/foo", "ext3", MS_MGC_VAL, NULL) = -1 EBUSY (Device or resource busy)
> rt_sigprocmask(SIG_UNBLOCK, ~[TRAP SEGV RTMIN RT_1], NULL, 8) = 0
> write(2, "mount: /dev/hda1 already mounted"..., 50mount: /dev/hda1 already mounted or /mnt/foo busy) = 50
...
> All other block devices (all LVM LVs) seem to mount with no problems.

Probably something to do with either LVM or the Device Mapper having
the raw partition already "open" for some remapping purpose.
There was a note about that around here somewhere quite recently..

??


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: Re:
  2007-08-09 20:33 ` Mark Lord
@ 2007-08-09 21:04   ` Brian J. Murrell
  0 siblings, 0 replies; 414+ messages in thread
From: Brian J. Murrell @ 2007-08-09 21:04 UTC (permalink / raw)
  To: Mark Lord; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 496 bytes --]

On Thu, 2007-08-09 at 16:33 -0400, Mark Lord wrote:
> Probably something to do with either LVM or the Device Mapper having
> the raw partition already "open" for some remapping purpose.
> There was a note about that around here somewhere quite recently..

Indeed, that was it.  I posted a followup "Re: [SOLVED] problems while
mounting /boot partition" reporting such.

Thanx for the heads up though!

b.

-- 
My other computer is your Microsoft Windows server.

Brian J. Murrell

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <FC1D1B23302A22499C60C967336B2AE00186B15C@pdsmsx411.ccr.corp.intel.com>]

* Re:
       [not found] <FC1D1B23302A22499C60C967336B2AE00186B15C@pdsmsx411.ccr.corp.intel.com>
@ 2007-07-24 13:40 ` Shaohua Li
  0 siblings, 0 replies; 414+ messages in thread
From: Shaohua Li @ 2007-07-24 13:40 UTC (permalink / raw)
  To: Avi Kivity; +Cc: lkml, kvm-devel

> From: "Avi Kivity" <avi@qumranet.com>
> To: "Li, Shaohua" <shaohua.li@intel.com>
> Date: Tue, 24 Jul 2007 13:42:29 +0800
> Subject: Re: [RFC 0/8]KVM: swap out guest pages
>
>
> Shaohua Li wrote:
> > On Mon, 2007-07-23 at 18:27 +0800, Avi Kivity wrote:
> >
> >> Shaohua Li wrote:
> >>
> >>> This patch series make kvm guest pages be able to be swapped out and
> >>> dynamically allocated. Without it, all guest memory is allocated at
> >>> guest start time.
> >>>
> >>> patches are against latest git, and you need first patch Avi's
> >>>
> >> kvm-sch
> >>
> >>> integration patch
> >>>
> >>>
> >>
> (http://sourceforge.net/mailarchive/forum.php?thread_name=11841693332609-git-send-email-avi%40qumranet.com&forum_name=kvm-devel
> ).
> >>
> >>> Patch is quite stable in my test. With the patch, I can run a 256M
> >>> memory guest in a 300M memory host.
> >>>
> >> What about the opposite?
> >>
> >>
> >>> If guest is idle, the memory it used
> >>> can be less than 10M. I did a simple performance test (measure
> >>>
> >> kernel
> >>
> >>> build time in guest), if there is few swap, the performance w/wo the
> >>> patch difference isn't significent. If you have better measurement
> >>> approach, please let me try.
> >>>
> >>> Unresolved issue:
> >>> 1. swapoff doesn't work, we need a hook.
> >>> 2. SMP guest might not work, as kvm doesn't support smp till now.
> >>> 3. better algorithm to select swaped out guest pages according to
> >>> guest's memory usage.
> >>> Maybe more.
> >>>
> >>> Any suggests and comments are appreciated.
> >>>
> >>>
> >> The big question is whether to have kvm's own address_space or not.
> >>
> >> Having an address_space (like your patch does) is remarkably simple,
> >> and
> >> requires few hooks from the current vm.  However using existing vmas
> >> mapped by the user has many advantages:
> >>
> >> - compatible with s390 requirements
> >> - allows the user to use hugetlbfs pages, which have a performance
> >> advantage using ept/npt (but which are unswappable)
> >> - allows the user to map a file (which can be regarded as way to
> >> specify
> >> the swap device)
> >> - better ingration with the rest of the vm
> >>
> >> I am quite torn between the simplicity of your approach and the
> >> advantages of using generic vmas.  However, s390 pretty much forces
> >> our
> >> hand.
> >>
> >> What is your opinion of extending generic vmas to back kvm guest
> >> memory?
> >>
> > several issues:
> > 1. vma is to manage usersapce address, kvm guest uses full address
> > space.
> > 2. qemu itself must use some address space.
> >
>
> My idea is to keep the current slot concept, but instead of having kvm
> allocate pages for a slot, it would call get_user_pages() for a virtual
> address range.  Userspace doesn't directly talk about vmas, just virtual
> address ranges.
all the APIs need vma/page table handling. swap also needs vma for
rmap for generic file.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2007-02-09  6:29 Priyanka Sharma
  2007-02-10  2:41 ` hackmiester (Hunter Fuller)
  0 siblings, 1 reply; 414+ messages in thread
From: Priyanka Sharma @ 2007-02-09  6:29 UTC (permalink / raw)
  To: linux-kernel

unsubscribe linux-kernel

-- 
Priyanka
202.141.151.80/~priyanka

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2007-02-09  6:29 Priyanka Sharma
@ 2007-02-10  2:41 ` hackmiester (Hunter Fuller)
  0 siblings, 0 replies; 414+ messages in thread
From: hackmiester (Hunter Fuller) @ 2007-02-10  2:41 UTC (permalink / raw)
  To: Priyanka Sharma; +Cc: linux-kernel

You're doing it wrong. Please read the bottom of your emails.
On 9 February 2007, at 00:29, Priyanka Sharma wrote:

> unsubscribe linux-kernel
>
> --  
> Priyanka
> 202.141.151.80/~priyanka
> -
> To unsubscribe from this list: send the line "unsubscribe linux- 
> kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
hackmiester (Hunter Fuller)

<badaboom> who can help me ? i'm french and i don't know irc
<Paladine> can't help you with the being french part, you are screwed  
their mate




Phone
Voice: +1 251 589 6348
Fax: Call the voice number and ask.

Email
General chat: hackmiester@hackmiester.com
Large attachments: hackmiester@gmail.com
SPS-related stuff: hfuller@stpaulsmobile.net

IM
AIM: hackmiester1337
Skype: hackmiester31337
YIM: hackm1ester
Gtalk: hackmiester
MSN: hackmiester@hackmiester.com
Xfire: hackmiester







^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2006-08-16  9:30 shane
  0 siblings, 0 replies; 414+ messages in thread
From: shane @ 2006-08-16  9:30 UTC (permalink / raw)
  To: linux-kernel

Hello,

Your mail to shane@bcs.org.uk was caught by the
SpamAssassin filter running on the bcs.org.uk mail system.

To confirm that your mail is genuine, please click this
link, or paste it into your browser:
https://bcsnet.bcs.org.uk/approve.php?c=2c9bc71e222cc1265421a982

You will not have to do this again for any mail sent
to this recipient (shane@bcs.org.uk).

Thank you.

-- 
British Computer Society - www.bcs.org.uk
Email Services from gradwell dot com - www.gradwell.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2006-05-16 10:34 Chris Boot
  2006-05-16 12:34 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 414+ messages in thread
From: Chris Boot @ 2006-05-16 10:34 UTC (permalink / raw)
  To: kernel list, netdev; +Cc: grsecurity

Hi,

I've just seen the following assertions pop out of one of my servers  
running 2.6.16.9 with grsecurity. I've searched the archives of LKML  
and netdev and I've only found posts relating to 2.6.9, after which  
some related bugs were fixed... It looks like these bugs are related  
to e1000, which is the driver I'm using. The system was running 24  
days before these appeared and it's still running absolutely fine.

May 16 09:15:12 baldrick kernel: [6442250.504000] KERNEL: assertion (! 
sk->sk_forward_alloc) failed at net/core/stream.c (283)
May 16 09:15:12 baldrick kernel: [6442250.513000] KERNEL: assertion (! 
sk->sk_forward_alloc) failed at net/ipv4/af_inet.c (150)

baldrick bootc # ethtool -k eth0
Offload parameters for eth0:
rx-checksumming: on
tx-checksumming: on
scatter-gather: on
tcp segmentation offload: on

Many thanks,
Chris

PS: I'm not subscribed to netdev.

-- 
Chris Boot
bootc@bootc.net
http://www.bootc.net/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2006-05-16 10:34 Chris Boot
@ 2006-05-16 12:34 ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 414+ messages in thread
From: Arnaldo Carvalho de Melo @ 2006-05-16 12:34 UTC (permalink / raw)
  To: Chris Boot; +Cc: kernel list, netdev, grsecurity

On 5/16/06, Chris Boot <bootc@bootc.net> wrote:
> Hi,
>
> I've just seen the following assertions pop out of one of my servers
> running 2.6.16.9 with grsecurity. I've searched the archives of LKML
> and netdev and I've only found posts relating to 2.6.9, after which
> some related bugs were fixed... It looks like these bugs are related
> to e1000, which is the driver I'm using. The system was running 24
> days before these appeared and it's still running absolutely fine.
>
> May 16 09:15:12 baldrick kernel: [6442250.504000] KERNEL: assertion (!
> sk->sk_forward_alloc) failed at net/core/stream.c (283)
> May 16 09:15:12 baldrick kernel: [6442250.513000] KERNEL: assertion (!
> sk->sk_forward_alloc) failed at net/ipv4/af_inet.c (150)
>
> baldrick bootc # ethtool -k eth0
> Offload parameters for eth0:
> rx-checksumming: on
> tx-checksumming: on
> scatter-gather: on
> tcp segmentation offload: on

I guess just disable TSO or use latest kernel from git, it has a fix for this.

- Arnaldo

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2006-03-11  1:00 Alec
  0 siblings, 0 replies; 414+ messages in thread
From: Alec @ 2006-03-11  1:00 UTC (permalink / raw)
  To: linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=unknown-8bit, Size: 909 bytes --]

Hey whats up,

All the maajor barands like aR0lex, aTag uHeuer, aCart ier etc.

Dite for the Rep lica wautches, L0w   ePRi ces we 0 ffer.

Your human instinct is to be recognized.

iAffordable imitations make you look erich, ufraction of the oC0St. 

========================================================================

COPY the Address below and paste in your WEBa browser:

afraidness.justworlds.com

========================================================================

I am the woman who worked in the field .
Despite overall sluggish wage growth,.
Not physiognomy alone, nor brain alone, is worthy for the muse—I say the-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: Re:
@ 2006-03-03 14:54 Kennedy
  0 siblings, 0 replies; 414+ messages in thread
From: Kennedy @ 2006-03-03 14:54 UTC (permalink / raw)
  To: linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 653 bytes --]

How have you been,

A11          Pre#scr!pt!0ns       are filled by          L!#cens#ed        Ph@r#m@!_sts.

We have Special    0ff3rss      and some  New       Pr0ducctss.

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

copy the address below and paste in e your web browser:

bibliosoph.iyodopack.com

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

A sour Scotswoman called Hooch..
A child—with a most knowing eye..
(With my lips soothing thee, adding, I whisper, .
push the "Perform Currency Conversion" button..
Still bar you the way, and deny you life --  .

Thanks Alot,

Dudley Templeton 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2006-02-23 12:16 Norberto
  0 siblings, 0 replies; 414+ messages in thread
From: Norberto @ 2006-02-23 12:16 UTC (permalink / raw)
  To: linux-kernel

Hey,

R3F1N4NC3         your current           L0A`NNNN.

R3F1N@NC3          your          m0rrt g@@gee          at a better      Ra=
a te.

$340k for 330 pm, we r    Justi    Giving    away

******************************************************************

COPY the Address below and paste in your BROiWSER:

Aphelinus.lowestpay.net

******************************************************************

She only looked away for a few seconds to guide her fingers in peeling the=
=20.
(With my lips soothing thee, adding, I whisper,=20.
Considerest thou alone the burial of the stars?=20.
After she'd done her ledger she would clamber into her hammock and read a =
book for a couple of hours..
If you'll just tell me so --.

Thanks,
Morris Kaufmann=20

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: Re:
@ 2006-02-18 16:04 Donne
  0 siblings, 0 replies; 414+ messages in thread
From: Donne @ 2006-02-18 16:04 UTC (permalink / raw)
  To: linux-kernel

Good day sir,

Loow cost name-brand prei scri iptions shipped to your door lightning quic=
k!

Fruee delivery and unique packaging.

--------------------------------

copy the address below and paste in a your web browser:

arrestor.newtechtown.com/?zz=3Dlowcost

--------------------------------

vali d  for 24 hars.

'Well if you had to think about it now, what would you think?'.
when we were sharing rooms as bachelors in Baker Street..
So shake the very Heaven on high.
Of Bobo the Bear..
His books are all jammed in the closet,.

Goodbye,

Hugh Jackson=20

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2006-02-04 14:33 Ira Jackson 
  0 siblings, 0 replies; 414+ messages in thread
From: Ira Jackson  @ 2006-02-04 14:33 UTC (permalink / raw)
  To: linux-admin, linux-fsdevel, linux-kernel

Hi,

We cut your payment by 45%

Re aF ainancing can give you extra acash for the things you've always want=
ed to do. Like starting a home business or paying for college.

U S  $ 300 ,000       aL0 aANS        are avai lable for only $277 / month=
! WE'RE aPRACT ICALLY aGIVIaNG aAWAY MOaNEY!

---------------------------------

COPY the Addreass below and paste in your WEaB BROaWSER:

carucal.realquikx.com

----------------------------------

V a l id for 24 Hrs.

I need not start -- you're sure --.
professor and former chief of GE's Crotonville leadership development prog=
ram..
Of Life immense in passion, pulse, and power,=20.
No safety , no love, no respect was I due..
Yet shining like the sun with love's true light.=20.

Thanks Alot,
Elsa Robb

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2006-01-27 10:05 sarat
  2006-01-27 10:09 ` Arjan van de Ven
  0 siblings, 1 reply; 414+ messages in thread
From: sarat @ 2006-01-27 10:05 UTC (permalink / raw)
  To: linux-kernel

please clarify the error

insmod: error inserting 'firewall.ko': -1 Invalid module format

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2006-01-27 10:05 sarat
@ 2006-01-27 10:09 ` Arjan van de Ven
  0 siblings, 0 replies; 414+ messages in thread
From: Arjan van de Ven @ 2006-01-27 10:09 UTC (permalink / raw)
  To: sarat; +Cc: linux-kernel

On Fri, 2006-01-27 at 15:35 +0530, sarat wrote:
> 
> insmod: error inserting 'firewall.ko': -1 Invalid module format

your module is not compatible with the kernel you are running. In dmesg
or /var/log/messages is more information on the nature of the
incompatibility.



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
@ 2005-12-31  0:27 Alistair John Strachan
  2005-12-31  0:42 ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Alistair John Strachan @ 2005-12-31  0:27 UTC (permalink / raw)
  To: Mark v Wolher; +Cc: Lee Revell, Folkert van Heusden, Jesper Juhl, Linux Kernel

On Saturday 31 December 2005 00:20, Mark v Wolher wrote:
[snip]
> >
> > This is good news -- you stand a better chance of achieving the stability
> > you require by eliminating variables. VMWare and NVIDIA are useful
> > softwares, and I would not deny that, but they are closed source and thus
> > any conflicts resulting from their use are not necessary LKML material
> > (however, if the interaction is generic and is as a result of a kernel
> > bug, then the maintainer would very much like to hear it).
>
> Okay, i have something interesting now, i only had the nvidia module
> loaded so my x-configuration starts up as usual. (not saying the nvidia
> module is flawless, i'm sure it still contains bugs)
> But here is the crash info, this time it was mozilla, i think this
> speaks more hehe :
>
> Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 061f0c08.
> Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 06b96000.
> Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 18000bf8.
> Dec 31 00:55:28 localhost kernel: ------------[ cut here ]------------
> Dec 31 00:55:28 localhost kernel: kernel BUG at mm/mmap.c:2214!
> Dec 31 00:55:28 localhost kernel: invalid operand: 0000 [#1]
> Dec 31 00:55:28 localhost kernel: SMP
> Dec 31 00:55:28 localhost kernel: Modules linked in: nvidia

Steady and sure progress. Now, the trace below doesn't explicitly mention any 
nvidia symbols, but this line must disappear before anybody will bother to 
read your report.

Remove the module. This does not mean unload, this means "never load in the 
first place". Then reproduce the problem. If you are successful, send a new 
email (not pinned to this thread) with a subject a la "kernel BUG at 
mm/mmap.c:2214". State that the kernel is not tainted.

At this point all you can do is wait. Good luck!

-- 
Cheers,
Alistair.

'No sense being pessimistic, it probably wouldn't work anyway.'
Third year Computer Science undergraduate.
1F2 55 South Clerk Street, Edinburgh, UK.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31  0:27 system keeps freezing once every 24 hours / random apps crashing Alistair John Strachan
@ 2005-12-31  0:42 ` Mark v Wolher
  2005-12-31  0:51   ` Alistair John Strachan
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2005-12-31  0:42 UTC (permalink / raw)
  To: Alistair John Strachan
  Cc: Lee Revell, Folkert van Heusden, Jesper Juhl, Linux Kernel

Alistair John Strachan wrote:
> On Saturday 31 December 2005 00:20, Mark v Wolher wrote:
> [snip]
> 
>>>This is good news -- you stand a better chance of achieving the stability
>>>you require by eliminating variables. VMWare and NVIDIA are useful
>>>softwares, and I would not deny that, but they are closed source and thus
>>>any conflicts resulting from their use are not necessary LKML material
>>>(however, if the interaction is generic and is as a result of a kernel
>>>bug, then the maintainer would very much like to hear it).
>>
>>Okay, i have something interesting now, i only had the nvidia module
>>loaded so my x-configuration starts up as usual. (not saying the nvidia
>>module is flawless, i'm sure it still contains bugs)
>>But here is the crash info, this time it was mozilla, i think this
>>speaks more hehe :
>>
>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 061f0c08.
>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 06b96000.
>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 18000bf8.
>>Dec 31 00:55:28 localhost kernel: ------------[ cut here ]------------
>>Dec 31 00:55:28 localhost kernel: kernel BUG at mm/mmap.c:2214!
>>Dec 31 00:55:28 localhost kernel: invalid operand: 0000 [#1]
>>Dec 31 00:55:28 localhost kernel: SMP
>>Dec 31 00:55:28 localhost kernel: Modules linked in: nvidia
> 
> 
> Steady and sure progress. Now, the trace below doesn't explicitly mention any 
> nvidia symbols, but this line must disappear before anybody will bother to 
> read your report.
> 
> Remove the module. This does not mean unload, this means "never load in the 
> first place". Then reproduce the problem. If you are successful, send a new 
> email (not pinned to this thread) with a subject a la "kernel BUG at 
> mm/mmap.c:2214". State that the kernel is not tainted.
> 
> At this point all you can do is wait. Good luck!
> 

Well, i guess i'll have to do that to be sure. But i must say that i did
try the nv module and de-installed the nvidia binary module. It didn't
matter, the system froze but didn't leave anything in the logs, this
time it did. Doesn't that help at all ?

I'll try again, put nv up and wait for a something to happen. If some
one has in the meantime more advise or maybe even could check out of
curiousity why it says kernel BUG i'd appreciate it ofcourse.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31  0:42 ` Mark v Wolher
@ 2005-12-31  0:51   ` Alistair John Strachan
  2005-12-31  0:54     ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Alistair John Strachan @ 2005-12-31  0:51 UTC (permalink / raw)
  To: Mark v Wolher; +Cc: Lee Revell, Folkert van Heusden, Jesper Juhl, Linux Kernel

On Saturday 31 December 2005 00:42, Mark v Wolher wrote:
> Alistair John Strachan wrote:
> > On Saturday 31 December 2005 00:20, Mark v Wolher wrote:
> > [snip]
> >
> >>>This is good news -- you stand a better chance of achieving the
> >>> stability you require by eliminating variables. VMWare and NVIDIA are
> >>> useful softwares, and I would not deny that, but they are closed source
> >>> and thus any conflicts resulting from their use are not necessary LKML
> >>> material (however, if the interaction is generic and is as a result of
> >>> a kernel bug, then the maintainer would very much like to hear it).
> >>
> >>Okay, i have something interesting now, i only had the nvidia module
> >>loaded so my x-configuration starts up as usual. (not saying the nvidia
> >>module is flawless, i'm sure it still contains bugs)
> >>But here is the crash info, this time it was mozilla, i think this
> >>speaks more hehe :
> >>
> >>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 061f0c08.
> >>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 06b96000.
> >>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 18000bf8.
> >>Dec 31 00:55:28 localhost kernel: ------------[ cut here ]------------
> >>Dec 31 00:55:28 localhost kernel: kernel BUG at mm/mmap.c:2214!
> >>Dec 31 00:55:28 localhost kernel: invalid operand: 0000 [#1]
> >>Dec 31 00:55:28 localhost kernel: SMP
> >>Dec 31 00:55:28 localhost kernel: Modules linked in: nvidia
> >
> > Steady and sure progress. Now, the trace below doesn't explicitly mention
> > any nvidia symbols, but this line must disappear before anybody will
> > bother to read your report.
> >
> > Remove the module. This does not mean unload, this means "never load in
> > the first place". Then reproduce the problem. If you are successful, send
> > a new email (not pinned to this thread) with a subject a la "kernel BUG
> > at mm/mmap.c:2214". State that the kernel is not tainted.
> >
> > At this point all you can do is wait. Good luck!
>
> Well, i guess i'll have to do that to be sure. But i must say that i did
> try the nv module and de-installed the nvidia binary module. It didn't
> matter, the system froze but didn't leave anything in the logs, this
> time it did. Doesn't that help at all ?
>
> I'll try again, put nv up and wait for a something to happen. If some
> one has in the meantime more advise or maybe even could check out of
> curiousity why it says kernel BUG i'd appreciate it ofcourse.

Probably upwards of 95% of BUGs in mm/ are due to defective memory in the 
system running the kernel. However, since you claim to have run other OSes 
successfully on this configuration, I did not suggest it.

However, I would highly recommend running memtest86 at least twice on the 
machine if you cannot track down the source of the problem.

It is always worth eliminating hardware.

-- 
Cheers,
Alistair.

'No sense being pessimistic, it probably wouldn't work anyway.'
Third year Computer Science undergraduate.
1F2 55 South Clerk Street, Edinburgh, UK.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31  0:51   ` Alistair John Strachan
@ 2005-12-31  0:54     ` Mark v Wolher
  2005-12-31 10:31       ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2005-12-31  0:54 UTC (permalink / raw)
  To: Alistair John Strachan
  Cc: Lee Revell, Folkert van Heusden, Jesper Juhl, Linux Kernel

Alistair John Strachan wrote:
> On Saturday 31 December 2005 00:42, Mark v Wolher wrote:
> 
>>Alistair John Strachan wrote:
>>
>>>On Saturday 31 December 2005 00:20, Mark v Wolher wrote:
>>>[snip]
>>>
>>>
>>>>>This is good news -- you stand a better chance of achieving the
>>>>>stability you require by eliminating variables. VMWare and NVIDIA are
>>>>>useful softwares, and I would not deny that, but they are closed source
>>>>>and thus any conflicts resulting from their use are not necessary LKML
>>>>>material (however, if the interaction is generic and is as a result of
>>>>>a kernel bug, then the maintainer would very much like to hear it).
>>>>
>>>>Okay, i have something interesting now, i only had the nvidia module
>>>>loaded so my x-configuration starts up as usual. (not saying the nvidia
>>>>module is flawless, i'm sure it still contains bugs)
>>>>But here is the crash info, this time it was mozilla, i think this
>>>>speaks more hehe :
>>>>
>>>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 061f0c08.
>>>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 06b96000.
>>>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 18000bf8.
>>>>Dec 31 00:55:28 localhost kernel: ------------[ cut here ]------------
>>>>Dec 31 00:55:28 localhost kernel: kernel BUG at mm/mmap.c:2214!
>>>>Dec 31 00:55:28 localhost kernel: invalid operand: 0000 [#1]
>>>>Dec 31 00:55:28 localhost kernel: SMP
>>>>Dec 31 00:55:28 localhost kernel: Modules linked in: nvidia
>>>
>>>Steady and sure progress. Now, the trace below doesn't explicitly mention
>>>any nvidia symbols, but this line must disappear before anybody will
>>>bother to read your report.
>>>
>>>Remove the module. This does not mean unload, this means "never load in
>>>the first place". Then reproduce the problem. If you are successful, send
>>>a new email (not pinned to this thread) with a subject a la "kernel BUG
>>>at mm/mmap.c:2214". State that the kernel is not tainted.
>>>
>>>At this point all you can do is wait. Good luck!
>>
>>Well, i guess i'll have to do that to be sure. But i must say that i did
>>try the nv module and de-installed the nvidia binary module. It didn't
>>matter, the system froze but didn't leave anything in the logs, this
>>time it did. Doesn't that help at all ?
>>
>>I'll try again, put nv up and wait for a something to happen. If some
>>one has in the meantime more advise or maybe even could check out of
>>curiousity why it says kernel BUG i'd appreciate it ofcourse.
> 
> 
> Probably upwards of 95% of BUGs in mm/ are due to defective memory in the 
> system running the kernel. However, since you claim to have run other OSes 
> successfully on this configuration, I did not suggest it.
> 
> However, I would highly recommend running memtest86 at least twice on the 
> machine if you cannot track down the source of the problem.
> 
> It is always worth eliminating hardware.
> 

Indeed, i'm going soon to get some sleep but leave memtest86 running for
 the night and when i wake up then i'll see if something is reported.
It's 2x256 pc2100 ECC memory. I also expect next week monday or tuesday
new memory, which i can use to replace this memory and exclude that
eitherway.

Thanks !








^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31  0:54     ` Mark v Wolher
@ 2005-12-31 10:31       ` Mark v Wolher
  2005-12-31 11:08         ` Jesper Juhl
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2005-12-31 10:31 UTC (permalink / raw)
  To: Mark v Wolher
  Cc: Alistair John Strachan, Lee Revell, Folkert van Heusden,
	Jesper Juhl, Linux Kernel

Mark v Wolher wrote:
> Alistair John Strachan wrote:
> 
>>On Saturday 31 December 2005 00:42, Mark v Wolher wrote:
>>
>>
>>>Alistair John Strachan wrote:
>>>
>>>
>>>>On Saturday 31 December 2005 00:20, Mark v Wolher wrote:
>>>>[snip]
>>>>
>>>>
>>>>
>>>>>>This is good news -- you stand a better chance of achieving the
>>>>>>stability you require by eliminating variables. VMWare and NVIDIA are
>>>>>>useful softwares, and I would not deny that, but they are closed source
>>>>>>and thus any conflicts resulting from their use are not necessary LKML
>>>>>>material (however, if the interaction is generic and is as a result of
>>>>>>a kernel bug, then the maintainer would very much like to hear it).
>>>>>
>>>>>Okay, i have something interesting now, i only had the nvidia module
>>>>>loaded so my x-configuration starts up as usual. (not saying the nvidia
>>>>>module is flawless, i'm sure it still contains bugs)
>>>>>But here is the crash info, this time it was mozilla, i think this
>>>>>speaks more hehe :
>>>>>
>>>>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 061f0c08.
>>>>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 06b96000.
>>>>>Dec 31 00:55:28 localhost kernel: mm/memory.c:106: bad pgd 18000bf8.
>>>>>Dec 31 00:55:28 localhost kernel: ------------[ cut here ]------------
>>>>>Dec 31 00:55:28 localhost kernel: kernel BUG at mm/mmap.c:2214!
>>>>>Dec 31 00:55:28 localhost kernel: invalid operand: 0000 [#1]
>>>>>Dec 31 00:55:28 localhost kernel: SMP
>>>>>Dec 31 00:55:28 localhost kernel: Modules linked in: nvidia
>>>>
>>>>Steady and sure progress. Now, the trace below doesn't explicitly mention
>>>>any nvidia symbols, but this line must disappear before anybody will
>>>>bother to read your report.
>>>>
>>>>Remove the module. This does not mean unload, this means "never load in
>>>>the first place". Then reproduce the problem. If you are successful, send
>>>>a new email (not pinned to this thread) with a subject a la "kernel BUG
>>>>at mm/mmap.c:2214". State that the kernel is not tainted.
>>>>
>>>>At this point all you can do is wait. Good luck!
>>>
>>>Well, i guess i'll have to do that to be sure. But i must say that i did
>>>try the nv module and de-installed the nvidia binary module. It didn't
>>>matter, the system froze but didn't leave anything in the logs, this
>>>time it did. Doesn't that help at all ?
>>>
>>>I'll try again, put nv up and wait for a something to happen. If some
>>>one has in the meantime more advise or maybe even could check out of
>>>curiousity why it says kernel BUG i'd appreciate it ofcourse.
>>
>>
>>Probably upwards of 95% of BUGs in mm/ are due to defective memory in the 
>>system running the kernel. However, since you claim to have run other OSes 
>>successfully on this configuration, I did not suggest it.
>>
>>However, I would highly recommend running memtest86 at least twice on the 
>>machine if you cannot track down the source of the problem.
>>
>>It is always worth eliminating hardware.
>>
> 
> 
> Indeed, i'm going soon to get some sleep but leave memtest86 running for
>  the night and when i wake up then i'll see if something is reported.
> It's 2x256 pc2100 ECC memory. I also expect next week monday or tuesday
> new memory, which i can use to replace this memory and exclude that
> eitherway.
> 
> Thanks !
> 
> 
> 
>

g'morning !

the memtest86 went 40 times over the memory, no errors detected.



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 10:31       ` Mark v Wolher
@ 2005-12-31 11:08         ` Jesper Juhl
  2005-12-31 11:40           ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Jesper Juhl @ 2005-12-31 11:08 UTC (permalink / raw)
  To: Mark v Wolher
  Cc: Alistair John Strachan, Lee Revell, Folkert van Heusden, Linux Kernel

On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
>
> g'morning !
>
> the memtest86 went 40 times over the memory, no errors detected.
>
Give memtest86+ a spin (http://www.memtest.org/) as well. memtest86 is
good, but I've found in the past that memtest86+ sometimes finds
errors that memtest86 does not, so giving both a sin fo an extended
period of time is usually a good idea.
Also, make sure you enable all the tests of both tools.

--
Jesper Juhl <jesper.juhl@gmail.com>
Don't top-post  http://www.catb.org/~esr/jargon/html/T/top-post.html
Plain text mails only, please      http://www.expita.com/nomime.html

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 11:08         ` Jesper Juhl
@ 2005-12-31 11:40           ` Mark v Wolher
  2005-12-31 11:49             ` Jesper Juhl
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2005-12-31 11:40 UTC (permalink / raw)
  To: Jesper Juhl
  Cc: Alistair John Strachan, Lee Revell, Folkert van Heusden, Linux Kernel

Jesper Juhl wrote:
> On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
> 
>>g'morning !
>>
>>the memtest86 went 40 times over the memory, no errors detected.
>>
> 
> Give memtest86+ a spin (http://www.memtest.org/) as well. memtest86 is
> good, but I've found in the past that memtest86+ sometimes finds
> errors that memtest86 does not, so giving both a sin fo an extended
> period of time is usually a good idea.
> Also, make sure you enable all the tests of both tools.

Hi Jesper,

Oh i thought they were the same, i used memtest86+ which comes with
debian and not the "older" memtest86.

Right now i booted the kernel with nomce since one never knows with dell
machines as i saw on some redhat list. Furthermore i installed the
microcode32 utility which loaded new microcode in the cpu. So i'm now
going to continue put some good load on the system, tv on and so on, see
what happens.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 11:40           ` Mark v Wolher
@ 2005-12-31 11:49             ` Jesper Juhl
  2005-12-31 12:46               ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Jesper Juhl @ 2005-12-31 11:49 UTC (permalink / raw)
  To: Mark v Wolher
  Cc: Alistair John Strachan, Lee Revell, Folkert van Heusden, Linux Kernel

On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
> Jesper Juhl wrote:
> > On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
> >
> >>g'morning !
> >>
> >>the memtest86 went 40 times over the memory, no errors detected.
> >>
> >
> > Give memtest86+ a spin (http://www.memtest.org/) as well. memtest86 is
> > good, but I've found in the past that memtest86+ sometimes finds
> > errors that memtest86 does not, so giving both a sin fo an extended
> > period of time is usually a good idea.
> > Also, make sure you enable all the tests of both tools.
>
> Hi Jesper,
>
> Oh i thought they were the same, i used memtest86+ which comes with
> debian and not the "older" memtest86.
>
> Right now i booted the kernel with nomce since one never knows with dell

Surpressing MCE's (Machine Check Exceptions) is a really bad idea
usually. MCE's indicate a hardware problem, so unless it's known that
a certain MCE is reported wrongly they should *not* be ignored.

--
Jesper Juhl <jesper.juhl@gmail.com>
Don't top-post  http://www.catb.org/~esr/jargon/html/T/top-post.html
Plain text mails only, please      http://www.expita.com/nomime.html

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 11:49             ` Jesper Juhl
@ 2005-12-31 12:46               ` Mark v Wolher
  2005-12-31 15:18                 ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2005-12-31 12:46 UTC (permalink / raw)
  To: Jesper Juhl
  Cc: Alistair John Strachan, Lee Revell, Folkert van Heusden, Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 17421 bytes --]

Jesper Juhl wrote:
> On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
> 
>>Jesper Juhl wrote:
>>
>>>On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
>>>
>>>
>>>>g'morning !
>>>>
>>>>the memtest86 went 40 times over the memory, no errors detected.
>>>>
>>>
>>>Give memtest86+ a spin (http://www.memtest.org/) as well. memtest86 is
>>>good, but I've found in the past that memtest86+ sometimes finds
>>>errors that memtest86 does not, so giving both a sin fo an extended
>>>period of time is usually a good idea.
>>>Also, make sure you enable all the tests of both tools.
>>
>>Hi Jesper,
>>
>>Oh i thought they were the same, i used memtest86+ which comes with
>>debian and not the "older" memtest86.
>>
>>Right now i booted the kernel with nomce since one never knows with dell
> 
> 
> Surpressing MCE's (Machine Check Exceptions) is a really bad idea
> usually. MCE's indicate a hardware problem, so unless it's known that
> a certain MCE is reported wrongly they should *not* be ignored.

Hi Jesper,

Yes, i rather not disable it, but since i found some reports also
related to dell machines which somehow do not follow always the standard
this caused false exceptions on them. I'll re-enable it, and see if the
update of the intel microcode made a difference. I have now only the nv
module loaded. If a crash occurs i'll open the box and remove the tvcard.

Also, i wonder, i downloaded the DSDT table from the bios and when i
recompiled it with IASL from intel it showed 7 errors, one of them
related to DMA. It is known that alot of companies like Dell use
microsoft compilers which easily skip such errors or not report them,
this is what i read.

I'm pasting the DSDT errors occured during recompile, who knows, this
could also a help a little bit.

DSDT Table / Recompile:

Intel ACPI Component Architecture
ASL Optimizing Compiler version 20050930 [Dec 15 2005]
Copyright (C) 2000 - 2005 Intel Corporation
Supports ACPI Specification Revision 3.0

dsdt.dsl   338:         Notify (\_SB.PCI0.USB0, 0x02)
Error    1061 -        Object does not exist ^  (\_SB.PCI0.USB0)

dsdt.dsl   351:         Notify (\_SB.PCI0.USB1, 0x02)
Error    1061 -        Object does not exist ^  (\_SB.PCI0.USB1)

dsdt.dsl   364:         Notify (\_SB.PCI0.USB2, 0x02)
Error    1061 -        Object does not exist ^  (\_SB.PCI0.USB2)

dsdt.dsl   377:         Notify (\_SB.PCI0, 0x02)
Error    1061 -   Object does not exist ^  (\_SB.PCI0)

dsdt.dsl   384:         Notify (\_SB.PCI0.PCI4, 0x02)
Error    1061 -        Object does not exist ^  (\_SB.PCI0.PCI4)

dsdt.dsl   400:         Notify (\_SB.PCI0.ISA.KBD, 0x02)
Error    1061 -           Object does not exist ^  (\_SB.PCI0.ISA.KBD)

dsdt.dsl  1784:                 Device (DMA)
Error    1094 -                           ^ syntax error, unexpected
PARSEOP_DMA, expecting PARSEOP_NAMESEG or PARSEOP_NAMESTRING

ASL Input:  dsdt.dsl - 3096 lines, 93624 bytes, 515 keywords
Compilation complete. 7 Errors, 0 Warnings, 0 Remarks, 53 Optimizations


====

LSUSB:
Bus 004 Device 002: ID 0d8c:0001 C-Media Electronics, Inc.
Bus 004 Device 001: ID 0000:0000
Bus 003 Device 003: ID 051d:0002 American Power Conversion Back-UPS Pro
500/1000/1500
Bus 003 Device 002: ID 046d:c00e Logitech, Inc. Optical Mouse
Bus 003 Device 001: ID 0000:0000
Bus 002 Device 001: ID 0000:0000
Bus 001 Device 001: ID 0000:0000


=====

cat /proc/meminfo:

MemTotal:       512548 kB
MemFree:         10684 kB
Buffers:         17252 kB
Cached:         221508 kB
SwapCached:      10120 kB
Active:         355392 kB
Inactive:        49652 kB
HighTotal:           0 kB
HighFree:            0 kB
LowTotal:       512548 kB
LowFree:         10684 kB
SwapTotal:     4883680 kB
SwapFree:      4739048 kB
Dirty:             132 kB
Writeback:           0 kB
Mapped:         347756 kB
Slab:            49344 kB
CommitLimit:   5139952 kB
Committed_AS:   635544 kB
PageTables:       2108 kB
VmallocTotal:   515796 kB
VmallocUsed:     25556 kB
VmallocChunk:   486608 kB

=====

cat /proc/cpuinfo:

processor       : 0
vendor_id       : GenuineIntel
cpu family      : 15
model           : 2
model name      : Intel(R) Xeon(TM) CPU 2.40GHz
stepping        : 9
cpu MHz         : 2392.630
cache size      : 512 KB
physical id     : 0
siblings        : 2
core id         : 0
cpu cores       : 1
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
bogomips        : 4791.93

processor       : 1
vendor_id       : GenuineIntel
cpu family      : 15
model           : 2
model name      : Intel(R) Xeon(TM) CPU 2.40GHz
stepping        : 9
cpu MHz         : 2392.630
cache size      : 512 KB
physical id     : 0
siblings        : 2
core id         : 0
cpu cores       : 1
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
bogomips        : 4784.99

processor       : 2
vendor_id       : GenuineIntel
cpu family      : 15
model           : 2
model name      : Intel(R) Xeon(TM) CPU 2.40GHz
stepping        : 9
cpu MHz         : 2392.630
cache size      : 512 KB
physical id     : 3
siblings        : 2
core id         : 3
cpu cores       : 1
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
bogomips        : 4785.12

processor       : 3
vendor_id       : GenuineIntel
cpu family      : 15
model           : 2
model name      : Intel(R) Xeon(TM) CPU 2.40GHz
stepping        : 9
cpu MHz         : 2392.630
cache size      : 512 KB
physical id     : 3
siblings        : 2
core id         : 3
cpu cores       : 1
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
bogomips        : 4785.12

=====

lspci -v:
0000:00:00.0 Host bridge: Intel Corporation E7505 Memory Controller Hub
(rev 03)
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, fast devsel, latency 0
        Memory at e8000000 (32-bit, prefetchable) [size=128M]
        Capabilities: [40] #09 [0104]
        Capabilities: [a0] AGP version 3.0

0000:00:01.0 PCI bridge: Intel Corporation E7505/E7205 PCI-to-AGP Bridge
(rev 03) (prog-if 00 [Normal decode])
        Flags: bus master, 66MHz, fast devsel, latency 64
        Memory at e0000000 (32-bit, prefetchable) [size=128M]
        Bus: primary=00, secondary=01, subordinate=01, sec-latency=64
        Memory behind bridge: fc000000-fdffffff
        Prefetchable memory behind bridge: f0000000-f7ffffff
        Capabilities: [60] #0e [0035]

0000:00:02.0 PCI bridge: Intel Corporation E7505 Hub Interface B
PCI-to-PCI Bridge (rev 03) (prog-if 00 [Normal decode])
        Flags: bus master, 66MHz, fast devsel, latency 64
        Bus: primary=00, secondary=02, subordinate=04, sec-latency=0
        I/O behind bridge: 0000e000-0000efff
        Memory behind bridge: fe300000-fe6fffff

0000:00:1d.0 USB Controller: Intel Corporation 82801DB/DBL/DBM
(ICH4/ICH4-L/ICH4-M) USB UHCI Controller #1 (rev 01) (prog-if 00 [UHCI])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, medium devsel, latency 0, IRQ 21
        I/O ports at ff80 [size=32]

0000:00:1d.1 USB Controller: Intel Corporation 82801DB/DBL/DBM
(ICH4/ICH4-L/ICH4-M) USB UHCI Controller #2 (rev 01) (prog-if 00 [UHCI])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, medium devsel, latency 0, IRQ 22
        I/O ports at ff60 [size=32]

0000:00:1d.2 USB Controller: Intel Corporation 82801DB/DBL/DBM
(ICH4/ICH4-L/ICH4-M) USB UHCI Controller #3 (rev 01) (prog-if 00 [UHCI])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, medium devsel, latency 0, IRQ 18
        I/O ports at ff40 [size=32]

0000:00:1d.7 USB Controller: Intel Corporation 82801DB/DBM (ICH4/ICH4-M)
USB2 EHCI Controller (rev 01) (prog-if 20 [EHCI])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, medium devsel, latency 0, IRQ 20
        Memory at fe700800 (32-bit, non-prefetchable) [size=1K]
        Capabilities: [50] Power Management version 2
        Capabilities: [58] #0a [2080]

0000:00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 81)
(prog-if 00 [Normal decode])
        Flags: bus master, fast devsel, latency 0
        Bus: primary=00, secondary=05, subordinate=05, sec-latency=32
        I/O behind bridge: 0000d000-0000dfff
        Memory behind bridge: fe100000-fe2fffff
        Prefetchable memory behind bridge: f8000000-f80fffff

0000:00:1f.0 ISA bridge: Intel Corporation 82801DB/DBL (ICH4/ICH4-L) LPC
Interface Bridge (rev 01)
        Flags: bus master, medium devsel, latency 0
0000:00:1f.1 IDE interface: Intel Corporation 82801DB (ICH4) IDE
Controller (rev 01) (prog-if 8a [Master SecP PriP])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, medium devsel, latency 0, IRQ 18
        I/O ports at <unassigned>
        I/O ports at <unassigned>
        I/O ports at <unassigned>
        I/O ports at <unassigned>
        I/O ports at ffa0 [size=16]
        Memory at 30000000 (32-bit, non-prefetchable) [size=1K]

0000:00:1f.3 SMBus: Intel Corporation 82801DB/DBL/DBM
(ICH4/ICH4-L/ICH4-M) SMBus Controller (rev 01)
        Subsystem: Dell: Unknown device 012c
        Flags: medium devsel, IRQ 4
        I/O ports at cc80 [size=32]

0000:00:1f.5 Multimedia audio controller: Intel Corporation
82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) AC'97 Audio Controller (rev 01)
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, medium devsel, latency 0, IRQ 23
        I/O ports at c800 [size=256]
        I/O ports at cc40 [size=64]
        Memory at fe700400 (32-bit, non-prefetchable) [size=512]
        Memory at fe700000 (32-bit, non-prefetchable) [size=256]
        Capabilities: [50] Power Management version 2

0000:01:00.0 VGA compatible controller: nVidia Corporation NV34GL
[Quadro FX 500/600 PCI] (rev a1) (prog-if 00 [VGA])
        Subsystem: nVidia Corporation: Unknown device 01ba
        Flags: bus master, 66MHz, medium devsel, latency 248, IRQ 21
        Memory at fc000000 (32-bit, non-prefetchable) [size=16M]
        Memory at f0000000 (32-bit, prefetchable) [size=128M]
        Expansion ROM at fd000000 [disabled] [size=128K]
        Capabilities: [60] Power Management version 2
        Capabilities: [44] AGP version 3.0

0000:02:1c.0 PIC: Intel Corporation 82870P2 P64H2 I/OxAPIC (rev 04)
(prog-if 20 [IO(X)-APIC])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, 66MHz, fast devsel, latency 0
        Memory at fe3ff000 (32-bit, non-prefetchable) [size=4K]
        Capabilities: [50] PCI-X non-bridge device.

0000:02:1d.0 PCI bridge: Intel Corporation 82870P2 P64H2 Hub PCI Bridge
(rev 04) (prog-if 00 [Normal decode])
        Flags: bus master, 66MHz, fast devsel, latency 64
        Bus: primary=02, secondary=03, subordinate=03, sec-latency=48
        I/O behind bridge: 0000e000-0000efff
        Memory behind bridge: fe500000-fe6fffff
        Capabilities: [50] PCI-X bridge device.

0000:02:1e.0 PIC: Intel Corporation 82870P2 P64H2 I/OxAPIC (rev 04)
(prog-if 20 [IO(X)-APIC])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, 66MHz, fast devsel, latency 0
        Memory at fe3fe000 (32-bit, non-prefetchable) [size=4K]
        Capabilities: [50] PCI-X non-bridge device.

0000:02:1f.0 PCI bridge: Intel Corporation 82870P2 P64H2 Hub PCI Bridge
(rev 04) (prog-if 00 [Normal decode])
        Flags: bus master, 66MHz, fast devsel, latency 64
        Bus: primary=02, secondary=04, subordinate=04, sec-latency=64
        Capabilities: [50] PCI-X bridge device.

0000:03:0d.0 Mass storage controller: Promise Technology, Inc. 20269
(rev 02) (prog-if 85)
        Subsystem: Promise Technology, Inc. Ultra133TX2
        Flags: bus master, 66MHz, slow devsel, latency 64, IRQ 19
        I/O ports at ecf8 [size=8]
        I/O ports at ecf0 [size=4]
        I/O ports at ece0 [size=8]
        I/O ports at ecd8 [size=4]
        I/O ports at ecc0 [size=16]
        Memory at fe5fc000 (32-bit, non-prefetchable) [size=16K]
        Expansion ROM at fe600000 [disabled] [size=16K]
        Capabilities: [60] Power Management version 1

0000:03:0e.0 Ethernet controller: Intel Corporation 82545EM Gigabit
Ethernet Controller (Copper) (rev 01)
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 16
        Memory at fe5c0000 (64-bit, non-prefetchable) [size=128K]
        I/O ports at ec80 [size=64]
        Capabilities: [dc] Power Management version 2
        Capabilities: [e4] PCI-X non-bridge device.
        Capabilities: [f0] Message Signalled Interrupts: 64bit+
Queue=0/0 Enable-

0000:05:0c.0 FireWire (IEEE 1394): Texas Instruments TSB43AB22/A
IEEE-1394a-2000 Controller (PHY/Link) (prog-if 10 [OHCI])
        Subsystem: Dell: Unknown device 012c
        Flags: bus master, medium devsel, latency 64, IRQ 4
        Memory at fe1ff800 (32-bit, non-prefetchable) [size=2K]
        Memory at fe1f8000 (32-bit, non-prefetchable) [size=16K]
        Capabilities: [44] Power Management version 2

0000:05:0d.0 Multimedia audio controller: Creative Labs SB Live! EMU10k1
(rev 07)
        Subsystem: Creative Labs SBLive! 5.1 Model SB0100
        Flags: bus master, medium devsel, latency 64, IRQ 24
        I/O ports at dce0 [size=32]
        Capabilities: [dc] Power Management version 1

0000:05:0d.1 Input device controller: Creative Labs SB Live! MIDI/Game
Port (rev 07)
        Subsystem: Creative Labs Gameport Joystick
        Flags: bus master, medium devsel, latency 64
        I/O ports at dcd8 [size=8]
        Capabilities: [dc] Power Management version 1

0000:05:0e.0 Multimedia video controller: Brooktree Corporation Bt878
Video Capture (rev 02)
        Subsystem: TERRATEC Electronic GmbH: Unknown device 1134
        Flags: bus master, medium devsel, latency 64, IRQ 17
        Memory at f80ff000 (32-bit, prefetchable) [size=4K]

0000:05:0e.1 Multimedia controller: Brooktree Corporation Bt878 Audio
Capture (rev 02)
        Subsystem: TERRATEC Electronic GmbH: Unknown device 1134
        Flags: bus master, medium devsel, latency 64, IRQ 10
        Memory at f80fe000 (32-bit, prefetchable) [size=4K]


====

ver_linux script output:
If some fields are empty or look unusual you may have an old version.
Compare to the current minimal requirements in Documentation/Changes.

Linux sigma-9 2.6.14.5 #5 SMP Fri Dec 30 19:50:12 CET 2005 i686 GNU/Linux

Gnu C                  3.3.5
Gnu make               3.80
binutils               2.15
util-linux             2.12p
mount                  2.12p
module-init-tools      3.2-pre1
e2fsprogs              1.37
reiserfsprogs          line
reiser4progs           line
PPP                    2.4.3
nfs-utils              1.0.6
Linux C Library        2.3.2
Dynamic linker (ldd)   2.3.2
Procps                 3.2.1
Net-tools              1.60
Console-tools          0.2.3
Sh-utils               5.2.1
udev                   056
Modules Loaded         nv


====

results of memtest86+ after 40 passes with all tests enabled: no errors

====

cat /proc/interrupts:
           CPU0       CPU1       CPU2       CPU3
  0:     501324     492735     492754     492100    IO-APIC-edge  timer
  1:       2555       2761       2861       2451    IO-APIC-edge  i8042
  7:          0          0          0          0    IO-APIC-edge  parport0
  8:    2369118    2386295    2363140    2356586    IO-APIC-edge  rtc
  9:          0          0          0          0   IO-APIC-level  acpi
 14:         21          0          0          0    IO-APIC-edge  ide0
 15:         13          0          0          0    IO-APIC-edge  ide1
 16:      28924          0          0          0   IO-APIC-level  eth0
 17:      97407     105474     103650     103304   IO-APIC-level  bttv0
 18:         48          4          0          7   IO-APIC-level
uhci_hcd:usb4
 19:      28880      54020      48433      23791   IO-APIC-level  ide2, ide3
 20:          6          0          1          0   IO-APIC-level
ehci_hcd:usb1
 21:     398859     319390     317707     425780   IO-APIC-level
uhci_hcd:usb2, nv
 22:     200970     244113     220837     191613   IO-APIC-level
uhci_hcd:usb3
 23:          0          0          0          0   IO-APIC-level  Intel
82801DB-ICH4
 24:       9460       9468      12491       8706   IO-APIC-level  EMU10K1
NMI:          0          0          0          0
LOC:    1978858    1979111    1979110    1979109
ERR:          0
MIS:          0


====

2.6.14.5 vanilla kernel .config file see attachment

====

I hope this gives more complete picture of the current running setup.



[-- Attachment #2: config-31-12-05 --]
[-- Type: text/plain, Size: 35420 bytes --]

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.14.5-grsec
# Fri Dec 30 19:44:17 2005
#
CONFIG_X86=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y

#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
CONFIG_CLEAN_COMPILE=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32

#
# General setup
#
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
# CONFIG_CPUSETS is not set
CONFIG_INITRAMFS_SOURCE=""
# CONFIG_EMBEDDED is not set
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0

#
# Loadable module support
#
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_OBSOLETE_MODPARM=y
CONFIG_MODVERSIONS=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y

#
# Processor type and features
#
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_ES7000 is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
CONFIG_MPENTIUM4=y
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
CONFIG_X86_L1_CACHE_SHIFT=7
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_ALIGNMENT_16=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_SCHED_SMT=y
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
# CONFIG_PREEMPT is not set
# CONFIG_PREEMPT_BKL is not set
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_TSC=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_NONFATAL=y
CONFIG_X86_MCE_P4THERMAL=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y

#
# Firmware Drivers
#
# CONFIG_EDD is not set
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_NOHIGHMEM=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_EFI is not set
# CONFIG_IRQBALANCE is not set
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
CONFIG_HZ_250=y
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
CONFIG_PHYSICAL_START=0x100000
# CONFIG_KEXEC is not set

#
# Power management options (ACPI, APM)
#
CONFIG_PM=y
# CONFIG_PM_DEBUG is not set

#
# ACPI (Advanced Configuration and Power Interface) Support
#
CONFIG_ACPI=y
# CONFIG_ACPI_AC is not set
# CONFIG_ACPI_BATTERY is not set
# CONFIG_ACPI_BUTTON is not set
# CONFIG_ACPI_VIDEO is not set
# CONFIG_ACPI_HOTKEY is not set
CONFIG_ACPI_FAN=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_IBM is not set
# CONFIG_ACPI_TOSHIBA is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_EC=y
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
# CONFIG_X86_PM_TIMER is not set
CONFIG_ACPI_CONTAINER=m

#
# APM (Advanced Power Management) BIOS Support
#
# CONFIG_APM is not set

#
# CPU Frequency scaling
#
# CONFIG_CPU_FREQ is not set

#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
CONFIG_PCI_GOANY=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
# CONFIG_PCIEPORTBUS is not set
# CONFIG_PCI_MSI is not set
# CONFIG_PCI_LEGACY_PROC is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set
# CONFIG_HOTPLUG_CPU is not set

#
# PCCARD (PCMCIA/CardBus) support
#
# CONFIG_PCCARD is not set

#
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set

#
# Executable file formats
#
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_AOUT=y
CONFIG_BINFMT_MISC=y

#
# Networking
#
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
CONFIG_XFRM_USER=y
CONFIG_NET_KEY=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
# CONFIG_IP_PIMSM_V1 is not set
# CONFIG_IP_PIMSM_V2 is not set
# CONFIG_ARPD is not set
CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
CONFIG_INET_IPCOMP=y
CONFIG_INET_TUNNEL=y
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
CONFIG_TCP_CONG_ADVANCED=y

#
# TCP congestion control
#
CONFIG_TCP_CONG_BIC=y
CONFIG_TCP_CONG_WESTWOOD=y
CONFIG_TCP_CONG_HTCP=y
CONFIG_TCP_CONG_HSTCP=y
CONFIG_TCP_CONG_HYBLA=y
CONFIG_TCP_CONG_VEGAS=y
CONFIG_TCP_CONG_SCALABLE=y

#
# IP: Virtual Server Configuration
#
# CONFIG_IP_VS is not set
CONFIG_IPV6=y
CONFIG_IPV6_PRIVACY=y
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
CONFIG_INET6_IPCOMP=y
CONFIG_INET6_TUNNEL=y
CONFIG_IPV6_TUNNEL=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_NETFILTER_NETLINK=y
CONFIG_NETFILTER_NETLINK_QUEUE=y
CONFIG_NETFILTER_NETLINK_LOG=y

#
# IP: Netfilter Configuration
#
CONFIG_IP_NF_CONNTRACK=y
CONFIG_IP_NF_CT_ACCT=y
CONFIG_IP_NF_CONNTRACK_MARK=y
CONFIG_IP_NF_CONNTRACK_EVENTS=y
CONFIG_IP_NF_CONNTRACK_NETLINK=y
CONFIG_IP_NF_CT_PROTO_SCTP=y
CONFIG_IP_NF_FTP=y
CONFIG_IP_NF_IRC=y
# CONFIG_IP_NF_NETBIOS_NS is not set
CONFIG_IP_NF_TFTP=y
CONFIG_IP_NF_AMANDA=y
CONFIG_IP_NF_PPTP=y
CONFIG_IP_NF_QUEUE=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_MATCH_LIMIT=y
CONFIG_IP_NF_MATCH_IPRANGE=y
CONFIG_IP_NF_MATCH_MAC=y
CONFIG_IP_NF_MATCH_PKTTYPE=y
CONFIG_IP_NF_MATCH_MARK=y
CONFIG_IP_NF_MATCH_MULTIPORT=y
CONFIG_IP_NF_MATCH_TOS=y
CONFIG_IP_NF_MATCH_RECENT=y
CONFIG_IP_NF_MATCH_ECN=y
CONFIG_IP_NF_MATCH_DSCP=y
CONFIG_IP_NF_MATCH_AH_ESP=y
CONFIG_IP_NF_MATCH_LENGTH=y
CONFIG_IP_NF_MATCH_TTL=y
CONFIG_IP_NF_MATCH_TCPMSS=y
CONFIG_IP_NF_MATCH_STEALTH=y
CONFIG_IP_NF_MATCH_HELPER=y
CONFIG_IP_NF_MATCH_STATE=y
CONFIG_IP_NF_MATCH_CONNTRACK=y
CONFIG_IP_NF_MATCH_OWNER=y
CONFIG_IP_NF_MATCH_ADDRTYPE=y
CONFIG_IP_NF_MATCH_REALM=y
CONFIG_IP_NF_MATCH_SCTP=y
CONFIG_IP_NF_MATCH_DCCP=y
CONFIG_IP_NF_MATCH_COMMENT=y
CONFIG_IP_NF_MATCH_CONNMARK=y
CONFIG_IP_NF_MATCH_CONNBYTES=y
CONFIG_IP_NF_MATCH_HASHLIMIT=y
CONFIG_IP_NF_MATCH_STRING=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
CONFIG_IP_NF_TARGET_LOG=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_IP_NF_TARGET_TCPMSS=y
CONFIG_IP_NF_TARGET_NFQUEUE=y
CONFIG_IP_NF_NAT=y
CONFIG_IP_NF_NAT_NEEDED=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_IP_NF_TARGET_REDIRECT=y
CONFIG_IP_NF_TARGET_NETMAP=y
CONFIG_IP_NF_TARGET_SAME=y
CONFIG_IP_NF_NAT_SNMP_BASIC=y
CONFIG_IP_NF_NAT_IRC=y
CONFIG_IP_NF_NAT_FTP=y
CONFIG_IP_NF_NAT_TFTP=y
CONFIG_IP_NF_NAT_AMANDA=y
CONFIG_IP_NF_NAT_PPTP=y
CONFIG_IP_NF_MANGLE=y
CONFIG_IP_NF_TARGET_TOS=y
CONFIG_IP_NF_TARGET_ECN=y
CONFIG_IP_NF_TARGET_DSCP=y
CONFIG_IP_NF_TARGET_MARK=y
CONFIG_IP_NF_TARGET_CLASSIFY=y
CONFIG_IP_NF_TARGET_TTL=y
CONFIG_IP_NF_TARGET_CONNMARK=y
CONFIG_IP_NF_TARGET_CLUSTERIP=y
CONFIG_IP_NF_RAW=y
CONFIG_IP_NF_TARGET_NOTRACK=y
CONFIG_IP_NF_ARPTABLES=y
CONFIG_IP_NF_ARPFILTER=y
CONFIG_IP_NF_ARP_MANGLE=y

#
# IPv6: Netfilter Configuration (EXPERIMENTAL)
#
CONFIG_IP6_NF_QUEUE=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_LIMIT=m
CONFIG_IP6_NF_MATCH_MAC=m
CONFIG_IP6_NF_MATCH_RT=m
CONFIG_IP6_NF_MATCH_OPTS=m
CONFIG_IP6_NF_MATCH_FRAG=m
CONFIG_IP6_NF_MATCH_HL=m
CONFIG_IP6_NF_MATCH_MULTIPORT=m
CONFIG_IP6_NF_MATCH_OWNER=m
CONFIG_IP6_NF_MATCH_MARK=m
CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_AHESP=m
CONFIG_IP6_NF_MATCH_LENGTH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_LOG=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_TARGET_NFQUEUE=m
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_TARGET_MARK=m
CONFIG_IP6_NF_TARGET_HL=m
CONFIG_IP6_NF_RAW=m

#
# DCCP Configuration (EXPERIMENTAL)
#
CONFIG_IP_DCCP=m
CONFIG_INET_DCCP_DIAG=m

#
# DCCP CCIDs Configuration (EXPERIMENTAL)
#
CONFIG_IP_DCCP_CCID3=m
CONFIG_IP_DCCP_TFRC_LIB=m

#
# SCTP Configuration (EXPERIMENTAL)
#
CONFIG_IP_SCTP=m
# CONFIG_SCTP_DBG_MSG is not set
# CONFIG_SCTP_DBG_OBJCNT is not set
# CONFIG_SCTP_HMAC_NONE is not set
# CONFIG_SCTP_HMAC_SHA1 is not set
CONFIG_SCTP_HMAC_MD5=y
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CLK_JIFFIES=y
# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
# CONFIG_NET_SCH_CLK_CPU is not set
CONFIG_NET_SCH_CBQ=y
CONFIG_NET_SCH_HTB=y
CONFIG_NET_SCH_HFSC=y
CONFIG_NET_SCH_PRIO=y
CONFIG_NET_SCH_RED=y
CONFIG_NET_SCH_SFQ=y
CONFIG_NET_SCH_TEQL=y
CONFIG_NET_SCH_TBF=y
CONFIG_NET_SCH_GRED=y
CONFIG_NET_SCH_DSMARK=y
CONFIG_NET_SCH_NETEM=y
CONFIG_NET_SCH_INGRESS=y
CONFIG_NET_QOS=y
CONFIG_NET_ESTIMATOR=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=y
CONFIG_NET_CLS_TCINDEX=y
CONFIG_NET_CLS_ROUTE4=y
CONFIG_NET_CLS_ROUTE=y
CONFIG_NET_CLS_FW=y
CONFIG_NET_CLS_U32=y
# CONFIG_CLS_U32_PERF is not set
# CONFIG_NET_CLS_IND is not set
# CONFIG_CLS_U32_MARK is not set
CONFIG_NET_CLS_RSVP=y
CONFIG_NET_CLS_RSVP6=y
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
CONFIG_NET_EMATCH_CMP=y
CONFIG_NET_EMATCH_NBYTE=y
CONFIG_NET_EMATCH_U32=y
CONFIG_NET_EMATCH_META=y
CONFIG_NET_EMATCH_TEXT=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
CONFIG_NET_ACT_GACT=y
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_IPT=y
CONFIG_NET_ACT_PEDIT=y
CONFIG_NET_ACT_SIMP=y

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_IEEE80211 is not set

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y

#
# Connector - unified userspace <-> kernelspace linker
#
# CONFIG_CONNECTOR is not set

#
# Memory Technology Devices (MTD)
#
# CONFIG_MTD is not set

#
# Parallel port support
#
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
# CONFIG_PARPORT_SERIAL is not set
# CONFIG_PARPORT_PC_FIFO is not set
# CONFIG_PARPORT_PC_SUPERIO is not set
# CONFIG_PARPORT_GSC is not set
CONFIG_PARPORT_1284=y

#
# Plug and Play support
#
CONFIG_PNP=y
# CONFIG_PNP_DEBUG is not set

#
# Protocols
#
CONFIG_PNPACPI=y

#
# Block devices
#
CONFIG_BLK_DEV_FD=y
# CONFIG_PARIDE is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_CRYPTOLOOP=y
CONFIG_BLK_DEV_NBD=y
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_UB is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_INITRD=y
# CONFIG_LBD is not set
CONFIG_CDROM_PKTCDVD=y
CONFIG_CDROM_PKTCDVD_BUFFERS=8
# CONFIG_CDROM_PKTCDVD_WCACHE is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
# CONFIG_ATA_OVER_ETH is not set

#
# ATA/ATAPI/MFM/RLL support
#
CONFIG_IDE=y
CONFIG_BLK_DEV_IDE=y

#
# Please see Documentation/ide.txt for help/info on IDE drives
#
# CONFIG_BLK_DEV_IDE_SATA is not set
# CONFIG_BLK_DEV_HD_IDE is not set
CONFIG_BLK_DEV_IDEDISK=y
# CONFIG_IDEDISK_MULTI_MODE is not set
CONFIG_BLK_DEV_IDECD=y
# CONFIG_BLK_DEV_IDETAPE is not set
# CONFIG_BLK_DEV_IDEFLOPPY is not set
CONFIG_BLK_DEV_IDESCSI=y
# CONFIG_IDE_TASK_IOCTL is not set

#
# IDE chipset support/bugfixes
#
CONFIG_IDE_GENERIC=y
# CONFIG_BLK_DEV_CMD640 is not set
# CONFIG_BLK_DEV_IDEPNP is not set
CONFIG_BLK_DEV_IDEPCI=y
CONFIG_IDEPCI_SHARE_IRQ=y
# CONFIG_BLK_DEV_OFFBOARD is not set
CONFIG_BLK_DEV_GENERIC=y
# CONFIG_BLK_DEV_OPTI621 is not set
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
# CONFIG_BLK_DEV_AEC62XX is not set
# CONFIG_BLK_DEV_ALI15X3 is not set
# CONFIG_BLK_DEV_AMD74XX is not set
# CONFIG_BLK_DEV_ATIIXP is not set
# CONFIG_BLK_DEV_CMD64X is not set
# CONFIG_BLK_DEV_TRIFLEX is not set
# CONFIG_BLK_DEV_CY82C693 is not set
# CONFIG_BLK_DEV_CS5520 is not set
# CONFIG_BLK_DEV_CS5530 is not set
# CONFIG_BLK_DEV_HPT34X is not set
# CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_SC1200 is not set
CONFIG_BLK_DEV_PIIX=y
# CONFIG_BLK_DEV_IT821X is not set
# CONFIG_BLK_DEV_NS87415 is not set
# CONFIG_BLK_DEV_PDC202XX_OLD is not set
CONFIG_BLK_DEV_PDC202XX_NEW=y
CONFIG_PDC202XX_FORCE=y
# CONFIG_BLK_DEV_SVWKS is not set
# CONFIG_BLK_DEV_SIIMAGE is not set
# CONFIG_BLK_DEV_SIS5513 is not set
# CONFIG_BLK_DEV_SLC90E66 is not set
# CONFIG_BLK_DEV_TRM290 is not set
# CONFIG_BLK_DEV_VIA82CXXX is not set
# CONFIG_IDE_ARM is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
CONFIG_IDEDMA_AUTO=y
# CONFIG_BLK_DEV_HD is not set

#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_PROC_FS=y

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set

#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
CONFIG_SCSI_MULTI_LUN=y
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set

#
# SCSI Transport Attributes
#
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=m
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set

#
# SCSI low-level drivers
#
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_SATA is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_PPA is not set
# CONFIG_SCSI_IMM is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
CONFIG_SCSI_QLA2XXX=y
# CONFIG_SCSI_QLA21XX is not set
# CONFIG_SCSI_QLA22XX is not set
# CONFIG_SCSI_QLA2300 is not set
# CONFIG_SCSI_QLA2322 is not set
# CONFIG_SCSI_QLA6312 is not set
# CONFIG_SCSI_QLA24XX is not set
# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set

#
# Multi-device support (RAID and LVM)
#
CONFIG_MD=y
# CONFIG_BLK_DEV_MD is not set
CONFIG_BLK_DEV_DM=y
CONFIG_DM_CRYPT=y
# CONFIG_DM_SNAPSHOT is not set
# CONFIG_DM_MIRROR is not set
# CONFIG_DM_ZERO is not set
# CONFIG_DM_MULTIPATH is not set

#
# Fusion MPT device support
#
CONFIG_FUSION=y
CONFIG_FUSION_SPI=m
# CONFIG_FUSION_FC is not set
# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=128
# CONFIG_FUSION_CTL is not set

#
# IEEE 1394 (FireWire) support
#
CONFIG_IEEE1394=m

#
# Subsystem Options
#
# CONFIG_IEEE1394_VERBOSEDEBUG is not set
# CONFIG_IEEE1394_OUI_DB is not set
# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
# CONFIG_IEEE1394_EXPORT_FULL_API is not set

#
# Device Drivers
#
# CONFIG_IEEE1394_PCILYNX is not set
# CONFIG_IEEE1394_OHCI1394 is not set

#
# Protocol Drivers
#
# CONFIG_IEEE1394_SBP2 is not set
# CONFIG_IEEE1394_ETH1394 is not set
CONFIG_IEEE1394_RAWIO=m
# CONFIG_IEEE1394_CMP is not set

#
# I2O device support
#
CONFIG_I2O=y
CONFIG_I2O_EXT_ADAPTEC=y
# CONFIG_I2O_CONFIG is not set
# CONFIG_I2O_BUS is not set
# CONFIG_I2O_BLOCK is not set
# CONFIG_I2O_SCSI is not set
# CONFIG_I2O_PROC is not set

#
# Network device support
#
CONFIG_NETDEVICES=y
CONFIG_DUMMY=y
# CONFIG_BONDING is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
# CONFIG_NET_SB1000 is not set

#
# ARCnet devices
#
# CONFIG_ARCNET is not set

#
# PHY device support
#

#
# Ethernet (10 or 100Mbit)
#
# CONFIG_NET_ETHERNET is not set

#
# Ethernet (1000 Mbit)
#
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
CONFIG_E1000=y
# CONFIG_E1000_NAPI is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SK98LIN is not set
# CONFIG_TIGON3 is not set
# CONFIG_BNX2 is not set

#
# Ethernet (10000 Mbit)
#
# CONFIG_CHELSIO_T1 is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set

#
# Token Ring devices
#
# CONFIG_TR is not set

#
# Wireless LAN (non-hamradio)
#
# CONFIG_NET_RADIO is not set

#
# Wan interfaces
#
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PLIP is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
CONFIG_SHAPER=m
# CONFIG_NETCONSOLE is not set
# CONFIG_NETPOLL is not set
# CONFIG_NET_POLL_CONTROLLER is not set

#
# ISDN subsystem
#
# CONFIG_ISDN is not set

#
# Telephony Support
#
# CONFIG_PHONE is not set

#
# Input device support
#
CONFIG_INPUT=y

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_TSDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_SERIAL=y
# CONFIG_MOUSE_VSXXXAA is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=y
# CONFIG_INPUT_UINPUT is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
CONFIG_SERIO_SERPORT=y
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PARKBD is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
# CONFIG_GAMEPORT is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
# CONFIG_SERIAL_NONSTANDARD is not set

#
# Serial drivers
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_SERIAL_8250_ACPI is not set
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
# CONFIG_SERIAL_8250_DETECT_IRQ is not set
# CONFIG_SERIAL_8250_RSA is not set

#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
# CONFIG_PRINTER is not set
# CONFIG_PPDEV is not set
# CONFIG_TIPAR is not set

#
# IPMI
#
# CONFIG_IPMI_HANDLER is not set

#
# Watchdog Cards
#
# CONFIG_WATCHDOG is not set
CONFIG_HW_RANDOM=y
CONFIG_NVRAM=y
CONFIG_RTC=y
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set

#
# Ftape, the floppy tape device driver
#
CONFIG_AGP=y
# CONFIG_AGP_ALI is not set
# CONFIG_AGP_ATI is not set
# CONFIG_AGP_AMD is not set
# CONFIG_AGP_AMD64 is not set
CONFIG_AGP_INTEL=y
# CONFIG_AGP_NVIDIA is not set
# CONFIG_AGP_SIS is not set
# CONFIG_AGP_SWORKS is not set
# CONFIG_AGP_VIA is not set
# CONFIG_AGP_EFFICEON is not set
# CONFIG_DRM is not set
# CONFIG_MWAVE is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
# CONFIG_HPET_RTC_IRQ is not set
CONFIG_HPET_MMAP=y
CONFIG_HANGCHECK_TIMER=y

#
# TPM devices
#
# CONFIG_TCG_TPM is not set

#
# I2C support
#
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y

#
# I2C Algorithms
#
CONFIG_I2C_ALGOBIT=y
# CONFIG_I2C_ALGOPCF is not set
# CONFIG_I2C_ALGOPCA is not set

#
# I2C Hardware Bus support
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
# CONFIG_I2C_I801 is not set
# CONFIG_I2C_I810 is not set
CONFIG_I2C_PIIX4=y
# CONFIG_I2C_NFORCE2 is not set
# CONFIG_I2C_PARPORT is not set
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_PROSAVAGE is not set
# CONFIG_I2C_SAVAGE4 is not set
# CONFIG_SCx200_ACB is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_STUB is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
# CONFIG_I2C_VOODOO3 is not set
# CONFIG_I2C_PCA_ISA is not set

#
# Miscellaneous I2C Chip support
#
# CONFIG_SENSORS_DS1337 is not set
# CONFIG_SENSORS_DS1374 is not set
# CONFIG_SENSORS_EEPROM is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_RTC8564 is not set
# CONFIG_SENSORS_MAX6875 is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set

#
# Dallas's 1-wire bus
#
# CONFIG_W1 is not set

#
# Hardware Monitoring support
#
# CONFIG_HWMON is not set
# CONFIG_HWMON_VID is not set

#
# Misc devices
#
# CONFIG_IBM_ASM is not set

#
# Multimedia Capabilities Port drivers
#

#
# Multimedia devices
#
CONFIG_VIDEO_DEV=y

#
# Video For Linux
#

#
# Video Adapters
#
CONFIG_VIDEO_BT848=y
# CONFIG_VIDEO_SAA6588 is not set
# CONFIG_VIDEO_BWQCAM is not set
# CONFIG_VIDEO_CQCAM is not set
# CONFIG_VIDEO_W9966 is not set
# CONFIG_VIDEO_CPIA is not set
# CONFIG_VIDEO_SAA5246A is not set
# CONFIG_VIDEO_SAA5249 is not set
# CONFIG_TUNER_3036 is not set
# CONFIG_VIDEO_STRADIS is not set
# CONFIG_VIDEO_ZORAN is not set
# CONFIG_VIDEO_SAA7134 is not set
# CONFIG_VIDEO_MXB is not set
# CONFIG_VIDEO_DPC is not set
# CONFIG_VIDEO_HEXIUM_ORION is not set
# CONFIG_VIDEO_HEXIUM_GEMINI is not set
# CONFIG_VIDEO_CX88 is not set
# CONFIG_VIDEO_OVCAMCHIP is not set

#
# Radio Adapters
#
# CONFIG_RADIO_GEMTEK_PCI is not set
# CONFIG_RADIO_MAXIRADIO is not set
# CONFIG_RADIO_MAESTRO is not set

#
# Digital Video Broadcasting Devices
#
# CONFIG_DVB is not set
CONFIG_VIDEO_TUNER=y
CONFIG_VIDEO_BUF=y
CONFIG_VIDEO_BTCX=y
CONFIG_VIDEO_IR=y
CONFIG_VIDEO_TVEEPROM=y

#
# Graphics support
#
CONFIG_FB=y
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
CONFIG_FB_SOFT_CURSOR=y
# CONFIG_FB_MACMODES is not set
CONFIG_FB_MODE_HELPERS=y
# CONFIG_FB_TILEBLITTING is not set
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ARC is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
CONFIG_FB_VGA16=y
CONFIG_FB_VESA=y
# CONFIG_VIDEO_SELECT is not set
# CONFIG_FB_HGA is not set
CONFIG_FB_NVIDIA=m
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I810 is not set
# CONFIG_FB_INTEL is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON_OLD is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_CYBLA is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_VIRTUAL is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
CONFIG_DUMMY_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE is not set

#
# Logo configuration
#
# CONFIG_LOGO is not set
# CONFIG_BACKLIGHT_LCD_SUPPORT is not set

#
# Sound
#
CONFIG_SOUND=y

#
# Advanced Linux Sound Architecture
#
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
CONFIG_SND_HWDEP=y
CONFIG_SND_RAWMIDI=y
CONFIG_SND_SEQUENCER=y
# CONFIG_SND_SEQ_DUMMY is not set
CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_RTCTIMER=y
CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set

#
# Generic devices
#
# CONFIG_SND_DUMMY is not set
# CONFIG_SND_VIRMIDI is not set
# CONFIG_SND_MTPAV is not set
# CONFIG_SND_SERIAL_U16550 is not set
# CONFIG_SND_MPU401 is not set
CONFIG_SND_AC97_CODEC=y
CONFIG_SND_AC97_BUS=y

#
# PCI devices
#
# CONFIG_SND_ALI5451 is not set
# CONFIG_SND_ATIIXP is not set
# CONFIG_SND_ATIIXP_MODEM is not set
# CONFIG_SND_AU8810 is not set
# CONFIG_SND_AU8820 is not set
# CONFIG_SND_AU8830 is not set
# CONFIG_SND_AZT3328 is not set
CONFIG_SND_BT87X=y
# CONFIG_SND_BT87X_OVERCLOCK is not set
# CONFIG_SND_CS46XX is not set
# CONFIG_SND_CS4281 is not set
CONFIG_SND_EMU10K1=y
# CONFIG_SND_EMU10K1X is not set
# CONFIG_SND_CA0106 is not set
# CONFIG_SND_KORG1212 is not set
# CONFIG_SND_MIXART is not set
# CONFIG_SND_NM256 is not set
# CONFIG_SND_RME32 is not set
# CONFIG_SND_RME96 is not set
# CONFIG_SND_RME9652 is not set
# CONFIG_SND_HDSP is not set
# CONFIG_SND_HDSPM is not set
# CONFIG_SND_TRIDENT is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SND_AD1889 is not set
# CONFIG_SND_ALS4000 is not set
# CONFIG_SND_CMIPCI is not set
# CONFIG_SND_ENS1370 is not set
# CONFIG_SND_ENS1371 is not set
# CONFIG_SND_ES1938 is not set
# CONFIG_SND_ES1968 is not set
# CONFIG_SND_MAESTRO3 is not set
# CONFIG_SND_FM801 is not set
# CONFIG_SND_ICE1712 is not set
# CONFIG_SND_ICE1724 is not set
CONFIG_SND_INTEL8X0=y
# CONFIG_SND_INTEL8X0M is not set
# CONFIG_SND_SONICVIBES is not set
# CONFIG_SND_VIA82XX is not set
# CONFIG_SND_VIA82XX_MODEM is not set
# CONFIG_SND_VX222 is not set
# CONFIG_SND_HDA_INTEL is not set

#
# USB devices
#
CONFIG_SND_USB_AUDIO=y
# CONFIG_SND_USB_USX2Y is not set

#
# Open Sound System
#
# CONFIG_SOUND_PRIME is not set

#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set

#
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
CONFIG_USB_BANDWIDTH=y
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_SUSPEND is not set
# CONFIG_USB_OTG is not set

#
# USB Host Controller Drivers
#
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_SPLIT_ISO=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
# CONFIG_USB_ISP116X_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set

#
# USB Device Class drivers
#
# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
# CONFIG_USB_BLUETOOTH_TTY is not set
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y

#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
# CONFIG_USB_STORAGE_DATAFAB is not set
# CONFIG_USB_STORAGE_FREECOM is not set
# CONFIG_USB_STORAGE_ISD200 is not set
# CONFIG_USB_STORAGE_DPCM is not set
# CONFIG_USB_STORAGE_USBAT is not set
# CONFIG_USB_STORAGE_SDDR09 is not set
# CONFIG_USB_STORAGE_SDDR55 is not set
# CONFIG_USB_STORAGE_JUMPSHOT is not set
# CONFIG_USB_STORAGE_ONETOUCH is not set

#
# USB Input Devices
#
CONFIG_USB_HID=y
CONFIG_USB_HIDINPUT=y
# CONFIG_HID_FF is not set
CONFIG_USB_HIDDEV=y
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
# CONFIG_USB_ACECAD is not set
# CONFIG_USB_KBTAB is not set
# CONFIG_USB_POWERMATE is not set
# CONFIG_USB_MTOUCH is not set
# CONFIG_USB_ITMTOUCH is not set
# CONFIG_USB_EGALAX is not set
# CONFIG_USB_YEALINK is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
# CONFIG_USB_KEYSPAN_REMOTE is not set
# CONFIG_USB_APPLETOUCH is not set

#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set

#
# USB Multimedia devices
#
# CONFIG_USB_DABUSB is not set
# CONFIG_USB_VICAM is not set
# CONFIG_USB_DSBR is not set
# CONFIG_USB_IBMCAM is not set
# CONFIG_USB_KONICAWC is not set
# CONFIG_USB_OV511 is not set
# CONFIG_USB_SE401 is not set
# CONFIG_USB_SN9C102 is not set
# CONFIG_USB_STV680 is not set
CONFIG_USB_PWC=m

#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
# CONFIG_USB_MON is not set

#
# USB port drivers
#
# CONFIG_USB_USS720 is not set

#
# USB Serial Converter support
#
# CONFIG_USB_SERIAL is not set

#
# USB Miscellaneous drivers
#
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_AUERSWALD is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_LED is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_PHIDGETKIT is not set
# CONFIG_USB_PHIDGETSERVO is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TEST is not set

#
# USB DSL modem support
#

#
# USB Gadget Support
#
# CONFIG_USB_GADGET is not set

#
# MMC/SD Card support
#
CONFIG_MMC=y
# CONFIG_MMC_DEBUG is not set
CONFIG_MMC_BLOCK=y
# CONFIG_MMC_WBSD is not set

#
# InfiniBand support
#
# CONFIG_INFINIBAND is not set

#
# SN Devices
#

#
# File systems
#
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT2_FS_XIP=y
CONFIG_FS_XIP=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
CONFIG_MINIX_FS=y
CONFIG_ROMFS_FS=y
CONFIG_INOTIFY=y
CONFIG_QUOTA=y
CONFIG_QFMT_V1=y
CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
CONFIG_DNOTIFY=y
# CONFIG_AUTOFS_FS is not set
# CONFIG_AUTOFS4_FS is not set
# CONFIG_FUSE_FS is not set

#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_ZISOFS_FS=y
CONFIG_UDF_FS=y
CONFIG_UDF_NLS=y

#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
CONFIG_NTFS_FS=y
# CONFIG_NTFS_DEBUG is not set
CONFIG_NTFS_RW=y

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
# CONFIG_HUGETLBFS is not set
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
CONFIG_RELAYFS_FS=y

#
# Miscellaneous filesystems
#
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
CONFIG_CRAMFS=y
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set

#
# Network File Systems
#
# CONFIG_NFS_FS is not set
# CONFIG_NFSD is not set
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
# CONFIG_9P_FS is not set

#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
# CONFIG_ACORN_PARTITION is not set
# CONFIG_OSF_PARTITION is not set
# CONFIG_AMIGA_PARTITION is not set
# CONFIG_ATARI_PARTITION is not set
# CONFIG_MAC_PARTITION is not set
CONFIG_MSDOS_PARTITION=y
# CONFIG_BSD_DISKLABEL is not set
# CONFIG_MINIX_SUBPARTITION is not set
# CONFIG_SOLARIS_X86_PARTITION is not set
# CONFIG_UNIXWARE_DISKLABEL is not set
CONFIG_LDM_PARTITION=y
# CONFIG_LDM_DEBUG is not set
# CONFIG_SGI_PARTITION is not set
# CONFIG_ULTRIX_PARTITION is not set
# CONFIG_SUN_PARTITION is not set
# CONFIG_EFI_PARTITION is not set

#
# Native Language Support
#
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="cp437"
CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_CODEPAGE_852=y
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
CONFIG_NLS_UTF8=y

#
# Profiling support
#
# CONFIG_PROFILING is not set

#
# Kernel hacking
#
# CONFIG_PRINTK_TIME is not set
# CONFIG_DEBUG_KERNEL is not set
CONFIG_LOG_BUF_SHIFT=15
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_EARLY_PRINTK=y
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y

#
# Cryptographic options
#
CONFIG_CRYPTO=y
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_WP512=y
CONFIG_CRYPTO_TGR192=y
CONFIG_CRYPTO_DES=y
CONFIG_CRYPTO_BLOWFISH=y
CONFIG_CRYPTO_TWOFISH=y
CONFIG_CRYPTO_SERPENT=y
CONFIG_CRYPTO_AES_586=y
CONFIG_CRYPTO_CAST5=y
CONFIG_CRYPTO_CAST6=y
CONFIG_CRYPTO_TEA=y
CONFIG_CRYPTO_ARC4=y
CONFIG_CRYPTO_KHAZAD=y
CONFIG_CRYPTO_ANUBIS=y
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_MICHAEL_MIC=y
CONFIG_CRYPTO_CRC32C=y
CONFIG_CRYPTO_TEST=y

#
# Hardware crypto devices
#
# CONFIG_CRYPTO_DEV_PADLOCK is not set

#
# Library routines
#
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_CRC32=y
CONFIG_LIBCRC32C=y
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=y
CONFIG_TEXTSEARCH=y
CONFIG_TEXTSEARCH_KMP=y
CONFIG_TEXTSEARCH_BM=y
CONFIG_TEXTSEARCH_FSM=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_PC=y

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 12:46               ` Mark v Wolher
@ 2005-12-31 15:18                 ` Mark v Wolher
  2005-12-31 16:34                   ` Sami Farin
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2005-12-31 15:18 UTC (permalink / raw)
  To: Mark v Wolher
  Cc: Jesper Juhl, Alistair John Strachan, Lee Revell,
	Folkert van Heusden, Linux Kernel

Mark v Wolher wrote:
> Jesper Juhl wrote:
> 
>>On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
>>
>>
>>>Jesper Juhl wrote:
>>>
>>>
>>>>On 12/31/05, Mark v Wolher <trilight@ns666.com> wrote:
>>>>
>>>>
>>>>
>>>>>g'morning !
>>>>>
>>>>>the memtest86 went 40 times over the memory, no errors detected.
>>>>>
>>>>
>>>>Give memtest86+ a spin (http://www.memtest.org/) as well. memtest86 is
>>>>good, but I've found in the past that memtest86+ sometimes finds
>>>>errors that memtest86 does not, so giving both a sin fo an extended
>>>>period of time is usually a good idea.
>>>>Also, make sure you enable all the tests of both tools.
>>>
>>>Hi Jesper,
>>>
>>>Oh i thought they were the same, i used memtest86+ which comes with
>>>debian and not the "older" memtest86.
>>>
>>>Right now i booted the kernel with nomce since one never knows with dell
>>
>>
>>Surpressing MCE's (Machine Check Exceptions) is a really bad idea
>>usually. MCE's indicate a hardware problem, so unless it's known that
>>a certain MCE is reported wrongly they should *not* be ignored.
> 
> 
> Hi Jesper,
> 
> Yes, i rather not disable it, but since i found some reports also
> related to dell machines which somehow do not follow always the standard
> this caused false exceptions on them. I'll re-enable it, and see if the
> update of the intel microcode made a difference. I have now only the nv
> module loaded. If a crash occurs i'll open the box and remove the tvcard.
> 
> Also, i wonder, i downloaded the DSDT table from the bios and when i
> recompiled it with IASL from intel it showed 7 errors, one of them
> related to DMA. It is known that alot of companies like Dell use
> microsoft compilers which easily skip such errors or not report them,
> this is what i read.
> 
> I'm pasting the DSDT errors occured during recompile, who knows, this
> could also a help a little bit.
> 
> DSDT Table / Recompile:
> 
> Intel ACPI Component Architecture
> ASL Optimizing Compiler version 20050930 [Dec 15 2005]
> Copyright (C) 2000 - 2005 Intel Corporation
> Supports ACPI Specification Revision 3.0
> 
> dsdt.dsl   338:         Notify (\_SB.PCI0.USB0, 0x02)
> Error    1061 -        Object does not exist ^  (\_SB.PCI0.USB0)
> 
> dsdt.dsl   351:         Notify (\_SB.PCI0.USB1, 0x02)
> Error    1061 -        Object does not exist ^  (\_SB.PCI0.USB1)
> 
> dsdt.dsl   364:         Notify (\_SB.PCI0.USB2, 0x02)
> Error    1061 -        Object does not exist ^  (\_SB.PCI0.USB2)
> 
> dsdt.dsl   377:         Notify (\_SB.PCI0, 0x02)
> Error    1061 -   Object does not exist ^  (\_SB.PCI0)
> 
> dsdt.dsl   384:         Notify (\_SB.PCI0.PCI4, 0x02)
> Error    1061 -        Object does not exist ^  (\_SB.PCI0.PCI4)
> 
> dsdt.dsl   400:         Notify (\_SB.PCI0.ISA.KBD, 0x02)
> Error    1061 -           Object does not exist ^  (\_SB.PCI0.ISA.KBD)
> 
> dsdt.dsl  1784:                 Device (DMA)
> Error    1094 -                           ^ syntax error, unexpected
> PARSEOP_DMA, expecting PARSEOP_NAMESEG or PARSEOP_NAMESTRING
> 
> ASL Input:  dsdt.dsl - 3096 lines, 93624 bytes, 515 keywords
> Compilation complete. 7 Errors, 0 Warnings, 0 Remarks, 53 Optimizations
> 
> 
> ====
> 
> LSUSB:
> Bus 004 Device 002: ID 0d8c:0001 C-Media Electronics, Inc.
> Bus 004 Device 001: ID 0000:0000
> Bus 003 Device 003: ID 051d:0002 American Power Conversion Back-UPS Pro
> 500/1000/1500
> Bus 003 Device 002: ID 046d:c00e Logitech, Inc. Optical Mouse
> Bus 003 Device 001: ID 0000:0000
> Bus 002 Device 001: ID 0000:0000
> Bus 001 Device 001: ID 0000:0000
> 
> 
> =====
> 
> cat /proc/meminfo:
> 
> MemTotal:       512548 kB
> MemFree:         10684 kB
> Buffers:         17252 kB
> Cached:         221508 kB
> SwapCached:      10120 kB
> Active:         355392 kB
> Inactive:        49652 kB
> HighTotal:           0 kB
> HighFree:            0 kB
> LowTotal:       512548 kB
> LowFree:         10684 kB
> SwapTotal:     4883680 kB
> SwapFree:      4739048 kB
> Dirty:             132 kB
> Writeback:           0 kB
> Mapped:         347756 kB
> Slab:            49344 kB
> CommitLimit:   5139952 kB
> Committed_AS:   635544 kB
> PageTables:       2108 kB
> VmallocTotal:   515796 kB
> VmallocUsed:     25556 kB
> VmallocChunk:   486608 kB
> 
> =====
> 
> cat /proc/cpuinfo:
> 
> processor       : 0
> vendor_id       : GenuineIntel
> cpu family      : 15
> model           : 2
> model name      : Intel(R) Xeon(TM) CPU 2.40GHz
> stepping        : 9
> cpu MHz         : 2392.630
> cache size      : 512 KB
> physical id     : 0
> siblings        : 2
> core id         : 0
> cpu cores       : 1
> fdiv_bug        : no
> hlt_bug         : no
> f00f_bug        : no
> coma_bug        : no
> fpu             : yes
> fpu_exception   : yes
> cpuid level     : 2
> wp              : yes
> flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
> cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
> bogomips        : 4791.93
> 
> processor       : 1
> vendor_id       : GenuineIntel
> cpu family      : 15
> model           : 2
> model name      : Intel(R) Xeon(TM) CPU 2.40GHz
> stepping        : 9
> cpu MHz         : 2392.630
> cache size      : 512 KB
> physical id     : 0
> siblings        : 2
> core id         : 0
> cpu cores       : 1
> fdiv_bug        : no
> hlt_bug         : no
> f00f_bug        : no
> coma_bug        : no
> fpu             : yes
> fpu_exception   : yes
> cpuid level     : 2
> wp              : yes
> flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
> cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
> bogomips        : 4784.99
> 
> processor       : 2
> vendor_id       : GenuineIntel
> cpu family      : 15
> model           : 2
> model name      : Intel(R) Xeon(TM) CPU 2.40GHz
> stepping        : 9
> cpu MHz         : 2392.630
> cache size      : 512 KB
> physical id     : 3
> siblings        : 2
> core id         : 3
> cpu cores       : 1
> fdiv_bug        : no
> hlt_bug         : no
> f00f_bug        : no
> coma_bug        : no
> fpu             : yes
> fpu_exception   : yes
> cpuid level     : 2
> wp              : yes
> flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
> cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
> bogomips        : 4785.12
> 
> processor       : 3
> vendor_id       : GenuineIntel
> cpu family      : 15
> model           : 2
> model name      : Intel(R) Xeon(TM) CPU 2.40GHz
> stepping        : 9
> cpu MHz         : 2392.630
> cache size      : 512 KB
> physical id     : 3
> siblings        : 2
> core id         : 3
> cpu cores       : 1
> fdiv_bug        : no
> hlt_bug         : no
> f00f_bug        : no
> coma_bug        : no
> fpu             : yes
> fpu_exception   : yes
> cpuid level     : 2
> wp              : yes
> flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
> cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr
> bogomips        : 4785.12
> 
> =====
> 
> lspci -v:
> 0000:00:00.0 Host bridge: Intel Corporation E7505 Memory Controller Hub
> (rev 03)
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, fast devsel, latency 0
>         Memory at e8000000 (32-bit, prefetchable) [size=128M]
>         Capabilities: [40] #09 [0104]
>         Capabilities: [a0] AGP version 3.0
> 
> 0000:00:01.0 PCI bridge: Intel Corporation E7505/E7205 PCI-to-AGP Bridge
> (rev 03) (prog-if 00 [Normal decode])
>         Flags: bus master, 66MHz, fast devsel, latency 64
>         Memory at e0000000 (32-bit, prefetchable) [size=128M]
>         Bus: primary=00, secondary=01, subordinate=01, sec-latency=64
>         Memory behind bridge: fc000000-fdffffff
>         Prefetchable memory behind bridge: f0000000-f7ffffff
>         Capabilities: [60] #0e [0035]
> 
> 0000:00:02.0 PCI bridge: Intel Corporation E7505 Hub Interface B
> PCI-to-PCI Bridge (rev 03) (prog-if 00 [Normal decode])
>         Flags: bus master, 66MHz, fast devsel, latency 64
>         Bus: primary=00, secondary=02, subordinate=04, sec-latency=0
>         I/O behind bridge: 0000e000-0000efff
>         Memory behind bridge: fe300000-fe6fffff
> 
> 0000:00:1d.0 USB Controller: Intel Corporation 82801DB/DBL/DBM
> (ICH4/ICH4-L/ICH4-M) USB UHCI Controller #1 (rev 01) (prog-if 00 [UHCI])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, medium devsel, latency 0, IRQ 21
>         I/O ports at ff80 [size=32]
> 
> 0000:00:1d.1 USB Controller: Intel Corporation 82801DB/DBL/DBM
> (ICH4/ICH4-L/ICH4-M) USB UHCI Controller #2 (rev 01) (prog-if 00 [UHCI])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, medium devsel, latency 0, IRQ 22
>         I/O ports at ff60 [size=32]
> 
> 0000:00:1d.2 USB Controller: Intel Corporation 82801DB/DBL/DBM
> (ICH4/ICH4-L/ICH4-M) USB UHCI Controller #3 (rev 01) (prog-if 00 [UHCI])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, medium devsel, latency 0, IRQ 18
>         I/O ports at ff40 [size=32]
> 
> 0000:00:1d.7 USB Controller: Intel Corporation 82801DB/DBM (ICH4/ICH4-M)
> USB2 EHCI Controller (rev 01) (prog-if 20 [EHCI])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, medium devsel, latency 0, IRQ 20
>         Memory at fe700800 (32-bit, non-prefetchable) [size=1K]
>         Capabilities: [50] Power Management version 2
>         Capabilities: [58] #0a [2080]
> 
> 0000:00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 81)
> (prog-if 00 [Normal decode])
>         Flags: bus master, fast devsel, latency 0
>         Bus: primary=00, secondary=05, subordinate=05, sec-latency=32
>         I/O behind bridge: 0000d000-0000dfff
>         Memory behind bridge: fe100000-fe2fffff
>         Prefetchable memory behind bridge: f8000000-f80fffff
> 
> 0000:00:1f.0 ISA bridge: Intel Corporation 82801DB/DBL (ICH4/ICH4-L) LPC
> Interface Bridge (rev 01)
>         Flags: bus master, medium devsel, latency 0
> 0000:00:1f.1 IDE interface: Intel Corporation 82801DB (ICH4) IDE
> Controller (rev 01) (prog-if 8a [Master SecP PriP])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, medium devsel, latency 0, IRQ 18
>         I/O ports at <unassigned>
>         I/O ports at <unassigned>
>         I/O ports at <unassigned>
>         I/O ports at <unassigned>
>         I/O ports at ffa0 [size=16]
>         Memory at 30000000 (32-bit, non-prefetchable) [size=1K]
> 
> 0000:00:1f.3 SMBus: Intel Corporation 82801DB/DBL/DBM
> (ICH4/ICH4-L/ICH4-M) SMBus Controller (rev 01)
>         Subsystem: Dell: Unknown device 012c
>         Flags: medium devsel, IRQ 4
>         I/O ports at cc80 [size=32]
> 
> 0000:00:1f.5 Multimedia audio controller: Intel Corporation
> 82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) AC'97 Audio Controller (rev 01)
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, medium devsel, latency 0, IRQ 23
>         I/O ports at c800 [size=256]
>         I/O ports at cc40 [size=64]
>         Memory at fe700400 (32-bit, non-prefetchable) [size=512]
>         Memory at fe700000 (32-bit, non-prefetchable) [size=256]
>         Capabilities: [50] Power Management version 2
> 
> 0000:01:00.0 VGA compatible controller: nVidia Corporation NV34GL
> [Quadro FX 500/600 PCI] (rev a1) (prog-if 00 [VGA])
>         Subsystem: nVidia Corporation: Unknown device 01ba
>         Flags: bus master, 66MHz, medium devsel, latency 248, IRQ 21
>         Memory at fc000000 (32-bit, non-prefetchable) [size=16M]
>         Memory at f0000000 (32-bit, prefetchable) [size=128M]
>         Expansion ROM at fd000000 [disabled] [size=128K]
>         Capabilities: [60] Power Management version 2
>         Capabilities: [44] AGP version 3.0
> 
> 0000:02:1c.0 PIC: Intel Corporation 82870P2 P64H2 I/OxAPIC (rev 04)
> (prog-if 20 [IO(X)-APIC])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, 66MHz, fast devsel, latency 0
>         Memory at fe3ff000 (32-bit, non-prefetchable) [size=4K]
>         Capabilities: [50] PCI-X non-bridge device.
> 
> 0000:02:1d.0 PCI bridge: Intel Corporation 82870P2 P64H2 Hub PCI Bridge
> (rev 04) (prog-if 00 [Normal decode])
>         Flags: bus master, 66MHz, fast devsel, latency 64
>         Bus: primary=02, secondary=03, subordinate=03, sec-latency=48
>         I/O behind bridge: 0000e000-0000efff
>         Memory behind bridge: fe500000-fe6fffff
>         Capabilities: [50] PCI-X bridge device.
> 
> 0000:02:1e.0 PIC: Intel Corporation 82870P2 P64H2 I/OxAPIC (rev 04)
> (prog-if 20 [IO(X)-APIC])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, 66MHz, fast devsel, latency 0
>         Memory at fe3fe000 (32-bit, non-prefetchable) [size=4K]
>         Capabilities: [50] PCI-X non-bridge device.
> 
> 0000:02:1f.0 PCI bridge: Intel Corporation 82870P2 P64H2 Hub PCI Bridge
> (rev 04) (prog-if 00 [Normal decode])
>         Flags: bus master, 66MHz, fast devsel, latency 64
>         Bus: primary=02, secondary=04, subordinate=04, sec-latency=64
>         Capabilities: [50] PCI-X bridge device.
> 
> 0000:03:0d.0 Mass storage controller: Promise Technology, Inc. 20269
> (rev 02) (prog-if 85)
>         Subsystem: Promise Technology, Inc. Ultra133TX2
>         Flags: bus master, 66MHz, slow devsel, latency 64, IRQ 19
>         I/O ports at ecf8 [size=8]
>         I/O ports at ecf0 [size=4]
>         I/O ports at ece0 [size=8]
>         I/O ports at ecd8 [size=4]
>         I/O ports at ecc0 [size=16]
>         Memory at fe5fc000 (32-bit, non-prefetchable) [size=16K]
>         Expansion ROM at fe600000 [disabled] [size=16K]
>         Capabilities: [60] Power Management version 1
> 
> 0000:03:0e.0 Ethernet controller: Intel Corporation 82545EM Gigabit
> Ethernet Controller (Copper) (rev 01)
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 16
>         Memory at fe5c0000 (64-bit, non-prefetchable) [size=128K]
>         I/O ports at ec80 [size=64]
>         Capabilities: [dc] Power Management version 2
>         Capabilities: [e4] PCI-X non-bridge device.
>         Capabilities: [f0] Message Signalled Interrupts: 64bit+
> Queue=0/0 Enable-
> 
> 0000:05:0c.0 FireWire (IEEE 1394): Texas Instruments TSB43AB22/A
> IEEE-1394a-2000 Controller (PHY/Link) (prog-if 10 [OHCI])
>         Subsystem: Dell: Unknown device 012c
>         Flags: bus master, medium devsel, latency 64, IRQ 4
>         Memory at fe1ff800 (32-bit, non-prefetchable) [size=2K]
>         Memory at fe1f8000 (32-bit, non-prefetchable) [size=16K]
>         Capabilities: [44] Power Management version 2
> 
> 0000:05:0d.0 Multimedia audio controller: Creative Labs SB Live! EMU10k1
> (rev 07)
>         Subsystem: Creative Labs SBLive! 5.1 Model SB0100
>         Flags: bus master, medium devsel, latency 64, IRQ 24
>         I/O ports at dce0 [size=32]
>         Capabilities: [dc] Power Management version 1
> 
> 0000:05:0d.1 Input device controller: Creative Labs SB Live! MIDI/Game
> Port (rev 07)
>         Subsystem: Creative Labs Gameport Joystick
>         Flags: bus master, medium devsel, latency 64
>         I/O ports at dcd8 [size=8]
>         Capabilities: [dc] Power Management version 1
> 
> 0000:05:0e.0 Multimedia video controller: Brooktree Corporation Bt878
> Video Capture (rev 02)
>         Subsystem: TERRATEC Electronic GmbH: Unknown device 1134
>         Flags: bus master, medium devsel, latency 64, IRQ 17
>         Memory at f80ff000 (32-bit, prefetchable) [size=4K]
> 
> 0000:05:0e.1 Multimedia controller: Brooktree Corporation Bt878 Audio
> Capture (rev 02)
>         Subsystem: TERRATEC Electronic GmbH: Unknown device 1134
>         Flags: bus master, medium devsel, latency 64, IRQ 10
>         Memory at f80fe000 (32-bit, prefetchable) [size=4K]
> 
> 
> ====
> 
> ver_linux script output:
> If some fields are empty or look unusual you may have an old version.
> Compare to the current minimal requirements in Documentation/Changes.
> 
> Linux sigma-9 2.6.14.5 #5 SMP Fri Dec 30 19:50:12 CET 2005 i686 GNU/Linux
> 
> Gnu C                  3.3.5
> Gnu make               3.80
> binutils               2.15
> util-linux             2.12p
> mount                  2.12p
> module-init-tools      3.2-pre1
> e2fsprogs              1.37
> reiserfsprogs          line
> reiser4progs           line
> PPP                    2.4.3
> nfs-utils              1.0.6
> Linux C Library        2.3.2
> Dynamic linker (ldd)   2.3.2
> Procps                 3.2.1
> Net-tools              1.60
> Console-tools          0.2.3
> Sh-utils               5.2.1
> udev                   056
> Modules Loaded         nv
> 
> 
> ====
> 
> results of memtest86+ after 40 passes with all tests enabled: no errors
> 
> ====
> 
> cat /proc/interrupts:
>            CPU0       CPU1       CPU2       CPU3
>   0:     501324     492735     492754     492100    IO-APIC-edge  timer
>   1:       2555       2761       2861       2451    IO-APIC-edge  i8042
>   7:          0          0          0          0    IO-APIC-edge  parport0
>   8:    2369118    2386295    2363140    2356586    IO-APIC-edge  rtc
>   9:          0          0          0          0   IO-APIC-level  acpi
>  14:         21          0          0          0    IO-APIC-edge  ide0
>  15:         13          0          0          0    IO-APIC-edge  ide1
>  16:      28924          0          0          0   IO-APIC-level  eth0
>  17:      97407     105474     103650     103304   IO-APIC-level  bttv0
>  18:         48          4          0          7   IO-APIC-level
> uhci_hcd:usb4
>  19:      28880      54020      48433      23791   IO-APIC-level  ide2, ide3
>  20:          6          0          1          0   IO-APIC-level
> ehci_hcd:usb1
>  21:     398859     319390     317707     425780   IO-APIC-level
> uhci_hcd:usb2, nv
>  22:     200970     244113     220837     191613   IO-APIC-level
> uhci_hcd:usb3
>  23:          0          0          0          0   IO-APIC-level  Intel
> 82801DB-ICH4
>  24:       9460       9468      12491       8706   IO-APIC-level  EMU10K1
> NMI:          0          0          0          0
> LOC:    1978858    1979111    1979110    1979109
> ERR:          0
> MIS:          0
> 
> 
> ====
> 
> 2.6.14.5 vanilla kernel .config file see attachment
> 
> ====
> 
> I hope this gives more complete picture of the current running setup.
> 
> 

Ok, got some more data now, i did recompile the kernel with alot of
debugging options turned on in kernel hacking section.

Maybe because of those debugging options the system won't freeze quickly
but rather display the errors and continue to run, because of
detect_soft_lockups and nmi watchdog i think.

Here is new data, this time it had to do with bttv:

Dec 31 16:11:35 localhost kernel: Unable to handle kernel paging request at
virtual address d162e000
Dec 31 16:11:35 localhost kernel: printing eip:
Dec 31 16:11:35 localhost kernel: c036037a
Dec 31 16:11:35 localhost kernel: *pgd = 46063
Dec 31 16:11:35 localhost kernel: *pmd = 46063
Dec 31 16:11:35 localhost kernel: *pte = 1162e000
Dec 31 16:11:35 localhost kernel: Oops: 0002 [#1]
Dec 31 16:11:35 localhost kernel: SMP DEBUG_PAGEALLOC
Dec 31 16:11:35 localhost kernel: Modules linked in: nv
Dec 31 16:11:35 localhost kernel: CPU:    2
Dec 31 16:11:35 localhost kernel: EIP:    0060:[bttv_risc_packed+394/432]
Not tainted VLI
Dec 31 16:11:35 localhost kernel: EFLAGS: 00210202   (2.6.14.5)
Dec 31 16:11:35 localhost kernel: eax: 14000008   ebx: d5ce9800   ecx:
d162e000   edx: 00000008
Dec 31 16:11:35 localhost kernel: esi: 00000008   edi: 000000ff   ebp:
cd06dde8   esp: cd06ddd0
Dec 31 16:11:35 localhost kernel: ds: 007b   es: 007b   ss: 0068
Dec 31 16:11:35 localhost kernel: Process xawtv (pid: 31110,
threadinfo=cd06c000 task=ca871aa0)
Dec 31 16:11:35 localhost kernel: Stack: df80bbf8 c3b25fbc 00000fd0 00000c00
000d8000 c3b25ef8 cd06de40 c0361b0b
Dec 31 16:11:35 localhost kernel: c06ccba0 c3b25fbc d5ce8000 00000c00
00000c00 00000c00 00000120 000001b1
Dec 31 16:11:35 localhost kernel: 00000008 c3b25f1c c06cd168 00000000
cd06de40 c037022a df80bbf8 c3b25f1c
Dec 31 16:11:35 localhost kernel: Call Trace:
Dec 31 16:11:35 localhost kernel: [show_stack+127/160]
Dec 31 16:11:35 localhost kernel: [show_registers+347/448]
Dec 31 16:11:35 localhost kernel: [die+256/384]
Dec 31 16:11:35 localhost kernel: [do_page_fault+1084/2083]
Dec 31 16:11:35 localhost kernel: [error_code+79/96]
Dec 31 16:11:35 localhost kernel: [bttv_buffer_risc+1371/1696]
Dec 31 16:11:35 localhost kernel: [bttv_prepare_buffer+268/464]
Dec 31 16:11:35 localhost kernel: [buffer_prepare+69/80]
Dec 31 16:11:35 localhost kernel: [videobuf_read_zerocopy+108/304]
Dec 31 16:11:35 localhost kernel: [videobuf_read_one+522/560]
Dec 31 16:11:35 localhost kernel: [bttv_read+272/352]
Dec 31 16:11:35 localhost kernel: [vfs_read+213/432]
Dec 31 16:11:35 localhost kernel: [sys_read+75/128]
Dec 31 16:11:35 localhost kernel: [syscall_call+7/11]
Dec 31 16:11:35 localhost kernel: Code: 00 0d 00 00 00 10 89 01 8b 43 08 83
c1 04 89 01 8b 43 0c 83 c1 04 83 c3 10 29 c2 8b 43 0c 39 c2 77 df 89 d0 89
d6 0d 00 00 00 14 <89> 01 8b 43 08 83 c1 04 89 01 83 c1 04 eb 8a 8d b4 26 00
00 00






^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 15:18                 ` Mark v Wolher
@ 2005-12-31 16:34                   ` Sami Farin
  2005-12-31 16:48                     ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Sami Farin @ 2005-12-31 16:34 UTC (permalink / raw)
  To: Linux Kernel

On Sat, Dec 31, 2005 at 04:18:38PM +0100, Mark v Wolher wrote:
...
> Here is new data, this time it had to do with bttv:
> 
> Dec 31 16:11:35 localhost kernel: Unable to handle kernel paging request at
> virtual address d162e000
> Dec 31 16:11:35 localhost kernel: printing eip:
> Dec 31 16:11:35 localhost kernel: c036037a
> Dec 31 16:11:35 localhost kernel: *pgd = 46063
> Dec 31 16:11:35 localhost kernel: *pmd = 46063
> Dec 31 16:11:35 localhost kernel: *pte = 1162e000
> Dec 31 16:11:35 localhost kernel: Oops: 0002 [#1]
> Dec 31 16:11:35 localhost kernel: SMP DEBUG_PAGEALLOC
> Dec 31 16:11:35 localhost kernel: Modules linked in: nv
> Dec 31 16:11:35 localhost kernel: CPU:    2
> Dec 31 16:11:35 localhost kernel: EIP:    0060:[bttv_risc_packed+394/432]

Can you try how many seconds it takes to get Oops/crash when you start
pressing 'v' in xawtv (video capture on/off).
For me, not very many.

This happens with every 2.6 kernel.  And my hardware is OK.

> Not tainted VLI
> Dec 31 16:11:35 localhost kernel: EFLAGS: 00210202   (2.6.14.5)
> Dec 31 16:11:35 localhost kernel: eax: 14000008   ebx: d5ce9800   ecx:
> d162e000   edx: 00000008
> Dec 31 16:11:35 localhost kernel: esi: 00000008   edi: 000000ff   ebp:
> cd06dde8   esp: cd06ddd0
> Dec 31 16:11:35 localhost kernel: ds: 007b   es: 007b   ss: 0068
> Dec 31 16:11:35 localhost kernel: Process xawtv (pid: 31110,
> threadinfo=cd06c000 task=ca871aa0)
> Dec 31 16:11:35 localhost kernel: Stack: df80bbf8 c3b25fbc 00000fd0 00000c00
> 000d8000 c3b25ef8 cd06de40 c0361b0b
> Dec 31 16:11:35 localhost kernel: c06ccba0 c3b25fbc d5ce8000 00000c00
> 00000c00 00000c00 00000120 000001b1
> Dec 31 16:11:35 localhost kernel: 00000008 c3b25f1c c06cd168 00000000
> cd06de40 c037022a df80bbf8 c3b25f1c
> Dec 31 16:11:35 localhost kernel: Call Trace:
> Dec 31 16:11:35 localhost kernel: [show_stack+127/160]
> Dec 31 16:11:35 localhost kernel: [show_registers+347/448]
> Dec 31 16:11:35 localhost kernel: [die+256/384]
> Dec 31 16:11:35 localhost kernel: [do_page_fault+1084/2083]
> Dec 31 16:11:35 localhost kernel: [error_code+79/96]
> Dec 31 16:11:35 localhost kernel: [bttv_buffer_risc+1371/1696]
> Dec 31 16:11:35 localhost kernel: [bttv_prepare_buffer+268/464]
> Dec 31 16:11:35 localhost kernel: [buffer_prepare+69/80]
> Dec 31 16:11:35 localhost kernel: [videobuf_read_zerocopy+108/304]
> Dec 31 16:11:35 localhost kernel: [videobuf_read_one+522/560]
> Dec 31 16:11:35 localhost kernel: [bttv_read+272/352]
> Dec 31 16:11:35 localhost kernel: [vfs_read+213/432]
> Dec 31 16:11:35 localhost kernel: [sys_read+75/128]
> Dec 31 16:11:35 localhost kernel: [syscall_call+7/11]
> Dec 31 16:11:35 localhost kernel: Code: 00 0d 00 00 00 10 89 01 8b 43 08 83
> c1 04 89 01 8b 43 0c 83 c1 04 83 c3 10 29 c2 8b 43 0c 39 c2 77 df 89 d0 89
> d6 0d 00 00 00 14 <89> 01 8b 43 08 83 c1 04 89 01 83 c1 04 eb 8a 8d b4 26 00
> 00 00

-- 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 16:34                   ` Sami Farin
@ 2005-12-31 16:48                     ` Mark v Wolher
  2006-01-01  2:26                       ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2005-12-31 16:48 UTC (permalink / raw)
  To: Sami Farin; +Cc: Linux Kernel

Sami Farin wrote:
> On Sat, Dec 31, 2005 at 04:18:38PM +0100, Mark v Wolher wrote:
> ...
> 
>>Here is new data, this time it had to do with bttv:
>>
>>Dec 31 16:11:35 localhost kernel: Unable to handle kernel paging request at
>>virtual address d162e000
>>Dec 31 16:11:35 localhost kernel: printing eip:
>>Dec 31 16:11:35 localhost kernel: c036037a
>>Dec 31 16:11:35 localhost kernel: *pgd = 46063
>>Dec 31 16:11:35 localhost kernel: *pmd = 46063
>>Dec 31 16:11:35 localhost kernel: *pte = 1162e000
>>Dec 31 16:11:35 localhost kernel: Oops: 0002 [#1]
>>Dec 31 16:11:35 localhost kernel: SMP DEBUG_PAGEALLOC
>>Dec 31 16:11:35 localhost kernel: Modules linked in: nv
>>Dec 31 16:11:35 localhost kernel: CPU:    2
>>Dec 31 16:11:35 localhost kernel: EIP:    0060:[bttv_risc_packed+394/432]
> 
> 
> Can you try how many seconds it takes to get Oops/crash when you start
> pressing 'v' in xawtv (video capture on/off).
> For me, not very many.
> 
> This happens with every 2.6 kernel.  And my hardware is OK.
> 
> 
>>Not tainted VLI
>>Dec 31 16:11:35 localhost kernel: EFLAGS: 00210202   (2.6.14.5)
>>Dec 31 16:11:35 localhost kernel: eax: 14000008   ebx: d5ce9800   ecx:
>>d162e000   edx: 00000008
>>Dec 31 16:11:35 localhost kernel: esi: 00000008   edi: 000000ff   ebp:
>>cd06dde8   esp: cd06ddd0
>>Dec 31 16:11:35 localhost kernel: ds: 007b   es: 007b   ss: 0068
>>Dec 31 16:11:35 localhost kernel: Process xawtv (pid: 31110,
>>threadinfo=cd06c000 task=ca871aa0)
>>Dec 31 16:11:35 localhost kernel: Stack: df80bbf8 c3b25fbc 00000fd0 00000c00
>>000d8000 c3b25ef8 cd06de40 c0361b0b
>>Dec 31 16:11:35 localhost kernel: c06ccba0 c3b25fbc d5ce8000 00000c00
>>00000c00 00000c00 00000120 000001b1
>>Dec 31 16:11:35 localhost kernel: 00000008 c3b25f1c c06cd168 00000000
>>cd06de40 c037022a df80bbf8 c3b25f1c
>>Dec 31 16:11:35 localhost kernel: Call Trace:
>>Dec 31 16:11:35 localhost kernel: [show_stack+127/160]
>>Dec 31 16:11:35 localhost kernel: [show_registers+347/448]
>>Dec 31 16:11:35 localhost kernel: [die+256/384]
>>Dec 31 16:11:35 localhost kernel: [do_page_fault+1084/2083]
>>Dec 31 16:11:35 localhost kernel: [error_code+79/96]
>>Dec 31 16:11:35 localhost kernel: [bttv_buffer_risc+1371/1696]
>>Dec 31 16:11:35 localhost kernel: [bttv_prepare_buffer+268/464]
>>Dec 31 16:11:35 localhost kernel: [buffer_prepare+69/80]
>>Dec 31 16:11:35 localhost kernel: [videobuf_read_zerocopy+108/304]
>>Dec 31 16:11:35 localhost kernel: [videobuf_read_one+522/560]
>>Dec 31 16:11:35 localhost kernel: [bttv_read+272/352]
>>Dec 31 16:11:35 localhost kernel: [vfs_read+213/432]
>>Dec 31 16:11:35 localhost kernel: [sys_read+75/128]
>>Dec 31 16:11:35 localhost kernel: [syscall_call+7/11]
>>Dec 31 16:11:35 localhost kernel: Code: 00 0d 00 00 00 10 89 01 8b 43 08 83
>>c1 04 89 01 8b 43 0c 83 c1 04 83 c3 10 29 c2 8b 43 0c 39 c2 77 df 89 d0 89
>>d6 0d 00 00 00 14 <89> 01 8b 43 08 83 c1 04 89 01 83 c1 04 eb 8a 8d b4 26 00
>>00 00
> 
> 

Hi Sami,

That caused also a crash, i kept pressing the v key and within 15
seconds it crashed, then i saw the crash-info appear in the log and when
i clicked on mozilla then it crashed too but without crahs info and
system froze totally.

Below the crash info:

Dec 31 17:38:32 localhost kernel: Unable to handle kernel paging request
at virtual address c8111000
Dec 31 17:38:32 localhost kernel:  printing eip:
Dec 31 17:38:32 localhost kernel: c036037a
Dec 31 17:38:32 localhost kernel: *pgd = 21063
Dec 31 17:38:32 localhost kernel: *pmd = 21063
Dec 31 17:38:32 localhost kernel: *pte = 8111000
Dec 31 17:38:32 localhost kernel: Oops: 0002 [#4]
Dec 31 17:38:32 localhost kernel: SMP DEBUG_PAGEALLOC
Dec 31 17:38:32 localhost kernel: Modules linked in:
Dec 31 17:38:32 localhost kernel: CPU:    3
Dec 31 17:38:32 localhost kernel: EIP:
0060:[bttv_risc_packed+394/432]    Not tainted VLI
Dec 31 17:38:32 localhost kernel: EFLAGS: 00210202   (2.6.14.5)
Dec 31 17:38:32 localhost kernel: eax: 14000008   ebx: d3a09800   ecx:
c8111000   edx: 00000008
Dec 31 17:38:32 localhost kernel: esi: 00000008   edi: 000000ff   ebp:
d3c0be38   esp: d3c0be20
Dec 31 17:38:32 localhost kernel: ds: 007b   es: 007b   ss: 0068
Dec 31 17:38:32 localhost kernel: Process xawtv (pid: 1703,
threadinfo=d3c0a000 task=cfba2aa0)
Dec 31 17:38:32 localhost kernel: Stack: df80bbf8 c6a5cfbc 00000fd0
00000c00 000d8000 c6a5cef8 d3c0be90 c0361b0b
Dec 31 17:38:32 localhost kernel:        c06ccba0 c6a5cfbc d3a08000
00000c00 00000c00 00000c00 00000120 000001b1
Dec 31 17:38:32 localhost kernel:        00000008 c6a5cf1c c06cd168
00000000 d3c0be90 c037022a df80bbf8 c6a5cf1c
Dec 31 17:38:32 localhost kernel: Call Trace:
Dec 31 17:38:32 localhost kernel:  [show_stack+127/160]
Dec 31 17:38:32 localhost kernel:  [show_registers+347/448]
Dec 31 17:38:32 localhost kernel:  [die+256/384]
Dec 31 17:38:32 localhost kernel:  [do_page_fault+1084/2083]
Dec 31 17:38:32 localhost kernel:  [error_code+79/96]
Dec 31 17:38:32 localhost kernel:  [bttv_buffer_risc+1371/1696]
Dec 31 17:38:32 localhost kernel:  [bttv_prepare_buffer+268/464]
Dec 31 17:38:32 localhost kernel:  [buffer_prepare+69/80]
Dec 31 17:38:32 localhost kernel:  [videobuf_read_zerocopy+108/304]
Dec 31 17:38:32 localhost kernel:  [videobuf_read_one+522/560]
Dec 31 17:38:32 localhost kernel:  [bttv_read+272/352]
Dec 31 17:38:32 localhost kernel:  [vfs_read+213/432]
Dec 31 17:38:32 localhost kernel:  [sys_read+75/128]
Dec 31 17:38:32 localhost kernel:  [syscall_call+7/11]
Dec 31 17:38:32 localhost kernel: Code: 00 0d 00 00 00 10 89 01 8b 43 08
83 c1 04 89 01 8b 43 0c 83 c1 04 83 c3 10 29 c2 8b 43 0c 39 c2 77 df 89
d0 89 d



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2005-12-31 16:48                     ` Mark v Wolher
@ 2006-01-01  2:26                       ` Mark v Wolher
  2006-01-01 13:06                         ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2006-01-01  2:26 UTC (permalink / raw)
  To: Jiri Slaby; +Cc: Sami Farin, Linux Kernel

Jiri Slaby wrote:
>>Hi Sami,
>>
>>That caused also a crash, i kept pressing the v key and within 15
>>seconds it crashed, then i saw the crash-info appear in the log and when
>>i clicked on mozilla then it crashed too but without crahs info and
>>system froze totally.
>>
>>Below the crash info:
>>
>>Dec 31 17:38:32 localhost kernel: Unable to handle kernel paging request
>>at virtual address c8111000
>>Dec 31 17:38:32 localhost kernel:  printing eip:
>>Dec 31 17:38:32 localhost kernel: c036037a
>>Dec 31 17:38:32 localhost kernel: *pgd = 21063
>>Dec 31 17:38:32 localhost kernel: *pmd = 21063
>>Dec 31 17:38:32 localhost kernel: *pte = 8111000
>>Dec 31 17:38:32 localhost kernel: Oops: 0002 [#4]
> 
> [snip]
> Could you try the attached patch?
> 
> --
> diff --git a/drivers/media/video/bttv-risc.c b/drivers/media/video/bttv-risc.c
> --- a/drivers/media/video/bttv-risc.c
> +++ b/drivers/media/video/bttv-risc.c
> @@ -53,7 +53,7 @@ bttv_risc_packed(struct bttv *btv, struc
>  	/* estimate risc mem: worst case is one write per page border +
>  	   one write per scan line + sync + jump (all 2 dwords) */
>  	instructions  = (bpl * lines) / PAGE_SIZE + lines;
> -	instructions += 2;
> +	instructions += 4;
>  	if ((rc = btcx_riscmem_alloc(btv->c.pci,risc,instructions*8)) < 0)
>  		return rc;
>  
> 
> 


Hi Jiri,

Tried it but it seems to crash indeed faster, and this time it didn't
leave traces in the log.

Appreciate your help eitherway !

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2006-01-01  2:26                       ` Mark v Wolher
@ 2006-01-01 13:06                         ` Mark v Wolher
  2006-01-01 14:47                           ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2006-01-01 13:06 UTC (permalink / raw)
  To: Mark v Wolher
  Cc: Jiri Slaby, Sami Farin, Linux Kernel, arjan, jesper.juhl,
	s0348365, rlrevell

Mark v Wolher wrote:
> Jiri Slaby wrote:
> 
>>>Hi Sami,
>>>
>>>That caused also a crash, i kept pressing the v key and within 15
>>>seconds it crashed, then i saw the crash-info appear in the log and when
>>>i clicked on mozilla then it crashed too but without crahs info and
>>>system froze totally.
>>>
>>>Below the crash info:
>>>
>>>Dec 31 17:38:32 localhost kernel: Unable to handle kernel paging request
>>>at virtual address c8111000
>>>Dec 31 17:38:32 localhost kernel:  printing eip:
>>>Dec 31 17:38:32 localhost kernel: c036037a
>>>Dec 31 17:38:32 localhost kernel: *pgd = 21063
>>>Dec 31 17:38:32 localhost kernel: *pmd = 21063
>>>Dec 31 17:38:32 localhost kernel: *pte = 8111000
>>>Dec 31 17:38:32 localhost kernel: Oops: 0002 [#4]
>>
>>[snip]
>>Could you try the attached patch?
>>
>>--
>>diff --git a/drivers/media/video/bttv-risc.c b/drivers/media/video/bttv-risc.c
>>--- a/drivers/media/video/bttv-risc.c
>>+++ b/drivers/media/video/bttv-risc.c
>>@@ -53,7 +53,7 @@ bttv_risc_packed(struct bttv *btv, struc
>> 	/* estimate risc mem: worst case is one write per page border +
>> 	   one write per scan line + sync + jump (all 2 dwords) */
>> 	instructions  = (bpl * lines) / PAGE_SIZE + lines;
>>-	instructions += 2;
>>+	instructions += 4;
>> 	if ((rc = btcx_riscmem_alloc(btv->c.pci,risc,instructions*8)) < 0)
>> 		return rc;
>> 
>>
>>
> 
> 
> 
> Hi Jiri,
> 
> Tried it but it seems to crash indeed faster, and this time it didn't
> leave traces in the log.
> 
> Appreciate your help eitherway !
> -


Hiya all,

First of all happy new year ! :-)


I might have discovered something interesting which might be responsible
for all those lockups/freezes/crashes !

Right now, i'm putting a huge load on the system, disk i/o, swapping
high, virusscan, number crushing with ssh-keygen moduli etc ...

5 hours passed with this load and no single crash/freeze/lockup happened
! Normally with all this load sooner or later something would have
happened.

What did i do ?

I disabled bttv support in the kernel, so no tv for me at this moment.
I'm planning to let this run for at least another 5 hours with heavy
load and see if still nothing happens...

Keeping you informed.

Mark







^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2006-01-01 13:06                         ` Mark v Wolher
@ 2006-01-01 14:47                           ` Mark v Wolher
  2006-01-01 18:38                             ` Jiri Slaby
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2006-01-01 14:47 UTC (permalink / raw)
  To: Mark v Wolher
  Cc: Jiri Slaby, Sami Farin, Linux Kernel, arjan, jesper.juhl,
	s0348365, rlrevell

Mark v Wolher wrote:
> Mark v Wolher wrote:
> 
>>Jiri Slaby wrote:
>>
>>
>>>>Hi Sami,
>>>>
>>>>That caused also a crash, i kept pressing the v key and within 15
>>>>seconds it crashed, then i saw the crash-info appear in the log and when
>>>>i clicked on mozilla then it crashed too but without crahs info and
>>>>system froze totally.
>>>>
>>>>Below the crash info:
>>>>
>>>>Dec 31 17:38:32 localhost kernel: Unable to handle kernel paging request
>>>>at virtual address c8111000
>>>>Dec 31 17:38:32 localhost kernel:  printing eip:
>>>>Dec 31 17:38:32 localhost kernel: c036037a
>>>>Dec 31 17:38:32 localhost kernel: *pgd = 21063
>>>>Dec 31 17:38:32 localhost kernel: *pmd = 21063
>>>>Dec 31 17:38:32 localhost kernel: *pte = 8111000
>>>>Dec 31 17:38:32 localhost kernel: Oops: 0002 [#4]
>>>
>>>[snip]
>>>Could you try the attached patch?
>>>
>>>--
>>>diff --git a/drivers/media/video/bttv-risc.c b/drivers/media/video/bttv-risc.c
>>>--- a/drivers/media/video/bttv-risc.c
>>>+++ b/drivers/media/video/bttv-risc.c
>>>@@ -53,7 +53,7 @@ bttv_risc_packed(struct bttv *btv, struc
>>>	/* estimate risc mem: worst case is one write per page border +
>>>	   one write per scan line + sync + jump (all 2 dwords) */
>>>	instructions  = (bpl * lines) / PAGE_SIZE + lines;
>>>-	instructions += 2;
>>>+	instructions += 4;
>>>	if ((rc = btcx_riscmem_alloc(btv->c.pci,risc,instructions*8)) < 0)
>>>		return rc;
>>>
>>>
>>>
>>
>>
>>
>>Hi Jiri,
>>
>>Tried it but it seems to crash indeed faster, and this time it didn't
>>leave traces in the log.
>>
>>Appreciate your help eitherway !
>>-
> 
> 
> 
> Hiya all,
> 
> First of all happy new year ! :-)
> 
> 
> I might have discovered something interesting which might be responsible
> for all those lockups/freezes/crashes !
> 
> Right now, i'm putting a huge load on the system, disk i/o, swapping
> high, virusscan, number crushing with ssh-keygen moduli etc ...
> 
> 5 hours passed with this load and no single crash/freeze/lockup happened
> ! Normally with all this load sooner or later something would have
> happened.
> 
> What did i do ?
> 
> I disabled bttv support in the kernel, so no tv for me at this moment.
> I'm planning to let this run for at least another 5 hours with heavy
> load and see if still nothing happens...
> 
> Keeping you informed.
> 
> Mark
> 
> 
> 
> 

Still no crashes or irregular things happened ! Will let it go for a few
more hours. This test is being done with the binary nvidia module loaded
and bttv disabled. The next test will be nv for X instead of the binary
module with bttv enabled, if crashes and such start to occur then it's
very likely that the problem sits in the bttv code.












^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
@ 2006-01-01 18:38                             ` Jiri Slaby
  2006-01-01 18:49                               ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Jiri Slaby @ 2006-01-01 18:38 UTC (permalink / raw)
  To: Mark v Wolher
  Cc: Jiri Slaby, Sami Farin, Linux Kernel, arjan, jesper.juhl,
	s0348365, rlrevell, mchehab, video4linux-list

Mark v Wolher wrote:
>> Still no crashes or irregular things happened ! Will let it go for a few
>> more hours. This test is being done with the binary nvidia module loaded
>> and bttv disabled. The next test will be nv for X instead of the binary
>> module with bttv enabled, if crashes and such start to occur then it's
>> very likely that the problem sits in the bttv code.
>
>
>Okay, here are the test results:
>
>
>- heavy load + nvidia (binary module) + bttv with grabdisplay = crash
>- heavy load + nv (not tainted kernel) + bttv with grabdisplay = crash
>
>- heavy load + nvidia (binary module) + bttv with overlay = OK
>- heavy load + nv (not tainted kernel) + bttv with overlay = OK
>
>Adding vmware on top of it will cause the system sooner to freeze/crash
>(using grabdisplay)
>
>So what you think guys ?
Hi,
we still think that there is a problem in bttv_risc_packed in computing
estimated size. My patch was bad, I see it now, but still don't understand, how
it is computed and how it should be:
        instructions  = (bpl * lines) / PAGE_SIZE + lines;
        instructions += 2;
and here it crashes (the first line, the (*rp)) -- actually after while loop.
	*(rp++)=cpu_to_le32(BT848_RISC_WRITE|BT848_RISC_EOL|todo);
	*(rp++)=cpu_to_le32(sg_dma_address(sg));
So, Mauro (or somebody from list), have you any idea, what could be wrong?  

thanks,
-- 
Jiri Slaby         www.fi.muni.cz/~xslaby
\_.-^-._   jirislaby@gmail.com   _.-^-._/
B67499670407CE62ACC8 22A032CC55C339D47A7E

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re: system keeps freezing once every 24 hours / random apps crashing
  2006-01-01 18:38                             ` Jiri Slaby
@ 2006-01-01 18:49                               ` Mark v Wolher
  2006-01-01 19:12                                 ` Jiri Slaby
  0 siblings, 1 reply; 414+ messages in thread
From: Mark v Wolher @ 2006-01-01 18:49 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Sami Farin, Linux Kernel, arjan, jesper.juhl, s0348365, rlrevell,
	mchehab, video4linux-list

Jiri Slaby wrote:
> Mark v Wolher wrote:
> 
>>>Still no crashes or irregular things happened ! Will let it go for a few
>>>more hours. This test is being done with the binary nvidia module loaded
>>>and bttv disabled. The next test will be nv for X instead of the binary
>>>module with bttv enabled, if crashes and such start to occur then it's
>>>very likely that the problem sits in the bttv code.
>>
>>
>>Okay, here are the test results:
>>
>>
>>- heavy load + nvidia (binary module) + bttv with grabdisplay = crash
>>- heavy load + nv (not tainted kernel) + bttv with grabdisplay = crash
>>
>>- heavy load + nvidia (binary module) + bttv with overlay = OK
>>- heavy load + nv (not tainted kernel) + bttv with overlay = OK
>>
>>Adding vmware on top of it will cause the system sooner to freeze/crash
>>(using grabdisplay)
>>
>>So what you think guys ?
> 
> Hi,
> we still think that there is a problem in bttv_risc_packed in computing
> estimated size. My patch was bad, I see it now, but still don't understand, how
> it is computed and how it should be:
>         instructions  = (bpl * lines) / PAGE_SIZE + lines;
>         instructions += 2;
> and here it crashes (the first line, the (*rp)) -- actually after while loop.
> 	*(rp++)=cpu_to_le32(BT848_RISC_WRITE|BT848_RISC_EOL|todo);
> 	*(rp++)=cpu_to_le32(sg_dma_address(sg));
> So, Mauro (or somebody from list), have you any idea, what could be wrong?  
> 
> thanks,

Well, i'd like to help in any way i can, but i'm not really a programmer :(

It seems also that xawtv on nvidia cards (using either nvidia binary
module or nv), at least, somehow doesn't know how to use the hardware to
scale the image in overlay mode. So if you use tvtime which i just
installed and running it is now fullscreen in overlay mode using the
card hardware (quite technical stuff so i'm not sure what else to say).

But back to grabdisplay, this causes the freezes/crashes, especially
under heavy load it'll happen very quick. It seems maybe that other
hardware combinations maybe do not suffer quickly from these things or
ppl with other videocards maybe (?)

It seems to be a combination of factors which might lead to these
issue's, maybe some bug in the bttv code, combined with nvidia cards for
example and xawtv using grabdisplay causes the freezes/crashes.

I'm now currently using tvtime instead of xawtv, overlay mode (i hope),
fullscreen which is basically why i had to use grabdisplay with xawtv in
the first place. I'm putting now alot of load on the system and hope
this is the solution (for now) ...

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2006-01-01 19:12                                 ` Jiri Slaby
  2006-01-01 19:37                                   ` Mark v Wolher
  0 siblings, 1 reply; 414+ messages in thread
From: Jiri Slaby @ 2006-01-01 19:12 UTC (permalink / raw)
  To: Mauro Carvalho Chehab
  Cc: Jiri Slaby, Sami Farin, jesper.juhl, s0348365, Linux Kernel,
	rlrevell, arjan, Linux and Kernel Video

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 689 bytes --]

Mauro Carvalho Chehab wrote:
>Em Dom, 2006-01-01 às 19:49 +0100, Mark v Wolher escreveu:
>> So, Mauro (or somebody from list), have you any idea, what could be
>> wrong?  
>	hmm.. have you sent the patch to the list?
Yes, it was only a (bad) try to solve the problem. The point is, that there is
some weird problem in the estimating, or something (number of loops?).

The oops and the patch are on lkml site in this thread, I would give you a
link, but lkml seems to be down for me.
[the patch helps in some way, but didn't solve the problem]

all the best,
-- 
Jiri Slaby         www.fi.muni.cz/~xslaby
\_.-^-._   jirislaby@gmail.com   _.-^-._/
B67499670407CE62ACC8 22A032CC55C339D47A7E

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2006-01-01 19:12                                 ` Jiri Slaby
@ 2006-01-01 19:37                                   ` Mark v Wolher
  0 siblings, 0 replies; 414+ messages in thread
From: Mark v Wolher @ 2006-01-01 19:37 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Mauro Carvalho Chehab, Sami Farin, jesper.juhl, s0348365,
	Linux Kernel, rlrevell, arjan, Linux and Kernel Video

Jiri Slaby wrote:
> Mauro Carvalho Chehab wrote:
> 
>>Em Dom, 2006-01-01 Ã s 19:49 +0100, Mark v Wolher escreveu:
>>
>>>So, Mauro (or somebody from list), have you any idea, what could be
>>>wrong?  
>>
>>	hmm.. have you sent the patch to the list?
> 
> Yes, it was only a (bad) try to solve the problem. The point is, that there is
> some weird problem in the estimating, or something (number of loops?).
> 
> The oops and the patch are on lkml site in this thread, I would give you a
> link, but lkml seems to be down for me.
> [the patch helps in some way, but didn't solve the problem]
> 
> all the best,

But i wonder, can you think of something why grabdisplay causes crashes
and overlay doesn't ? This needed patch, would it solve this problem too ?

Thanks


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-12-02 16:03 Yu, Luming
  2005-12-02 16:46 ` Dmitry Torokhov
  2005-12-02 20:11 ` Re: Miloslav Trmac
  0 siblings, 2 replies; 414+ messages in thread
From: Yu, Luming @ 2005-12-02 16:03 UTC (permalink / raw)
  To: Dmitry Torokhov, Linus Torvalds
  Cc: Vojtech Pavlik, Andrew Morton, Linux Kernel Mailing List

>Subject: [git pull 02/14] Add Wistron driver
>
>Input: add Wistron driver
>
>A driver for laptop buttons using an x86 BIOS interface that is
>apparently used on quite a few laptops and seems to be originating
>from Wistron.
>
>This driver currently "knows" only about Fujitsu-Siemens Amilo 
>Pro V2000
>(i.e. it can detect the laptop using DMI and it contains the
>keycode->key meaning mapping for this laptop) and Xeron SonicPro X 155G
>(probably can't be reliably autodetected, requires a module parameter),
>adding other laptops should be easy.
>
>In addition to reporting button presses to the input layer the driver
>also allows enabling/disabling the embedded wireless NIC (using the
>"Wifi" button); this is done using the same BIOS interface, so it seems
>only logical to keep the implementation together.  Any flexibility
>possibly gained by allowing users to remap the function of the "Wifi"
>button is IMHO not worth it when weighted against the necessity to run
>an user-space daemon to convert button presses to wifi state changes.
>
>Signed-off-by: Miloslav Trmac <mitr@volny.cz>
>Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
>---
>

I just tested module wistron_btn on  one Acer Aspire laptop after 
adding one dmi entry.  The wistron_btn found BIOS interfaces.
One visible error is the bluetooth light won't turn on upon 
stroking bluetooth button.
Without wistron_btn module, the bluetooth light works.
 with acpi enabled, I didn't try acpi disabled)

wistron_btn polls a cmos address to detect hotkey event.  It 
is not necessary, because there do have ACPI interrupt triggered upon 
hotkeys.  

So, my suggestion is to disable this module when ACPI enabled.
We need to implement hotkey support from ACPI subsystem for my
Acer aspire laptop.

--- linux-2.6.15-rc3/drivers/input/misc/Kconfig.0	2005-12-02
10:08:33.000000000 -0700
+++ linux-2.6.15-rc3/drivers/input/misc/Kconfig	2005-12-02
10:08:58.000000000 -0700
@@ -42,7 +42,7 @@
 
 config INPUT_WISTRON_BTNS
 	tristate "x86 Wistron laptop button interface"
-	depends on X86 && !X86_64
+	depends on X86 && !X86_64 && !ACPI
 	help
 	  Say Y here for support of Winstron laptop button interface,
used on
 	  laptops of various brands, including Acer and Fujitsu-Siemens.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-12-02 16:03 Yu, Luming
@ 2005-12-02 16:46 ` Dmitry Torokhov
  2005-12-02 20:11 ` Re: Miloslav Trmac
  1 sibling, 0 replies; 414+ messages in thread
From: Dmitry Torokhov @ 2005-12-02 16:46 UTC (permalink / raw)
  To: Yu, Luming
  Cc: Linus Torvalds, Vojtech Pavlik, Andrew Morton, Linux Kernel Mailing List

On 12/2/05, Yu, Luming <luming.yu@intel.com> wrote:
> I just tested module wistron_btn on  one Acer Aspire laptop after
> adding one dmi entry.  The wistron_btn found BIOS interfaces.
> One visible error is the bluetooth light won't turn on upon
> stroking bluetooth button.
> Without wistron_btn module, the bluetooth light works.
>  with acpi enabled, I didn't try acpi disabled)
>

Did you add the new keymap table with KE_BLUETOOTH to go with that DMI entry?

> wistron_btn polls a cmos address to detect hotkey event.  It
> is not necessary, because there do have ACPI interrupt triggered upon
> hotkeys.
>

Unfortunately ACPI does not route these events through the input layer
so aside from special buttons (like sleep) it is not very useful.

> So, my suggestion is to disable this module when ACPI enabled.
> We need to implement hotkey support from ACPI subsystem for my
> Acer aspire laptop.

I do not agree.

--
Dmitry

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-12-02 16:03 Yu, Luming
  2005-12-02 16:46 ` Dmitry Torokhov
@ 2005-12-02 20:11 ` Miloslav Trmac
  1 sibling, 0 replies; 414+ messages in thread
From: Miloslav Trmac @ 2005-12-02 20:11 UTC (permalink / raw)
  To: Yu, Luming
  Cc: Dmitry Torokhov, Linus Torvalds, Vojtech Pavlik, Andrew Morton,
	Linux Kernel Mailing List

Yu, Luming wrote:
> I just tested module wistron_btn on  one Acer Aspire laptop after 
> adding one dmi entry.  The wistron_btn found BIOS interfaces.
> One visible error is the bluetooth light won't turn on upon 
> stroking bluetooth button.
> Without wistron_btn module, the bluetooth light works.
>  with acpi enabled, I didn't try acpi disabled)
> 
> wistron_btn polls a cmos address to detect hotkey event.  It 
> is not necessary, because there do have ACPI interrupt triggered upon 
> hotkeys.
There are many different laptops using similar interfaces.
It is a mess :(

If your laptop provides the hotkey events via ACPI, simply don't use
wistron_btns.

> So, my suggestion is to disable this module when ACPI enabled.
I have a laptop that needs this module (hotkeys are not supported via
ACPI), but supports ACPI.
	Mirek

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-11-09 16:13 Nestor Velazquez
  2005-11-09 16:17 ` Alejandro Bonilla
  0 siblings, 1 reply; 414+ messages in thread
From: Nestor Velazquez @ 2005-11-09 16:13 UTC (permalink / raw)
  To: linux-kernel

Hola necesito informacion sobre linux gracias


Velazquez Nestor

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-11-09 16:13 Nestor Velazquez
@ 2005-11-09 16:17 ` Alejandro Bonilla
  0 siblings, 0 replies; 414+ messages in thread
From: Alejandro Bonilla @ 2005-11-09 16:17 UTC (permalink / raw)
  To: Nestor Velazquez, linux-kernel

On Wed, 09 Nov 2005 13:13:34 -0300, Nestor Velazquez wrote
> Hola necesito informacion sobre linux gracias

Nestor,

Esta es una mailing list en ingles. Porfavor escriba en ingles,
adicionalmente, esta lista es solo para desarrollo, porfavor si desea
informacion, busque en libros de Linux y en paginas de internet.

(Told him to speak english and to read books of Linux)

.Alejandro
 
> Velazquez Nestor



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-09-21 13:20 Robert.Boermans
  2005-09-21 13:27 ` Denis Vlasenko
  0 siblings, 1 reply; 414+ messages in thread
From: Robert.Boermans @ 2005-09-21 13:20 UTC (permalink / raw)
  To: linux-kernel

Hello, 

I noticed that the bogomips results for the two cores on my machine are 
consistently not the same, the second one is always reported slightly 
faster, it's a small difference and I saw the same in a posted dmesg from 
somebody else on the list. Which made me wonder: 

Shouldn't they be the same, as the cores run from the same clock? 
Could it be a bug in the bogomips calculation which could make some of the 
short time-out stuff fail?
Could this be related to the tsc synchronisation stuff mentioned in the 
lost ticks - TSC timer thread? 

Regards, 

Robert Boermans. 
PS nothing actually fails on my system because of this, I just thought it 
was odd. Although I do sometimes get the clock runs at double speed 
problem but only after at least one day uptime, but I reboot most days for 
games anyway. 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-09-21 13:20 Robert.Boermans
@ 2005-09-21 13:27 ` Denis Vlasenko
  0 siblings, 0 replies; 414+ messages in thread
From: Denis Vlasenko @ 2005-09-21 13:27 UTC (permalink / raw)
  To: Robert.Boermans; +Cc: linux-kernel

On Wednesday 21 September 2005 16:20, Robert.Boermans@uk.telex.com wrote:
> Hello, 
> 
> I noticed that the bogomips results for the two cores on my machine are 
> consistently not the same, the second one is always reported slightly 
> faster, it's a small difference and I saw the same in a posted dmesg from 
> somebody else on the list. Which made me wonder: 

I guess it's a cache warming effect. Please show the numbers.
--
vda

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-06-28  9:18 d binderman
  2005-06-28 11:03 ` Andrew Morton
  0 siblings, 1 reply; 414+ messages in thread
From: d binderman @ 2005-06-28  9:18 UTC (permalink / raw)
  To: linux-kernel

Hello there,

I just tried to compile the Linux Kernel version 2.6.11.12
with the gcc 4.0 compiler. The compiler said

drivers/net/depca.c:1829: warning: operation on 'i' may be undefined

The source code is

for (i = entry; i != end; i = (++i) & lp->txRingMask) {

I agree with the compiler. Better code is

for (i = entry; i != end; i = (i + 1) & lp->txRingMask) {

Regards

David Binderman

_________________________________________________________________
Be the first to hear what's new at MSN - sign up to our free newsletters! 
http://www.msn.co.uk/newsletters

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-06-28  9:18 d binderman
@ 2005-06-28 11:03 ` Andrew Morton
  0 siblings, 0 replies; 414+ messages in thread
From: Andrew Morton @ 2005-06-28 11:03 UTC (permalink / raw)
  To: d binderman; +Cc: linux-kernel

"d binderman" <dcb314@hotmail.com> wrote:
>
> 
> Hello there,
> 
> I just tried to compile the Linux Kernel version 2.6.11.12
> with the gcc 4.0 compiler. The compiler said
> 
> drivers/net/depca.c:1829: warning: operation on 'i' may be undefined
> 
> The source code is
> 
> for (i = entry; i != end; i = (++i) & lp->txRingMask) {
> 
> I agree with the compiler. Better code is
> 
> for (i = entry; i != end; i = (i + 1) & lp->txRingMask) {
> 

Someone already fixed it.

		/* set up the buffer descriptors */
		len = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len;
		for (i = entry; i != end; i = (i+1) & lp->txRingMask) {


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-06-28  9:15 d binderman
  2005-06-28 11:00 ` Andrew Morton
  0 siblings, 1 reply; 414+ messages in thread
From: d binderman @ 2005-06-28  9:15 UTC (permalink / raw)
  To: linux-kernel

Hello there,

I just tried to compile the Linux Kernel version 2.6.11.12
with the most excellent Intel C compiler. It said

drivers/usb/host/ohci-hub.c(424): warning #175: subscript out of range
        desc->bitmap [2] = desc->bitmap [3] = 0xff;
                           ^

This is clearly broken code, since there are only up to 16 ports.

Suggest avoid trying to initialise bitmap[ 3].

Regards

David Binderman

_________________________________________________________________
It's fast, it's easy and it's free. Get MSN Messenger 7.0 today! 
http://messenger.msn.co.uk

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-06-28  9:15 d binderman
@ 2005-06-28 11:00 ` Andrew Morton
  0 siblings, 0 replies; 414+ messages in thread
From: Andrew Morton @ 2005-06-28 11:00 UTC (permalink / raw)
  To: d binderman; +Cc: linux-kernel

"d binderman" <dcb314@hotmail.com> wrote:
>
> I just tried to compile the Linux Kernel version 2.6.11.12
>  with the most excellent Intel C compiler. It said
> 
>  drivers/usb/host/ohci-hub.c(424): warning #175: subscript out of range
>          desc->bitmap [2] = desc->bitmap [3] = 0xff;
>                             ^
> 
>  This is clearly broken code, since there are only up to 16 ports.
> 
>  Suggest avoid trying to initialise bitmap[ 3].

This is queued in -mm:


From: "KAMBAROV, ZAUR" <kambarov@berkeley.edu>

The length of the array desc->bitmap is 3, and not 4:

Definitions involved:

In drivers/usb/core/hcd.h

464  	#define bitmap 	DeviceRemovable

In drivers/usb/host/ohci-hub.c

395  		struct usb_hub_descriptor	*desc

In drivers/usb/core/hub.h

130  	struct usb_hub_descriptor {
131  		__u8  bDescLength;
132  		__u8  bDescriptorType;
133  		__u8  bNbrPorts;
134  		__u16 wHubCharacteristics;
135  		__u8  bPwrOn2PwrGood;
136  		__u8  bHubContrCurrent;
137  		    	/* add 1 bit for hub status change; round to bytes */
138  		__u8  DeviceRemovable[(USB_MAXCHILDREN + 1 + 7) / 8];
139  		__u8  PortPwrCtrlMask[(USB_MAXCHILDREN + 1 + 7) / 8];
140  	} __attribute__ ((packed));

In include/linux/usb.h

306  	#define USB_MAXCHILDREN		(16)

This defect was found automatically by Coverity Prevent, a static analysis
tool.

(akpm: this code should be shot.  Field `bitmap' doesn't exist in struct
usb_hub_descriptor.  And this .c file is #included in
drivers/usb/host/ohci-hcd.c, and someone somewhere #defines `bitmap' to
`DeviceRemovable'.

>From a maintainability POV it would be better to memset the whole array
beforehand - I changed the patch to do that)

Signed-off-by: Zaur Kambarov <zkambarov@coverity.com>
Cc: <linux-usb-devel@lists.sourceforge.net?
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 drivers/usb/host/ohci-hub.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletion(-)

diff -puN drivers/usb/host/ohci-hub.c~coverity-desc-bitmap-overrun-fix drivers/usb/host/ohci-hub.c
--- 25/drivers/usb/host/ohci-hub.c~coverity-desc-bitmap-overrun-fix	2005-06-24 22:11:00.000000000 -0700
+++ 25-akpm/drivers/usb/host/ohci-hub.c	2005-06-24 22:19:48.000000000 -0700
@@ -419,10 +419,11 @@ ohci_hub_descriptor (
 
 	/* two bitmaps:  ports removable, and usb 1.0 legacy PortPwrCtrlMask */
 	rh = roothub_b (ohci);
+	memset(desc->bitmap, 0xff, sizeof(desc->bitmap));
 	desc->bitmap [0] = rh & RH_B_DR;
 	if (ports > 7) {
 		desc->bitmap [1] = (rh & RH_B_DR) >> 8;
-		desc->bitmap [2] = desc->bitmap [3] = 0xff;
+		desc->bitmap [2] = 0xff;
 	} else
 		desc->bitmap [1] = 0xff;
 }
_


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <360D47F92A8ACCH7@vger.kernel.org>]

* Re:
       [not found] <360D47F92A8ACCH7@vger.kernel.org>
@ 2005-05-30  2:49 ` radej
  0 siblings, 0 replies; 414+ messages in thread
From: radej @ 2005-05-30  2:49 UTC (permalink / raw)
  To: Linux-kernel

Hey man, here's that site I was telling you about. They are offering huge discounts now on Penis Enhancement Patches

http://www.poqz.com/md/

A top team of British scientists and medical doctors have worked to develop the state-of-the-art Penis Enlargement Patch delivery system which automatically increases penis size up to 3-4 full inches. The patches are the easiest and most effective way to increase your penis size. You won't have to take pills, get under the knife to perform expensive and very painful surgery, use any pumps or other devices. No one will ever find out that you are using our product. Just apply one patch on your body and wear it for 3 days and you will start noticing dramatic results.

Millions of men are taking advantage of this revolutionary new product - Don't be left behind!

As an added incentive, they are offering huge discount specials right now, check out the site to see for yourself !

http://www.poqz.com/md/

u n s u b s c r i b e  
http://www.yzewa.com/un.php 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-05-06 20:23 Edison Giovanny Mendoza
  2005-05-06 20:32 ` Alejandro Bonilla
  0 siblings, 1 reply; 414+ messages in thread
From: Edison Giovanny Mendoza @ 2005-05-06 20:23 UTC (permalink / raw)
  To: linux-kernel


SALUDOS;

FAVOR ENVIARME TODA LA INFORMACION QUE DISPONGAN DE LINUX LES CONFESARE QUE 
NO CONOSCO NADA .

MUCHAS  GRACIAS

AT.
EDISON MENDOZA
ECUADOR -QUITO

_________________________________________________________________
Charla con tus amigos en línea mediante MSN Messenger: 
http://messenger.latam.msn.com/


^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
  2005-05-06 20:23 Edison Giovanny Mendoza
@ 2005-05-06 20:32 ` Alejandro Bonilla
  0 siblings, 0 replies; 414+ messages in thread
From: Alejandro Bonilla @ 2005-05-06 20:32 UTC (permalink / raw)
  To: 'Edison Giovanny Mendoza', linux-kernel


|SALUDOS;
|
|FAVOR ENVIARME TODA LA INFORMACION QUE DISPONGAN DE LINUX LES
|CONFESARE QUE
|NO CONOSCO NADA .
|

Edison,

La mayor informacion que haya esta en google.com ademas en lugares como
kernel.org linux.org y otros websites como el de la distribucion que te
interesa

Esta es una lista de desarrollo, no de preguntas basicas y de solo Ingles.

Gracias,

- Alejandro.

(I'm telling him that this is english only and that this is not a normal
help questions ML)


|MUCHAS  GRACIAS
|
|AT.
|EDISON MENDOZA
|ECUADOR -QUITO


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-03-25  7:03 Søren Lott
  2005-03-25  7:18 ` Jeff Garzik
  0 siblings, 1 reply; 414+ messages in thread
From: Søren Lott @ 2005-03-25  7:03 UTC (permalink / raw)
  To: linux-kernel; +Cc: jgarzik

in the SATA kconfig menu, the help message from  
Intel PIIX/ICH SATA support says:

 CONFIG_SCSI_ATA_PIIX:

This option enables support for ICH5 Serial ATA.
 If PATA support was enabled previously, this enables
 support for select Intel PIIX/ICH PATA host controllers.

anyone care to clarify if this mean that having enabled:

CONFIG_IDE=y
CONFIG_BLK_DEV_IDE=y

i can use the PATA ports on a ICH5 controller through libata ?
if not, which is exactly the meaning of "If PATA support was enabled
previously" on this message ?

thanks.

-SL.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-03-25  7:03 Søren Lott
@ 2005-03-25  7:18 ` Jeff Garzik
  0 siblings, 0 replies; 414+ messages in thread
From: Jeff Garzik @ 2005-03-25  7:18 UTC (permalink / raw)
  To: Søren Lott; +Cc: linux-kernel

Søren Lott wrote:
> in the SATA kconfig menu, the help message from  
> Intel PIIX/ICH SATA support says:
> 
>  CONFIG_SCSI_ATA_PIIX:
> 
> This option enables support for ICH5 Serial ATA.
>  If PATA support was enabled previously, this enables
>  support for select Intel PIIX/ICH PATA host controllers.
> 
> anyone care to clarify if this mean that having enabled:
> 
> CONFIG_IDE=y
> CONFIG_BLK_DEV_IDE=y
> 
> i can use the PATA ports on a ICH5 controller through libata ?
> if not, which is exactly the meaning of "If PATA support was enabled
> previously" on this message ?

I agree it is quite confusing wording.  Probably should remove all 
reference to PATA in the CONFIG_SCSI_ATA_PIIX Kconfig entry.

The comment is referring to the somewhat-hidden fact that if you define 
ATA_ENABLE_PATA in include/linux/libata.h, then libata will support your 
Intel PIIX PATA controllers, in addition to the Intel PIIX SATA controllers.

However, since ATAPI support isn't yet stable, this is of limited 
usefulness.

	Jeff

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2005-03-20  5:24 info
  0 siblings, 0 replies; 414+ messages in thread
From: info @ 2005-03-20  5:24 UTC (permalink / raw)
  To: linux-kernel

^[$B40A4L5NA3NDj!*!*!*^[(B
^[$B:#$^$G!"El5~8BDj$@$C$?%5%$%H$,^[(B
^[$B9%I>$K$D$-!"A49q3HBg!*!*:#$,%A%c%s%9$G$9!#^[(B
^[$B"(%3%3$KEPO?$7$F$k=w$N;R$OK\Ev$G$9!#^[(B
1.^[$B5U!{=u4uK>=w@-^[(B
2.^[$B#S#M4uK>=w@-^[(B
3.^[$B:#F|=P2q$$$?$$=w@-^[(B
4.^[$BITNQ4uK>=w@-^[(B
^[$B$J$I$N=w@-=P2q$$J|Bj^[(B

^[$BAa$/$7$J$$$H#S#O#L#D!!#O#U#T^[(B
http://loves.qsv20.com/



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-03-08 16:32 Peter W. Morreale
  2005-03-08 19:32 ` Ross Biro
  0 siblings, 1 reply; 414+ messages in thread
From: Peter W. Morreale @ 2005-03-08 16:32 UTC (permalink / raw)
  To: linux-kernel

In a driver I am reviewing I found the following locking constructs.
Notice how 'foo" is being called while we have suspended interrupts.

This seems wrong since we've mixed locking primitives.

Is it?

Thanks in advance.

-PWM

---------------------snip--------------------------------------
spin_lock_irqsave(global_lock, &flags);
....
foo()
{
    unsigned long lflags;

    spin_unlock(global_lock);
    ...
    {
        spin_lock_irqsave(global_lock, &lflags);
                .
                .
        spin_unlock_irqrestore(global_lock, &lflags);
    }

    spin_lock_irq(global_lock);
}

spin_unlock_irqrestore(global_lock, &flags);



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-03-08 16:32 Peter W. Morreale
@ 2005-03-08 19:32 ` Ross Biro
  0 siblings, 0 replies; 414+ messages in thread
From: Ross Biro @ 2005-03-08 19:32 UTC (permalink / raw)
  To: Peter W. Morreale; +Cc: linux-kernel

On Tue, 08 Mar 2005 09:32:48 -0700, Peter W. Morreale
<peter_w_morreale@hotmail.com> wrote:
> 
> This seems wrong since we've mixed locking primitives.
> 
> Is it?

It's not really wrong, it just wastes time turning interrupts off over
and over again.
> ....
> foo()
> {
>     unsigned long lflags;

At this point, interrupts are off, the lock is held.
> 
>     spin_unlock(global_lock);

Interrupts are still off, the lock is no longer held.
>     ...
>     {
>         spin_lock_irqsave(global_lock, &lflags);

Interrupts are still off, and just to be sure we turned them off
again.  The lock is held.
>                 .
>                 .
>         spin_unlock_irqrestore(global_lock, &lflags);
Interrupts are still off, but we restored them to the off state they
were in before
we grabbed the lock the last time.  The lock is no longer held.
>     }
> 
>     spin_lock_irq(global_lock);
Turn off interrupts again just to be extra sure they are off.  The
lock is held again.

>From the looks of this code, the locking will work.  But it's not what
it should be.

If you know foo is only called with interrupts off, then there is no
reason to turn them off over and over again.  Just use the standard
spin_lock and spin_unlock and comment that interrupts are already off.

You should also question if interrupts need to be disabled at all.  If
the spin lock is never grabbed at interrupt time (and probably won't
be in the near future), then there is no point in turning interrupts
on and off at all.

    Ross

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-03-05 10:11 Raffaele Ianniello
  2005-03-05 18:14 ` Randy.Dunlap
  0 siblings, 1 reply; 414+ messages in thread
From: Raffaele Ianniello @ 2005-03-05 10:11 UTC (permalink / raw)
  To: linux-kernel


I have a problem compiling a module that I am porting form 2.4 to 2.6 linux kernel.

Compiling with this Makefile:
 
DEBUG = y
 
KERNELDIR = /usr/src/linix.2.6.9
SUBDIR = $(KERNELDIR)/drivers/snoop
INCLUDEDIR = $(KERNELDIR)/include
 
obj-m := snoop.o
 
modules: $(MAKE) -C $(KERNELDIR) SUBDIR=$(SUBDIR) modules
 
clean:
        rm -f *.o
        rm -f *.ko

apperars some lines like:
***Warning: "snoop_ip_forward" [drivers/snoop/snoop] is COMMON symbol
***Warning: "snoop_ip_forward_finish" [drivers/snoop/snoop] is COMMON symbol
 
and I have insert in ip_forward.c some lines:
 
    extern int (* snoop_ip_forward_finish) (struct sk_buff *);

and this is in function ip_forward():
       if(snoop_ip_forward && (*snoop_ip_forward)(skb) == -6)
                goto drop;
 
then when I try to install the module it repyes with:
    insmod: error inserting 'snoop.ko': -1 Invalid module format

and in /var/log/message appears some lines line:
    kernel: snoop: Unknown symbol __floatsidf
    kernel: snoop: Unknown symbol __fixunsdfsi
    kernel: snoop: Unknown symbol __adddf3
    kernel: snoop: Unknown symbol __muldf3

I will be very pleased if you can help me in some way.
thank you for your time

regards,
Raffaele 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-03-05 10:11 Raffaele Ianniello
@ 2005-03-05 18:14 ` Randy.Dunlap
  0 siblings, 0 replies; 414+ messages in thread
From: Randy.Dunlap @ 2005-03-05 18:14 UTC (permalink / raw)
  To: Raffaele Ianniello; +Cc: linux-kernel

Raffaele Ianniello wrote:
> I have a problem compiling a module that I am porting form 2.4 to 2.6 linux kernel.
> 
> Compiling with this Makefile:
>  
> DEBUG = y
>  
> KERNELDIR = /usr/src/linix.2.6.9
> SUBDIR = $(KERNELDIR)/drivers/snoop
> INCLUDEDIR = $(KERNELDIR)/include
>  
> obj-m := snoop.o
>  
> modules: $(MAKE) -C $(KERNELDIR) SUBDIR=$(SUBDIR) modules
>  
> clean:
>         rm -f *.o
>         rm -f *.ko
> 
> apperars some lines like:
> ***Warning: "snoop_ip_forward" [drivers/snoop/snoop] is COMMON symbol
> ***Warning: "snoop_ip_forward_finish" [drivers/snoop/snoop] is COMMON symbol
>  
> and I have insert in ip_forward.c some lines:
>  
>     extern int (* snoop_ip_forward_finish) (struct sk_buff *);
> 
> and this is in function ip_forward():
>        if(snoop_ip_forward && (*snoop_ip_forward)(skb) == -6)
>                 goto drop;
>  
> then when I try to install the module it repyes with:
>     insmod: error inserting 'snoop.ko': -1 Invalid module format
> 
> and in /var/log/message appears some lines line:
>     kernel: snoop: Unknown symbol __floatsidf
>     kernel: snoop: Unknown symbol __fixunsdfsi
>     kernel: snoop: Unknown symbol __adddf3
>     kernel: snoop: Unknown symbol __muldf3
> 
> I will be very pleased if you can help me in some way.
> thank you for your time

I don't know what the ***Warning's are, but the first 2
problems to solve are:

a.  use a proper 2.6 Makefile:  see Documentation/kbuild/*
     and http://lwn.net/Articles/driver-porting/
     or see an example at
     http://www.xenotime.net/linux/modprms/Makefile
IOW, you need to use the 2.6 build system.

b.  The code is being generated with some floating point
     operations in it.  Linux kernel does not (generally)
     allow/support FP operations in kernel code, so you'll
     need to use some other method for those calculations.
     (unless this problem goes away because of using the
     correct 2.6 build system tools)

-- 
~Randy

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2005-02-26 14:57 Yong Haynes
  0 siblings, 0 replies; 414+ messages in thread
From: Yong Haynes @ 2005-02-26 14:57 UTC (permalink / raw)
  To: linux-kernel

              





^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-02-17 17:14 Deepti Patel
  2005-02-17 17:46 ` Matthias-Christian Ott
  0 siblings, 1 reply; 414+ messages in thread
From: Deepti Patel @ 2005-02-17 17:14 UTC (permalink / raw)
  To: linux-kernel; +Cc: pateldeepti

Hi 
I am getting an error while inserting an hello world program. 

[deepti@marieke deepti]$ /sbin/insmod hello-2.ko
insmod: error inserting 'hello-2.ko': -1 Operation not permitted

I haven't logged in as root. For inserting a module do I need to logged in as root?
I will really appretiate any suggestions.

Thanks in advance



-- 
_______________________________________________
Find what you are looking for with the Lycos Yellow Pages
http://r.lycos.com/r/yp_emailfooter/http://yellowpages.lycos.com/default.asp?SRC=lycos10


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-02-17 17:14 Deepti Patel
@ 2005-02-17 17:46 ` Matthias-Christian Ott
  0 siblings, 0 replies; 414+ messages in thread
From: Matthias-Christian Ott @ 2005-02-17 17:46 UTC (permalink / raw)
  To: Deepti Patel; +Cc: linux-kernel

Deepti Patel wrote:

>Hi 
>I am getting an error while inserting an hello world program. 
>
>[deepti@marieke deepti]$ /sbin/insmod hello-2.ko
>insmod: error inserting 'hello-2.ko': -1 Operation not permitted
>
>I haven't logged in as root. For inserting a module do I need to logged in as root?
>I will really appretiate any suggestions.
>
>Thanks in advance
>
>
>
>  
>
Jep you need to be root.

Matthias-Christian Ott

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2005-01-19 14:25 Gmail
  2005-01-19 15:22 ` Paolo Ornati
  0 siblings, 1 reply; 414+ messages in thread
From: Gmail @ 2005-01-19 14:25 UTC (permalink / raw)
  To: linux-kernel

--------------------------------------------------------------
 * Building module-init-tools...
./configure --prefix=/usr --host=i486-slackware-linux --mandir=//usr/share/man --infodir=//usr/share/info --datadir=//usr/share --sysconfdir=//etc --localstatedir=//var/lib --prefix=/ --enable-zlib
configure: WARNING: If you wanted to set the --build type, don't use --host.
    If a cross compiler is detected then cross compile mode will be used.
checking build system type... i586-pc-linux-gnu
checking host system type... i486-slackware-linux-gnu
checking target system type... i486-slackware-linux-gnu
checking for a BSD-compatible install... /usr/bin/ginstall -c
checking whether build environment is sane... yes
checking for gawk... gawk
checking whether make sets $(MAKE)... yes
checking for i486-slackware-linux-strip... no
checking for strip... strip
checking for i486-slackware-linux-gcc... gcc
checking for C compiler default output file name... a.out
checking whether the C compiler works... yes
checking whether we are cross compiling... no
checking for suffix of executables...
checking for suffix of object files... o
checking whether we are using the GNU C compiler... yes
checking whether gcc accepts -g... yes
checking for gcc option to accept ANSI C... none needed
checking for style of include used by make... GNU
checking dependency style of gcc... gcc3
configure: Adding gcc options: -O2 -mcpu=i686 -pipe -Wunused -Wall
configure: creating ./config.status
config.status: creating Makefile
config.status: executing depfiles commands
Makefile:385: *** missing separator.  Stop.

!!! ERROR: sys-apps/module-init-tools-3.0-r2 failed.
!!! Function src_compile, Line 1980, Exitcode 2
!!! emake module-init-tools failed
!!! If you need support, post the topmost build error, NOT this status message.

phases failed
-------------------------------------------------------------------

[1] When we reach the 'make' command.. it prints that error...
Makefile:385. I'm not familiar at all with this kind of stuff... So I
just CAN'T help myself! And that makes me nervous...

[2]
[3] module-init-tools
[4] Linux version 2.4.26 (root@tree) (gcc version 3.3.4) #6 Mon Jun 14 19:07:27 PDT 2004
[6] bash# make


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2005-01-19 14:25 Gmail
@ 2005-01-19 15:22 ` Paolo Ornati
  0 siblings, 0 replies; 414+ messages in thread
From: Paolo Ornati @ 2005-01-19 15:22 UTC (permalink / raw)
  To: Gmail; +Cc: linux-kernel

On Wed, 19 Jan 2005 16:25:22 +0200
Gmail <todor.t@gmail.com> wrote:

> !!! ERROR: sys-apps/module-init-tools-3.0-r2 failed.
> !!! Function src_compile, Line 1980, Exitcode 2
> !!! emake module-init-tools failed
> !!! If you need support, post the topmost build error, NOT this status
> message.
> 
> phases failed

Can you explain me what this has to do with Linux Kernel?

You are using Gentoo and a compilation failed, go here:

http://bugs.gentoo.org/

and search for "ALL module-init-tools", if you don't find the solution
then post a new BUG report.


-- 
	Paolo Ornati
	Gentoo Linux (kernel 2.6.10-gentoo-r4)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2004-11-08  7:39 Marcelo Tosatti
  2004-11-08 11:08 ` Paolo Ciarrocchi
  0 siblings, 1 reply; 414+ messages in thread
From: Marcelo Tosatti @ 2004-11-08  7:39 UTC (permalink / raw)
  To: linux-kernel


unsubscribe linux-kernel

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2004-11-08  7:39 Marcelo Tosatti
@ 2004-11-08 11:08 ` Paolo Ciarrocchi
  2004-11-08  8:34   ` Re: Marcelo Tosatti
  0 siblings, 1 reply; 414+ messages in thread
From: Paolo Ciarrocchi @ 2004-11-08 11:08 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: linux-kernel

On Mon, 8 Nov 2004 05:39:54 -0200, Marcelo Tosatti
<marcelo.tosatti@cyclades.com> wrote:
> 
> unsubscribe linux-kernel

Marcelo,
what are you doing ? ;-)

-- 
Paolo

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2004-11-08 11:08 ` Paolo Ciarrocchi
@ 2004-11-08  8:34   ` Marcelo Tosatti
  2004-11-08 22:08     ` Re: Guennadi Liakhovetski
  0 siblings, 1 reply; 414+ messages in thread
From: Marcelo Tosatti @ 2004-11-08  8:34 UTC (permalink / raw)
  To: Paolo Ciarrocchi; +Cc: linux-kernel

On Mon, Nov 08, 2004 at 12:08:24PM +0100, Paolo Ciarrocchi wrote:
> On Mon, 8 Nov 2004 05:39:54 -0200, Marcelo Tosatti
> <marcelo.tosatti@cyclades.com> wrote:
> > 
> > unsubscribe linux-kernel
> 
> Marcelo,
> what are you doing ? ;-)

Jesus, what a shame. 

I'm subscribe twice, so I tried to remove one of the 
subscriptions.

/me digs a hole...

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2004-11-08  8:34   ` Re: Marcelo Tosatti
@ 2004-11-08 22:08     ` Guennadi Liakhovetski
  0 siblings, 0 replies; 414+ messages in thread
From: Guennadi Liakhovetski @ 2004-11-08 22:08 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Paolo Ciarrocchi, linux-kernel

On Mon, 8 Nov 2004, Marcelo Tosatti wrote:

> On Mon, Nov 08, 2004 at 12:08:24PM +0100, Paolo Ciarrocchi wrote:
> > On Mon, 8 Nov 2004 05:39:54 -0200, Marcelo Tosatti
> > <marcelo.tosatti@cyclades.com> wrote:
> > > 
> > > unsubscribe linux-kernel
> > 
> > Marcelo,
> > what are you doing ? ;-)
> 
> Jesus, what a shame. 
> 
> I'm subscribe twice, so I tried to remove one of the 
> subscriptions.
> 
> /me digs a hole...

:-))) I vote for the best out-of-1st-of-April joke of the year for 
Marcelo!:-))

Guennadi
---
Guennadi Liakhovetski


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2004-09-19 12:29 plt
       [not found] ` <200409191508.33537.Norbert@edusupport.nl>
  0 siblings, 1 reply; 414+ messages in thread
From: plt @ 2004-09-19 12:29 UTC (permalink / raw)
  To: linux-kernel

Question: Are you guys going to work on please cleaning up some of the errors in
the code so we can get please get a more clean compile?



drivers/mtd/nftlmount.c:44: warning: unused variable `oob'

----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.


^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <200409191508.33537.Norbert@edusupport.nl>]

[parent not found: <1095607945.414da6891fc94@webmail.taylorassociate.com>]

* Re:
       [not found]   ` <1095607945.414da6891fc94@webmail.taylorassociate.com>
@ 2004-09-19 16:31     ` Norbert van Nobelen
  0 siblings, 0 replies; 414+ messages in thread
From: Norbert van Nobelen @ 2004-09-19 16:31 UTC (permalink / raw)
  To: plt; +Cc: linux-kernel

Assumption:
You are doing "make modules_install"
You are installing a new version of the kernel, not a recompile of the 
currenct kernel.

Is the basic directory in /lib/modules/2.6.8 present for the modules to 
install in?


On Sunday 19 September 2004 17:32, you wrote:
> I am compiling the newest kernel on Redhat Federo 2 and and I am getting
> this error when I am running make modules install.  Do you know how I fix
> this problems please?
>
> Phillip Taylor
>
> INSTALL sound/pci/ymfpci/snd-ymfpci.ko
>   INSTALL sound/pcmcia/pdaudiocf/snd-pdaudiocf.ko
>   INSTALL sound/soundcore.ko
>   INSTALL sound/synth/emux/snd-emux-synth.ko
>   INSTALL sound/synth/snd-util-mem.ko
>   INSTALL sound/usb/snd-usb-audio.ko
> if [ -r System.map ]; then /sbin/depmod -ae -F System.map  2.6.8; fi
> make: *** [_modinst_post] Error 143
> You have new mail in /var/spool/mail/root
> [root@localhost linux-2.6.8]#
>
> Quoting Norbert van Nobelen <Norbert@edusupport.nl>:
> > Warnings are not errors.
> >
> > On Sunday 19 September 2004 14:29, you wrote:
> > > Question: Are you guys going to work on please cleaning up some of the
> > > errors in the code so we can get please get a more clean compile?
> > >
> > >
> > >
> > > drivers/mtd/nftlmount.c:44: warning: unused variable `oob'
> > >
> > > ----------------------------------------------------------------
> > > This message was sent using IMP, the Internet Messaging Program.
> > >
> > > -
> > > To unsubscribe from this list: send the line "unsubscribe linux-kernel"
> > > in the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > Please read the FAQ at  http://www.tux.org/lkml/
>
> ----------------------------------------------------------------
> This message was sent using IMP, the Internet Messaging Program.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2004-06-27 14:18 Vinu Moses
  2004-06-27 20:14 ` Vinu Moses
  0 siblings, 1 reply; 414+ messages in thread
From: Vinu Moses @ 2004-06-27 14:18 UTC (permalink / raw)
  To: linux-kernel

unsubscribe linux-kernel

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2004-06-27 14:18 Vinu Moses
@ 2004-06-27 20:14 ` Vinu Moses
  0 siblings, 0 replies; 414+ messages in thread
From: Vinu Moses @ 2004-06-27 20:14 UTC (permalink / raw)
  To: linux-kernel

On Sunday 27 June 2004 07:48 pm, Vinu Moses wrote:
> unsubscribe linux-kernel

Eeps! Sorry! Guess my caffeine levels are low again :-(

^ permalink raw reply	[flat|nested] 414+ messages in thread

* re:
@ 2004-03-17 22:03 Kendrick Logan
  0 siblings, 0 replies; 414+ messages in thread
From: Kendrick Logan @ 2004-03-17 22:03 UTC (permalink / raw)
  To: linux-kernel-owner; +Cc: linux-kernel, linux-msdos, linux-net, linux-scsi

[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]

Paradise SEX Island Awaits! Tropical 1 week vacations where anything 
goes!

We have lots of WOMEN, SEX, ALCOHOL, ETC!

Every man's dream awaits on this island of pleasure.

Ever wonder what a Fantasy Sex Holiday would be like? 

If it was available at a reasonable cost.........would you go? 

Check out more information on our site & we can make your dream 
vacation a reality....

*All contact, reservations, billings, are strcitly confidential & are 
discussed directly with the client only.

**Group discounts are available. ie. Bachelor parties, etc.

MARCH/APRIL BONUS now available.

http://www.intimate-travelclub.com

This communication is privileged and contains confidential information 
intended only for the person(s) to whom it is addressed.  Any 
unauthorized disclosure, copying, other distribution  of this 
communication or taking any action on its contents is strictly  prohibited. If you have 
received this message in error, please notify us immediately OR remove 
yourself from our list if there is no interest in regards to our 
services.

http://www.intimate-travelclub.com/remove/remove.html

8
kittenish ponce paso titanic decreeing duma conflagrate expansible carbide salk phone echidna excommunicate template 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2004-03-07 20:08 Michael Frank
  2004-03-07 20:26 ` John Bradford
  0 siblings, 1 reply; 414+ messages in thread
From: Michael Frank @ 2004-03-07 20:08 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kernel mailing list

Marcelo,

Here is a patch to update 2.4 Codingstyle.

It is equivalent to the 2.6 version except for
Chapter 9 Configuration files which is unchanged

Please apply.

Regards
Michael

diff -uN linux-2.4.25-mhf184/Documentation/CodingStyle.mhf.orig linux-2.4.25-mhf184/Documentation/CodingStyle
--- linux-2.4.25-mhf184/Documentation/CodingStyle.mhf.orig	2004-02-27 12:30:50.000000000 +0800
+++ linux-2.4.25-mhf184/Documentation/CodingStyle	2004-03-08 04:04:11.000000000 +0800
@@ -1,42 +1,75 @@

-		Linux kernel coding style
+		Linux kernel coding style

  This is a short document describing the preferred coding style for the
  linux kernel.  Coding style is very personal, and I won't _force_ my
  views on anybody, but this is what goes for anything that I have to be
  able to maintain, and I'd prefer it for most other things too.  Please
-at least consider the points made here.
+at least consider the points made here.

  First off, I'd suggest printing out a copy of the GNU coding standards,
-and NOT read it.  Burn them, it's a great symbolic gesture.
+and NOT read it.  Burn them, it's a great symbolic gesture.

  Anyway, here goes:


  	 	Chapter 1: Indentation

-Tabs are 8 characters, and thus indentations are also 8 characters.
+Tabs are 8 characters, and thus indentations are also 8 characters.
  There are heretic movements that try to make indentations 4 (or even 2!)
  characters deep, and that is akin to trying to define the value of PI to
-be 3.
+be 3.

  Rationale: The whole idea behind indentation is to clearly define where
  a block of control starts and ends.  Especially when you've been looking
  at your screen for 20 straight hours, you'll find it a lot easier to see
-how the indentation works if you have large indentations.
+how the indentation works if you have large indentations.

  Now, some people will claim that having 8-character indentations makes
  the code move too far to the right, and makes it hard to read on a
  80-character terminal screen.  The answer to that is that if you need
  more than 3 levels of indentation, you're screwed anyway, and should fix
-your program.
+your program.

  In short, 8-char indents make things easier to read, and have the added
-benefit of warning you when you're nesting your functions too deep.
-Heed that warning.
+benefit of warning you when you're nesting your functions too deep.
+Heed that warning.

+Don't put multiple statements on a single line unless you have
+something to hide:

-		Chapter 2: Placing Braces
+	if (condition) do_this;
+	  do_something_everytime;
+
+Outside of comments, documentation and except in Kconfig, spaces are never
+used for indentation, and the above example is deliberately broken.
+
+Get a decent editor and don't leave whitespace at the end of lines.
+
+
+		Chapter 2: Breaking long lines and strings
+
+Coding style is all about readability and maintainability using commonly
+available tools.
+
+The limit on the length of lines is 80 columns and this is a hard limit.
+
+Statements longer than 80 columns will be broken into sensible chunks.
+Descendants are always substantially shorter than the parent and are placed
+substantially to the right. The same applies to function headers with a long
+argument list. Long strings are as well broken into shorter strings.
+
+void fun(int a, int b, int c)
+{
+	if (condition)
+		printk(KERN_WARNING "Warning this is a long printk with "
+						"3 parameters a: %u b: %u "
+						"c: %u \n", a, b, c);
+	else
+		next_statement;
+}
+
+		Chapter 3: Placing Braces

  The other issue that always comes up in C styling is the placement of
  braces.  Unlike the indent size, there are few technical reasons to
@@ -59,7 +92,7 @@
  Heretic people all over the world have claimed that this inconsistency
  is ...  well ...  inconsistent, but all right-thinking people know that
  (a) K&R are _right_ and (b) K&R are right.  Besides, functions are
-special anyway (you can't nest them in C).
+special anyway (you can't nest them in C).

  Note that the closing brace is empty on a line of its own, _except_ in
  the cases where it is followed by a continuation of the same statement,
@@ -79,60 +112,60 @@
  	} else {
  		....
  	}
-			
-Rationale: K&R.
+
+Rationale: K&R.

  Also, note that this brace-placement also minimizes the number of empty
  (or almost empty) lines, without any loss of readability.  Thus, as the
  supply of new-lines on your screen is not a renewable resource (think
  25-line terminal screens here), you have more empty lines to put
-comments on.
+comments on.


-		Chapter 3: Naming
+		Chapter 4: Naming

  C is a Spartan language, and so should your naming be.  Unlike Modula-2
  and Pascal programmers, C programmers do not use cute names like
  ThisVariableIsATemporaryCounter.  A C programmer would call that
  variable "tmp", which is much easier to write, and not the least more
-difficult to understand.
+difficult to understand.

  HOWEVER, while mixed-case names are frowned upon, descriptive names for
  global variables are a must.  To call a global function "foo" is a
-shooting offense.
+shooting offense.

  GLOBAL variables (to be used only if you _really_ need them) need to
  have descriptive names, as do global functions.  If you have a function
  that counts the number of active users, you should call that
-"count_active_users()" or similar, you should _not_ call it "cntusr()".
+"count_active_users()" or similar, you should _not_ call it "cntusr()".

  Encoding the type of a function into the name (so-called Hungarian
  notation) is brain damaged - the compiler knows the types anyway and can
  check those, and it only confuses the programmer.  No wonder MicroSoft
-makes buggy programs.
+makes buggy programs.

  LOCAL variable names should be short, and to the point.  If you have
-some random integer loop counter, it should probably be called "i".
+some random integer loop counter, it should probably be called "i".
  Calling it "loop_counter" is non-productive, if there is no chance of it
  being mis-understood.  Similarly, "tmp" can be just about any type of
-variable that is used to hold a temporary value.
+variable that is used to hold a temporary value.

  If you are afraid to mix up your local variable names, you have another
-problem, which is called the function-growth-hormone-imbalance syndrome.
-See next chapter.
+problem, which is called the function-growth-hormone-imbalance syndrome.
+See next chapter.

-		
-		Chapter 4: Functions
+
+		Chapter 5: Functions

  Functions should be short and sweet, and do just one thing.  They should
  fit on one or two screenfuls of text (the ISO/ANSI screen size is 80x24,
-as we all know), and do one thing and do that well.
+as we all know), and do one thing and do that well.

  The maximum length of a function is inversely proportional to the
  complexity and indentation level of that function.  So, if you have a
  conceptually simple function that is just one long (but simple)
  case-statement, where you have to do lots of small things for a lot of
-different cases, it's OK to have a longer function.
+different cases, it's OK to have a longer function.

  However, if you have a complex function, and you suspect that a
  less-than-gifted first-year high-school student might not even
@@ -140,41 +173,78 @@
  maximum limits all the more closely.  Use helper functions with
  descriptive names (you can ask the compiler to in-line them if you think
  it's performance-critical, and it will probably do a better job of it
-that you would have done).
+than you would have done).

  Another measure of the function is the number of local variables.  They
  shouldn't exceed 5-10, or you're doing something wrong.  Re-think the
  function, and split it into smaller pieces.  A human brain can
  generally easily keep track of about 7 different things, anything more
  and it gets confused.  You know you're brilliant, but maybe you'd like
-to understand what you did 2 weeks from now.
+to understand what you did 2 weeks from now.
+
+
+		Chapter 6: Centralized exiting of functions

+Albeit deprecated by some people, the equivalent of the goto statement is
+used frequently by compilers in form of the unconditional jump instruction.

-		Chapter 5: Commenting
+The goto statement comes in handy when a function exits from multiple
+locations and some common work such as cleanup has to be done.
+
+The rationale is:
+
+- unconditional statements are easier to understand and follow
+- nesting is reduced
+- errors by not updating individual exit points when making
+    modifications are prevented
+- saves the compiler work to optimize redundant code away ;)
+
+int fun(int )
+{
+	int result = 0;
+	char *buffer = kmalloc(SIZE);
+
+	if (buffer == NULL)
+		return -ENOMEM;
+
+	if (condition1) {
+		while (loop1) {
+			...
+		}
+		result = 1;
+		goto out;
+	}
+	...
+out:
+	kfree(buffer);
+	return result;
+}
+
+		Chapter 7: Commenting

  Comments are good, but there is also a danger of over-commenting.  NEVER
  try to explain HOW your code works in a comment: it's much better to
  write the code so that the _working_ is obvious, and it's a waste of
-time to explain badly written code.
+time to explain badly written code.

-Generally, you want your comments to tell WHAT your code does, not HOW.
+Generally, you want your comments to tell WHAT your code does, not HOW.
  Also, try to avoid putting comments inside a function body: if the
  function is so complex that you need to separately comment parts of it,
-you should probably go back to chapter 4 for a while.  You can make
+you should probably go back to chapter 5 for a while.  You can make
  small comments to note or warn about something particularly clever (or
  ugly), but try to avoid excess.  Instead, put the comments at the head
  of the function, telling people what it does, and possibly WHY it does
-it.
+it.


-		Chapter 6: You've made a mess of it
+		Chapter 8: You've made a mess of it

  That's OK, we all do.  You've probably been told by your long-time Unix
  user helper that "GNU emacs" automatically formats the C sources for
  you, and you've noticed that yes, it does do that, but the defaults it
  uses are less than desirable (in fact, they are worse than random
-typing - a infinite number of monkeys typing into GNU emacs would never
-make a good program).
+typing - an infinite number of monkeys typing into GNU emacs would never
+make a good program).

  So, you can either get rid of GNU emacs, or change it to use saner
  values.  To do the latter, you can stick the following in your .emacs file:
@@ -192,7 +262,7 @@
  to add

  (setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
-                       auto-mode-alist))
+			auto-mode-alist))

  to your .emacs file if you want to have linux-c-mode switched on
  automagically when you edit source files under /usr/src/linux.
@@ -200,19 +270,20 @@
  But even if you fail in getting emacs to do sane formatting, not
  everything is lost: use "indent".

-Now, again, GNU indent has the same brain dead settings that GNU emacs
-has, which is why you need to give it a few command line options.
+Now, again, GNU indent has the same brain-dead settings that GNU emacs
+has, which is why you need to give it a few command line options.
  However, that's not too bad, because even the makers of GNU indent
  recognize the authority of K&R (the GNU people aren't evil, they are
  just severely misguided in this matter), so you just give indent the
-options "-kr -i8" (stands for "K&R, 8 character indents").
+options "-kr -i8" (stands for "K&R, 8 character indents"), or use
+"scripts/Lindent", which indents in the latest style.

  "indent" has a lot of options, and especially when it comes to comment
-re-formatting you may want to take a look at the manual page.  But
-remember: "indent" is not a fix for bad programming.
+re-formatting you may want to take a look at the man page.  But
+remember: "indent" is not a fix for bad programming.


-		Chapter 7: Configuration-files
+		Chapter 9: Configuration-files

  For configuration options (arch/xxx/config.in, and all the Config.in files),
  somewhat different indentation is used.
@@ -235,20 +306,20 @@
  Experimental options should be denoted (EXPERIMENTAL).


-		Chapter 8: Data structures
+		Chapter 10: Data structures

  Data structures that have visibility outside the single-threaded
  environment they are created and destroyed in should always have
  reference counts.  In the kernel, garbage collection doesn't exist (and
  outside the kernel garbage collection is slow and inefficient), which
-means that you absolutely _have_ to reference count all your uses.
+means that you absolutely _have_ to reference count all your uses.

  Reference counting means that you can avoid locking, and allows multiple
  users to have access to the data structure in parallel - and not having
  to worry about the structure suddenly going away from under them just
-because they slept or did something else for a while.
+because they slept or did something else for a while.

-Note that locking is _not_ a replacement for reference counting.
+Note that locking is _not_ a replacement for reference counting.
  Locking is used to keep data structures coherent, while reference
  counting is a memory management technique.  Usually both are needed, and
  they are not to be confused with each other.
@@ -258,9 +329,99 @@
  the number of subclass users, and decrements the global count just once
  when the subclass count goes to zero.

-Examples of this kind of "multi-reference-counting" can be found in
+Examples of this kind of "multi-level-reference-counting" can be found in
  memory management ("struct mm_struct": mm_users and mm_count), and in
  filesystem code ("struct super_block": s_count and s_active).

  Remember: if another thread can find your data structure, and you don't
  have a reference count on it, you almost certainly have a bug.
+
+
+		Chapter 11: Macros, Enums, Inline functions and RTL
+
+Names of macros defining constants and labels in enums are capitalized.
+
+#define CONSTANT 0x12345
+
+Enums are preferred when defining several related constants.
+
+CAPITALIZED macro names are appreciated but macros resembling functions
+may be named in lower case.
+
+Generally, inline functions are preferable to macros resembling functions.
+
+Macros with multiple statements should be enclosed in a do - while block:
+
+#define macrofun(a,b,c) 			\
+	do {					\
+		if (a == 5)			\
+			do_this(b,c);		\
+	} while (0)
+
+Things to avoid when using macros:
+
+1) macros that affect control flow:
+
+#define FOO(x)					\
+	do {					\
+		if (blah(x) < 0)		\
+			return -EBUGGERED;	\
+	} while(0)
+
+is a _very_ bad idea.  It looks like a function call but exits the "calling"
+function; don't break the internal parsers of those who will read the code.
+
+2) macros that depend on having a local variable with a magic name:
+
+#define FOO(val) bar(index, val)
+
+might look like a good thing, but it's confusing as hell when one reads the
+code and it's prone to breakage from seemingly innocent changes.
+
+3) macros with arguments that are used as l-values: FOO(x) = y; will
+bite you if somebody e.g. turns FOO into an inline function.
+
+4) forgetting about precedence: macros defining constants using expressions
+must enclose the expression in parentheses. Beware of similar issues with
+macros using parameters.
+
+#define CONSTANT 0x4000
+#define CONSTEXP (CONSTANT | 3)
+
+The cpp manual deals with macros exhaustively. The gcc internals manual also
+covers RTL which is used frequently with assembly language in the kernel.
+
+
+		Chapter 12: Printing kernel messages
+
+Kernel developers like to be seen as literate. Do mind the spelling
+of kernel messages to make a good impression. Do not use crippled
+words like "dont" and use "do not" or "don't" instead.
+
+Kernel messages do not have to be terminated with a period.
+
+Printing numbers in parentheses (%d) adds no value and should be avoided.
+
+
+		Chapter 13: References
+
+The C Programming Language, Second Edition
+by Brian W. Kernighan and Dennis M. Ritchie.
+Prentice Hall, Inc., 1988.
+ISBN 0-13-110362-8 (paperback), 0-13-110370-9 (hardback).
+URL: http://cm.bell-labs.com/cm/cs/cbook/
+
+The Practice of Programming
+by Brian W. Kernighan and Rob Pike.
+Addison-Wesley, Inc., 1999.
+ISBN 0-201-61586-X.
+URL: http://cm.bell-labs.com/cm/cs/tpop/
+
+GNU manuals - where in compliance with K&R and this text - for cpp, gcc,
+gcc internals and indent, all available from http://www.gnu.org
+
+WG14 is the international standardization working group for the programming
+language C, URL: http://std.dkuug.dk/JTC1/SC22/WG14/
+
+--
+Last updated on 16 February 2004 by a community effort on LKML.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2004-03-07 20:08 Michael Frank
@ 2004-03-07 20:26 ` John Bradford
  0 siblings, 0 replies; 414+ messages in thread
From: John Bradford @ 2004-03-07 20:26 UTC (permalink / raw)
  To: Michael Frank, Marcelo Tosatti; +Cc: kernel mailing list

> Here is a patch to update 2.4 Codingstyle.
> 
> It is equivalent to the 2.6 version except for
> Chapter 9 Configuration files which is unchanged
> 
> Please apply.

[snip]

> -and NOT read it.  Burn them, it's a great symbolic gesture.
> +and NOT read it.  Burn them, it's a great symbolic gesture.

Should be:

+and NOT reading it.  Burn them, it's a great symbolic gesture.

John.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2004-02-22 17:51 redzic fadil
  2004-02-22 18:48 ` Larry Reaves
  0 siblings, 1 reply; 414+ messages in thread
From: redzic fadil @ 2004-02-22 17:51 UTC (permalink / raw)
  To: linux-kernel

hello

I hope I don't disturb,

I have tried to compile the hello.c module under kernel 2.6.3.
And I'd like to insert the hello.o module in the kernel.
But this doesn't work with kernel 2.6.3 .

I have compiled this module with kernel 2.4.* and it is well.

Also I cannot include the header file module.h, because I get error 
messages.

my module:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>

int initial_module (void)
{
	printk("\ninitial module\n");
	return (0);
}

void delete_module (void)
{
	printk("\ndelete module\n");
}

module_init(initial_module);
module_exit(delete_module);

my Makefile:
CC=gcc
CFLAGS=-isystem /lib/modules/`uname -r`/build/include -O2 -D__KERNEL__ 
-DMODULE
all: hello.o

If you have any idea please send an E-Mail:  redzic_fadil@hotmail.com

thanks

_________________________________________________________________
Die ultimative Fan-Seite für den MSN Messenger http://www.ilovemessenger.de 
Emoticons und Hintergründe kostenlos downloaden!

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2004-02-22 17:51 redzic fadil
@ 2004-02-22 18:48 ` Larry Reaves
  0 siblings, 0 replies; 414+ messages in thread
From: Larry Reaves @ 2004-02-22 18:48 UTC (permalink / raw)
  To: redzic fadil

I suggest that you read
http://linuxdevices.com/articles/AT4389927951.html it is an article
about the differences between modules for 2.4 and 2.6.  Basically all
you need to do to get it to work is delete your Makefile and create a
new one with only this line:
obj-m := hello.o
and then issue the following command:
make -C /usr/src/linux-2.6.3 SUBDIRS=$PWD modules
your module compiles fine on my box using this method

On Sun, 2004-02-22 at 12:51, redzic fadil wrote:
> hello
> 
> 
> I hope I don't disturb,
> 
> 
> I have tried to compile the hello.c module under kernel 2.6.3.
> And I'd like to insert the hello.o module in the kernel.
> But this doesn't work with kernel 2.6.3 .
> 
> I have compiled this module with kernel 2.4.* and it is well.
> 
> Also I cannot include the header file module.h, because I get error 
> messages.
> 
> my module:
> #include <linux/kernel.h>
> #include <linux/module.h>
> #include <linux/init.h>
> 
> 
> int initial_module (void)
> {
> 	printk("\ninitial module\n");
> 	return (0);
> }
> 
> void delete_module (void)
> {
> 	printk("\ndelete module\n");
> }
> 
> module_init(initial_module);
> module_exit(delete_module);
> 
> 
> my Makefile:
> CC=gcc
> CFLAGS=-isystem /lib/modules/`uname -r`/build/include -O2 -D__KERNEL__ 
> -DMODULE
> all: hello.o
> 
> If you have any idea please send an E-Mail:  redzic_fadil@hotmail.com
> 
> thanks
> 
> _________________________________________________________________
> Die ultimative Fan-Seite für den MSN Messenger http://www.ilovemessenger.de
> Emoticons und Hintergründe kostenlos downloaden!
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-- 
Larry Reaves <larry@moonshinecomputers.com>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2004-02-14 23:17 Alexandr Chernyy
  0 siblings, 0 replies; 414+ messages in thread
From: Alexandr Chernyy @ 2004-02-14 23:17 UTC (permalink / raw)
  To: linux-kernel

On Monday 09 February 2004 22:05, Alexandr Chernyy wrote:

 >> Hello All! Can you help me! I write module for kernel 2.4.22 and have
 >> some problems! I need to read some information form file, create
 >> directory and etc. (Did kerlen have some stdio.h like function - fopen,
 >> fgets, fclose......)!!! Please help me.


 > what you are looking for is not usually needed by 'normal' kernel 
modules.
 > could you please be more specific on what is exactly your porpouse?

 > alessandro

for example i need to create directory in all mounting devices when 
module load!!!
and read some informations form /proc/mounts

WBR, Alexandr Chernyy



^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <7A25937D23A1E64C8E93CB4A50509C2A0310F099@stca204a.bus.sc.rolm.com>]

* Re:
       [not found] <7A25937D23A1E64C8E93CB4A50509C2A0310F099@stca204a.bus.sc.rolm.com>
@ 2004-02-05 17:02 ` Tommy Reynolds
  0 siblings, 0 replies; 414+ messages in thread
From: Tommy Reynolds @ 2004-02-05 17:02 UTC (permalink / raw)
  To: linux-kernel; +Cc: Bloch, Jack

[-- Attachment #1: Type: text/plain, Size: 893 bytes --]

Uttered "Bloch, Jack" <Jack.Bloch@icn.siemens.com>, spake thus:

Please do not include me in the CC: list as I belong to this mailing
list.

Do not reply privately to this message, keep it all out in the open ;-)

> It is not really a device, simply a file loaded into memory by another
> process at a fixed location.

Userland processes _do_ have resource limits.  Try a "man getrlimit"
for some hints here.

There is a max process size limit (RLIMIT_DATA) and the number of VM
pages kept in RAM (RLIMIT_RSS).

Keep in mind that a child processes usually inherits whatever rlimits
are left unused by the parent process, so if your program is being
started from another program near its limits, the child process
begins with those limitations as well.

1)  Can a standalone test program, started from the shell, map the
    data?

2)  How was the kernel memory used to hold the data obtained?

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-12-11 23:37 Hettinger Tamas
  2003-12-12  1:29 ` Jonathan Corbet
  0 siblings, 1 reply; 414+ messages in thread
From: Hettinger Tamas @ 2003-12-11 23:37 UTC (permalink / raw)
  To: linux-kernel

Hi everybody !

I've been developing a kernel module and I needed some timer functions. I
found init_timer() add_timer() del_timer() etc... but I would ask some
questions about them.

1) When I set a timer, it is added to a timer_list chain with add_timer().
If the time is up and the scheduled function is called, should I remove the
timer_list struct from the chain via del_timer() ? Or is it removed
automatically ?

2) How can a module safely removed if it has some running timers ? I have to
call del_timer() in cleanup_module() for each running timer ? And what's the
purpose of the timer_pending function ?

thanks

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-12-11 23:37 Hettinger Tamas
@ 2003-12-12  1:29 ` Jonathan Corbet
  0 siblings, 0 replies; 414+ messages in thread
From: Jonathan Corbet @ 2003-12-12  1:29 UTC (permalink / raw)
  To: Hettinger Tamas; +Cc: linux-kernel

> 1) When I set a timer, it is added to a timer_list chain with add_timer().
> If the time is up and the scheduled function is called, should I remove the
> timer_list struct from the chain via del_timer() ? Or is it removed
> automatically ?

It will be removed automatically, just before your timer function is called.

> 2) How can a module safely removed if it has some running timers ? I have to
> call del_timer() in cleanup_module() for each running timer ? 

You cannot remove a module (safely) if there are outstanding timers.  Use
del_timer_sync() to get rid of them and ensure they aren't running on
another processor.

Chapter 6 of Linux Device Drivers covers this topic; see:

	http://www.xml.com/ldd/chapter/book/ch06.html

jon

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-12-05 17:36 gmack
  2003-12-05 23:16 ` Oliver Hunt
  0 siblings, 1 reply; 414+ messages in thread
From: gmack @ 2003-12-05 17:36 UTC (permalink / raw)


>From gmack@innerfire.net  Fri Dec  5 12:36:11 2003
Received: from localhost (sendmail-bs@127.0.0.1)
  by localhost with SMTP; 5 Dec 2003 17:36:11 -0000
Date: Fri, 5 Dec 2003 12:36:11 -0500 (EST)
From: Gerhard Mack <gmack@innerfire.net>
To: Linus Torvalds <torvalds@osdl.org>
cc: David Schwartz <davids@webmaster.com>, Valdis.Kletnieks@vt.edu, 
    Peter Chubb <peter@chubb.wattle.id.au>, linux-kernel@vger.kernel.org
Subject: RE: Linux GPL and binary module exception clause? 
In-Reply-To: <Pine.LNX.4.58.0312042245350.9125@home.osdl.org>
Message-ID: <Pine.LNX.4.58.0312051232530.16547@innerfire.net>
References: <MDEHLPKNGKAHNMBLJOLKMEIDIHAA.davids@webmaster.com>
 <Pine.LNX.4.58.0312042245350.9125@home.osdl.org>
MIME-Version: 1.0
Content-Type: TEXT/PLAIN; charset=US-ASCII
X-Spam-Status: No, hits=-104.5 required=4.0 tests=IN_REP_TO,SUBJ_ENDS_IN_Q_MARK,USER_IN_WHITELIST version=2.20
X-Spam-Level: 

Those views are scary when you consider that webmaster Inc sells closed
source software that works exactly like IRC (wich is GPL)

On Thu, 4 Dec 2003, Linus Torvalds wrote:

> Date: Thu, 4 Dec 2003 22:58:09 -0800 (PST)
> From: Linus Torvalds <torvalds@osdl.org>
> To: David Schwartz <davids@webmaster.com>
> Cc: Valdis.Kletnieks@vt.edu, Peter Chubb <peter@chubb.wattle.id.au>,
>      linux-kernel@vger.kernel.org
> Subject: RE: Linux GPL and binary module exception clause?
>
>
>
> On Thu, 4 Dec 2003, David Schwartz wrote:
> >
> > The GPL gives you the unrestricted right to *use* the original work.
> > This implicitly includes the right to peform any step necessary to use
> > the work.
>
> No it doesn't.
>
> Your logic is fundamentally flawed, and/or your reading skills are
> deficient.
>
> The GPL expressly states that the license does not restrict the act of
> "running the Program" in any way, and yes, in that sense you may "use" the
> program in whatever way you want.
>
> But that "use" is clearly limited to running the resultant program. It
> very much does NOT say that you can "use the header files in any way you
> want, including building non-GPL'd programs with them".
>
> In fact, it very much says the reverse. If you use the source code to
> build a new program, the GPL _explicitly_ says that that new program has
> to be GPL'd too.
>
> > Please tell me how you use a kernel header file, other than by including
> > it in a code file, compiling that code file, and executing the result.
>
> You are a weasel, and you are trying to make the world look the way you
> want it to, rather than the way it _is_.
>
> You use the word "use" in a sense that is not compatible with the GPL. You
> claim that the GPL says that you can "use the program any way you want",
> but that is simply not accurate or even _close_ to accurate. Go back and
> read the GPL again. It says:
>
> 	"The act of running the Program is not restricted"
>
> and it very much does NOT say
>
> 	"The act of using parts of the source code of the Program is not
> 	 restricted"
>
> In short: you do _NOT_ have the right to use a kernel header file (or any
> other part of the kernel sources), unless that use results in a GPL'd
> program.
>
> What you _do_ have the right is to _run_ the kernel any way you please
> (this is the part you would like to redefine as "use the source code",
> but that definition simply isn't allowed by the license, however much you
> protest to the contrary).
>
> So you can run the kernel and create non-GPL'd programs while running it
> to your hearts content. You can use it to control a nuclear submarine, and
> that's totally outside the scope of the license (but if you do, please
> note that the license does not imply any kind of warranty or similar).
>
> BUT YOU CAN NOT USE THE KERNEL HEADER FILES TO CREATE NON-GPL'D BINARIES.
>
> Comprende?
>
> 		Linus
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

--
Gerhard Mack

gmack@innerfire.net

<>< As a computer I find your faith in technology amusing.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-12-05 17:36 gmack
@ 2003-12-05 23:16 ` Oliver Hunt
  0 siblings, 0 replies; 414+ messages in thread
From: Oliver Hunt @ 2003-12-05 23:16 UTC (permalink / raw)
  To: gmack, linux-kernel

No its not, doing something similar to (or identical) to a GPL'd program 
is fine, if you were to use a GPL'd IRC server/client as the base for 
your own code, then you'd need to pblish using the GPL...

IRC would be in the realm of software patents really - it's a protocol - 
the client/serverside code comes under copyright laws :)

--Oliver

gmack@innerfire.net wrote:

>From gmack@innerfire.net  Fri Dec  5 12:36:11 2003
>Received: from localhost (sendmail-bs@127.0.0.1)
>  by localhost with SMTP; 5 Dec 2003 17:36:11 -0000
>Date: Fri, 5 Dec 2003 12:36:11 -0500 (EST)
>From: Gerhard Mack <gmack@innerfire.net>
>To: Linus Torvalds <torvalds@osdl.org>
>cc: David Schwartz <davids@webmaster.com>, Valdis.Kletnieks@vt.edu, 
>    Peter Chubb <peter@chubb.wattle.id.au>, linux-kernel@vger.kernel.org
>Subject: RE: Linux GPL and binary module exception clause? 
>In-Reply-To: <Pine.LNX.4.58.0312042245350.9125@home.osdl.org>
>Message-ID: <Pine.LNX.4.58.0312051232530.16547@innerfire.net>
>References: <MDEHLPKNGKAHNMBLJOLKMEIDIHAA.davids@webmaster.com>
> <Pine.LNX.4.58.0312042245350.9125@home.osdl.org>
>MIME-Version: 1.0
>Content-Type: TEXT/PLAIN; charset=US-ASCII
>X-Spam-Status: No, hits=-104.5 required=4.0 tests=IN_REP_TO,SUBJ_ENDS_IN_Q_MARK,USER_IN_WHITELIST version=2.20
>X-Spam-Level: 
>
>Those views are scary when you consider that webmaster Inc sells closed
>source software that works exactly like IRC (wich is GPL)
>
>On Thu, 4 Dec 2003, Linus Torvalds wrote:
>
>  
>
>>Date: Thu, 4 Dec 2003 22:58:09 -0800 (PST)
>>From: Linus Torvalds <torvalds@osdl.org>
>>To: David Schwartz <davids@webmaster.com>
>>Cc: Valdis.Kletnieks@vt.edu, Peter Chubb <peter@chubb.wattle.id.au>,
>>     linux-kernel@vger.kernel.org
>>Subject: RE: Linux GPL and binary module exception clause?
>>
>>
>>
>>On Thu, 4 Dec 2003, David Schwartz wrote:
>>    
>>
>>>The GPL gives you the unrestricted right to *use* the original work.
>>>This implicitly includes the right to peform any step necessary to use
>>>the work.
>>>      
>>>
>>No it doesn't.
>>
>>Your logic is fundamentally flawed, and/or your reading skills are
>>deficient.
>>
>>The GPL expressly states that the license does not restrict the act of
>>"running the Program" in any way, and yes, in that sense you may "use" the
>>program in whatever way you want.
>>
>>But that "use" is clearly limited to running the resultant program. It
>>very much does NOT say that you can "use the header files in any way you
>>want, including building non-GPL'd programs with them".
>>
>>In fact, it very much says the reverse. If you use the source code to
>>build a new program, the GPL _explicitly_ says that that new program has
>>to be GPL'd too.
>>
>>    
>>
>>>Please tell me how you use a kernel header file, other than by including
>>>it in a code file, compiling that code file, and executing the result.
>>>      
>>>
>>You are a weasel, and you are trying to make the world look the way you
>>want it to, rather than the way it _is_.
>>
>>You use the word "use" in a sense that is not compatible with the GPL. You
>>claim that the GPL says that you can "use the program any way you want",
>>but that is simply not accurate or even _close_ to accurate. Go back and
>>read the GPL again. It says:
>>
>>	"The act of running the Program is not restricted"
>>
>>and it very much does NOT say
>>
>>	"The act of using parts of the source code of the Program is not
>>	 restricted"
>>
>>In short: you do _NOT_ have the right to use a kernel header file (or any
>>other part of the kernel sources), unless that use results in a GPL'd
>>program.
>>
>>What you _do_ have the right is to _run_ the kernel any way you please
>>(this is the part you would like to redefine as "use the source code",
>>but that definition simply isn't allowed by the license, however much you
>>protest to the contrary).
>>
>>So you can run the kernel and create non-GPL'd programs while running it
>>to your hearts content. You can use it to control a nuclear submarine, and
>>that's totally outside the scope of the license (but if you do, please
>>note that the license does not imply any kind of warranty or similar).
>>
>>BUT YOU CAN NOT USE THE KERNEL HEADER FILES TO CREATE NON-GPL'D BINARIES.
>>
>>Comprende?
>>
>>		Linus
>>-
>>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>the body of a message to majordomo@vger.kernel.org
>>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>Please read the FAQ at  http://www.tux.org/lkml/
>>
>>    
>>
>
>--
>Gerhard Mack
>
>gmack@innerfire.net
>
><>< As a computer I find your faith in technology amusing.
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>  
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-12-03 15:08 Bloch, Jack
  2003-12-04  4:56 ` Raj
  0 siblings, 1 reply; 414+ messages in thread
From: Bloch, Jack @ 2003-12-03 15:08 UTC (permalink / raw)
  To: linux-kernel

I try to open a non-existan device driver node file. The Kernel returns a
value of -1 (expected). However, when I read the value of errno it contains
a value of 29. A call to the perror functrion does print out the correct
error message (a value of 2). Why does this happen?

Jack Bloch 
Siemens ICN
phone                (561) 923-6550
e-mail                jack.bloch@icn.siemens.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-12-03 15:08 Bloch, Jack
@ 2003-12-04  4:56 ` Raj
  0 siblings, 0 replies; 414+ messages in thread
From: Raj @ 2003-12-04  4:56 UTC (permalink / raw)
  To: Bloch, Jack; +Cc: linux-kernel

Bloch, Jack wrote:

>I try to open a non-existan device driver node file. The Kernel returns a
>value of -1 (expected). However, when I read the value of errno it contains
>a value of 29. A call to the perror functrion does print out the correct
>error message (a value of 2). Why does this happen?
>
>  
>
I tried this on a 2.6.0-test11 and it works fine. Pls specify your 
kernel version and attach the program if possible.

/Raj


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-09-10  2:20 John Yau
  2003-09-10  2:31 ` Nick Piggin
  0 siblings, 1 reply; 414+ messages in thread
From: John Yau @ 2003-09-10  2:20 UTC (permalink / raw)
  To: piggin; +Cc: linux-kernel

>Your mechanism is basically "backboost". Its how you get X to keep a
>high piroirity, but quite unpredictable. Giving a boost to a process
>holding a semaphore is an interesting idea, but it doesn't address the
>X problem.

Hmm...I'm actually curious why you called it "backboosting".  In academia
this approach first described in the paper here:

L. Sha, R. Rajkumar, and J. P. Lehoczky. Priority Inheritance Protocols: An
Approach to Real-Time Synchronization. In IEEE Transactions on Computers,
vol. 39, pp. 1175-1185, Sep. 1990.

is referred to as priority inheritance.  Is there significant difference
between your implementation and priority inheritance schemes implemented in
other OSes?  If so, why backboosting?

I was under the impression that pipes and IPC in general are synchronized
using some sort of semaphores/mutex...or does Linux use a different
mechanism for IPC and does away with user space synchronization all together
(e.g. flip-flop buffers with the kernel arbitrating all contention)?  IIRC
processes don't write to X directly and has to send data to X via IPC.  If
some futex derivative is used to synchronize the producers with X, then
making priority inheritable futexes would solve the problem.

>The scheduler in Linus' tree is basically obsolete now, so there isn't
>any point testing it really. Test Con's or my patches, and let us know
>if you're still having problems with sir dumps-a-lot.

Okay enough said, you and Con should get your patches merged into that tree
ASAP if they're ready.

John Yau

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-09-10  2:20 John Yau
@ 2003-09-10  2:31 ` Nick Piggin
  0 siblings, 0 replies; 414+ messages in thread
From: Nick Piggin @ 2003-09-10  2:31 UTC (permalink / raw)
  To: John Yau; +Cc: linux-kernel



John Yau wrote:

>>Your mechanism is basically "backboost". Its how you get X to keep a
>>high piroirity, but quite unpredictable. Giving a boost to a process
>>holding a semaphore is an interesting idea, but it doesn't address the
>>X problem.
>>
>
>Hmm...I'm actually curious why you called it "backboosting".  In academia
>this approach first described in the paper here:
>
>L. Sha, R. Rajkumar, and J. P. Lehoczky. Priority Inheritance Protocols: An
>Approach to Real-Time Synchronization. In IEEE Transactions on Computers,
>vol. 39, pp. 1175-1185, Sep. 1990.
>
>is referred to as priority inheritance.  Is there significant difference
>between your implementation and priority inheritance schemes implemented in
>other OSes?  If so, why backboosting?
>

Well I haven't read the paper, but I'm guessing this is semaphore
priority inheritance.

>
>I was under the impression that pipes and IPC in general are synchronized
>using some sort of semaphores/mutex...or does Linux use a different
>mechanism for IPC and does away with user space synchronization all together
>(e.g. flip-flop buffers with the kernel arbitrating all contention)?  IIRC
>processes don't write to X directly and has to send data to X via IPC.  If
>some futex derivative is used to synchronize the producers with X, then
>making priority inheritable futexes would solve the problem.
>

I _think_ communication with X will mostly be done with waitqueues.
Someone has a priority inheritance futex patch around. I'm not sure
that it is such an open and shut case as you think though. Even if you
could use futexes in communication with X.

>
>>The scheduler in Linus' tree is basically obsolete now, so there isn't
>>any point testing it really. Test Con's or my patches, and let us know
>>if you're still having problems with sir dumps-a-lot.
>>
>
>Okay enough said, you and Con should get your patches merged into that tree
>ASAP if they're ready.
>
>

I think Con's is ready (I think mine is as well, but nobody else does!)



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-08-25 13:53 Marcelo Tosatti
  2003-08-25 14:12 ` Nick Piggin
  0 siblings, 1 reply; 414+ messages in thread
From: Marcelo Tosatti @ 2003-08-25 13:53 UTC (permalink / raw)
  To: Nick Piggin; +Cc: lkml


>
>
> Matthias Andree wrote:
>
> >On Mon, 25 Aug 2003, Marcelo Tosatti wrote:
> >
> >
> >>- 2.4.22-rc4 was released as 2.4.22 with no changes.
> >>
> >
> >What are the plans for 2.4.23? XFS merge perhaps <hint>?
> >
>
> Maybe some of Andrea's VM stuff?

Definately. Thats the first thing I'm going to do after looking through
"2.4.23-pre-patches" folder.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-08-25 13:53 Marcelo Tosatti
@ 2003-08-25 14:12 ` Nick Piggin
  0 siblings, 0 replies; 414+ messages in thread
From: Nick Piggin @ 2003-08-25 14:12 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: lkml



Marcelo Tosatti wrote:

>>Matthias Andree wrote:
>>
>>    
>>
>>>On Mon, 25 Aug 2003, Marcelo Tosatti wrote:
>>>
>>>
>>>      
>>>
>>>>- 2.4.22-rc4 was released as 2.4.22 with no changes.
>>>>
>>>>        
>>>>
>>>What are the plans for 2.4.23? XFS merge perhaps <hint>?
>>>
>>>      
>>>
>>Maybe some of Andrea's VM stuff?
>>    
>>
>
>Definately. Thats the first thing I'm going to do after looking through
>"2.4.23-pre-patches" folder.
>
>  
>

Excellent choice IMO ;)



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-08-12 13:55 Catalin BOIE
  2003-08-12 17:05 ` Ian Hastie
  0 siblings, 1 reply; 414+ messages in thread
From: Catalin BOIE @ 2003-08-12 13:55 UTC (permalink / raw)
  To: linux-kernel

Hello!

"cat drivers/built-in.o > /dev/null" gives me i/o error.

Can I suspect a bad sector?
I use reiserfs.

Thanks!

---
Catalin(ux) BOIE
catab@deuroconsult.ro

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-08-12 13:55 Catalin BOIE
@ 2003-08-12 17:05 ` Ian Hastie
  0 siblings, 0 replies; 414+ messages in thread
From: Ian Hastie @ 2003-08-12 17:05 UTC (permalink / raw)
  To: linux-kernel

On Tuesday 12 Aug 2003 14:55, Catalin BOIE wrote:
> Hello!
>
> "cat drivers/built-in.o > /dev/null" gives me i/o error.
>
> Can I suspect a bad sector?
> I use reiserfs.

Can't say.  What i/o error does it give you?  Anything useful in 
/var/log/messages?  Or perhaps /var/log/kern.log?

-- 
Ian.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-07-16 18:36 Sir Ace
  2003-07-16 23:10 ` jiho
  0 siblings, 1 reply; 414+ messages in thread
From: Sir Ace @ 2003-07-16 18:36 UTC (permalink / raw)
  To: linux-kernel

unsubscribe linux-kernel

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-07-16 18:36 Sir Ace
@ 2003-07-16 23:10 ` jiho
  0 siblings, 0 replies; 414+ messages in thread
From: jiho @ 2003-07-16 23:10 UTC (permalink / raw)
  To: Sir Ace; +Cc: linux-kernel

You sent to wrong e-mail address, send to majordomo, as shown at bottom.

Sir Ace wrote:

> unsubscribe linux-kernel
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-06-30  3:16 usenet
  2003-06-30  8:09 ` Bruce Harada
  0 siblings, 1 reply; 414+ messages in thread
From: usenet @ 2003-06-30  3:16 UTC (permalink / raw)
  To: linux-kernel



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-06-30  3:16 usenet
@ 2003-06-30  8:09 ` Bruce Harada
  2003-06-30  8:23   ` Re: Matti Aarnio
  0 siblings, 1 reply; 414+ messages in thread
From: Bruce Harada @ 2003-06-30  8:09 UTC (permalink / raw)
  To: matti.aarnio; +Cc: linux-kernel


Matt,

What is this guy doing? I count 23 in the last few hours.


On 30 Jun 2003 03:16:53 -0000
usenet@mailgate.netsplit.com wrote:

> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-06-30  8:09 ` Bruce Harada
@ 2003-06-30  8:23   ` Matti Aarnio
  0 siblings, 0 replies; 414+ messages in thread
From: Matti Aarnio @ 2003-06-30  8:23 UTC (permalink / raw)
  To: Bruce Harada; +Cc: matti.aarnio, linux-kernel

Always do ask   POSTMASTER@vger.kernel.org   about this kind
of things...    It is summer-time, and even your ever watchfull
postmasters are going off-net for extended periods of time..
Contacting the ROLE contact will reach multiple people, hopefully
at least one of us is at hand.

On Mon, Jun 30, 2003 at 05:09:47PM +0900, Bruce Harada wrote:
> Date:	Mon, 30 Jun 2003 17:09:47 +0900
> From:	Bruce Harada <bharada@coral.ocn.ne.jp>
> To:	matti.aarnio@zmailer.org
> Cc:	linux-kernel@vger.kernel.org
> Subject: Re:
> 
> Matt,

I wonder who is that..

> What is this guy doing? I count 23 in the last few hours.

Do find out, and tell us.
I blocked messages coming from there couple hours ago.

You have seen nothing, though.   DaveM had a horror situation,
as one misconfigured Norwegian site keeps pushing error messages
to listowner some 15-30 times a minute.  That has been going on
for about 20 hours now:
  http://vger.kernel.org/z/zmailer-rrd-vger_SNMP_SS_SMTPconnects.html

> On 30 Jun 2003 03:16:53 -0000
> usenet@mailgate.netsplit.com wrote:
> 
> > -

/Matti Aarnio  -- one of  <postmaster@vger.kernel.org>

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-06-03 23:51 Justin T. Gibbs
  2003-06-03 23:58 ` Marc-Christian Petersen
  0 siblings, 1 reply; 414+ messages in thread
From: Justin T. Gibbs @ 2003-06-03 23:51 UTC (permalink / raw)
  To: linux-scsi, linux-kernel; +Cc: Linus Torvalds, Alan Cox, Marcelo Tosatti

Folks,

I've just uploaded version 1.3.10 of the aic79xx driver and version 
6.2.36 of the aic7xxx driver.  Both are available for 2.4.X and
2.5.X kernels in either bk send format or as a tarball from here:
 
http://people.FreeBSD.org/~gibbs/linux/SRC/

The change sets relative to the 2.5.X tree are:

ChangeSet@1.1275, 2003-06-03 17:35:01-06:00, gibbs@overdrive.btc.adaptec.com
  Update Aic79xx Readme

ChangeSet@1.1274, 2003-06-03 17:22:05-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx Driver Update
   o Bump version number to 6.2.36
   o Document recent aic7xxx driver releases

ChangeSet@1.1273, 2003-06-03 17:20:14-06:00, gibbs@overdrive.btc.adaptec.com
  Aic79xx Driver Update
   o Bump driver version to 1.3.10
   o Document recent releases in driver readme.

ChangeSet@1.1272, 2003-05-31 21:12:09-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx and Aic79xx Driver Update
   o Work around negotiation firmware bug in the Quantum Atlas 10K
   o Clear stale PCI errors in our register mapping test to avoid
     false positives from rouge accesses to our registers that occur
     prior to our driver attach.

ChangeSet@1.1271, 2003-05-31 18:34:01-06:00, gibbs@overdrive.btc.adaptec.com
  Aic79xx Driver Update
   o Implement suspend and resume

ChangeSet@1.1270, 2003-05-31 18:32:36-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx Driver Update
   o Fix some suspend and resume bugs

ChangeSet@1.1269, 2003-05-31 18:27:09-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx Driver Update
   o Correct the type of the DV settings array.

ChangeSet@1.1268, 2003-05-31 18:25:28-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx and Aic79xx driver Update
   o Remove unecessary and incorrect use of ~0 as a mask.

ChangeSet@1.1267, 2003-05-30 13:50:00-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx and Aic79xx Driver Update
   o Adapt to 2.5.X SCSI proc interface change while maitaining
     compatibility with earlier kernels.

ChangeSet@1.1266, 2003-05-30 11:01:02-06:00, gibbs@overdrive.btc.adaptec.com
  Merge http://linux.bkbits.net/linux-2.5
  into overdrive.btc.adaptec.com:/usr/home/gibbs/bk/linux-2.5

ChangeSet@1.1215.4.6, 2003-05-30 10:50:17-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx Driver Update
   o Bring in aic7xxx_reg_print.c update that was missed the
     last time the firmware was regenerated.  The old file worked
     fine, so this is mostly a cosmetic change.

ChangeSet@1.1215.4.5, 2003-05-30 10:48:31-06:00, gibbs@overdrive.btc.adaptec.com
  Aic79xx Driver Update
   o Correct non-zero lun output on post Rev A4 hardware
     in packetized mode.

ChangeSet@1.1215.4.4, 2003-05-30 10:46:03-06:00, gibbs@overdrive.btc.adaptec.com
  Aic79xx Driver Update
   o Return to using 16byte alignment for th SCB_TAG field in our SCB.
     The hardware seems to corrupt SCBs on some PCI platforms with the
     tag field in its old location.

ChangeSet@1.1215.4.3, 2003-05-30 10:43:20-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx Driver Update
   o Adopt 2.5.X EISA framework for probing aic7770 controllers

ChangeSet@1.1215.4.2, 2003-05-30 10:31:04-06:00, gibbs@overdrive.btc.adaptec.com
  Aic7xxx Driver Update
   o Correct card identifcation string for the 2920C



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-06-03 23:51 Justin T. Gibbs
@ 2003-06-03 23:58 ` Marc-Christian Petersen
  0 siblings, 0 replies; 414+ messages in thread
From: Marc-Christian Petersen @ 2003-06-03 23:58 UTC (permalink / raw)
  To: Justin T. Gibbs, linux-scsi, linux-kernel
  Cc: Linus Torvalds, Alan Cox, Marcelo Tosatti

On Wednesday 04 June 2003 01:51, Justin T. Gibbs wrote:

Hi Justin,

> I've just uploaded version 1.3.10 of the aic79xx driver and version
> 6.2.36 of the aic7xxx driver.  Both are available for 2.4.X and
> 2.5.X kernels in either bk send format or as a tarball from here:
many thanks! I'll update them for my tree (as always with your updates 
:-)

ciao, Marc


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-04-30 21:39 Mauricio Oliveira Carneiro
  2003-05-01  0:03 ` Eyal Lebedinsky
  0 siblings, 1 reply; 414+ messages in thread
From: Mauricio Oliveira Carneiro @ 2003-04-30 21:39 UTC (permalink / raw)
  To: linux-kernel

Hi everyone,

I'm almost hanging myself.. it's been a week trying without success :-( 
Maybe someone here could help me out. Sorry for the disturbance ! 

I have bought a USB FLASH MEMORY device from Kmit Co. The model is : "Unity 
ITS". I bought it from FRY's Electronics in Palo Alto, CA (USA)

It works perfectly with windows, but i'm having problems with linux 2.4.20- 
9 kernel.

It detects the existance of the USB FLASH MEMORY device, as of my 
/proc/bus/usb/device file says :

C:* #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr= 98mA
I:  If#= 0 Alt= 0 #EPs= 1 Cls=03(HID  ) Sub=01 Prot=01 Driver=hid
E:  Ad=81(I) Atr=03(Int.) MxPS=   8 Ivl=10ms
I:  If#= 1 Alt= 0 #EPs= 1 Cls=03(HID  ) Sub=01 Prot=02 Driver=hid
E:  Ad=82(I) Atr=03(Int.) MxPS=   8 Ivl=10ms
T:  Bus=02 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#=  4 Spd=12  MxCh= 0
D:  Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=16 #Cfgs=  1
P:  Vendor=09a6 ProdID=8001 Rev= 1.00
S:  Manufacturer=KMIT CO.,LTD
S:  Product=KM USB Removable Disk
S:  SerialNumber=20021226113657-00

At the /proc/scsi/usb-storage-0/0 file I get :

   Host scsi0: usb-storage
       Vendor: KMIT CO.,LTD
      Product: KM USB Removable Disk
Serial Number: 20021226113657-00
     Protocol: 8070i
    Transport: Bulk
         GUID: 09a68001002122fffffff600
     Attached: Yes

In /etc/mtab :

usbdevfs on /proc/bus/usb type usbdevfs (rw)

But I can't see it mounted anywhere in my system, nor can I mount it by 
hand since I don't know the device filename (/dev/?) .

I believe it works the same way for every flash memory drive.

if someone have already suffered of the same cause, I thank for the help :)

Mauricio Oliveira Carneiro

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-04-30 21:39 Mauricio Oliveira Carneiro
@ 2003-05-01  0:03 ` Eyal Lebedinsky
  0 siblings, 0 replies; 414+ messages in thread
From: Eyal Lebedinsky @ 2003-05-01  0:03 UTC (permalink / raw)
  To: Mauricio Oliveira Carneiro; +Cc: linux-kernel

Mauricio Oliveira Carneiro wrote:

> But I can't see it mounted anywhere in my system, nor can I mount it by
> hand since I don't know the device filename (/dev/?) .

The device is the next available scsi, e.g.:
	mount /dev/sda1 /mnt

My /etc/fstab has this:

# USB camera as mass-storage, Nikon Coolpix 2000
#
none               /proc/bus/usb        usbdevfs  noauto          0 0
/dev/sda1          /nikon               msdos     noauto          0 0

--
Eyal Lebedinsky (eyal@eyal.emu.id.au) <http://samba.org/eyal/>

^ permalink raw reply	[flat|nested] 414+ messages in thread

[parent not found: <001e01c2d9ef$01cdc970$0200a8c0@wsl3>]

* Re:
       [not found] <001e01c2d9ef$01cdc970$0200a8c0@wsl3>
@ 2003-02-21 21:34 ` b_adlakha
  0 siblings, 0 replies; 414+ messages in thread
From: b_adlakha @ 2003-02-21 21:34 UTC (permalink / raw)
  To: vlad; +Cc: linux-kernel

Vlad@geekizoid.com writes: 

> Send your subscribe message to majordomo@vger.kernel.org. 
> 
> -- 
> 
>  /"\                         / For information and quotes, email us at
>  \ /  ASCII RIBBON CAMPAIGN / info@lrsehosting.com
>   X   AGAINST HTML MAIL    / http://www.lrsehosting.com/
>  / \  AND POSTINGS        / vlad@lrsehosting.com
> ------------------------------------------------------------------------- 
> 
>> -----Original Message-----
>> From: linux-kernel-owner@vger.kernel.org
>> [mailto:linux-kernel-owner@vger.kernel.org]On Behalf Of
>> b_adlakha@softhome.net
>> Sent: Friday, February 21, 2003 3:14 PM
>> To: linux-kernel@vger.kernel.org
>> Subject:  
>> 
>> 
>> subscribe 
>> -
>> To unsubscribe from this list: send the line "unsubscribe 
>> linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
 

srry 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2003-02-08 10:40 Manfred Spraul
  0 siblings, 0 replies; 414+ messages in thread
From: Manfred Spraul @ 2003-02-08 10:40 UTC (permalink / raw)
  To: Jun Sun; +Cc: linux-kernel

Jun wrote:

>cpu B:
>        get the ipi and (WITHOUT CHECKING cpu_vm_mask again)
>        go ahead doing tlb flushing.
>
>I am not sure if any disastrous result will happen, but apparently
>an unintended flush has happened.
>
Yes, that's possible. It should be rare (the windows is a few 
instructions long), and on i386 it doesn't hurt.

>In MIPS such a hole could
>cause two processes using the same TLB entries which yields all kinds
>of interesting crashes.
>
What is your problem? Do your mips cpus have mmu contexts (the ability 
to store tlb entries from multiple processes), and you load tlb entries 
with the wrong context id?
The i386 implementation knows that i386 cpus don't support mmu contexts, 
i.e. the whole tlb is flushed during process switches.

--
    Manfred



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2003-01-12 13:28 Philip K.F. Hölzenspies
  2003-01-12 17:57 ` Shawn Starr
  0 siblings, 1 reply; 414+ messages in thread
From: Philip K.F. Hölzenspies @ 2003-01-12 13:28 UTC (permalink / raw)
  To: linux-kernel
  Cc: 'Pete Zaitcev', 'Shawn Starr',
	'Bayard R. Coolidge'

I do have the following line in my fstab file (Bayard):

none  /proc/bus/usb  usbfs  defaults 0 0

I believe usbdevfs is deprecated (although - should I use it when my
hosthub is picked up by the OHCI driver in stead of the EHCI driver?).

My full dmesg is attached below (Pete), I'ld say the relevant section
is:

usb.c: registered new driver usbdevfs
usb.c: registered new driver hub
PCI: Enabling device 02:08.2 (0014 -> 0016)
PCI: No IRQ known for interrupt pin C of device 02:08.2. Probably buggy
MP table.
hcd.c: Found HC with no IRQ.  Check BIOS/PCI 02:08.2 setup!
uhci.c: USB Universal Host Controller Interface driver v1.1
PCI: Enabling device 02:08.0 (0014 -> 0016)
PCI: No IRQ known for interrupt pin A of device 02:08.0. Probably buggy
MP table.
usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
PCI: Enabling device 02:08.1 (0014 -> 0016)
PCI: No IRQ known for interrupt pin B of device 02:08.1. Probably buggy
MP table.
usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
usb.c: registered new driver hiddev
usb.c: registered new driver hid
hid-core.c: v1.8.1 Andreas Gal, Vojtech Pavlik <vojtech@suse.cz>
hid-core.c: USB HID support drivers
usb.c: registered new driver usblp
printer.c: v0.11: USB Printer Device Class driver
Initializing USB Mass Storage driver...
usb.c: registered new driver usb-storage
USB Mass Storage support registered.

Does anybody else with the A7M266-D have that "Probably buggy MP table."

For the OHCI 'found device with no IRQ assigned' I don't really get it.
I have all my PCI slots set to auto assign IRQ and I didn't reserve any
IRQ for Legacy Devices, so that shouldn't be the problem.

> I have this board, but the problem is the newer A7M266-D boards have
the USB
> 1.x pins removed.
(Shawn)

I don't know what would be considered a "newer" board, but mine is a 
03/07/2002-ASUS-A7M266-D

B.T.W.
I use BIOS rev. 1005.

Regards,

Philip

P.S.
Would configuring the kernel with >1GB mem support remove that
" Warning only 896MB will be used." from my dmesg?



Linux version 2.4.20 (root@tomwaits) (gcc version 3.2) #1 SMP Sat Jan 11
18:46:51 CET 2003
BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
 BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 000000003ffec000 (usable)
 BIOS-e820: 000000003ffec000 - 000000003ffef000 (ACPI data)
 BIOS-e820: 000000003ffef000 - 000000003ffff000 (reserved)
 BIOS-e820: 000000003ffff000 - 0000000040000000 (ACPI NVS)
 BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved)
 BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
 BIOS-e820: 00000000ffff0000 - 0000000100000000 (reserved)
Warning only 896MB will be used.
Use a HIGHMEM enabled kernel.
896MB LOWMEM available.
found SMP MP-table at 000f6d10
hm, page 000f6000 reserved twice.
hm, page 000f7000 reserved twice.
hm, page 000f6000 reserved twice.
hm, page 000f7000 reserved twice.
On node 0 totalpages: 229376
zone(0): 4096 pages.
zone(1): 225280 pages.
zone(2): 0 pages.
Intel MultiProcessor Specification v1.4
    Virtual Wire compatibility mode.
OEM ID: ASUS     Product ID: PROD00000000 APIC at: 0xFEE00000
Processor #0 Pentium(tm) Pro APIC version 16
Processor #1 Pentium(tm) Pro APIC version 16
I/O APIC #2 Version 17 at 0xFEC00000.
Processors: 2
Kernel command line: BOOT_IMAGE=lfs ro root=305
Initializing CPU#0
Detected 1533.431 MHz processor.
Console: colour VGA+ 80x25
Calibrating delay loop... 3060.53 BogoMIPS
Memory: 904324k/917504k available (1707k kernel code, 12792k reserved,
604k data, 124k init, 0k highmem)
Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes)
Inode cache hash table entries: 65536 (order: 7, 524288 bytes)
Mount-cache hash table entries: 16384 (order: 5, 131072 bytes)
Buffer-cache hash table entries: 65536 (order: 6, 262144 bytes)
Page-cache hash table entries: 262144 (order: 8, 1048576 bytes)
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 256K (64 bytes/line)
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#0.
CPU:     After generic, caps: 0383fbff c1cbfbff 00000000 00000000
CPU:             Common caps: 0383fbff c1cbfbff 00000000 00000000
Enabling fast FPU save and restore... done.
Enabling unmasked SIMD FPU exception support... done.
Checking 'hlt' instruction... OK.
POSIX conformance testing by UNIFIX
mtrr: v1.40 (20010327) Richard Gooch (rgooch@atnf.csiro.au)
mtrr: detected mtrr type: Intel
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 256K (64 bytes/line)
Intel machine check reporting enabled on CPU#0.
CPU:     After generic, caps: 0383fbff c1cbfbff 00000000 00000000
CPU:             Common caps: 0383fbff c1cbfbff 00000000 00000000
CPU0: AMD Athlon(TM) MP 1800+ stepping 02
per-CPU timeslice cutoff: 731.39 usecs.
enabled ExtINT on CPU#0
ESR value before enabling vector: 00000000
ESR value after enabling vector: 00000000
Booting processor 1/1 eip 2000
Initializing CPU#1
masked ExtINT on CPU#1
ESR value before enabling vector: 00000000
ESR value after enabling vector: 00000000
Calibrating delay loop... 3060.53 BogoMIPS
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 256K (64 bytes/line)
Intel machine check reporting enabled on CPU#1.
CPU:     After generic, caps: 0383fbff c1cbfbff 00000000 00000000
CPU:             Common caps: 0383fbff c1cbfbff 00000000 00000000
CPU1: AMD Athlon(TM) MP 1800+ stepping 02
Total of 2 processors activated (6121.06 BogoMIPS).
ENABLING IO-APIC IRQs
Setting 2 in the phys_id_present_map
...changing IO-APIC physical APIC ID to 2 ... ok.
init IO_APIC IRQs
 IO-APIC (apicid-pin) 2-0, 2-5, 2-9, 2-10, 2-11, 2-17, 2-20, 2-21, 2-22,
2-23 not connected.
..TIMER: vector=0x31 pin1=2 pin2=0
number of MP IRQ sources: 16.
number of IO-APIC #2 registers: 24.
testing the IO APIC.......................

IO APIC #2......
.... register #00: 02000000
.......    : physical APIC id: 02
.... register #01: 00170011
.......     : max redirection entries: 0017
.......     : PRQ implemented: 0
.......     : IO APIC version: 0011
.... register #02: 00000000
.......     : arbitration: 00
.... IRQ redirection table:
 NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:   
 00 000 00  1    0    0   0   0    0    0    00
 01 003 03  0    0    0   0   0    1    1    39
 02 003 03  0    0    0   0   0    1    1    31
 03 003 03  0    0    0   0   0    1    1    41
 04 003 03  0    0    0   0   0    1    1    49
 05 000 00  1    0    0   0   0    0    0    00
 06 003 03  0    0    0   0   0    1    1    51
 07 003 03  0    0    0   0   0    1    1    59
 08 003 03  0    0    0   0   0    1    1    61
 09 000 00  1    0    0   0   0    0    0    00
 0a 000 00  1    0    0   0   0    0    0    00
 0b 000 00  1    0    0   0   0    0    0    00
 0c 003 03  0    0    0   0   0    1    1    69
 0d 003 03  0    0    0   0   0    1    1    71
 0e 003 03  0    0    0   0   0    1    1    79
 0f 003 03  0    0    0   0   0    1    1    81
 10 003 03  1    1    0   1   0    1    1    89
 11 000 00  1    0    0   0   0    0    0    00
 12 003 03  1    1    0   1   0    1    1    91
 13 003 03  1    1    0   1   0    1    1    99
 14 000 00  1    0    0   0   0    0    0    00
 15 000 00  1    0    0   0   0    0    0    00
 16 000 00  1    0    0   0   0    0    0    00
 17 000 00  1    0    0   0   0    0    0    00
IRQ to pin mappings:
IRQ0 -> 0:2
IRQ1 -> 0:1
IRQ3 -> 0:3
IRQ4 -> 0:4
IRQ6 -> 0:6
IRQ7 -> 0:7
IRQ8 -> 0:8
IRQ12 -> 0:12
IRQ13 -> 0:13
IRQ14 -> 0:14
IRQ15 -> 0:15
IRQ16 -> 0:16
IRQ18 -> 0:18
IRQ19 -> 0:19
.................................... done.
Using local APIC timer interrupts.
calibrating APIC timer ...
..... CPU clock speed is 1533.4929 MHz.
..... host bus clock speed is 266.6942 MHz.
cpu: 0, clocks: 2666942, slice: 888980
CPU0<T0:2666928,T1:1777936,D:12,S:888980,C:2666942>
cpu: 1, clocks: 2666942, slice: 888980
CPU1<T0:2666928,T1:888960,D:8,S:888980,C:2666942>
checking TSC synchronization across CPUs: passed.
Waiting on wait_init_idle (map = 0x2)
All processors have done init_idle
mtrr: your CPUs had inconsistent fixed MTRR settings
mtrr: probably your BIOS does not setup all CPUs
PCI: PCI BIOS revision 2.10 entry at 0xf0de0, last bus=2
PCI: Using configuration type 1
PCI: Probing PCI hardware
PCI: Using IRQ router AMD768 [1022/7443] at 00:07.3
PCI->APIC IRQ transform: (B1,I5,P0) -> 16
PCI->APIC IRQ transform: (B2,I5,P0) -> 18
BIOS failed to enable PCI standards compliance, fixing this error.
Linux NET4.0 for Linux 2.4
Based upon Swansea University Computer Society NET3.039
Initializing RT netlink socket
Starting kswapd
Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
NTFS driver v1.1.22 [Flags: R/O]
pty: 256 Unix98 ptys configured
Serial driver version 5.05c (2001-07-08) with MANY_PORTS SHARE_IRQ
SERIAL_PCI enabled
ttyS00 at 0x03f8 (irq = 4) is a 16550A
ttyS01 at 0x02f8 (irq = 3) is a 16550A
amd768_rng: AMD768 system management I/O registers at 0xE400.
amd768_rng hardware driver 0.1.0 loaded
Uniform Multi-Platform E-IDE driver Revision: 6.31
ide: Assuming 33MHz system bus speed for PIO modes; override with
idebus=xx
AMD7441: IDE controller on PCI bus 00 dev 39
AMD7441: chipset revision 4
AMD7441: not 100% native mode: will probe irqs later
AMD7441: disabling single-word DMA support (revision < C4)
    ide0: BM-DMA at 0xd800-0xd807, BIOS settings: hda:DMA, hdb:DMA
    ide1: BM-DMA at 0xd808-0xd80f, BIOS settings: hdc:DMA, hdd:DMA
hda: WDC WD800JB-00CRA1, ATA DISK drive
hdb: WDC WD307AA-00BAA0, ATA DISK drive
hdc: LITEON DVD-ROM LTD163D, ATAPI CD/DVD-ROM drive
hdd: AOPEN CD-RW CRW3248 1.10 20020301, ATAPI CD/DVD-ROM drive
ide0 at 0x1f0-0x1f7,0x3f6 on irq 14
ide1 at 0x170-0x177,0x376 on irq 15
blk: queue c03b9124, I/O limit 4095Mb (mask 0xffffffff)
hda: 156301488 sectors (80026 MB) w/8192KiB Cache, CHS=9729/255/63,
UDMA(100)
blk: queue c03b9270, I/O limit 4095Mb (mask 0xffffffff)
hdb: 60074784 sectors (30758 MB) w/2048KiB Cache, CHS=3739/255/63,
UDMA(66)
hdc: ATAPI 48X DVD-ROM drive, 512kB Cache, UDMA(33)
Uniform CD-ROM driver Revision: 3.12
hdd: ATAPI 48X CD-ROM CD-R/RW drive, 8192kB Cache, UDMA(33)
Partition check:
 hda: hda1 hda2 < hda5 >
 hdb: hdb1
Floppy drive(s): fd0 is 1.44M
FDC 0 is a post-1991 82077
loop: loaded (max 8 devices)
3c59x: Donald Becker and others. www.scyld.com/network/vortex.html
02:05.0: 3Com PCI 3c982 Dual Port Server Cyclone at 0xc800. Vers
LK1.1.16
Linux agpgart interface v0.99 (c) Jeff Hartmann
agpgart: Maximum main memory to use for agp memory: 816M
agpgart: Detected AMD 760MP chipset
agpgart: AGP aperture is 32M @ 0xfc000000
SCSI subsystem driver Revision: 1.00
kmod: failed to exec /sbin/modprobe -s -k scsi_hostadapter, errno = 2
es1371: version v0.30 time 18:48:06 Jan 11 2003
usb.c: registered new driver usbdevfs
usb.c: registered new driver hub
PCI: Enabling device 02:08.2 (0014 -> 0016)
PCI: No IRQ known for interrupt pin C of device 02:08.2. Probably buggy
MP table.
hcd.c: Found HC with no IRQ.  Check BIOS/PCI 02:08.2 setup!
uhci.c: USB Universal Host Controller Interface driver v1.1
PCI: Enabling device 02:08.0 (0014 -> 0016)
PCI: No IRQ known for interrupt pin A of device 02:08.0. Probably buggy
MP table.
usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
PCI: Enabling device 02:08.1 (0014 -> 0016)
PCI: No IRQ known for interrupt pin B of device 02:08.1. Probably buggy
MP table.
usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
usb.c: registered new driver hiddev
usb.c: registered new driver hid
hid-core.c: v1.8.1 Andreas Gal, Vojtech Pavlik <vojtech@suse.cz>
hid-core.c: USB HID support drivers
usb.c: registered new driver usblp
printer.c: v0.11: USB Printer Device Class driver
Initializing USB Mass Storage driver...
usb.c: registered new driver usb-storage
USB Mass Storage support registered.
mice: PS/2 mouse device common for all mice
NET4: Linux TCP/IP 1.0 for NET4.0
IP Protocols: ICMP, UDP, TCP, IGMP
IP: routing cache hash table of 8192 buckets, 64Kbytes
TCP: Hash tables configured (established 262144 bind 65536)
NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.
VFS: Mounted root (ext2 filesystem) readonly.
Freeing unused kernel memory: 124k freed



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2003-01-12 13:28 Philip K.F. Hölzenspies
@ 2003-01-12 17:57 ` Shawn Starr
  0 siblings, 0 replies; 414+ messages in thread
From: Shawn Starr @ 2003-01-12 17:57 UTC (permalink / raw)
  To: Philip K.F. Hölzenspies, linux-kernel
  Cc: 'Pete Zaitcev', 'Bayard R. Coolidge'

Same bug I get with 2.4.20 :-(

On Sunday 12 January 2003 8:28 am, Philip K.F. Hölzenspies wrote:
> I do have the following line in my fstab file (Bayard):
>
> none  /proc/bus/usb  usbfs  defaults 0 0
>
> I believe usbdevfs is deprecated (although - should I use it when my
> hosthub is picked up by the OHCI driver in stead of the EHCI driver?).
>
> My full dmesg is attached below (Pete), I'ld say the relevant section
> is:
>
> usb.c: registered new driver usbdevfs
> usb.c: registered new driver hub
> PCI: Enabling device 02:08.2 (0014 -> 0016)
> PCI: No IRQ known for interrupt pin C of device 02:08.2. Probably buggy
> MP table.
> hcd.c: Found HC with no IRQ.  Check BIOS/PCI 02:08.2 setup!
> uhci.c: USB Universal Host Controller Interface driver v1.1
> PCI: Enabling device 02:08.0 (0014 -> 0016)
> PCI: No IRQ known for interrupt pin A of device 02:08.0. Probably buggy
> MP table.
> usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
> PCI: Enabling device 02:08.1 (0014 -> 0016)
> PCI: No IRQ known for interrupt pin B of device 02:08.1. Probably buggy
> MP table.
> usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
> usb.c: registered new driver hiddev
> usb.c: registered new driver hid
> hid-core.c: v1.8.1 Andreas Gal, Vojtech Pavlik <vojtech@suse.cz>
> hid-core.c: USB HID support drivers
> usb.c: registered new driver usblp
> printer.c: v0.11: USB Printer Device Class driver
> Initializing USB Mass Storage driver...
> usb.c: registered new driver usb-storage
> USB Mass Storage support registered.
>
> Does anybody else with the A7M266-D have that "Probably buggy MP table."
>
> For the OHCI 'found device with no IRQ assigned' I don't really get it.
> I have all my PCI slots set to auto assign IRQ and I didn't reserve any
> IRQ for Legacy Devices, so that shouldn't be the problem.
>
> > I have this board, but the problem is the newer A7M266-D boards have
>
> the USB
>
> > 1.x pins removed.
>
> (Shawn)
>
> I don't know what would be considered a "newer" board, but mine is a
> 03/07/2002-ASUS-A7M266-D
>
> B.T.W.
> I use BIOS rev. 1005.
>
> Regards,
>
> Philip
>
> P.S.
> Would configuring the kernel with >1GB mem support remove that
> " Warning only 896MB will be used." from my dmesg?
>
>
>
> Linux version 2.4.20 (root@tomwaits) (gcc version 3.2) #1 SMP Sat Jan 11
> 18:46:51 CET 2003
> BIOS-provided physical RAM map:
>  BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
>  BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
>  BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved)
>  BIOS-e820: 0000000000100000 - 000000003ffec000 (usable)
>  BIOS-e820: 000000003ffec000 - 000000003ffef000 (ACPI data)
>  BIOS-e820: 000000003ffef000 - 000000003ffff000 (reserved)
>  BIOS-e820: 000000003ffff000 - 0000000040000000 (ACPI NVS)
>  BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved)
>  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  BIOS-e820: 00000000ffff0000 - 0000000100000000 (reserved)
> Warning only 896MB will be used.
> Use a HIGHMEM enabled kernel.
> 896MB LOWMEM available.
> found SMP MP-table at 000f6d10
> hm, page 000f6000 reserved twice.
> hm, page 000f7000 reserved twice.
> hm, page 000f6000 reserved twice.
> hm, page 000f7000 reserved twice.
> On node 0 totalpages: 229376
> zone(0): 4096 pages.
> zone(1): 225280 pages.
> zone(2): 0 pages.
> Intel MultiProcessor Specification v1.4
>     Virtual Wire compatibility mode.
> OEM ID: ASUS     Product ID: PROD00000000 APIC at: 0xFEE00000
> Processor #0 Pentium(tm) Pro APIC version 16
> Processor #1 Pentium(tm) Pro APIC version 16
> I/O APIC #2 Version 17 at 0xFEC00000.
> Processors: 2
> Kernel command line: BOOT_IMAGE=lfs ro root=305
> Initializing CPU#0
> Detected 1533.431 MHz processor.
> Console: colour VGA+ 80x25
> Calibrating delay loop... 3060.53 BogoMIPS
> Memory: 904324k/917504k available (1707k kernel code, 12792k reserved,
> 604k data, 124k init, 0k highmem)
> Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes)
> Inode cache hash table entries: 65536 (order: 7, 524288 bytes)
> Mount-cache hash table entries: 16384 (order: 5, 131072 bytes)
> Buffer-cache hash table entries: 65536 (order: 6, 262144 bytes)
> Page-cache hash table entries: 262144 (order: 8, 1048576 bytes)
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 256K (64 bytes/line)
> Intel machine check architecture supported.
> Intel machine check reporting enabled on CPU#0.
> CPU:     After generic, caps: 0383fbff c1cbfbff 00000000 00000000
> CPU:             Common caps: 0383fbff c1cbfbff 00000000 00000000
> Enabling fast FPU save and restore... done.
> Enabling unmasked SIMD FPU exception support... done.
> Checking 'hlt' instruction... OK.
> POSIX conformance testing by UNIFIX
> mtrr: v1.40 (20010327) Richard Gooch (rgooch@atnf.csiro.au)
> mtrr: detected mtrr type: Intel
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 256K (64 bytes/line)
> Intel machine check reporting enabled on CPU#0.
> CPU:     After generic, caps: 0383fbff c1cbfbff 00000000 00000000
> CPU:             Common caps: 0383fbff c1cbfbff 00000000 00000000
> CPU0: AMD Athlon(TM) MP 1800+ stepping 02
> per-CPU timeslice cutoff: 731.39 usecs.
> enabled ExtINT on CPU#0
> ESR value before enabling vector: 00000000
> ESR value after enabling vector: 00000000
> Booting processor 1/1 eip 2000
> Initializing CPU#1
> masked ExtINT on CPU#1
> ESR value before enabling vector: 00000000
> ESR value after enabling vector: 00000000
> Calibrating delay loop... 3060.53 BogoMIPS
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 256K (64 bytes/line)
> Intel machine check reporting enabled on CPU#1.
> CPU:     After generic, caps: 0383fbff c1cbfbff 00000000 00000000
> CPU:             Common caps: 0383fbff c1cbfbff 00000000 00000000
> CPU1: AMD Athlon(TM) MP 1800+ stepping 02
> Total of 2 processors activated (6121.06 BogoMIPS).
> ENABLING IO-APIC IRQs
> Setting 2 in the phys_id_present_map
> ...changing IO-APIC physical APIC ID to 2 ... ok.
> init IO_APIC IRQs
>  IO-APIC (apicid-pin) 2-0, 2-5, 2-9, 2-10, 2-11, 2-17, 2-20, 2-21, 2-22,
> 2-23 not connected.
> ..TIMER: vector=0x31 pin1=2 pin2=0
> number of MP IRQ sources: 16.
> number of IO-APIC #2 registers: 24.
> testing the IO APIC.......................
>
> IO APIC #2......
> .... register #00: 02000000
> .......    : physical APIC id: 02
> .... register #01: 00170011
> .......     : max redirection entries: 0017
> .......     : PRQ implemented: 0
> .......     : IO APIC version: 0011
> .... register #02: 00000000
> .......     : arbitration: 00
> .... IRQ redirection table:
>  NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:
>  00 000 00  1    0    0   0   0    0    0    00
>  01 003 03  0    0    0   0   0    1    1    39
>  02 003 03  0    0    0   0   0    1    1    31
>  03 003 03  0    0    0   0   0    1    1    41
>  04 003 03  0    0    0   0   0    1    1    49
>  05 000 00  1    0    0   0   0    0    0    00
>  06 003 03  0    0    0   0   0    1    1    51
>  07 003 03  0    0    0   0   0    1    1    59
>  08 003 03  0    0    0   0   0    1    1    61
>  09 000 00  1    0    0   0   0    0    0    00
>  0a 000 00  1    0    0   0   0    0    0    00
>  0b 000 00  1    0    0   0   0    0    0    00
>  0c 003 03  0    0    0   0   0    1    1    69
>  0d 003 03  0    0    0   0   0    1    1    71
>  0e 003 03  0    0    0   0   0    1    1    79
>  0f 003 03  0    0    0   0   0    1    1    81
>  10 003 03  1    1    0   1   0    1    1    89
>  11 000 00  1    0    0   0   0    0    0    00
>  12 003 03  1    1    0   1   0    1    1    91
>  13 003 03  1    1    0   1   0    1    1    99
>  14 000 00  1    0    0   0   0    0    0    00
>  15 000 00  1    0    0   0   0    0    0    00
>  16 000 00  1    0    0   0   0    0    0    00
>  17 000 00  1    0    0   0   0    0    0    00
> IRQ to pin mappings:
> IRQ0 -> 0:2
> IRQ1 -> 0:1
> IRQ3 -> 0:3
> IRQ4 -> 0:4
> IRQ6 -> 0:6
> IRQ7 -> 0:7
> IRQ8 -> 0:8
> IRQ12 -> 0:12
> IRQ13 -> 0:13
> IRQ14 -> 0:14
> IRQ15 -> 0:15
> IRQ16 -> 0:16
> IRQ18 -> 0:18
> IRQ19 -> 0:19
> .................................... done.
> Using local APIC timer interrupts.
> calibrating APIC timer ...
> ..... CPU clock speed is 1533.4929 MHz.
> ..... host bus clock speed is 266.6942 MHz.
> cpu: 0, clocks: 2666942, slice: 888980
> CPU0<T0:2666928,T1:1777936,D:12,S:888980,C:2666942>
> cpu: 1, clocks: 2666942, slice: 888980
> CPU1<T0:2666928,T1:888960,D:8,S:888980,C:2666942>
> checking TSC synchronization across CPUs: passed.
> Waiting on wait_init_idle (map = 0x2)
> All processors have done init_idle
> mtrr: your CPUs had inconsistent fixed MTRR settings
> mtrr: probably your BIOS does not setup all CPUs
> PCI: PCI BIOS revision 2.10 entry at 0xf0de0, last bus=2
> PCI: Using configuration type 1
> PCI: Probing PCI hardware
> PCI: Using IRQ router AMD768 [1022/7443] at 00:07.3
> PCI->APIC IRQ transform: (B1,I5,P0) -> 16
> PCI->APIC IRQ transform: (B2,I5,P0) -> 18
> BIOS failed to enable PCI standards compliance, fixing this error.
> Linux NET4.0 for Linux 2.4
> Based upon Swansea University Computer Society NET3.039
> Initializing RT netlink socket
> Starting kswapd
> Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
> NTFS driver v1.1.22 [Flags: R/O]
> pty: 256 Unix98 ptys configured
> Serial driver version 5.05c (2001-07-08) with MANY_PORTS SHARE_IRQ
> SERIAL_PCI enabled
> ttyS00 at 0x03f8 (irq = 4) is a 16550A
> ttyS01 at 0x02f8 (irq = 3) is a 16550A
> amd768_rng: AMD768 system management I/O registers at 0xE400.
> amd768_rng hardware driver 0.1.0 loaded
> Uniform Multi-Platform E-IDE driver Revision: 6.31
> ide: Assuming 33MHz system bus speed for PIO modes; override with
> idebus=xx
> AMD7441: IDE controller on PCI bus 00 dev 39
> AMD7441: chipset revision 4
> AMD7441: not 100% native mode: will probe irqs later
> AMD7441: disabling single-word DMA support (revision < C4)
>     ide0: BM-DMA at 0xd800-0xd807, BIOS settings: hda:DMA, hdb:DMA
>     ide1: BM-DMA at 0xd808-0xd80f, BIOS settings: hdc:DMA, hdd:DMA
> hda: WDC WD800JB-00CRA1, ATA DISK drive
> hdb: WDC WD307AA-00BAA0, ATA DISK drive
> hdc: LITEON DVD-ROM LTD163D, ATAPI CD/DVD-ROM drive
> hdd: AOPEN CD-RW CRW3248 1.10 20020301, ATAPI CD/DVD-ROM drive
> ide0 at 0x1f0-0x1f7,0x3f6 on irq 14
> ide1 at 0x170-0x177,0x376 on irq 15
> blk: queue c03b9124, I/O limit 4095Mb (mask 0xffffffff)
> hda: 156301488 sectors (80026 MB) w/8192KiB Cache, CHS=9729/255/63,
> UDMA(100)
> blk: queue c03b9270, I/O limit 4095Mb (mask 0xffffffff)
> hdb: 60074784 sectors (30758 MB) w/2048KiB Cache, CHS=3739/255/63,
> UDMA(66)
> hdc: ATAPI 48X DVD-ROM drive, 512kB Cache, UDMA(33)
> Uniform CD-ROM driver Revision: 3.12
> hdd: ATAPI 48X CD-ROM CD-R/RW drive, 8192kB Cache, UDMA(33)
> Partition check:
>  hda: hda1 hda2 < hda5 >
>  hdb: hdb1
> Floppy drive(s): fd0 is 1.44M
> FDC 0 is a post-1991 82077
> loop: loaded (max 8 devices)
> 3c59x: Donald Becker and others. www.scyld.com/network/vortex.html
> 02:05.0: 3Com PCI 3c982 Dual Port Server Cyclone at 0xc800. Vers
> LK1.1.16
> Linux agpgart interface v0.99 (c) Jeff Hartmann
> agpgart: Maximum main memory to use for agp memory: 816M
> agpgart: Detected AMD 760MP chipset
> agpgart: AGP aperture is 32M @ 0xfc000000
> SCSI subsystem driver Revision: 1.00
> kmod: failed to exec /sbin/modprobe -s -k scsi_hostadapter, errno = 2
> es1371: version v0.30 time 18:48:06 Jan 11 2003
> usb.c: registered new driver usbdevfs
> usb.c: registered new driver hub
> PCI: Enabling device 02:08.2 (0014 -> 0016)
> PCI: No IRQ known for interrupt pin C of device 02:08.2. Probably buggy
> MP table.
> hcd.c: Found HC with no IRQ.  Check BIOS/PCI 02:08.2 setup!
> uhci.c: USB Universal Host Controller Interface driver v1.1
> PCI: Enabling device 02:08.0 (0014 -> 0016)
> PCI: No IRQ known for interrupt pin A of device 02:08.0. Probably buggy
> MP table.
> usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
> PCI: Enabling device 02:08.1 (0014 -> 0016)
> PCI: No IRQ known for interrupt pin B of device 02:08.1. Probably buggy
> MP table.
> usb-ohci.c: found OHCI device with no IRQ assigned. check BIOS settings!
> usb.c: registered new driver hiddev
> usb.c: registered new driver hid
> hid-core.c: v1.8.1 Andreas Gal, Vojtech Pavlik <vojtech@suse.cz>
> hid-core.c: USB HID support drivers
> usb.c: registered new driver usblp
> printer.c: v0.11: USB Printer Device Class driver
> Initializing USB Mass Storage driver...
> usb.c: registered new driver usb-storage
> USB Mass Storage support registered.
> mice: PS/2 mouse device common for all mice
> NET4: Linux TCP/IP 1.0 for NET4.0
> IP Protocols: ICMP, UDP, TCP, IGMP
> IP: routing cache hash table of 8192 buckets, 64Kbytes
> TCP: Hash tables configured (established 262144 bind 65536)
> NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.
> VFS: Mounted root (ext2 filesystem) readonly.
> Freeing unused kernel memory: 124k freed


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2002-10-17  7:41 Rusty Russell
  2002-10-17 14:49 ` Roman Zippel
  0 siblings, 1 reply; 414+ messages in thread
From: Rusty Russell @ 2002-10-17  7:41 UTC (permalink / raw)
  To: Daniel Phillips, S; +Cc: Roman Zippel, linux-kernel

In message <E181zuY-0004Fl-00@starship> you write:
> On Thursday 17 October 2002 00:48, Rusty Russell wrote:
> > > On Wednesday 16 October 2002 08:11, Rusty Russell wrote:
> > > > It needs to be turned off when dealing with any interface which might
> > > > be used by one of the hard modules.  Which is pretty bad.
> > > 
> > > As far as I can see, preemption only has to be disabled during the 
> > > synchronize_kernel phase of unloading that one module, and this requireme
nt 
> > > is inherited neither by dependant or depending modules.
> > 
> > No, someone could already have been preempted before you start
> > synchronize_kernel().
> 
> I don't get that.  The sequence is:
> 
>   - turn off preemption
>   - unhook call points
>   - synchronize_kernel
>   - ...
> 
> which doesn't leave any preemption hole that I can see, so I can't comment
> on a couple of the other points until you clear that one up.

You mean that "turn off preemption" also wakes up anyone currently
preempted?  Otherwise they're preempted just inside one of those call
points.

> > Still a race between the zero check and the can't-increment state
> > setting.
> 
> But that one is easy: the zero check just takes the same spinlock as 
> TRY_INC_MOD_COUNT, then sets can't-increment only in the case the count
> is zero, considerably simpler than:

The current spinlock is horrible.  You could use a brlock, of course,
but I didn't mainly because of code bloat and speed.  My current code
looks like:

static inline int try_module_get(struct module *module)
{
	int ret = 1;

	if (module) {
		unsigned int cpu = get_cpu();
		if (likely(module->ref[cpu].live))
			local_inc(&module->ref[cpu].counter);
		else
			ret = 0;
		put_cpu();
	}
	return ret;
}

Which is small enough to be inlined quite nicely, and very fast.
Adding br_read_lock_irqsave() starts to get big and slow (at that
point it's more likely we want to move the module case out of line).

> > This is what my current code does: rmmod itself checks (if
> > /proc/modules available), then the kernel sets the module to
> > can't-increment, then checks again.  If the non-blocking flag is set,
> > it then re-animates the module and fails, otherwise it waits.
> 
> and leaves no window for spurious failure.  The still-initializing case is
> also easy, e.g., a filesystem module simply doesn't call register_filesystem
> until it's completely ready to service calls, so nobody is able to do
> TRY_INC_MOD_COUNT.

Consider some code which needs to know when cpus go up and down, so
registers a notifier.  If the notifier fires before the init is
finished, the notifier code will fail to "try_inc_mod_count()" and
won't call it (it doesn't do try_inc_mod_count at the moment, but
that's a bug).

I don't know of any code which does this now, but it is at least a
theoretical problem.

> > BTW, current patchset (2.5.43):
> 
> Thanks, I'll read them all on the 21st ;-)  The other thing I need to read
> closely is Roman's strategy for changing the module format, and the weird
> linker connections.

Roman dislikes linking in the kernel.  So did I until I wrote it: it's
really trivial (esp. compared with the code to coordinate with the
userspace linker properly).  And it exists today.  The linking takes
around 200 lines.  But, let's say his solution is 500 lines shorter
than mine.

For those five hundred lines, the new parameter infrastructure and
module versioning changes can be done *without* requiring any changes
in modutils.  If you've been following the module changes closely in
the last couple of years, you'll realize what a pain it has been to
introduce changes like licensing, etc.  This frees up our hand.

IMHO, the benifits of having it in-kernel outweigh the slight extra
size.

> > ...The second is the "die-mother-fucker-die"
> > version, which taints the kernel and just removes the damn thing.  For
> > most people, this is better than a reboot, and will usually "work".
> 
> Is there a case where removing a module would actually help?  What is
> the user going to do next, try to reinsert the same module?

Insert a fixed one, hopefully 8).  I was thinking for kernel
developers, and general robustness (eg. an oops inside a module leaves
its refcount at 1).

> > http://www.kernel.org/pub/linux/kernel/people/rusty/patches/Module/force-un
load.patch.gz
> 
> ERROR 404: Not Found.

Damn my fingers.  Updated (now applies on top of the others) but I
haven't tested this version yet (that's what I'm doing now):

http://www.kernel.org/pub/linux/kernel/people/rusty/patches/Module/forceunload.patch.gz

Cheers!
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-10-17  7:41 Rusty Russell
@ 2002-10-17 14:49 ` Roman Zippel
  0 siblings, 0 replies; 414+ messages in thread
From: Roman Zippel @ 2002-10-17 14:49 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Daniel Phillips, S, linux-kernel

Hi,

On Thu, 17 Oct 2002, Rusty Russell wrote:

> Roman dislikes linking in the kernel.  So did I until I wrote it: it's
> really trivial (esp. compared with the code to coordinate with the
> userspace linker properly).  And it exists today.  The linking takes
> around 200 lines.  But, let's say his solution is 500 lines shorter
> than mine.

I believe you that linking in the kernel is simpler, but so would be a lot
of other things and the part I really dislike is to remove the ability to
keep it in user space.

bye, Roman


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2002-10-11  0:11 sridhar vaidyanathan
  2002-10-11  0:21 ` Steven Dake
  0 siblings, 1 reply; 414+ messages in thread
From: sridhar vaidyanathan @ 2002-10-11  0:11 UTC (permalink / raw)
  To: linux-kernel

I am trying to debug a kernel over a remote serial console. I get 
Ignoring packet error ..
kgdb page suggests that it might be due to the speed mismatch. i tried 
stty ispeed 9600 ospeed 9600 < /dev/ttyS0 
on the development machine and have passed serial=0,9600n8 option and 
gdbbaud=9600 via lilo to the debug kernel. 

when i run 
%stty speed 
on the development machine it still reports 38400.
so i changed the gdbbaud and serial= values to 38400 on the test machine. even 
this doesn't work.
any ideas?also on the development machine when i invoke 
%gdb bzImage 
gdb reports that bzImage is not an Executable file format and it is unable to 
recognize the format. what is the problem?
-sridhar
ps: i have tried redirecting the kernel messages( without patching it with 
kgdb) over the serial line and read it with minicom . that works fine.

please email as i am not subscribed.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-10-11  0:11 sridhar vaidyanathan
@ 2002-10-11  0:21 ` Steven Dake
  0 siblings, 0 replies; 414+ messages in thread
From: Steven Dake @ 2002-10-11  0:21 UTC (permalink / raw)
  To: sridhar vaidyanathan; +Cc: linux-kernel



sridhar vaidyanathan wrote:

>I am trying to debug a kernel over a remote serial console. I get 
>Ignoring packet error ..
>
I have seen this and in my case, it had to do with printks coming over 
the serial link with console redirection.

>kgdb page suggests that it might be due to the speed mismatch. i tried 
>stty ispeed 9600 ospeed 9600 < /dev/ttyS0 
>on the development machine and have passed serial=0,9600n8 option and 
>gdbbaud=9600 via lilo to the debug kernel. 
>
>when i run 
>%stty speed 
>on the development machine it still reports 38400.
>so i changed the gdbbaud and serial= values to 38400 on the test machine. even 
>this doesn't work.
>any ideas?also on the development machine when i invoke 
>%gdb bzImage 
>
try gdb vmlinux.  This is the uncompressed image that gdb knows how to 
read.  bzImage is the compressed kernel that you should boot.  The 
vmlinux file should match the bzImage file.

>gdb reports that bzImage is not an Executable file format and it is unable to 
>recognize the format. what is the problem?
>-sridhar
>ps: i have tried redirecting the kernel messages( without patching it with 
>kgdb) over the serial line and read it with minicom . that works fine.
>
>please email as i am not subscribed.
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>
>
>
>  
>


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2002-09-29 10:11 Richard Cooper
  2002-09-29 17:49 ` David Lloyd
  0 siblings, 1 reply; 414+ messages in thread
From: Richard Cooper @ 2002-09-29 10:11 UTC (permalink / raw)
  To: linux-kernel

unsubscribe linux-kernel


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-09-29 10:11 Richard Cooper
@ 2002-09-29 17:49 ` David Lloyd
  0 siblings, 0 replies; 414+ messages in thread
From: David Lloyd @ 2002-09-29 17:49 UTC (permalink / raw)
  To: Richard Cooper; +Cc: linux-kernel


I don't suppose you've read this message on the bottom of every message:

> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

Spamassassin kills my main computer whilst processing Kernel list
messages. This one just makes you look like a Spammer :-(

DSL
-- 
Qualcuno no mi basta.
  Vivere cercando il grande amore.
  Vivere come se mai dovessimo morire.
(Anastasio, Valli e Travato)

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2002-06-08 21:35 tushar  korde
  2002-08-21 16:30 ` Daniel Phillips
  0 siblings, 1 reply; 414+ messages in thread
From: tushar  korde @ 2002-06-08 21:35 UTC (permalink / raw)
  To: linux-kernel

hi folks,
 	as kmalloc allocates memory in power of 2 ( starting from 32 )
instead of the size requested. there are following problems :

  1) we are allocating at least 32 bytes in all cases ( most of 
the times it is not
required ).

  2) if we allocate large memory, internal fregmentation also 
increases.

  3) allocating more memory then the request often leads to 
programming errors
esp. when we store some data and read it back or try to get size 
of data stored
  ( though it can be handled but we have to take special care of 
it at every point ).

the solution to above problems may be that we dont allocate 
objects from the 13
general purpose caches, instead we make a new cache keep its 
address either in
cache_sizes or declare it global. now as the kmalloc is invoked 
check the memory size
requested if predefined sizes are not suitable then make a new 
object of the size
requested ( now here the definition of c_offset flag of cache 
descriptor may be
violated ) and allot it to our new cache and return it .

 	i know that there may be subtle problems in it's 
implementation.
i need your suggestions. is it worth to make efforts in this 
field.

keenly waitinf for ur reply
tushar korde
_________________________________________________________
Click below to visit monsterindia.com and review jobs in India or 
Abroad
http://monsterindia.rediff.com/jobs

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-06-08 21:35 tushar  korde
@ 2002-08-21 16:30 ` Daniel Phillips
  0 siblings, 0 replies; 414+ messages in thread
From: Daniel Phillips @ 2002-08-21 16:30 UTC (permalink / raw)
  To: tushar korde, linux-kernel

On Saturday 08 June 2002 23:35, tushar  korde wrote:
> hi folks,
>  	as kmalloc allocates memory in power of 2 ( starting from 32 )
> instead of the size requested. there are following problems :
> 
>   1) we are allocating at least 32 bytes in all cases ( most of 
> the times it is not
> required ).
> 
>   2) if we allocate large memory, internal fregmentation also 
> increases.
> 
>   3) allocating more memory then the request often leads to 
> programming errors
> esp. when we store some data and read it back or try to get size 
> of data stored
>   ( though it can be handled but we have to take special care of 
> it at every point ).
> 
> the solution to above problems may be that we dont allocate 
> objects from the 13
> general purpose caches, instead we make a new cache keep its 
> address either in
> cache_sizes or declare it global. now as the kmalloc is invoked 
> check the memory size
> requested if predefined sizes are not suitable then make a new 
> object of the size
> requested ( now here the definition of c_offset flag of cache 
> descriptor may be
> violated ) and allot it to our new cache and return it .
> 
>  	i know that there may be subtle problems in it's 
> implementation.
> i need your suggestions. is it worth to make efforts in this 
> field.

You probably want kmem_cache_alloc, see slab.c.  Kmalloc is just an
interface to kmem_cache_alloc.

-- 
Daniel

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2002-05-31  8:04 Oliver Pitzeier
  2002-05-31 14:37 ` Alan Cox
  0 siblings, 1 reply; 414+ messages in thread
From: Oliver Pitzeier @ 2002-05-31  8:04 UTC (permalink / raw)
  To: 'linux-kernel'
  Cc: axp-kernel-list, alan, torvalds, 'Ivan Kokshaysky'

Hi volks/Linus/Alan/Ivan! :o)

I tried to compile kernel 2.5.19 on an alpha.

Can someone help me? I had the same problem already with kernel 2.5.18.
Kernel 2.5.15 works well. Everything above _not_.

Please help!!! Thanks!

While trying to compile 2.5.19, this happens:
<snip>
make[2]: Entering directory `/root/linux-2.5.19/drivers/base'
gcc -D__KERNEL__ -I/root/linux-2.5.19/include -Wall -Wstrict-prototypes
-Wno-trigraphs -O2 -fomit-frame-pointer -fno-strict-aliasing -fno-common
-pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6
-DKBUILD_BASENAME=bus -DEXPORT_SYMTAB -c -o bus.o bus.c
In file included from /root/linux-2.5.19/include/linux/thread_info.h:10,
                 from /root/linux-2.5.19/include/linux/spinlock.h:7,
                 from /root/linux-2.5.19/include/linux/tqueue.h:16,
                 from /root/linux-2.5.19/include/linux/sched.h:10,
                 from /root/linux-2.5.19/include/linux/device.h:30,
                 from bus.c:12:
/root/linux-2.5.19/include/linux/bitops.h: In function
`get_bitmask_order':
/root/linux-2.5.19/include/linux/bitops.h:77: warning: implicit
declaration of function `fls'
bus.c: At top level:
bus.c:114: parse error before `bus_init'
bus.c:115: warning: return type defaults to `int'
bus.c:120: warning: type defaults to `int' in declaration of
`core_initcall'
bus.c:120: warning: parameter names (without types) in function
declaration
bus.c:120: warning: data definition has no type or storage class
make[2]: *** [bus.o] Error 1
make[2]: Leaving directory `/root/linux-2.5.19/drivers/base'
make[1]: *** [_subdir_base] Error 2
make[1]: Leaving directory `/root/linux-2.5.19/drivers'
make: *** [drivers] Error 2
<snip>

Best regards,
  Greetz to the community,
    Oliver



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-05-31  8:04 Oliver Pitzeier
@ 2002-05-31 14:37 ` Alan Cox
  0 siblings, 0 replies; 414+ messages in thread
From: Alan Cox @ 2002-05-31 14:37 UTC (permalink / raw)
  To: o.pitzeier
  Cc: 'linux-kernel',
	axp-kernel-list, torvalds, 'Ivan Kokshaysky'

On Fri, 2002-05-31 at 09:04, Oliver Pitzeier wrote:
> Hi volks/Linus/Alan/Ivan! :o)
> 
> I tried to compile kernel 2.5.19 on an alpha.
> 
> Can someone help me? I had the same problem already with kernel 2.5.18.
> Kernel 2.5.15 works well. Everything above _not_.
> 

There are continual 2.5 changes and at times they are going to break the
Alpha port since x86 is the main item of focus. I don't follow 2.5 much
(I've yet to find a 2.5 kernel that doesn't crash and/or eat my disks)
so I can't really help you.

Alan


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2002-04-18 11:23 Satish Mohan
  2002-04-18 11:35 ` François Cami
  0 siblings, 1 reply; 414+ messages in thread
From: Satish Mohan @ 2002-04-18 11:23 UTC (permalink / raw)
  To: linux-kernel

subscribe linux-kernel

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-04-18 11:23 Satish Mohan
@ 2002-04-18 11:35 ` François Cami
  0 siblings, 0 replies; 414+ messages in thread
From: François Cami @ 2002-04-18 11:35 UTC (permalink / raw)
  To: Satish Mohan; +Cc: linux-kernel

Satish Mohan wrote:
> subscribe linux-kernel
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 

please send a message to

majordomo@vger.kernel.org

with the line "subscribe linux-kernel" in the body of your
message

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

François Cami


^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2002-04-09 13:25 Kuppuswamy, Priyadarshini
  0 siblings, 0 replies; 414+ messages in thread
From: Kuppuswamy, Priyadarshini @ 2002-04-09 13:25 UTC (permalink / raw)
  To: linux-kernel

Please ignore my earlier email. I didn't intend to send it to the list.

> -----Original Message-----
> From:	Kuppuswamy, Priyadarshini 
> Sent:	Tuesday, April 09, 2002 9:21 AM
> To:	linux-kernel@vger.kernel.org
> Subject:	
> 
> unsubscribe linux-kernel

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2002-03-31 19:17 mpaa3d
  2002-04-01  9:43 ` Vance Lankhaar
  0 siblings, 1 reply; 414+ messages in thread
From: mpaa3d @ 2002-03-31 19:17 UTC (permalink / raw)
  To: linux-kernel

Subject: Unauthorized Circumvention Device

MOTION PICTURE ASSOCIATION OF AMERICA, INC.
15503 VENTURA BOULEVARD
ENCINO, CALIFORNIA 91436

UNITED STATES
Anti-Piracy Operations
PHONE: (818) 728 - 8127
Email: MPAA3d@pacbell.net

April 1, 2003

Via Fax/Email

RE:         Illegal Provision of Circumvention Device
Site/URL:   http://www.kernel.org [and mirrors, with unknown IP addresses]
Reference#: 343313

Dear linux-kernel@vger.rutrgers.edu:

The Motion Picture Association (MPA) represents the following motion picture
production and distribution companies:

Columbia Pictures Industries, Inc.
Disney Enterprises, Inc.
Metro-Goldwyn-Mayer Studios Inc.
tro-Goldwyn-Mayer Studios Inc.
Paramount Pictures Corporation
TriStar Pictures, Inc.
Twentieth Century Fox Film Corporation
United Artists Pictures, Inc.
United Artists Corporation
Universal City Studios, Inc.
Warner Bros., a Division of Time Warner Entertainment Company, L.P.

We have received information that the above referenced Internet site is
providing a circumvention device commonly known as Linux.  Linux is a
software utility that circumvents the protection afforded by the Microsoft
Windows Operating Systems DRM implementation, therefore circumventing the
schemes designed for consumer content protection and permitting the copy of
protected contents in whole or partially.  As such, Linux is an unlawful
circumvention device within the meaning of the Digital Millennium Copyright
Act, Title 17 United States Code Section 1201(a)(2)(3).  Providing or
offering Linux to the public on your system or  network violates the
provisions of Section 1201(a)(2) which prohibit the manufacturing, importing
or offering to the public, providing, or otherwise trafficking in an
unlawful circumvention device.   (Title 17 United States Code 
Section 1201 et seq. hereafter is referred to as the DMCA).

We therefore demand that you take appropriate steps to cause the immediate
removal of Linux from the above identified Internet site, along with such
other actions as may be necessary or appropriate to suspend this illegal
activity. Failure to comply with this measure will subject you to liability
as described above.
We also request that you:

1. maintain and take whatever steps are necessary to prevent the destruction
of all records, including electronic records, in your possession or control
related to this Internet site, account holder or subscriber, and 

2. provide appropriate notice to the subscriber or account holder
responsible for the presence of Linux on your system or network, advising
him/her of the contents of this notice and directing that person to contact
the undersigned immediately at the email address provided above.

By copy of this letter, the owner of the above referenced Internet site
and/or email account is hereby directed to cease and desist from the conduct
complained of herein.

On behalf of the respective owners of the exclusive rights to the copyrighted
material at issue in this notice, we hereby state, pursuant to the DMCA that we
have a good faith belief that the acts complained of are not authorized by the
copyright owners, their respective agents, or the law.

Also pursuant to DMCA, we hereby state, under penalty of perjury under the
law of California and under the laws of the United States, that the
information in this notification is accurate and that we are authorized to
act on behalf of the owners of the exclusive rights being infringed as set
forth in this notification. 

Please contact us at the above listed address or by replying to this email
if you should have any questions.

Thank you for your cooperation in this matter.  Your prompt response is
requested.

Respectfully,

Haminshu Nigam
Director
Worldwide Internet Enforcement

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-03-31 19:17 mpaa3d
@ 2002-04-01  9:43 ` Vance Lankhaar
  0 siblings, 0 replies; 414+ messages in thread
From: Vance Lankhaar @ 2002-04-01  9:43 UTC (permalink / raw)
  To: linux-kernel

On Monday 01 April 2002 02:17, mpaa3d@pacbell.net wrote:
[snip]
>April 1, 2003
[snip]

I guess we don't have to read this for a year ;)

Vance Lankhaar 

------------------------------------------------------------
 Vance Lankhaar                        vance@pcsscreston.ca
 PCSS Yearbook                      yearbook@pcsscreston.ca
 PCSS Computers                    sysadmins@pcsscreston.ca
 http://www.crestonbc.com/pcss/   http://www.pcsscreston.ca
------------------------------------------------------------

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
@ 2002-02-20 17:55 Torrey Hoffman
  0 siblings, 0 replies; 414+ messages in thread
From: Torrey Hoffman @ 2002-02-20 17:55 UTC (permalink / raw)
  To: nimeesh, linux-kernel

nimeesh wrote:

> I'm new to linux.I'm trying network booting of linux
> system.System Conf.(intel 815 chipset ,PXE
> bios,3com905c-ethernet card)
> 
> In that i'm facing problem with kernel image and it's
> file system.
> 
> In that when i get the kernel image and filesystem.
> It gives me error as
> kernel panic : unable to mount root fs on 01:00
> 
> Is it necessary to use NFS or is it possible without
> that? Any special specification while creating kernel
> image and filesystem?

It sounds like you have PXE working well enough to have the 
BIOS download a PXE bootloader from a TFTP server, which 
then downloads a kernel image.  I assume you are using Peter 
Anvin's "pxelinux" boot loader?  
It is part of the "syslinux" package.

Anyway, you have at least two options for the root
file system.  You can use an NFS root, or you can use
an initial ramdisk.    I use an initial ramdisk.  My server 
has the standard ISC DHCP server, and Peter Anvin's TFTP server.

The DHCP server is configured in /etc/dhcpd.conf and
includes:

subnet 10.134.0.0 netmask 255.255.254.0
{
	# Not shown: options for DNS, router, domain name, 
	# default lease time, range, etc.

	# this is the important bit for PXE:
	filename "pxelinux.0"
}

The TFTP server contains the files:

pxelinux.0        (the pxelinux boot loader)
bzImage		(the kernel)
initrd.gz		(the initial ramdisk)
pxelinux.cfg/0A86 (the pxelinux config directory and file)

The Pxelinux.cfg configuration file I use is named "0A86" 
because the DHCP server assigns addresses "10.134.xxx.yyy", 
(10=0x0A, 134=0x86) and looks something like this:

DEFAULT MYNETBOOT
TIMEOUT 0
LABEL MYNETBOOT
  KERNEL bzImage
  APPEND initrd=initrd.gz root=/dev/ram

There is quite a bit of information on the network about how 
to create an initrd image.  Check the "HOWTO" documentation 
files that may have come with your Linux distribution.  
Also try "man initrd".

So here's how the pxe boot works from start to finish:

1. Client boots and enters PXE BIOS.
2. PXE BIOS does DHCP, gets IP address and filename option
3. PXE BIOS uses TFTP to download pxelinux.0 and boot from it
4. pxelinux.0 bootloader determines IP address, asks TFTP
   server for config file, finds "0A86" file and downloads it
5. pxelinux.0 reads config file, uses TFTP to download
   bzImage and initrd.gz
6. pxelinux boots bzImage kernel together with initrd.gz 
   ramdisk, kernel command line "root=/dev/ram" tells 
   kernel to use ramdisk as root filesystem
7. Kernel boots, mounts ramdisk, and then runs /linuxrc if it
   exists, otherwise runs /sbin/init just like booting from
   any other device.
8. linuxrc or init starts up the system or does whatever you
   want them to do.  Read the initrd man page for details.

Best wishes,

Torrey Hoffman

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2001-12-25 16:17 Manfred Spraul
  2001-12-25 19:14 ` Re: Legacy Fishtank
  0 siblings, 1 reply; 414+ messages in thread
From: Manfred Spraul @ 2001-12-25 16:17 UTC (permalink / raw)
  To: Colonel; +Cc: linux-kernel

> When I went to build 2.4.17 on a dinky box (486, 16M RAM), the
> config option was missing.  The box is a wall mount and is not very
> capable of multiple kernel experimentation alas.  Can someone
> supply some background as to what has happened?

It seems that RTNETLINK is now unconditionally enabled, I don't know
why.

--
    Manfred




^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-12-25 16:17 Manfred Spraul
@ 2001-12-25 19:14 ` Legacy Fishtank
  2001-12-25 21:23   ` Re: Kurt Roeckx
                     ` (2 more replies)
  0 siblings, 3 replies; 414+ messages in thread
From: Legacy Fishtank @ 2001-12-25 19:14 UTC (permalink / raw)
  To: Manfred Spraul; +Cc: Colonel, linux-kernel

On Tue, Dec 25, 2001 at 05:17:01PM +0100, Manfred Spraul wrote:
> > When I went to build 2.4.17 on a dinky box (486, 16M RAM), the
> > config option was missing.  The box is a wall mount and is not very
> > capable of multiple kernel experimentation alas.  Can someone
> > supply some background as to what has happened?
> 
> It seems that RTNETLINK is now unconditionally enabled, I don't know
> why.

It's required by newer RedHat and MDK initscripts, perhaps others.
ip, iproute and similar utilities use it, and so since it's commonly
required DaveM made it unconditional...  I think the checkin comment was
something along the lines of "make it unconditional unless Alan
complains about kernel bloat" :)

	Jeff



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-12-25 19:14 ` Re: Legacy Fishtank
@ 2001-12-25 21:23   ` Kurt Roeckx
  2001-12-25 22:03   ` Re: Alan Cox
  2002-01-03  0:06   ` Re: David S. Miller
  2 siblings, 0 replies; 414+ messages in thread
From: Kurt Roeckx @ 2001-12-25 21:23 UTC (permalink / raw)
  To: Legacy Fishtank; +Cc: Manfred Spraul, Colonel, linux-kernel

On Tue, Dec 25, 2001 at 02:14:41PM -0500, Legacy Fishtank wrote:
> On Tue, Dec 25, 2001 at 05:17:01PM +0100, Manfred Spraul wrote:
> > It seems that RTNETLINK is now unconditionally enabled, I don't know
> > why.
> 
> It's required by newer RedHat and MDK initscripts, perhaps others.
> ip, iproute and similar utilities use it, and so since it's commonly
> required DaveM made it unconditional...  I think the checkin comment was
> something along the lines of "make it unconditional unless Alan
> complains about kernel bloat" :)

But ifconfig and route don't use it, and now you can't do certain
things you could before.

One thing that comes to mind is showing the ipv6 routig cache,
because it only made that proc entry when it's not enabled.

Should I mention my kernel got bigger?

(I also was under the impression that this was a stable series.)


Kurt


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-12-25 19:14 ` Re: Legacy Fishtank
  2001-12-25 21:23   ` Re: Kurt Roeckx
@ 2001-12-25 22:03   ` Alan Cox
  2002-01-03  0:06   ` Re: David S. Miller
  2 siblings, 0 replies; 414+ messages in thread
From: Alan Cox @ 2001-12-25 22:03 UTC (permalink / raw)
  To: Legacy Fishtank; +Cc: Manfred Spraul, Colonel, linux-kernel

> It's required by newer RedHat and MDK initscripts, perhaps others.
> ip, iproute and similar utilities use it, and so since it's commonly

Basically because Dave refused to recognize lots of embedded setups don't
need the netlink crap and couldn't just accept defaulting it to Y we all
get lumbered with it

> required DaveM made it unconditional...  I think the checkin comment was
> something along the lines of "make it unconditional unless Alan
> complains about kernel bloat" :)

And I did complain. "Red Hat needs XYZ so we make it mandatory" is not an
appropriate approach to a problem.

Alan

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-12-25 19:14 ` Re: Legacy Fishtank
  2001-12-25 21:23   ` Re: Kurt Roeckx
  2001-12-25 22:03   ` Re: Alan Cox
@ 2002-01-03  0:06   ` David S. Miller
  2002-01-03  0:23     ` Re: Alan Cox
  2 siblings, 1 reply; 414+ messages in thread
From: David S. Miller @ 2002-01-03  0:06 UTC (permalink / raw)
  To: alan; +Cc: garzik, manfred, klink, linux-kernel

   From: Alan Cox <alan@lxorguk.ukuu.org.uk>
   Date: Tue, 25 Dec 2001 22:03:59 +0000 (GMT)

   > required DaveM made it unconditional...  I think the checkin comment was
   > something along the lines of "make it unconditional unless Alan
   > complains about kernel bloat" :)

   And I did complain. "Red Hat needs XYZ so we make it mandatory" is not an
   appropriate approach to a problem.

[ Just got back from British Columbia... ]

No you did not complain.  I asked you specifically if it was ok, and
your response was that turning netlink/rtnetlink on by default was
fine with you.

It has zilch to do with redhat anything, in fact I had to ask vendors
first if they could still fit the kernel on their boot disks if I
added ~5K of object code to kernels with networking enabled.

It has everything to do with iproute2 and tcp_diag using it.

Franks a lot,
David S. Miller
davem@redhat.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2002-01-03  0:06   ` Re: David S. Miller
@ 2002-01-03  0:23     ` Alan Cox
  0 siblings, 0 replies; 414+ messages in thread
From: Alan Cox @ 2002-01-03  0:23 UTC (permalink / raw)
  To: David S. Miller; +Cc: alan, garzik, manfred, klink, linux-kernel

> No you did not complain.  I asked you specifically if it was ok, and
> your response was that turning netlink/rtnetlink on by default was
> fine with you.

But not forcing it always on - thats not what I said.

|   From: Alan Cox <alan@redhat.com>
|   Date: Wed, 31 Oct 2001 13:09:16 -0500 (EST)
|
|   Ask Arjan. I'm certainly of the opinion it should edefault to Y in Linus
|   config.in

Alan

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-12-05 16:05 Romain Giry
  2001-12-05 21:25 ` Dipak
  2001-12-06 10:43 ` Re: Romain Giry
  0 siblings, 2 replies; 414+ messages in thread
From: Romain Giry @ 2001-12-05 16:05 UTC (permalink / raw)
  To: mlist-linux-kernel

Hi

i would like to know how the network layer does to know what is the upper 
layer protocol in order to fill in correctly the protocol field in the 
header it adds to the packet before sending it. I'm doing a ethernet device 
that doesn't add any header to the packet but change the output device, 
then i should say the network device that the packet is like if it has been 
sent by the ip protocol.

Thanks,

Romain Giry

_________________________________________________________
Do You Yahoo!?
Get your free @yahoo.com address at http://mail.yahoo.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-12-05 16:05 Romain Giry
@ 2001-12-05 21:25 ` Dipak
  2001-12-06 10:43 ` Re: Romain Giry
  1 sibling, 0 replies; 414+ messages in thread
From: Dipak @ 2001-12-05 21:25 UTC (permalink / raw)
  To: Romain Giry; +Cc: mlist-linux-kernel

Romain Giry wrote:

Hi,

> Hi
>
> i would like to know how the network layer does to know what is the upper
> layer protocol in order to fill in correctly the protocol field in the
> header it adds to the packet before sending it.

see, their might be either API provided by DLL (ethernet, ATM, FR etc) to
network layer (IP, IPX etc) to sent packets over a physical device. Now, the
Network protocol field can be passed through the API to be filled in by
DLL header by DLL. Another case may be, DLL can easily know what's the ifIndex
the packet is coming from. Network layer might have registration policy by
ifIndex to DLL, which can be used now to infer Network layer protocol id.

> I'm doing a ethernet device
> that doesn't add any header to the packet but change the output device,
> then i should say the network device that the packet is like if it has been
> sent by the ip protocol.

I didn't understand what did you mean by "ethernet device doesn't add any
header but change the output device"? May be after you explain a bit more I
can suggest something more.

Thanks,
Dipak

>
>
> Thanks,
>
> Romain Giry
>
> _________________________________________________________
> Do You Yahoo!?
> Get your free @yahoo.com address at http://mail.yahoo.com
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-12-05 16:05 Romain Giry
  2001-12-05 21:25 ` Dipak
@ 2001-12-06 10:43 ` Romain Giry
  2001-12-06 11:28   ` Re: Alan Cox
  1 sibling, 1 reply; 414+ messages in thread
From: Romain Giry @ 2001-12-06 10:43 UTC (permalink / raw)
  To: Dipak; +Cc: Linux-Kernel mailing list

Hi,

> > I'm doing a ethernet device
> > that doesn't add any header to the packet but change the output device,
> > then i should say the network device that the packet is like if it has been
> > sent by the ip protocol.
>
>I didn't understand what did you mean by "ethernet device doesn't add any
>header but change the output device"? May be after you explain a bit more I
>can suggest something more.

For my thesis, I should do a module for linux that allows the user to 
switch the
physical device at run-time. In a first time this should be done by user 
commands
and later the module should decide himself to switch the physical device.

That's why it seems to me natural to create a dummy ethernet device driver 
which
does nothing else apart from forwarding the packets received from the IP stack
to a real (= physical) network device. Therefore i need to fake that the 
packet was
sent by the IP stack so as the physical device fill in correctly the 
protocol field in
the header it adds.

Otherwise i thought i can do a transparent firewall that decides to which real
interface to switch the packets after beeing sure that the physical device is
running otherwise it should change.

One thing that may be difficult to implement is that i want to keep a TCP
connection running when i change the physical device. That's why maybe the
firewall solution may be better because when receiving packets i could fake
that they all come from the same physical device and have therefore the same
IP.

Thanks

Romain

_________________________________________________________
Do You Yahoo!?
Get your free @yahoo.com address at http://mail.yahoo.com

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-12-06 10:43 ` Re: Romain Giry
@ 2001-12-06 11:28   ` Alan Cox
  0 siblings, 0 replies; 414+ messages in thread
From: Alan Cox @ 2001-12-06 11:28 UTC (permalink / raw)
  To: Romain Giry; +Cc: Dipak, Linux-Kernel mailing list

> One thing that may be difficult to implement is that i want to keep a TCP
> connection running when i change the physical device. That's why maybe the
> firewall solution may be better because when receiving packets i could fake
> that they all come from the same physical device and have therefore the same
> IP.

You set up multiple physical devices with the same IP and use the "route"
command. Thats worked with TCP/IP protocols since day 1

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-10-15  6:25 Dinesh  Gandhewar
  2001-10-15  6:56 ` David Ford
  0 siblings, 1 reply; 414+ messages in thread
From: Dinesh  Gandhewar @ 2001-10-15  6:25 UTC (permalink / raw)
  To: mlist-linux-kernel


Hello,
What is the effect of following statement at the end of function definition?
*(int *)0 = 0;	
Thanking you,
Dinesh 


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-10-15  6:25 Dinesh  Gandhewar
@ 2001-10-15  6:56 ` David Ford
  2001-10-15 16:02   ` Re: Timur Tabi
  0 siblings, 1 reply; 414+ messages in thread
From: David Ford @ 2001-10-15  6:56 UTC (permalink / raw)
  To: Dinesh Gandhewar; +Cc: mlist-linux-kernel

That should throw a segmentation fault, in the kernel an OOPS,  in this 
statement the code is trying to dereference a NULL pointer and store a 
value at 0x0.

David

Dinesh Gandhewar wrote:

>Hello,
>What is the effect of following statement at the end of function definition?
>*(int *)0 = 0;	
>Thanking you,
>Dinesh 
>
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-10-15  6:56 ` David Ford
@ 2001-10-15 16:02   ` Timur Tabi
  0 siblings, 0 replies; 414+ messages in thread
From: Timur Tabi @ 2001-10-15 16:02 UTC (permalink / raw)
  To: mlist-linux-kernel

David Ford wrote:

> That should throw a segmentation fault, in the kernel an OOPS,  in this 
> statement the code is trying to dereference a NULL pointer and store a 
> value at 0x0.

I much smarter way to do this would be to use this code:

static inline void int3(void) { __asm__ __volatile__ (".byte 0xCC\n"); };

Granted, it's x86-specific, but it works better, since gdb will halt the code 
right at that spot rather than inside some trap hander.  And it's just more 
elegant.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-10-02 15:30 Dinesh  Gandhewar
  2001-10-09 10:25 ` VDA
  0 siblings, 1 reply; 414+ messages in thread
From: Dinesh  Gandhewar @ 2001-10-02 15:30 UTC (permalink / raw)
  To: mlist-linux-kernel

Hello,
I have written a linux kernel module. The linux version is 2.2.14. 
In this module I have declared an array of size 2048. If I use this array, the execution of this module function causes kernel to reboot. If I kmalloc() this array then execution of this module function doesnot cause any problem.
Can you explain this behaviour?
Thnaks,
Dinesh 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-10-02 15:30 Dinesh  Gandhewar
@ 2001-10-09 10:25 ` VDA
  0 siblings, 0 replies; 414+ messages in thread
From: VDA @ 2001-10-09 10:25 UTC (permalink / raw)
  To: linux-kernel

Hello Dinesh,

Tuesday, October 02, 2001, 5:30:02 PM, you wrote:

DG> Hello,
DG> I have written a linux kernel module. The linux version is 2.2.14. 
DG> In this module I have declared an array of size 2048. If I use this array, the execution of this module function causes kernel to reboot. If I kmalloc() this array then execution of this module
DG> function doesnot cause any problem.
DG> Can you explain this behaviour?
DG> Thnaks,
DG> Dinesh 

DG> -
DG> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
DG> the body of a message to majordomo@vger.kernel.org
DG> More majordomo info at  http://vger.kernel.org/majordomo-info.html
DG> Please read the FAQ at  http://www.tux.org/lkml/

stack overflow

-- 
Best regards, VDA
mailto:VDA@port.imtp.ilyichevsk.odessa.ua

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-10-02 15:29 Dinesh  Gandhewar
  2001-10-02 15:23 ` Tommy Reynolds
  2001-10-02 15:32 ` Re: Alex Bligh - linux-kernel
  0 siblings, 2 replies; 414+ messages in thread
From: Dinesh  Gandhewar @ 2001-10-02 15:29 UTC (permalink / raw)
  To: mlist-linux-kernel

Hello,
I have written a linux kernel module. The linux version is 2.2.14. 
In this module I have declared an array of size 2048. If I use this array, the execution of this module function causes kernel to reboot. If I kmalloc() this array then execution of this module function doesnot cause any problem.
Can you explain this behaviour?
Thnaks,
Dinesh 

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-10-02 15:29 Dinesh  Gandhewar
@ 2001-10-02 15:23 ` Tommy Reynolds
  2001-10-02 15:32 ` Re: Alex Bligh - linux-kernel
  1 sibling, 0 replies; 414+ messages in thread
From: Tommy Reynolds @ 2001-10-02 15:23 UTC (permalink / raw)
  To: Dinesh  Gandhewar; +Cc: mlist-linux-kernel

"Dinesh  Gandhewar" <dinesh_gandhewar@rediffmail.com> was pleased to say:

> I have written a linux kernel module. The linux version is 2.2.14. 
> In this module I have declared an array of size 2048. If I use this array, the
> execution of this module function causes kernel to reboot. If I kmalloc() this
> array then execution of this module function doesnot cause any problem.
> Can you explain this behaviour?

Unlike userland application programming, the kernel stack does not grow: it has
a fixed size.  You are using too much stack space and corrupting your system.
The kernel stack is quite small (less than 8K is available for ALL nested
modules and interrupt handlers), so driver functions should use an absolute
minimum of local variables, such as a pointer to a per-instance data area. 
Kernel-leval kmalloc() is efficient enough to use frequently.

---------------------------------------------+-----------------------------
Tommy Reynolds                               | mailto:	<reynolds@redhat.com>
Red Hat, Inc., Embedded Development Services | Phone:  +1.256.704.9286
307 Wynn Drive NW, Huntsville, AL 35805 USA  | FAX:    +1.236.837.3839
Senior Software Developer                    | Mobile: +1.919.641.2923

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-10-02 15:29 Dinesh  Gandhewar
  2001-10-02 15:23 ` Tommy Reynolds
@ 2001-10-02 15:32 ` Alex Bligh - linux-kernel
  1 sibling, 0 replies; 414+ messages in thread
From: Alex Bligh - linux-kernel @ 2001-10-02 15:32 UTC (permalink / raw)
  To: Dinesh Gandhewar, mlist-linux-kernel; +Cc: Alex Bligh - linux-kernel



--On Tuesday, October 02, 2001 3:29 PM +0000 Dinesh  Gandhewar 
<dinesh_gandhewar@rediffmail.com> wrote:

> In this module I have declared an array of size 2048. If I use this
> array, the execution of this module function causes kernel to reboot. If
> I kmalloc() this array then execution of this module function doesnot
> cause any problem.

If you are allocating it on the stack (i.e. as a local variable)
you are probably running out of kernel stack space (depending
what it's an array of).

If you are declaring it non-local, it's possible you are
overwriting the end of it, and, kmalloc() being what it
is, there happens to be some wasted space next to it.

--
Alex Bligh

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2001-08-16 12:18 Saravana
  0 siblings, 0 replies; 414+ messages in thread
From: Saravana @ 2001-08-16 12:18 UTC (permalink / raw)
  To: linux-kernel

> hi!
>    I am beginning learn to write a driver follow the
> Book "Beginnng Linux programming(Second
> Edition)".There is a example about char driver,I write
> as that,but I can open device,but can't read from it.I
> just copy some static data to user buffer as follow:
>    copy_to_user(buf, schar_buffer, count)

just check out if u got any perms(rw) on the device.. ?
sarvana

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2001-08-14  3:08 Parag Warudkar
  2001-08-14  3:17 ` Re: Keith Owens
  0 siblings, 1 reply; 414+ messages in thread
From: Parag Warudkar @ 2001-08-14  3:08 UTC (permalink / raw)
  To: Colonel; +Cc: linux-kernel

Additionally include/linux/modules/ksyms.ver 
requires following  two lines to be added for modules to work

#define __ver_no_llseek 8d4d42a6 
#define no_llseek       _set_ver(no_llseek)

Parag

On Mon, 13 Aug 2001 08:51:03 -0700 (PDT), Colonel wrote:

>  From: Colonel <klink@clouddancer.com>
>  To: paragw@excite.com
>  In-reply-to: <6558420.997690787827.JavaMail.imail@mayall.excite.com>
(message
>  	from Parag Warudkar on Mon, 13 Aug 2001 01:19:46 -0700 (PDT))
>  Subject: Re: Unresolved symbol: no_llseek
>  Reply-to: klink@clouddancer.com
>  References:  <6558420.997690787827.JavaMail.imail@mayall.excite.com>
>  
>     Date: Mon, 13 Aug 2001 01:19:46 -0700 (PDT)
>     From: Parag Warudkar <paragw@excite.com>
>     Reply-To: paragw@excite.com
>     Content-Type: text/plain; charset=us-ascii
>     X-Sender-Ip: 164.164.130.13
>  
>     Hi,
>  	 May be now you can answer my query :)?
>  
>     TIA,
>  
>     Parag
>  
>  
>  There was a msg posted from me replying to Linus about 15 minutes
>  prior to your query.  There are two missing symbols in kernel/ksyms
>  that need exporting.
>  
>  diff ksyms.c ksyms.c.~1~ 
>  245,246d244
>  < EXPORT_SYMBOL(generic_file_llseek);
>  < EXPORT_SYMBOL(no_llseek);
>  
>  
>  
>  
>  
>     In clouddancer.list.kernel, you wrote:
>  
>     >
>     >
>     >This is a multi-part message in MIME format.
>     >
>     >--------------InterScan_NT_MIME_Boundary
>     >Content-Type: text/plain; charset=us-ascii; format=flowed
>     >Content-Transfer-Encoding: 7bit
>     >
>     >
>     >In i810_audio: Unresolved symbol: no_llseek
>     >
>     >In agpgart : Unresolved symbol: no_llseek
>     >
>     >
>  
>  
>     I think I could tell you the answer, but it might violation this
disclaimer.
>  
>  
>  
>  
>     Parag Warudkar
>     Senior Systems Engineer
>     Wipro Technologies,
>     E-Commerce Division,
>     Lavelle Road,
>     Bangalore - 560001.
>     Ph: 2215010 Ext: 124.
>  
>  
>  
>  
>  
>     _______________________________________________________
>     http://inbox.excite.com
>  
>  


Parag Warudkar
Senior Systems Engineer
Wipro Technologies,
E-Commerce Division,
Lavelle Road,
Bangalore - 560001.
Ph: 2215010 Ext: 124.





_______________________________________________________
Send a cool gift with your E-Card
http://www.bluemountain.com/giftcenter/



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-08-14  3:08 Re: Parag Warudkar
@ 2001-08-14  3:17 ` Keith Owens
  0 siblings, 0 replies; 414+ messages in thread
From: Keith Owens @ 2001-08-14  3:17 UTC (permalink / raw)
  To: paragw; +Cc: Colonel, linux-kernel

On Mon, 13 Aug 2001 20:08:01 -0700 (PDT), 
Parag Warudkar <paragw@excite.com> wrote:
>Additionally include/linux/modules/ksyms.ver 
>requires following  two lines to be added for modules to work
>
>#define __ver_no_llseek 8d4d42a6 
>#define no_llseek       _set_ver(no_llseek)

Don't do that.  If you have module symbol versions turned on
(CONFIG_MODVERSIONS) then after any change that affects exported
symbols, you must make mrproper and rebuild from scratch.  See
http://www.tux.org/lkml/#s8-8.


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-07-25 18:44 Sumit Bhardwaj
  2001-07-25 19:18 ` Matthew M
  0 siblings, 1 reply; 414+ messages in thread
From: Sumit Bhardwaj @ 2001-07-25 18:44 UTC (permalink / raw)
  To: linux-kernel

I tried to compile linux 2.4.5 with gcc-3.0. It gave
the following error

kernel/sched.c : 'xtime' definition clash
In file include/linux/timer.h: line no. 540

after making the declaration

extern volatile struct xtime;

things worked fine.

__________________________________________________
Do You Yahoo!?
Make international calls for as low as $.04/minute with Yahoo! Messenger
http://phonecard.yahoo.com/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-07-25 18:44 Sumit Bhardwaj
@ 2001-07-25 19:18 ` Matthew M
  0 siblings, 0 replies; 414+ messages in thread
From: Matthew M @ 2001-07-25 19:18 UTC (permalink / raw)
  To: linux-kernel

>I tried to compile linux 2.4.5 with gcc-3.0. It gave
>the following error
>kernel/sched.c : 'xtime' definition clash
>In file include/linux/timer.h: line no. 540

This did appear in 2.4.5, but is no longer an issue in 2.4.7.

-- 
*matt* 

Don't Worry, Be Happy.
		-- Meher Baba

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-06-11  4:58 kiran.thirumalai
  2001-06-11  6:54 ` Anil Kumar
  0 siblings, 1 reply; 414+ messages in thread
From: kiran.thirumalai @ 2001-06-11  4:58 UTC (permalink / raw)
  To: linux-kernel

Hi,
Is there some kernel api to validate memory allocated using kmalloc.
Suppose, I allocate some memory using kmalloc and at a later point of
execution
I would like to validate if the memory allocated is not possibly freed by
some other thread.

Pls suggest a patch/pointers if any.
I also noticed a commented 'CONFIG_DEBUG_MALLOC' config option  (2.4.3
source),
It doesn't seem to be functional.  Any pointers towards the history behind
it would also be helpful.

Thanks in advance,
Kiran

^ permalink raw reply	[flat|nested] 414+ messages in thread

* RE:
  2001-06-11  4:58 kiran.thirumalai
@ 2001-06-11  6:54 ` Anil Kumar
  0 siblings, 0 replies; 414+ messages in thread
From: Anil Kumar @ 2001-06-11  6:54 UTC (permalink / raw)
  To: kiran.thirumalai, linux-kernel

you can use "sys_mprotect" call which is kernel space equ. of "mprotect" .
The implementation for the same can be found in "mm\mprotect.c".

anil

-----Original Message-----
From: linux-kernel-owner@vger.kernel.org
[mailto:linux-kernel-owner@vger.kernel.org]On Behalf Of
kiran.thirumalai@in.ibm.com
Sent: Monday, June 11, 2001 10:29 AM
To: linux-kernel@vger.kernel.org
Subject:

Hi,
Is there some kernel api to validate memory allocated using kmalloc.
Suppose, I allocate some memory using kmalloc and at a later point of
execution
I would like to validate if the memory allocated is not possibly freed by
some other thread.

Pls suggest a patch/pointers if any.
I also noticed a commented 'CONFIG_DEBUG_MALLOC' config option  (2.4.3
source),
It doesn't seem to be functional.  Any pointers towards the history behind
it would also be helpful.

Thanks in advance,
Kiran

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2001-05-22  4:25 Rajiv Majumdar
  0 siblings, 0 replies; 414+ messages in thread
From: Rajiv Majumdar @ 2001-05-22  4:25 UTC (permalink / raw)
  To: Anita Sinha; +Cc: linux-kernel, linux-kernel-owner


send a mail to majordomo@vger.kernel.org with 'help' in the body of the
mail

cheers
rajiv


^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-05-08 19:48 Richard B. Johnson
  2001-05-08 21:33 ` george anzinger
  2001-05-09  0:36 ` Re: Andrew Morton
  0 siblings, 2 replies; 414+ messages in thread
From: Richard B. Johnson @ 2001-05-08 19:48 UTC (permalink / raw)
  To: Linux kernel

To driver wizards:

I have a driver which needs to wait for some hardware.
Basically, it needs to have some code added to the run-queue
so it can get some CPU time even though it's not being called.

It needs to get some CPU time which can be "turned on" or
"turned off" as a result of an interrupt or some external
input from  an ioctl().

So I thought that the "tasklet" would be ideal. However, the
scheduler "thinks" that a tasklet is an interrupt, so any
attempt to sleep in the tasklet results in a kernel panic,
"ieee scheduling in an interrupt..., BUG sched.c line 688".

Next, I added code to try queue_task(). This has the same problem.

Basically the procedure needs to do:

procedure()
{
    if(some_event)
        schedule_timeout(n);               /* Needs to sleep */
    else if(something_else)
        do_something();
   queue_task(procedure, &tq_immediate);   /* Needs to queue itself again */
}

Since I'm running against a time-line, I temporarily  gave the module
some CPU time through an ioctl(), i.e., a separate task that does nothing
except repeatably execute ioctl(GIVE_CPU, NULL); This shows that the
driver actually works. It's a GPIB driver so it needs to get the
CPU to find out if it's addressed to listen, etc. These events don't
produce interrupts.

So, what am I supposed to do to add a piece of driver code to the
run queue so it gets scheduled occasionally?

Cheers,
Dick Johnson

Penguin : Linux version 2.4.1 on an i686 machine (799.53 BogoMips).

"Memory is like gasoline. You use it up when you are running. Of
course you get it all back when you reboot..."; Actual explanation
obtained from the Micro$oft help desk.

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-05-08 19:48 Richard B. Johnson
@ 2001-05-08 21:33 ` george anzinger
  2001-05-09 13:04   ` Re: Richard B. Johnson
  2001-05-09  0:36 ` Re: Andrew Morton
  1 sibling, 1 reply; 414+ messages in thread
From: george anzinger @ 2001-05-08 21:33 UTC (permalink / raw)
  To: root; +Cc: Linux kernel

"Richard B. Johnson" wrote:
> 
> To driver wizards:
> 
> I have a driver which needs to wait for some hardware.
> Basically, it needs to have some code added to the run-queue
> so it can get some CPU time even though it's not being called.
> 
> It needs to get some CPU time which can be "turned on" or
> "turned off" as a result of an interrupt or some external
> input from  an ioctl().
> 
> So I thought that the "tasklet" would be ideal. However, the
> scheduler "thinks" that a tasklet is an interrupt, so any
> attempt to sleep in the tasklet results in a kernel panic,
> "ieee scheduling in an interrupt..., BUG sched.c line 688".
> 
> Next, I added code to try queue_task(). This has the same problem.
> 
> Basically the procedure needs to do:
> 
> procedure()
> {
>     if(some_event)
>         schedule_timeout(n);               /* Needs to sleep */
>     else if(something_else)
>         do_something();
>    queue_task(procedure, &tq_immediate);   /* Needs to queue itself again */
> }
> 
> Since I'm running against a time-line, I temporarily  gave the module
> some CPU time through an ioctl(), i.e., a separate task that does nothing
> except repeatably execute ioctl(GIVE_CPU, NULL); This shows that the
> driver actually works. It's a GPIB driver so it needs to get the
> CPU to find out if it's addressed to listen, etc. These events don't
> produce interrupts.
> 
> So, what am I supposed to do to add a piece of driver code to the
> run queue so it gets scheduled occasionally?
> 
> Cheers,
> Dick Johnson

How about something like:

#include <linux/timer.h>

void queue_task(void process_timeout(void), unsigned long timeout,
struct timer_list *timer, unsigned long data)
{
	unsigned long expire = timeout + jiffies;

	init_timer(&timer);
	timer->expires = expire;
	timer->data = data;
	timer->function = process_timeout;

	add_timer(&timer);
}


You will have to define the "struct timer_list timer".  This should
cause the function passed to be called after "timeout" jiffies (1/HZ,
not to be confused with 10 ms).  If you want to stop the timer early do:

	del_timer_sync(&timer);

"data" was not used in you example, but process_timeout will be passed
"data" when it is called.  This routine is called as part of the timer
interrupt, so it must be fast and should not do schedule() calls.  It
could queue a tasklet, however, to relax constraints a bit.

George

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-05-08 21:33 ` george anzinger
@ 2001-05-09 13:04   ` Richard B. Johnson
  2001-05-09 14:10     ` Re: Alan Cox
  0 siblings, 1 reply; 414+ messages in thread
From: Richard B. Johnson @ 2001-05-09 13:04 UTC (permalink / raw)
  To: george anzinger; +Cc: Linux kernel

On Tue, 8 May 2001, george anzinger wrote:

> "Richard B. Johnson" wrote:
> > 
> > To driver wizards:
> > 
> > I have a driver which needs to wait for some hardware.
> > Basically, it needs to have some code added to the run-queue
> > so it can get some CPU time even though it's not being called.
> > 
[SNIPPED...]

> How about something like:
> 
> #include <linux/timer.h>
> 
> void queue_task(void process_timeout(void), unsigned long timeout,
> struct timer_list *timer, unsigned long data)
> {
> 	unsigned long expire = timeout + jiffies;
> 
> 	init_timer(&timer);
> 	timer->expires = expire;
> 	timer->data = data;
> 	timer->function = process_timeout;
> 
> 	add_timer(&timer);
> }
> 
> 
> You will have to define the "struct timer_list timer".  This should
> cause the function passed to be called after "timeout" jiffies (1/HZ,
> not to be confused with 10 ms).  If you want to stop the timer early do:
> 
> 	del_timer_sync(&timer);
> 
> "data" was not used in you example, but process_timeout will be passed
> "data" when it is called.  This routine is called as part of the timer
> interrupt, so it must be fast and should not do schedule() calls.  It
> could queue a tasklet, however, to relax constraints a bit.
> 
> George

This is all very nice. This is basically what the 'tasklet' does.
The problem is that I have in my code something like this:


/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
/*
 *  This waits for an event-flag to be TRUE. It takes a pointer to
 *  that flag, plus the number of timer-ticks to wait. If it times-
 *  out, it returns -ETIME. Otherwise it returns 0.
 */
static int waitfor(volatile int *event, int mask, int ticks)
{
    unsigned long timer;
    int stat;
    DEB(printk("%s waitfor\n", info->dev));
    stat = -ETIME;
    timer = jiffies + (unsigned long) ticks;
    while(!!time_before(jiffies, timer))
    {
        if(!!(*event & mask))
        {
            stat = 0;
            break;
        }
        schedule();
    }
    return stat; 
}
/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
/*
 *  This waits for a bit in a port to be TRUE. It takes the OFFSET of
 *  that port, plus the number of timer-ticks to wait. If it times-
 *  out, it returns -ETIME. Otherwise it returns 0.
 */
static int waitport(int offset, int mask, int ticks)
{
    unsigned long timer;
    int stat;
    DEB(printk("%s waitport\n", info->dev));
    stat = -ETIME;
    timer = jiffies + (unsigned long) ticks;
    while(!!time_before(jiffies, timer))
    {
        if(!!(READ_TNT(offset) & mask))
        {
            stat = 0;
            break;
        }
        schedule();
    }
    return stat; 
}


Both of these procedures schedule() while waiting for something to
happen. The wait can be very long (1 second) so I don't want to
just spin eating CPU cycles. I have to give the CPU to somebody.





Cheers,
Dick Johnson

Penguin : Linux version 2.4.1 on an i686 machine (799.53 BogoMips).

"Memory is like gasoline. You use it up when you are running. Of
course you get it all back when you reboot..."; Actual explanation
obtained from the Micro$oft help desk.



^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-05-09 13:04   ` Re: Richard B. Johnson
@ 2001-05-09 14:10     ` Alan Cox
  2001-05-09 16:59       ` Re: george anzinger
  0 siblings, 1 reply; 414+ messages in thread
From: Alan Cox @ 2001-05-09 14:10 UTC (permalink / raw)
  To: root; +Cc: george anzinger, Linux kernel

>     while(!!time_before(jiffies, timer))
>     {
>         if(!!(*event & mask))
>         {
>             stat = 0;
>             break;
>         }
>         schedule();

You want to yield as well otherwise you may just spin anyway

> Both of these procedures schedule() while waiting for something to
> happen. The wait can be very long (1 second) so I don't want to
> just spin eating CPU cycles. I have to give the CPU to somebody.

So use a timer


void tick_tick_boom(unsigned long l)
{
	struct my_device *d = (struct my_device *)l;

	if(its_still_busy(d))
	{
		d->timer_count--;
		if(d->timer_count)
		{
			/* Try again until timer_count hits zero */
			add_timer(&t->timer, jiffies+1);
			return;
		}
		else
		{
			/* Lose some .. */
			d->event_status = TIMEOUT;
		}
	}
	else
	{
		/* Win some .. */
		d->event_status = OK;
	}
	/* Wake up the invoker */
	wake_up(&d->timer_wait);
}


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-05-09 14:10     ` Re: Alan Cox
@ 2001-05-09 16:59       ` george anzinger
  2001-05-09 17:15         ` Re: Alan Cox
  0 siblings, 1 reply; 414+ messages in thread
From: george anzinger @ 2001-05-09 16:59 UTC (permalink / raw)
  To: Alan Cox; +Cc: root, Linux kernel

Alan Cox wrote:
> 
> >     while(!!time_before(jiffies, timer))
> >     {
> >         if(!!(*event & mask))
> >         {
> >             stat = 0;
> >             break;
> >         }
> >         schedule();
> 
> You want to yield as well otherwise you may just spin anyway
> 
> > Both of these procedures schedule() while waiting for something to
> > happen. The wait can be very long (1 second) so I don't want to
> > just spin eating CPU cycles. I have to give the CPU to somebody.
> 
> So use a timer
> 
> void tick_tick_boom(unsigned long l)
> {
>         struct my_device *d = (struct my_device *)l;
> 
>         if(its_still_busy(d))
>         {
>                 d->timer_count--;
>                 if(d->timer_count)
>                 {
>                         /* Try again until timer_count hits zero */
>                         add_timer(&t->timer, jiffies+1);
>                         return;
>                 }
>                 else
>                 {
>                         /* Lose some .. */
>                         d->event_status = TIMEOUT;
>                 }
>         }
>         else
>         {
>                 /* Win some .. */
>                 d->event_status = OK;
>         }
>         /* Wake up the invoker */
>         wake_up(&d->timer_wait);
> }

To clarify this a bit, the above code invokes itself with the timer and
thus runs under the timer interrupt.  The first call to it would be made
from your driver which would then sleep waiting for the wake_up, which
will come either on success or when the timer_count has expired.  This
code will poll each jiffie.

The key here is to use the wake_up/ sleep combination to pass control
from the interrupt back to the driver.  This is not unlike what you must
already be doing for interrupt completion.

Do pay attention to getting the timer (&t->timer above) properly set up
(see my first response or most any usage in the kernel).

Have I got this right Alan?

George

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-05-09 16:59       ` Re: george anzinger
@ 2001-05-09 17:15         ` Alan Cox
  0 siblings, 0 replies; 414+ messages in thread
From: Alan Cox @ 2001-05-09 17:15 UTC (permalink / raw)
  To: george anzinger; +Cc: Alan Cox, root, Linux kernel

> from the interrupt back to the driver.  This is not unlike what you must
> already be doing for interrupt completion.
> 
> Do pay attention to getting the timer (&t->timer above) properly set up
> (see my first response or most any usage in the kernel).
> 
> Have I got this right Alan?

The other thing to watch is that you need to delete the timer before you unload
As you can safely del_timer() an initialised but already deleted timer that
isnt too onerous.

Waiting for a thread in the module unload is trickier. You cannot simply kill
the thread as it may run after cleanup_module() returns. Instead you do

static void cleanup_module(void)
{
	kill_thread();
	down(&thread_sem);
	printk("Thread dead\n");
}

and in the thread exit path do

	up_and_exit(&thread_sem, error_code);

This ensures that the thread of execution has left the module code space and
will not return.

Alan

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-05-08 19:48 Richard B. Johnson
  2001-05-08 21:33 ` george anzinger
@ 2001-05-09  0:36 ` Andrew Morton
  1 sibling, 0 replies; 414+ messages in thread
From: Andrew Morton @ 2001-05-09  0:36 UTC (permalink / raw)
  To: root; +Cc: Linux kernel

"Richard B. Johnson" wrote:
> 
> To driver wizards:
> 
> I have a driver which needs to wait for some hardware.
> Basically, it needs to have some code added to the run-queue
> so it can get some CPU time even though it's not being called.
> 
> It needs to get some CPU time which can be "turned on" or
> "turned off" as a result of an interrupt or some external
> input from  an ioctl().

schedule_task()?

^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-04-18  0:15 Vibol Hou
  2001-04-18  0:26 ` Jaquemet Loic
  2001-04-20  2:47 ` Re: Francois Cami
  0 siblings, 2 replies; 414+ messages in thread
From: Vibol Hou @ 2001-04-18  0:15 UTC (permalink / raw)
  To: Linux-Kernel

Hi,

I'm using 2.4.4-pre3 and get this message occasionally when the system is
loaded:

Apr 17 16:10:12 omega kernel: eth0: Too much work in interrupt, status e401.
Apr 17 16:10:12 omega kernel: eth0: Too much work in interrupt, status e401.

The nic is a 3Com 3c905B. Is this a bad thing?

/proc/interrupts:
           CPU0       CPU1
  0:   13167527   12036422    IO-APIC-edge  timer
  1:          0          2    IO-APIC-edge  keyboard
  2:          0          0          XT-PIC  cascade
  4:      22773      19820    IO-APIC-edge
  8:          1          0    IO-APIC-edge  rtc
 15:          1          4    IO-APIC-edge  ide1
 17:   50001929   49606064   IO-APIC-level  eth0
 18:    2459038    2364252   IO-APIC-level  aic7xxx
NMI:          0          0
LOC:   25202946   25202942
ERR:          0

--
Vibol Hou
KhmerConnection
http://khmer.cc


^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-04-18  0:15 Vibol Hou
@ 2001-04-18  0:26 ` Jaquemet Loic
  2001-04-18  0:32   ` Re: Jeff Garzik
  2001-04-20  2:47 ` Re: Francois Cami
  1 sibling, 1 reply; 414+ messages in thread
From: Jaquemet Loic @ 2001-04-18  0:26 UTC (permalink / raw)
  To: Vibol Hou; +Cc: Linux-Kernel

Vibol Hou a écrit :

> Hi,
>
> I'm using 2.4.4-pre3 and get this message occasionally when the system is
> loaded:
>
> Apr 17 16:10:12 omega kernel: eth0: Too much work in interrupt, status e401.
> Apr 17 16:10:12 omega kernel: eth0: Too much work in interrupt, status e401.
>
> The nic is a 3Com 3c905B. Is this a bad thing?
>
> /proc/interrupts:
>            CPU0       CPU1
>   0:   13167527   12036422    IO-APIC-edge  timer
>   1:          0          2    IO-APIC-edge  keyboard
>   2:          0          0          XT-PIC  cascade
>   4:      22773      19820    IO-APIC-edge
>   8:          1          0    IO-APIC-edge  rtc
>  15:          1          4    IO-APIC-edge  ide1
>  17:   50001929   49606064   IO-APIC-level  eth0
>  18:    2459038    2364252   IO-APIC-level  aic7xxx
> NMI:          0          0
> LOC:   25202946   25202942
> ERR:          0
>
> --
> Vibol Hou
> KhmerConnection
> http://khmer.cc
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

I've got a similar problem with a  RTL-8139 (rev 10) ( 8139too.c )
Apr 17 22:53:12 skippy kernel: eth1: Too much work at interrupt,
IntrStatus=0x0040.

The maintenair of this module writes that's a RxFIIFO Overflow that have
probably no other issue than buying a new processor :)
But .. I didn't have this messages on pre - 2.4.3 kernels .. ( neither on
2.4.3ac7 )




^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-04-18  0:26 ` Jaquemet Loic
@ 2001-04-18  0:32   ` Jeff Garzik
  0 siblings, 0 replies; 414+ messages in thread
From: Jeff Garzik @ 2001-04-18  0:32 UTC (permalink / raw)
  To: Jaquemet Loic; +Cc: Vibol Hou, Linux-Kernel

Jaquemet Loic wrote:
> I've got a similar problem with a  RTL-8139 (rev 10) ( 8139too.c )
> Apr 17 22:53:12 skippy kernel: eth1: Too much work at interrupt,
> IntrStatus=0x0040.
> 
> The maintenair of this module writes that's a RxFIIFO Overflow that have
> probably no other issue than buying a new processor :)
> But .. I didn't have this messages on pre - 2.4.3 kernels .. ( neither on
> 2.4.3ac7 )

That's a different issue than the poster is having, it's two totally
different network cards with different characteristics.  I don't
remember telling you that status code is a RxFIFO overflow, though :)

The RxFIFO overflow code definitely needs changing -- that's the next
item on the list.

-- 
Jeff Garzik       | "Give a man a fish, and he eats for a day. Teach a
Building 1024     |  man to fish, and a US Navy submarine will make sure
MandrakeSoft      |  he's never hungry again." -- Chris Neufeld

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-04-18  0:15 Vibol Hou
  2001-04-18  0:26 ` Jaquemet Loic
@ 2001-04-20  2:47 ` Francois Cami
  2001-04-21  1:26   ` Re: Andrew Morton
  1 sibling, 1 reply; 414+ messages in thread
From: Francois Cami @ 2001-04-20  2:47 UTC (permalink / raw)
  To: Vibol Hou; +Cc: Linux-Kernel

Vibol Hou wrote:
> 
> Hi,
> 
> I'm using 2.4.4-pre3 and get this message occasionally when the system is
> loaded:
> 
> Apr 17 16:10:12 omega kernel: eth0: Too much work in interrupt, status e401.
> Apr 17 16:10:12 omega kernel: eth0: Too much work in interrupt, status e401.

I got that one too, PC is ASUS P2B-DS with two PII-350, 384MB RAM,
3C905B.
I've tried 3C905C to no avail.
The e401 status seems to be that there is too much load on the card to
be treated in the 20 (2.2.17) or 32 (2.2.19, 2.4.x) loops of the
interruption
check routine (stop/hit me if i'm wrong please). 
I think we should try (MM. Donald Becker or Andrew Norton, 
is this a Bad Thing ?) to change max_interrupt_work (3c59x.c, row 171)
to 64
or maybe even higher. Haven't had the guts to try on the production
machine
right now =)

> The nic is a 3Com 3c905B. Is this a bad thing?

I heard they work fine... 

François Cami
There And Back Again

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-04-20  2:47 ` Re: Francois Cami
@ 2001-04-21  1:26   ` Andrew Morton
  0 siblings, 0 replies; 414+ messages in thread
From: Andrew Morton @ 2001-04-21  1:26 UTC (permalink / raw)
  To: Francois Cami; +Cc: Vibol Hou, Linux-Kernel

Francois Cami wrote:
> 
> Vibol Hou wrote:
> ...
>
> > Apr 17 16:10:12 omega kernel: eth0: Too much work in interrupt, status e401.
> 
> I got that one too, PC is ASUS P2B-DS with two PII-350, 384MB RAM,
> 3C905B.

If you were getting this message occasionally, and if increasing the
max_interrupt_work module parm makes it stop, and everything
is always working fine, then it's an OK thing to do.

Question is: why is it happening?  We're failing to get out
of the interrupt loop after 32 loops.  Each loop can reap
up to 16 transmitted packets and 32 received packets.
That's a lot.

My suspicion is that something else in the system is
causing the NIC interrupt routine to get held up for long
periods of time.  It has to be another interrupt.

All reporters of this problem (ie: both of them) were using
aic7xx SCSI.  I wonder if that driver can sometimes spend a
long time in its interrupt routine.  Many times.  Rapidly.

Very odd.

Ah.  SMP.  Perhaps the other CPU is generating the transmit
load, some other interrupt source is slowing down *this*
CPU.

Could you test something for me?  Try *decreasing* the
value of max_interrupt_work.  See if that increases
the frequency of the message.  Then, it if does, try to
correlate the occurence of the message with some other
form of system activity (especially disk I/O).

Thanks.

-

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
@ 2001-03-22 18:02 Gunnar Ahlberg
  0 siblings, 0 replies; 414+ messages in thread
From: Gunnar Ahlberg @ 2001-03-22 18:02 UTC (permalink / raw)
  To: linux-kernel


I'm sorry for the empty message. As you can see it was 
delivered to the wrong address. 
Please disregard this posting.
Again, my apologies.

> list
> 
> -
> To unsubscribe from this list: send the line "unsubscribe 
linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-
info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 



^ permalink raw reply	[flat|nested] 414+ messages in thread

* (no subject)
@ 2001-01-19 13:37 Robert Kaiser
  2001-01-19 14:33 ` Michael Rothwell
  0 siblings, 1 reply; 414+ messages in thread
From: Robert Kaiser @ 2001-01-19 13:37 UTC (permalink / raw)
  To: Steve Hill; +Cc: linux-kernel


On Thu Jan 18 16:30:30 2001 steve@navaho.co.uk wrote
> Has anyone had any luck getting a 2.4 kernel to run on Cobalt x86
> hardware?  It doesn't even seem to start (I get nothing on the screen from
>t he kernel, it just sits there and does nothing). :(

What processor does it use ? (386 or 486 perchance?)



----------------------------------------------------------------
Robert Kaiser                         email: rkaiser@sysgo.de
SYSGO RTS GmbH
Am Pfaffenstein 14                    phone: (49) 6136 9948-762
D-55270 Klein-Winternheim / Germany   fax:   (49) 6136 9948-10
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 414+ messages in thread

* Re:
  2001-01-19 13:37 Robert Kaiser
@ 2001-01-19 14:33 ` Michael Rothwell
  0 siblings, 0 replies; 414+ messages in thread
From: Michael Rothwell @ 2001-01-19 14:33 UTC (permalink / raw)
  To: rob; +Cc: Steve Hill, linux-kernel

Robert Kaiser wrote:
> 
> On Thu Jan 18 16:30:30 2001 steve@navaho.co.uk wrote
> > Has anyone had any luck getting a 2.4 kernel to run on Cobalt x86
> > hardware?  It doesn't even seem to start (I get nothing on the screen from
> >t he kernel, it just sits there and does nothing). :(
> 
> What processor does it use ? (386 or 486 perchance?)

AMD K6. New ones will use Athlon.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 414+ messages in thread

end of thread, other threads:[~2024-03-29  4:41 UTC | newest]

Thread overview: 414+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-26 20:51 bfeely
  -- strict thread matches above, loose matches on Subject: below --
2024-03-07  6:07 KR Kim
2024-03-07  8:01 ` Miquel Raynal
2024-03-08  1:27   ` Re: Kyeongrho.Kim
     [not found]   ` <SE2P216MB210205B301549661575720CC833A2@SE2P216MB2102.KORP216.PROD.OUTLOOK.COM>
2024-03-29  4:41     ` Re: Kyeongrho.Kim
2023-11-27 13:37 [PATCH 2/3] net: microchip_t1s: add support for LAN867x Rev.C1 Andrew Lunn
2023-12-05 10:20 ` Félix Piédallu
2023-12-06 20:58   ` Ramón Nordin Rodriguez
2023-11-11  4:21 Andrew Worsley
2023-11-11  8:22 ` Javier Martinez Canillas
     [not found] <DB3PR10MB6835AF75D60D9A96465F35C2E8AAA@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM>
2023-11-06 12:55 ` Re: syzbot
2023-10-18 18:50 PIC probing code from e179f6914152 failing Mario Limonciello
2023-10-18 22:50 ` Thomas Gleixner
2023-10-19 21:20   ` Mario Limonciello
2023-10-23 15:59     ` Thomas Gleixner
2023-10-25  9:23       ` Thomas Gleixner
2023-10-25 14:41         ` Mario Limonciello
2023-10-25 15:25           ` David Lazar
2023-10-25 17:31             ` Thomas Gleixner
2023-10-25 21:04               ` [PATCH] x86/i8259: Skip probing when ACPI/MADT advertises PCAT compatibility, Thomas Gleixner
2023-10-25 22:11                 ` Mario Limonciello
2023-10-26  9:27                   ` Re: Thomas Gleixner
     [not found] <64b09dbb.630a0220.e80b9.e2ed@mx.google.com>
2023-07-14  8:05 ` Re: Andy Shevchenko
2023-05-11 12:58 Ryan Roberts
2023-05-11 13:13 ` Ryan Roberts
2023-03-12  6:52 [PATCH v2] uas: Add US_FL_NO_REPORT_OPCODES for JMicron JMS583Gen 2 Greg Kroah-Hartman
2023-03-27 13:54 ` Yaroslav Furman
2023-03-27 14:19   ` Greg Kroah-Hartman
2023-01-18 20:59 [PATCH v5 0/5] CXL Poison List Retrieval & Tracing alison.schofield
2023-01-27  1:59 ` Dan Williams
2023-01-27 16:10   ` Alison Schofield
2023-01-27 19:16     ` Re: Dan Williams
2023-01-27 21:36       ` Re: Alison Schofield
2023-01-27 22:04         ` Re: Dan Williams
2022-11-21 11:11 Denis Arefev
2022-11-21 14:28 ` Jason Yan
2022-09-14 13:12 Amjad Ouled-Ameur
2022-09-14 13:18 ` Amjad Ouled-Ameur
2022-05-15 20:36 [PATCH bpf-next 1/2] cpuidle/rcu: Making arch_cpu_idle and rcu_idle_exit noinstr Jiri Olsa
2023-05-20  9:47 ` Ze Gao
2023-05-21  3:58   ` Yonghong Song
2023-05-21 15:10     ` Re: Ze Gao
2023-05-21 20:26       ` Re: Jiri Olsa
2023-05-22  1:36         ` Re: Masami Hiramatsu
2023-05-22  2:07         ` Re: Ze Gao
2023-05-23  4:38           ` Re: Yonghong Song
2023-05-23  5:30           ` Re: Masami Hiramatsu
2023-05-23  6:59             ` Re: Paul E. McKenney
2023-05-25  0:13               ` Re: Masami Hiramatsu
2023-05-21  8:08   ` Re: Jiri Olsa
2023-05-21 10:09     ` Re: Masami Hiramatsu
2023-05-21 14:19       ` Re: Ze Gao
     [not found] <CANiq72k+5Rdj7i3Df2dcE6_OPYPXK3z5EWLKnY56sSMz4G3OvA@mail.gmail.com>
     [not found] ` <CAABZP2z64aYWfVSdXHaQopWc+BAbJJUGqtrju2iWER3DDTDFWg@mail.gmail.com>
     [not found]   ` <20220406170012.GO4285@paulmck-ThinkPad-P17-Gen-1>
     [not found]     ` <87pmls6nt7.fsf@mpe.ellerman.id.au>
     [not found]       ` <87k0bz7i1s.fsf@mpe.ellerman.id.au>
2022-04-13  5:11         ` Nicholas Piggin
2022-04-22 15:53           ` Thomas Gleixner
2022-04-23  2:29             ` Re: Nicholas Piggin
2022-04-21 16:41 Yury Norov
2022-04-21 23:04 ` John Hubbard
2022-04-21 23:09   ` Re: John Hubbard
2022-04-21 23:17   ` Re: Yury Norov
2022-04-21 23:21     ` Re: John Hubbard
2022-03-25  6:30 Michael S. Tsirkin
2022-03-25  7:52 ` Jason Wang
2022-03-25  9:10   ` Re: Michael S. Tsirkin
2022-03-25  9:20     ` Re: Jason Wang
2022-03-25 10:09       ` Re: Michael S. Tsirkin
2022-03-28  4:56         ` Re: Jason Wang
2022-03-28  5:59           ` Re: Michael S. Tsirkin
2022-03-28  6:18             ` Re: Jason Wang
2022-03-28 10:40               ` Re: Michael S. Tsirkin
2022-03-29  7:12                 ` Re: Jason Wang
2022-03-29 14:08                   ` Re: Michael S. Tsirkin
2022-03-30  2:40                     ` Re: Jason Wang
2022-03-30  5:14                       ` Re: Michael S. Tsirkin
2022-03-30  5:53                         ` Re: Jason Wang
2022-03-29  8:35                 ` Re: Thomas Gleixner
2022-03-29 14:37                   ` Re: Michael S. Tsirkin
2022-03-29 18:13                     ` Re: Thomas Gleixner
2022-03-29 22:04                       ` Re: Michael S. Tsirkin
2022-03-30  2:38                         ` Re: Jason Wang
2022-03-30  5:09                           ` Re: Michael S. Tsirkin
2022-03-30  5:53                             ` Re: Jason Wang
2022-04-12  6:55                   ` Re: Michael S. Tsirkin
2022-01-20 15:28 Myrtle Shah
2022-01-20 15:37 ` Vitaly Wool
2022-01-20 23:29   ` Re: Damien Le Moal
2022-02-04 21:45   ` Re: Palmer Dabbelt
     [not found] <20211126221034.21331-1-lukasz.bartosik@semihalf.com--annotate>
2021-11-29 21:59 ` Re: sean.wang
2021-11-02  9:48 [PATCH v5 00/11] Add support for X86/ACPI camera sensor/PMIC setup with clk and regulator platform data Hans de Goede
2021-11-02  9:49 ` [PATCH v5 05/11] clk: Introduce clk-tps68470 driver Hans de Goede
     [not found]   ` <163588780885.2993099.2088131017920983969@swboyd.mtv.corp.google.com>
2021-11-25 15:01     ` Hans de Goede
     [not found] <CAP7CzPcLhtXDyLudfmR2pWR5fzSQ_jhJSoRheH=cytoDnb_ujg@mail.gmail.com>
2021-09-14 15:37 ` Re: Nick Desaulniers
2021-08-12  9:21 Valdis Klētnieks
2021-08-12  9:42 ` SeongJae Park
2021-08-12 20:19   ` Re: Andrew Morton
2021-08-13  8:14     ` Re: SeongJae Park
2021-07-27  2:59 [PATCH v9] iomap: Support file tail packing Gao Xiang
2021-07-27 15:10 ` Darrick J. Wong
2021-07-27 15:23   ` Andreas Grünbacher
2021-07-27 15:30   ` Re: Gao Xiang
2021-06-06 19:19 Davidlohr Bueso
2021-06-07 16:02 ` André Almeida
2021-04-05  0:01 Mitali Borkar
2021-04-06  7:03 ` Arnd Bergmann
     [not found] <CAPncsNOFoUt7uEDEdihDTZY4pJsuPxt146W-L+Ju53SgZ6ezYw@mail.gmail.com>
     [not found] ` <CAPncsNMWCim1kozMyJaT7_suEnWyGadf1Kg1fzjyWfdGDVMZ3A@mail.gmail.com>
     [not found]   ` <CAPncsNOpMhn=N+9+uC8hx0shRE-5uhvHCmZKJ8X3=aAeja1sag@mail.gmail.com>
2021-03-18  6:51     ` Re: Jarvis Jiang
2021-01-19  0:10 David Howells
2021-01-20 14:46 ` Jarkko Sakkinen
     [not found] <CAGMNF6W8baS_zLYL8DwVsbfPWTP2ohzRB7xutW0X=MUzv93pbA@mail.gmail.com>
2020-12-02 17:09 ` Re: Kun Yi
2020-12-02  1:10 [PATCH] lib/find_bit: Add find_prev_*_bit functions Yun Levi
2020-12-02  9:47 ` Andy Shevchenko
2020-12-02 10:04   ` Rasmus Villemoes
2020-12-02 11:50     ` Yun Levi
     [not found]       ` <CAAH8bW-jUeFVU-0OrJzK-MuGgKJgZv38RZugEQzFRJHSXFRRDA@mail.gmail.com>
2020-12-02 18:22         ` Yun Levi
2020-12-02 21:26           ` Yury Norov
2020-12-02 22:51             ` Yun Levi
2020-12-03  1:23               ` Yun Levi
2020-12-03  8:33                 ` Rasmus Villemoes
2020-12-03  9:47                   ` Re: Yun Levi
2020-12-03 18:46                     ` Re: Yury Norov
2020-12-03 18:52                       ` Re: Willy Tarreau
2020-12-04  1:36                         ` Re: Yun Levi
2020-12-04 18:14                           ` Re: Yury Norov
2020-12-05  0:45                             ` Re: Yun Levi
2020-12-05 11:10                       ` Re: Rasmus Villemoes
2020-12-05 18:20                         ` Re: Yury Norov
2020-08-05 11:02 [PATCH v4] arm64: dts: qcom: Add support for Xiaomi Poco F1 (Beryllium) Amit Pundir
2020-08-06 22:31 ` Konrad Dybcio
2020-08-12 13:37   ` Amit Pundir
2020-06-30 17:56 Vasiliy Kupriakov
2020-07-10 20:36 ` Andy Shevchenko
2020-05-06  5:52 Jiaxun Yang
2020-05-06 17:17 ` Nick Desaulniers
     [not found] <5e7dc543.vYG3wru8B/me1sOV%chenanqing@oppo.com>
2020-03-27 15:53 ` Re: Lee Duncan
     [not found] <5e7dbb10.ulraq/ljeOm297+z%chenanqing@oppo.com>
2020-03-27  8:59 ` Re: Ilya Dryomov
2020-03-03 15:27 Gene Chen
2020-03-04 14:56 ` Matthias Brugger
2020-03-04 15:15   ` Re: Lee Jones
2020-03-04 18:00     ` Re: Matthias Brugger
2020-02-11 22:34 Rajat Jain
2020-02-12  9:30 ` Jarkko Nikula
2020-02-12 10:24   ` Re: Andy Shevchenko
     [not found] <f618ed4d-05ce-75cd-8cd9-24d8fe5a2551@samsung.com>
     [not found] ` <CGME20191105044921epcas1p2869157cceaf45351adf9dd2e59161db7@epcas1p2.samsung.com>
2019-11-05  4:54   ` Re: Chanwoo Choi
2019-10-27 21:36 Re: Margaret Kwan Wing Han
2019-09-24 19:49 Venkat Subbiah
     [not found] <CAK8P3a16=ktJm5B3c5-XS7SqVuHBY5+E2FwVUqbdOdWK-AUgSA@mail.gmail.com>
     [not found] ` <20190830202959.3539-1-msuchanek@suse.de>
2019-08-30 20:32   ` Arnd Bergmann
     [not found] <E1hUrZM-0007qA-Q8@sslproxy01.your-server.de>
2019-05-29 19:54 ` Re: Alex Williamson
2019-05-21  0:06 [PATCH v6 0/3] add new ima hook ima_kexec_cmdline to measure kexec boot cmdline args Prakhar Srivastava
2019-05-21  0:06 ` [PATCH v6 2/3] add a new ima template field buf Prakhar Srivastava
2019-05-24 15:12   ` Mimi Zohar
2019-05-24 15:42     ` Roberto Sassu
2019-05-24 15:47       ` Re: Roberto Sassu
2019-05-24 18:09         ` Re: Mimi Zohar
2019-05-24 19:00           ` Re: prakhar srivastava
2019-05-24 19:15             ` Re: Mimi Zohar
2019-04-12 23:06 RE, Sharifah Ahmad Mustahfa
     [not found] <20190319144116.400-1-mlevitsk@redhat.com>
2019-03-20 11:03 ` Felipe Franciosi
2019-03-20 19:08   ` Re: Maxim Levitsky
2019-03-21 16:12     ` Re: Stefan Hajnoczi
2019-03-21 16:21       ` Re: Keith Busch
2019-03-21 16:41         ` Re: Felipe Franciosi
2019-03-21 17:04           ` Re: Maxim Levitsky
2019-03-22  7:54             ` Re: Felipe Franciosi
2019-03-22 10:32               ` Re: Maxim Levitsky
2019-03-22 15:30               ` Re: Keith Busch
2019-03-25 15:44                 ` Re: Felipe Franciosi
2019-03-13 23:49 RE, LUIS EDUARDO CEPEDA CABRERA
2019-01-07 17:28 [PATCH] arch/arm/mm: Remove duplicate header Souptick Joarder
2019-01-17 11:23 ` Souptick Joarder
2019-01-17 11:28   ` Mike Rapoport
2019-01-31  5:54     ` Souptick Joarder
2019-01-31 12:58       ` Vladimir Murzin
2019-02-01 12:32         ` Re: Souptick Joarder
2019-02-01 12:36           ` Re: Vladimir Murzin
2019-02-01 12:41             ` Re: Souptick Joarder
2019-02-01 13:02               ` Re: Vladimir Murzin
2019-02-01 15:15               ` Re: Russell King - ARM Linux admin
2019-02-01 15:22                 ` Re: Russell King - ARM Linux admin
     [not found] <CAMkWEXP4Mm5x9rdrKn9xRNVm7vxqoL62ftxb+UcJFAiJ+U9X3A@mail.gmail.com>
2018-10-22  0:26 ` Re: Dave Airlie
2018-10-21 20:23   ` Re: Michael Tirado
2018-10-22  1:50     ` Re: Dave Airlie
2018-10-21 22:20       ` Re: Michael Tirado
2018-10-23  1:47       ` Re: Michael Tirado
2018-10-23  6:23         ` Re: Dave Airlie
     [not found] <1530911788-7033-1-git-send-email-santosh.shilimkar@oracle.com>
     [not found] ` <1530911788-7033-3-git-send-email-santosh.shilimkar@oracle.com>
2018-07-06 21:18   ` Re: Santosh Shilimkar
2018-01-11 17:16 Fabian Huegel
2018-01-11 17:25 ` Ben Evans
2017-11-13 14:55 Re: Amos Kalonzo
2017-09-07  8:50 Re: Quick Loan
2017-08-18 19:47 Re: Jessy
2017-07-31 23:46 TD CREDIT
2017-07-15  3:29 Saif Al-Islam
2017-07-07 17:04 Mrs Alice Walton
2017-05-28 13:39 RE: Lasek László
2017-05-03  6:23 H.A
2017-04-28  8:20 Anatolij Gustschin
2017-04-28  8:43 ` Linus Walleij
2017-04-28  9:26   ` Re: Anatolij Gustschin
2017-02-23 15:09 Qin's Yanjun
2017-01-07 14:47 Information
2016-11-15  4:40 Apply
2016-11-08 13:46 vaserman
2016-11-02  2:36 U
2016-07-21 21:50 Amit Jain
2016-07-04 15:47 Re: Mr. Bun Sam
2016-07-02 11:30 Re: Mr. Bun Sam
2016-06-27  8:24 Re: Fidelity Loans
2016-02-10 14:36 Petr Mladek
2016-02-10 14:44 ` Steven Rostedt
2016-02-08  3:11 Qatar Foundation
2016-01-26 20:52 Ms Nadia Mohammed
2016-01-15  2:39 Re: Trust Guarantee
2015-12-18 11:50 Re: 
2015-12-11  9:30 Re: Матвеева Руслана
     [not found] <D0613EBE33E8FD439137DAA95CCF59555B7A5A4D@MGCCCMAIL2010-5.mgccc.cc.ms.us>
2015-11-24 13:21 ` Amis, Ryann
     [not found] <CA+47Ykimr0d9cR35aWoCtm8JoXUYjKFXL0HJ-c=EE_suTAPR8w@mail.gmail.com>
2015-11-07 17:33 ` bbmbbm1
2015-11-07 16:48 Re: Mohammed
2015-10-29  2:40 
2015-10-23 14:46 RE: MajorAlan
2015-10-21  2:26 Mohammed
2015-10-08  8:30 Re BRGF
2015-09-01 16:06 Zariya
2015-09-01 14:14 Mika Penttilä
2015-09-01 15:22 ` Fabio Estevam
2015-09-01 12:01 Re: Zariya
2015-08-19 13:01 Re: christain147
2015-07-24 10:34 Re: Mrs Nadia  Mohammed 
     [not found] <CACy=+DtdZOUT4soNZ=zz+_qhCfM=C8Oa0D5gjRC7QM3nYi4oEw@mail.gmail.com>
2015-07-11 18:37 ` Re: Mustapha Abiola
     [not found] <CAHxZcryF7pNoENh8vpo-uvcEo5HYA5XgkZFWrLEHM5Hhf5ay+Q@mail.gmail.com>
2015-07-05 16:38 ` Re: t0021
     [not found] <E1Yz4NQ-0000Cw-B5@feisty.vs19.net>
2015-05-31 15:37 ` Re: Roman Volkov
2015-05-31 15:53   ` Re: Hans de Goede
2015-05-22  0:17 Re: kontakt
     [not found] <90BA5B564A2E4B4782C6F4398C32EE104E54369A@NHS-PCLI-MBC003.AD1.NHS.NET>
2015-05-21 10:49 ` Ratnakumar Sagana (KING'S COLLEGE HOSPITAL NHS FOUNDATION TRUST)
     [not found] <9E5F73AAFC185F49B0D37FE62E65D6C20724A9D8@XSERVER23A.campus.tue.nl>
2015-05-10 13:03 ` RE: Singer, W.P.
2015-04-21  7:43 Galaxy Darlehen Firma
     [not found] <CAONCqDfSP9DSWwPSDqz4NS6YHmzwMo=6VnRURRAJZLeGE_QKYA@mail.gmail.com>
2015-04-07 18:47 ` Re: Wilson Aggard
2015-04-01 21:56 Re: Globale Trust Company
2015-03-04 10:29 Quentin Lambert
2015-03-04 10:32 ` Quentin Lambert
2014-12-18 18:08 Re: Peter Page
2014-12-01 13:02 Re: Quan Han
2014-11-14 20:49 Re: salim
2014-11-14 18:56 milke
     [not found] <E1XgbTy-00072R-N3@feisty.vs19.net>
2014-10-21 15:48 ` Patrik Lundquist
     [not found] <E1Xf0HT-0005ZQ-OP@feisty.vs19.net>
2014-10-17  5:49 ` Re: Hillf Danton
2014-10-13  6:18 Re: geohughes
     [not found] <5633293EA8BBC640804038866F5D329F0B3A17@mail00.baptist.local>
2014-09-30 17:20 ` Sonya Wright
2014-09-20 19:45 Richard Wong
2014-09-16 14:54 Re: promocion_derechos.isna
     [not found] <AB37FB01B00BF44E85C75F6CFEC35E7D47324643@LPPTCPMXMBX01.LPCH.NET>
2014-09-15 23:42 ` Mandic, Andrew
2014-09-16  0:44 ` RE: Mandic, Andrew
     [not found] <6A286AB51AD8EC4180C4B2E9EF1D0A027AAD7EFF1E@exmb01.wrschool.net>
2014-09-08 16:58 ` RE: Deborah Mayher
2014-08-18 15:38 Mrs. Hajar Vaserman.
     [not found] <E1XFOD5-00007y-8L@feisty.vs19.net>
2014-08-07 14:23 ` Re: Pranith Kumar
2014-07-29  7:17 Re: eye2eye
     [not found] <blk-mq updates>
2014-04-14  8:30 ` Christoph Hellwig
2014-04-15 20:16   ` Jens Axboe
2014-03-10  3:04 Re: inforbonus
2014-03-10  3:01 Re: inforbonus
2014-01-11  2:11 Re: Mr. Jerry Natai
2013-12-30 10:43 st2
2013-12-30  9:06 RE: funds2
2013-12-20 11:49 RE: Unify Loan Company
2013-11-30  3:46 Bin Sumari
2013-11-07 12:09 Re: mypersonalmailbox1
2013-09-03 23:50 Matthew Garrett
2013-09-04 15:53 ` Kees Cook
2013-09-04 16:05   ` Re: Josh Boyer
2013-08-23 18:04 Andreas Werner
2013-08-23 21:10 ` Andy Lutomirski
     [not found] <B719EF0A9FB7A247B5147CD67A83E60E011FEB76D1@EXCH10-MB3.paterson.k12.nj.us>
2013-08-23 10:47 ` Ruiz, Irma
2013-08-07 20:43 Western Union
2013-07-08  4:52 Re: Wesstern Union money Transfer
2013-06-28 10:14 Re: emirates
2013-06-28 10:12 Re: emirates
2013-06-20 12:28 tingwei liu
2013-06-20 12:51 ` Jiri Slaby
2013-06-24  1:43   ` Re: tingwei liu
2013-06-24  8:24     ` Re: Jiri Slaby
     [not found] ` <CA+qZnSSPxO3h0v7An3R7e-HHs+bi4Ua-LE9coJtQL8CFWOHNBA@mail.gmail.com>
2013-06-27  5:12   ` Re: tingwei liu
2013-05-14 13:07 Re: info
2013-04-27 13:20 PRIVATE BUSINESS
2013-04-02 13:29 Mrs Akilah Saeedi
2013-03-26  2:26 Re: Mrs Akilah Saeedi
2013-02-04  0:47 Re: JUMBO PROMO
2013-01-27 21:59 Re: Congjun Yang
2013-01-13 19:58 Re: Michael A. Purwoadi
2012-11-21 14:04 roman
2012-11-21 14:50 ` Alan Cox
2012-10-30  9:19 Re: wumin_tsinghua
2012-10-06 23:15 David Howells
2012-10-07  6:36 ` Geert Uytterhoeven
2012-10-11  9:57   ` Re: Will Deacon
2012-09-04 14:40 [GIT PULL] sound fixes for 3.6-rc5 Takashi Iwai
2012-09-06  6:02 ` Markus Trippelsdorf
2012-09-06  6:33   ` Re: Daniel Mack
2012-09-06  6:45     ` Re: Markus Trippelsdorf
2012-09-06  6:48     ` Re: Takashi Iwai
2012-09-06  6:53       ` Re: Markus Trippelsdorf
2012-08-10  5:32 devendra.aaru
2012-08-10  8:45 ` Linus Walleij
2012-08-10 10:47 ` Re: Bernd Petrovitsch
2012-08-09 13:54 Fengguang Wu
2012-08-09 17:29 ` Mauro Carvalho Chehab
2012-08-06 16:59 anish kumar
2012-08-06 17:05 ` Maarten Lankhorst
2012-07-12 11:43 Re: macckone
2012-06-18  9:44 sakthiperumal karuthasamy
2012-06-18 11:52 `  
2012-05-20 22:20 Re: Mr. Peter Wong
2011-12-13  3:49 Re: Ryan Black
2011-11-22 12:06 Re: Balbir Singh
2011-11-09 11:58 Re: pradeep Annavarapu
2011-11-08  1:58 linux-next: manual merge of the bluetooth tree with Linus tree Stephen Rothwell
2011-11-08  2:26 ` Wu Fengguang
2011-11-08  4:40   ` Stephen Rothwell
2011-10-28 16:03 Re: Young Chang
2011-10-28 15:55 Re: Young Chang
2011-08-21 19:22 Re: jeffrice
2011-08-18 22:07 San Mehat
2011-08-18 22:08 ` San Mehat
2011-08-13 10:59 Mr. Kenneth Williams
2011-08-06 13:23 RE: John Coker
2011-07-22  0:32 Jason Baron
2011-07-22  0:57 ` Paul Turner
2011-05-23  9:11 Re: Young Chang
2011-05-18 15:57 Re: alex zaim
2011-05-06 18:52 Nat Gurumoorthy
2011-05-06 19:13 ` Guenter Roeck
2011-05-06 20:00   ` Re: Natarajan Gurumoorthy
2011-05-01 13:35 Re: lotto
2011-04-10  1:20 Re: Young Chang
2011-04-07 21:00 Re: Tim Peters
2011-02-23  9:18 Irish Online News Center
2011-02-01 16:39 young chang
2010-12-04 21:06 FreeLotto Online Promo
     [not found] <3E0D78C2-CEAF-42C3-9840-20B01AA4EFC7@vsecurity.com>
2010-11-21 18:33 ` Dan J. Rosenberg
2010-11-22 17:02   ` Re: Vasiliy Kulikov
2010-10-14 11:47 Re : World Bank
2010-10-09 17:52 Mr.Young Chang
2010-07-20  0:22 Re: wins
2010-07-17  3:37 Re: SINOPEC OIL AND GAS COMPANY
2010-07-11 21:42 Western Union
2010-07-11 22:23 ` Noah McNallie
     [not found] <7a07eea248913e9f.4c3919f6@access.k12.wv.us>
2010-07-11  0:49 ` Re: tkprice
2010-07-02 20:13 Re: ($10,500,000.00) Donation for Charitable Goals
2010-07-02 19:29 Re: ($10,500,000.00) Donation for Charitable Goals
2010-07-01 16:09 Re ! BRITISH COLUMBIA
2010-07-01 10:49 FUJITA Tomonori
2010-07-01 12:29 ` Jens Axboe
2010-06-14 20:26 [PATCH 0/8] Fix gcc 4.6.0 set but not used warning messages Justin P. Mattock
2010-06-14 20:26 ` [PATCH 7/8]ieee1394/sdp2 Fix warning: variable 'unit_characteristics' set but not used Justin P. Mattock
2010-06-14 21:44   ` [PATCH] ieee1394: sbp2: remove unused code Stefan Richter
2010-06-14 22:35     ` Justin P. Mattock
2010-06-14 23:22       ` Stefan Richter
2010-06-14 23:58         ` Justin P. Mattock
2010-06-13  6:16 Mike Gilks
2010-06-13  8:58 ` Tejun Heo
2010-06-08  4:27 FRL
2010-06-08  4:05 RE: FRL
2010-05-11 22:28 RE: Euro-Millions
     [not found] <20100510223054.luv5qlqdlp28g08o@webmail.wcsd.k12.oh.us>
     [not found] ` <20100510223506.77ylw39bns84c80c@webmail.wcsd.k12.oh.us>
     [not found]   ` <20100510223656.m8nzy8mwqf44g8g8@webmail.wcsd.k12.oh.us>
2010-05-11  4:19     ` Mr. Vincent Hong
2010-05-08  2:56 Promo
2010-05-08  0:01 IRISH NEWS CENTRE
2010-05-07 11:39 Re: William Wilcox
2010-05-07 11:37 Re: William Wilcox
2010-04-14 12:54 Alan Cox
2010-04-14 13:35 ` Jean Delvare
2010-04-02 23:17 RE; Mrs Claire page
2010-03-23  7:50 RE, FROM CENTRAL BANK
2010-03-11 16:40 Monica D.
2010-02-25 13:39 Re; William Wilcox
2010-01-16  1:54 Capt Chris P. Mark
2010-01-13  0:48 Jeff Mahoney
2010-01-13  8:24 ` David Woodhouse
2010-01-09 17:03 Ustin Gavrie
2009-12-19 17:38 OFFICE OF THE SENATE
2009-12-12 16:04 T Dent
2009-12-13  5:55 ` andrew hendry
2009-12-08  6:23 Irish News Center
2009-11-26  1:03 [PATCH 1/2] hw_random: core updates to allow more efficient drivers Matt Mackall
2009-11-26 10:49 ` Ian Molton
2009-11-26 11:38   ` Matt Mackall
2009-11-26 11:48     ` Re: Ian Molton
2009-11-27 22:54       ` Re: Matt Mackall
2009-11-20 13:29 Jerome Glisse
2009-12-01 23:53 ` Dave Airlie
2009-12-02  7:17   ` Re: Thomas Hellstrom
     [not found] <cover.1257602781.git.andre.goddard@gmail.com>
     [not found] ` <7206ef594e67a240a842339f520284de6569b1fc.1257602781.git.andre.goddard@gmail.com>
     [not found]   ` <31525.1257770343@redhat.com>
2009-11-09 15:31     ` Re: André Goddard Rosa
2009-11-05  3:24 Re: Irish News Centre
2009-11-01 17:00 Re: Irish News Centre
2009-10-10 19:13 Irish News Center
2009-09-26 15:22 RE: Irish News Center
2009-09-25 23:13 RE: Irish News Center
2009-06-20 19:45 Kay Sievers
2009-06-21  9:04 ` Takashi Iwai
2009-06-22 12:56 ` Re: David Woodhouse
2009-01-11  3:41 Jose Luis Marchetti
2009-01-11  5:44 ` Cooper Yuan
2008-11-30 11:23 Re: Frank
2008-10-11  7:30 Yudha Harimantoro T
2008-10-11 15:12 ` Bill Davidsen
2008-10-13  6:18   ` Re: Yudha Harimantoro T
2008-10-13  8:29     ` Re: Yudha Harimantoro T
2008-10-13 12:03       ` Re: Alan Jenkins
     [not found] <0K6B0005EN54GNO0@l-daemon>
2008-08-29  0:14 ` Re: Robert Hancock
     [not found] <alpine.LFD.1.10.0807271037190.3486@nehalem.linux-foundation.org>
2008-07-27 22:37 ` Trond Myklebust
2008-07-09 15:47 Mathieu Desnoyers
2008-07-09 16:07 ` Eduard - Gabriel Munteanu
2008-07-09 16:35   ` Re: Mathieu Desnoyers
2008-05-20 12:34 Lukas Hejtmanek
2008-05-20 12:40 ` Oliver Neukum
2008-04-09  8:45 Andreas Grimm
2008-04-10  1:14 ` Lee Revell
2008-02-03 11:13 am kara
2008-02-03 18:23 ` Benny Halevy
2007-11-10  1:18 Luck, Tony
2007-11-10  1:42 ` Eric Dumazet
2007-11-11  5:18   ` Re: David Miller
2007-08-14 23:04 [PATCH 0/24] make atomic_read() behave consistently across all architectures Chris Snook
2007-08-15  6:49 ` Herbert Xu
2007-08-15  8:18   ` Heiko Carstens
2007-08-15 13:53     ` Stefan Richter
2007-08-15 14:35       ` Satyam Sharma
2007-08-15 14:52         ` Herbert Xu
2007-08-15 16:09           ` Stefan Richter
2007-08-15 16:27             ` Paul E. McKenney
2007-08-15 18:31               ` Segher Boessenkool
2007-08-15 18:57                 ` Paul E. McKenney
2007-08-15 19:54                   ` Satyam Sharma
2007-08-15 20:47                     ` Segher Boessenkool
2007-08-16  0:36                       ` Satyam Sharma
2007-08-16  1:38                         ` Segher Boessenkool
2007-08-07 16:34 Brian J. Murrell
2007-08-09 20:33 ` Mark Lord
2007-08-09 21:04   ` Re: Brian J. Murrell
     [not found] <FC1D1B23302A22499C60C967336B2AE00186B15C@pdsmsx411.ccr.corp.intel.com>
2007-07-24 13:40 ` Re: Shaohua Li
2007-02-09  6:29 Priyanka Sharma
2007-02-10  2:41 ` hackmiester (Hunter Fuller)
2006-08-16  9:30 Re: shane
2006-05-16 10:34 Chris Boot
2006-05-16 12:34 ` Arnaldo Carvalho de Melo
2006-03-11  1:00 Re: Alec
2006-03-03 14:54 Re: Kennedy
2006-02-23 12:16 Re: Norberto
2006-02-18 16:04 Re: Donne
2006-02-04 14:33 Re: Ira Jackson 
2006-01-27 10:05 sarat
2006-01-27 10:09 ` Arjan van de Ven
2005-12-31  0:27 system keeps freezing once every 24 hours / random apps crashing Alistair John Strachan
2005-12-31  0:42 ` Mark v Wolher
2005-12-31  0:51   ` Alistair John Strachan
2005-12-31  0:54     ` Mark v Wolher
2005-12-31 10:31       ` Mark v Wolher
2005-12-31 11:08         ` Jesper Juhl
2005-12-31 11:40           ` Mark v Wolher
2005-12-31 11:49             ` Jesper Juhl
2005-12-31 12:46               ` Mark v Wolher
2005-12-31 15:18                 ` Mark v Wolher
2005-12-31 16:34                   ` Sami Farin
2005-12-31 16:48                     ` Mark v Wolher
2006-01-01  2:26                       ` Mark v Wolher
2006-01-01 13:06                         ` Mark v Wolher
2006-01-01 14:47                           ` Mark v Wolher
2006-01-01 18:38                             ` Jiri Slaby
2006-01-01 18:49                               ` Mark v Wolher
2006-01-01 19:12                                 ` Jiri Slaby
2006-01-01 19:37                                   ` Mark v Wolher
2005-12-02 16:03 Yu, Luming
2005-12-02 16:46 ` Dmitry Torokhov
2005-12-02 20:11 ` Re: Miloslav Trmac
2005-11-09 16:13 Nestor Velazquez
2005-11-09 16:17 ` Alejandro Bonilla
2005-09-21 13:20 Robert.Boermans
2005-09-21 13:27 ` Denis Vlasenko
2005-06-28  9:18 d binderman
2005-06-28 11:03 ` Andrew Morton
2005-06-28  9:15 d binderman
2005-06-28 11:00 ` Andrew Morton
     [not found] <360D47F92A8ACCH7@vger.kernel.org>
2005-05-30  2:49 ` Re: radej
2005-05-06 20:23 Edison Giovanny Mendoza
2005-05-06 20:32 ` Alejandro Bonilla
2005-03-25  7:03 Søren Lott
2005-03-25  7:18 ` Jeff Garzik
2005-03-20  5:24 Re: info
2005-03-08 16:32 Peter W. Morreale
2005-03-08 19:32 ` Ross Biro
2005-03-05 10:11 Raffaele Ianniello
2005-03-05 18:14 ` Randy.Dunlap
2005-02-26 14:57 Yong Haynes
2005-02-17 17:14 Deepti Patel
2005-02-17 17:46 ` Matthias-Christian Ott
2005-01-19 14:25 Gmail
2005-01-19 15:22 ` Paolo Ornati
2004-11-08  7:39 Marcelo Tosatti
2004-11-08 11:08 ` Paolo Ciarrocchi
2004-11-08  8:34   ` Re: Marcelo Tosatti
2004-11-08 22:08     ` Re: Guennadi Liakhovetski
2004-09-19 12:29 plt
     [not found] ` <200409191508.33537.Norbert@edusupport.nl>
     [not found]   ` <1095607945.414da6891fc94@webmail.taylorassociate.com>
2004-09-19 16:31     ` Norbert van Nobelen
2004-06-27 14:18 Vinu Moses
2004-06-27 20:14 ` Vinu Moses
2004-03-17 22:03 Kendrick Logan
2004-03-07 20:08 Michael Frank
2004-03-07 20:26 ` John Bradford
2004-02-22 17:51 redzic fadil
2004-02-22 18:48 ` Larry Reaves
2004-02-14 23:17 Re: Alexandr Chernyy
     [not found] <7A25937D23A1E64C8E93CB4A50509C2A0310F099@stca204a.bus.sc.rolm.com>
2004-02-05 17:02 ` Re: Tommy Reynolds
2003-12-11 23:37 Hettinger Tamas
2003-12-12  1:29 ` Jonathan Corbet
2003-12-05 17:36 gmack
2003-12-05 23:16 ` Oliver Hunt
2003-12-03 15:08 Bloch, Jack
2003-12-04  4:56 ` Raj
2003-09-10  2:20 John Yau
2003-09-10  2:31 ` Nick Piggin
2003-08-25 13:53 Marcelo Tosatti
2003-08-25 14:12 ` Nick Piggin
2003-08-12 13:55 Catalin BOIE
2003-08-12 17:05 ` Ian Hastie
2003-07-16 18:36 Sir Ace
2003-07-16 23:10 ` jiho
2003-06-30  3:16 usenet
2003-06-30  8:09 ` Bruce Harada
2003-06-30  8:23   ` Re: Matti Aarnio
2003-06-03 23:51 Justin T. Gibbs
2003-06-03 23:58 ` Marc-Christian Petersen
2003-04-30 21:39 Mauricio Oliveira Carneiro
2003-05-01  0:03 ` Eyal Lebedinsky
     [not found] <001e01c2d9ef$01cdc970$0200a8c0@wsl3>
2003-02-21 21:34 ` Re: b_adlakha
2003-02-08 10:40 Re: Manfred Spraul
2003-01-12 13:28 Philip K.F. Hölzenspies
2003-01-12 17:57 ` Shawn Starr
2002-10-17  7:41 Rusty Russell
2002-10-17 14:49 ` Roman Zippel
2002-10-11  0:11 sridhar vaidyanathan
2002-10-11  0:21 ` Steven Dake
2002-09-29 10:11 Richard Cooper
2002-09-29 17:49 ` David Lloyd
2002-06-08 21:35 tushar  korde
2002-08-21 16:30 ` Daniel Phillips
2002-05-31  8:04 Oliver Pitzeier
2002-05-31 14:37 ` Alan Cox
2002-04-18 11:23 Satish Mohan
2002-04-18 11:35 ` François Cami
2002-04-09 13:25 Kuppuswamy, Priyadarshini
2002-03-31 19:17 mpaa3d
2002-04-01  9:43 ` Vance Lankhaar
2002-02-20 17:55 Torrey Hoffman
2001-12-25 16:17 Manfred Spraul
2001-12-25 19:14 ` Re: Legacy Fishtank
2001-12-25 21:23   ` Re: Kurt Roeckx
2001-12-25 22:03   ` Re: Alan Cox
2002-01-03  0:06   ` Re: David S. Miller
2002-01-03  0:23     ` Re: Alan Cox
2001-12-05 16:05 Romain Giry
2001-12-05 21:25 ` Dipak
2001-12-06 10:43 ` Re: Romain Giry
2001-12-06 11:28   ` Re: Alan Cox
2001-10-15  6:25 Dinesh  Gandhewar
2001-10-15  6:56 ` David Ford
2001-10-15 16:02   ` Re: Timur Tabi
2001-10-02 15:30 Dinesh  Gandhewar
2001-10-09 10:25 ` VDA
2001-10-02 15:29 Dinesh  Gandhewar
2001-10-02 15:23 ` Tommy Reynolds
2001-10-02 15:32 ` Re: Alex Bligh - linux-kernel
2001-08-16 12:18 Re: Saravana
2001-08-14  3:08 Re: Parag Warudkar
2001-08-14  3:17 ` Re: Keith Owens
2001-07-25 18:44 Sumit Bhardwaj
2001-07-25 19:18 ` Matthew M
2001-06-11  4:58 kiran.thirumalai
2001-06-11  6:54 ` Anil Kumar
2001-05-22  4:25 Rajiv Majumdar
2001-05-08 19:48 Richard B. Johnson
2001-05-08 21:33 ` george anzinger
2001-05-09 13:04   ` Re: Richard B. Johnson
2001-05-09 14:10     ` Re: Alan Cox
2001-05-09 16:59       ` Re: george anzinger
2001-05-09 17:15         ` Re: Alan Cox
2001-05-09  0:36 ` Re: Andrew Morton
2001-04-18  0:15 Vibol Hou
2001-04-18  0:26 ` Jaquemet Loic
2001-04-18  0:32   ` Re: Jeff Garzik
2001-04-20  2:47 ` Re: Francois Cami
2001-04-21  1:26   ` Re: Andrew Morton
2001-03-22 18:02 Re: Gunnar Ahlberg
2001-01-19 13:37 Robert Kaiser
2001-01-19 14:33 ` Michael Rothwell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).