netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault
@ 2023-06-09  0:44 Jakub Kicinski
  2023-06-09 15:31 ` Andrew Lunn
  2023-06-11  7:39 ` Ido Schimmel
  0 siblings, 2 replies; 7+ messages in thread
From: Jakub Kicinski @ 2023-06-09  0:44 UTC (permalink / raw)
  To: mkubecek
  Cc: danieller, idosch, netdev, vladyslavt, linux, andrew, Jakub Kicinski

Report whether Loss of Lock, of Signal and Tx Faults were detected.
Print "None" in case no lane has the problem, and per-lane "Yes" /
"No"  if at least one of the lanes reports true.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
I was asked if "Linux reports" this information.
I'm not very confident on the merits, I'm going by the spec.
If it makes sense I'll send a similar patch for CMIS.
---
 qsfp.c       | 30 ++++++++++++++++++++++++++++++
 qsfp.h       |  8 ++++++++
 sff-common.c | 17 +++++++++++++++++
 sff-common.h |  2 ++
 4 files changed, 57 insertions(+)

diff --git a/qsfp.c b/qsfp.c
index 1fe5de1a863f..5a535c5c092b 100644
--- a/qsfp.c
+++ b/qsfp.c
@@ -872,6 +872,35 @@ static void sff8636_show_dom(const struct sff8636_memory_map *map)
 	}
 }
 
+static void sff8636_show_signals(const struct sff8636_memory_map *map)
+{
+	unsigned int v;
+
+	/* There appears to be no Rx LOS support bit, use Tx for both */
+	if (map->page_00h[SFF8636_OPTION_4_OFFSET] & SFF8636_O4_TX_LOS) {
+		v = map->lower_memory[SFF8636_LOS_AW_OFFSET] & 0xf;
+		sff_show_lane_status("Rx loss of signal", 4, "Yes", "No", v);
+		v = map->lower_memory[SFF8636_LOS_AW_OFFSET] >> 4;
+		sff_show_lane_status("Tx loss of signal", 4, "Yes", "No", v);
+	}
+
+	v = map->lower_memory[SFF8636_LOL_AW_OFFSET] & 0xf;
+	if (map->page_00h[SFF8636_OPTION_3_OFFSET] & SFF8636_O3_RX_LOL)
+		sff_show_lane_status("Rx loss of lock", 4, "Yes", "No", v);
+
+	v = map->lower_memory[SFF8636_LOL_AW_OFFSET] >> 4;
+	if (map->page_00h[SFF8636_OPTION_3_OFFSET] & SFF8636_O3_TX_LOL)
+		sff_show_lane_status("Tx loss of lock", 4, "Yes", "No", v);
+
+	v = map->lower_memory[SFF8636_FAULT_AW_OFFSET] & 0xf;
+	if (map->page_00h[SFF8636_OPTION_4_OFFSET] & SFF8636_O4_TX_FAULT)
+		sff_show_lane_status("Tx fault", 4, "Yes", "No", v);
+
+	v = map->lower_memory[SFF8636_FAULT_AW_OFFSET] >> 4;
+	if (map->page_00h[SFF8636_OPTION_2_OFFSET] & SFF8636_O2_TX_EQ_AUTO)
+		sff_show_lane_status("Tx adaptive eq fault", 4, "Yes", "No", v);
+}
+
 static void sff8636_show_page_zero(const struct sff8636_memory_map *map)
 {
 	sff8636_show_ext_identifier(map);
@@ -905,6 +934,7 @@ static void sff8636_show_page_zero(const struct sff8636_memory_map *map)
 		       SFF8636_DATE_VENDOR_LOT_OFFSET + 1, "Date code");
 	sff_show_revision_compliance(map->lower_memory,
 				     SFF8636_REV_COMPLIANCE_OFFSET);
+	sff8636_show_signals(map);
 }
 
 static void sff8636_show_all_common(const struct sff8636_memory_map *map)
diff --git a/qsfp.h b/qsfp.h
index aabf09fdc623..b4a0ffe06da1 100644
--- a/qsfp.h
+++ b/qsfp.h
@@ -55,6 +55,8 @@
 #define	 SFF8636_TX2_FAULT_AW	(1 << 1)
 #define	 SFF8636_TX1_FAULT_AW	(1 << 0)
 
+#define	SFF8636_LOL_AW_OFFSET	0x05
+
 /* Module Monitor Interrupt Flags - 6-8 */
 #define	SFF8636_TEMP_AW_OFFSET	0x06
 #define	 SFF8636_TEMP_HALARM_STATUS		(1 << 7)
@@ -525,9 +527,15 @@
 /*  56h-5Fh reserved */
 
 #define	 SFF8636_OPTION_2_OFFSET	0xC1
+/* Tx input equalizers auto-adaptive */
+#define	  SFF8636_O2_TX_EQ_AUTO		(1 << 3)
 /* Rx output amplitude */
 #define	  SFF8636_O2_RX_OUTPUT_AMP	(1 << 0)
 #define	 SFF8636_OPTION_3_OFFSET	0xC2
+/* Rx CDR Loss of Lock */
+#define	  SFF8636_O3_RX_LOL		(1 << 5)
+/* Tx CDR Loss of Lock */
+#define	  SFF8636_O3_TX_LOL		(1 << 4)
 /* Rx Squelch Disable */
 #define	  SFF8636_O3_RX_SQL_DSBL	(1 << 3)
 /* Rx Output Disable capable */
diff --git a/sff-common.c b/sff-common.c
index e951cf15c1d6..a5c1510302a6 100644
--- a/sff-common.c
+++ b/sff-common.c
@@ -53,6 +53,23 @@ void sff_show_ascii(const __u8 *id, unsigned int first_reg,
 	printf("\n");
 }
 
+void sff_show_lane_status(const char *name, unsigned int lane_cnt,
+			  const char *yes, const char *no, unsigned int value)
+{
+	printf("\t%-41s : ", name);
+	if (!value) {
+		printf("None\n");
+		return;
+	}
+
+	printf("[");
+	while (lane_cnt--) {
+		printf(" %s%c", value & 1 ? yes : no, lane_cnt ? ',': ' ');
+		value >>= 1;
+	}
+	printf("]\n");
+}
+
 void sff8024_show_oui(const __u8 *id, int id_offset)
 {
 	printf("\t%-41s : %02x:%02x:%02x\n", "Vendor OUI",
diff --git a/sff-common.h b/sff-common.h
index dd12dda7bbce..57bcc4a415fe 100644
--- a/sff-common.h
+++ b/sff-common.h
@@ -198,6 +198,8 @@ void sff_show_value_with_unit(const __u8 *id, unsigned int reg,
 			      const char *unit);
 void sff_show_ascii(const __u8 *id, unsigned int first_reg,
 		    unsigned int last_reg, const char *name);
+void sff_show_lane_status(const char *name, unsigned int lane_cnt,
+			  const char *yes, const char *no, unsigned int value);
 void sff_show_thresholds(struct sff_diags sd);
 
 void sff8024_show_oui(const __u8 *id, int id_offset);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault
  2023-06-09  0:44 [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault Jakub Kicinski
@ 2023-06-09 15:31 ` Andrew Lunn
  2023-06-09 16:18   ` Russell King (Oracle)
  2023-06-11  7:39 ` Ido Schimmel
  1 sibling, 1 reply; 7+ messages in thread
From: Andrew Lunn @ 2023-06-09 15:31 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: mkubecek, danieller, idosch, netdev, vladyslavt, linux

> I was asked if "Linux reports" this information.

Linux does, in /sys/kernel/debug/sfpX/status. It gives your the GPIOs
value.

But this assumes you are actually using Linux to drive the SFP.

	Andrew

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault
  2023-06-09 15:31 ` Andrew Lunn
@ 2023-06-09 16:18   ` Russell King (Oracle)
  2023-06-09 16:36     ` Jakub Kicinski
  0 siblings, 1 reply; 7+ messages in thread
From: Russell King (Oracle) @ 2023-06-09 16:18 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Jakub Kicinski, mkubecek, danieller, idosch, netdev, vladyslavt

On Fri, Jun 09, 2023 at 05:31:42PM +0200, Andrew Lunn wrote:
> > I was asked if "Linux reports" this information.
> 
> Linux does, in /sys/kernel/debug/sfpX/status. It gives your the GPIOs
> value.

That's for SFPs, but I didn't work out a sane way to drive QSFPs. I did
make a start on the basic bare bones, and it would be nice to put more
time into it, but I need a greater understanding of how setups with
quad interfaces are modelled in the kernel - and that's something I
very little experience of.

My mental stumbling block is that quad interfaces seem to act as either
four separate network interfaces, or I think the lanes can be combined
to double or quadruple the data bandwidth. How this looks from the
firmware description perspective (in either DT or elsewhere) I don't
know. Can they be dynamically changed too?

It's relevant because QSFPs give status per individual lanes, so
anything bridging between the QSFP and a set of MACs needs to know
which lanes are associated with which MACs.

One of the SolidRun platforms I have does have a QSFP cage, and that's
where I made a start, but I very much feel that the canvas is blank
about how this should be modelled.

Now, as for SFPs, there's... the hardware signal state and software
signal state. The debugfs file you point at is merely for debug, it's
not an API (it's in debugfs!) We do report there the hardware and
software state. Whether we should have a proper API for that
information, and whether we should be able to reset the tx fault
state once it's happened too many times and we've locked out...
so far I don't think anyone has got to that point though.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault
  2023-06-09 16:18   ` Russell King (Oracle)
@ 2023-06-09 16:36     ` Jakub Kicinski
  0 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2023-06-09 16:36 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Andrew Lunn, mkubecek, danieller, idosch, netdev, vladyslavt

On Fri, 9 Jun 2023 17:18:27 +0100 Russell King (Oracle) wrote:
> My mental stumbling block is that quad interfaces seem to act as either
> four separate network interfaces, or I think the lanes can be combined
> to double or quadruple the data bandwidth. How this looks from the
> firmware description perspective (in either DT or elsewhere) I don't
> know. Can they be dynamically changed too?

Last question I think I can answer at least from the uAPI perspective -
yes, via the devlink port split API. There's also an ethtool API for
configuring how many lanes are used within a single MAC (e.g. 2x25G vs
1x50G).

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault
  2023-06-09  0:44 [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault Jakub Kicinski
  2023-06-09 15:31 ` Andrew Lunn
@ 2023-06-11  7:39 ` Ido Schimmel
  2023-06-12 15:48   ` Jakub Kicinski
  1 sibling, 1 reply; 7+ messages in thread
From: Ido Schimmel @ 2023-06-11  7:39 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: mkubecek, danieller, netdev, vladyslavt, linux, andrew

On Thu, Jun 08, 2023 at 05:44:00PM -0700, Jakub Kicinski wrote:
> Report whether Loss of Lock, of Signal and Tx Faults were detected.
> Print "None" in case no lane has the problem, and per-lane "Yes" /
> "No"  if at least one of the lanes reports true.
      ^ double space

> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> I was asked if "Linux reports" this information.
> I'm not very confident on the merits, I'm going by the spec.
> If it makes sense I'll send a similar patch for CMIS.

I have two AOC connected to each other. When both are up:

# ethtool -m swp13 | grep "Rx loss of signal" -A 4
        Rx loss of signal                         : None
        Tx loss of signal                         : None
        Rx loss of lock                           : None
        Tx loss of lock                           : None
        Tx adaptive eq fault                      : None

When I bring the other side down:

# ip link set dev swp14 down
# ethtool -m swp13 | grep "Rx loss of signal" -A 4
        Rx loss of signal                         : [ Yes, Yes, Yes, Yes ]
        Tx loss of signal                         : None
        Rx loss of lock                           : [ Yes, Yes, Yes, Yes ]
        Tx loss of lock                           : None
        Tx adaptive eq fault                      : None

When I bring the interface itself down:

# ip link set dev swp13 down
# ethtool -m swp13 | grep "Rx loss of signal" -A 4
        Rx loss of signal                         : [ Yes, Yes, Yes, Yes ]
        Tx loss of signal                         : [ Yes, Yes, Yes, Yes ]
        Rx loss of lock                           : [ Yes, Yes, Yes, Yes ]
        Tx loss of lock                           : [ Yes, Yes, Yes, Yes ]
        Tx adaptive eq fault                      : [ Yes, Yes, Yes, Yes ]

And I don't see these fields on PC:

# ethtool -m swp3 | grep "Rx loss of signal" -A 4

See one comment below.

[...]

> diff --git a/qsfp.h b/qsfp.h
> index aabf09fdc623..b4a0ffe06da1 100644
> --- a/qsfp.h
> +++ b/qsfp.h
> @@ -55,6 +55,8 @@
>  #define	 SFF8636_TX2_FAULT_AW	(1 << 1)
>  #define	 SFF8636_TX1_FAULT_AW	(1 << 0)
>  
> +#define	SFF8636_LOL_AW_OFFSET	0x05
> +
>  /* Module Monitor Interrupt Flags - 6-8 */
>  #define	SFF8636_TEMP_AW_OFFSET	0x06
>  #define	 SFF8636_TEMP_HALARM_STATUS		(1 << 7)
> @@ -525,9 +527,15 @@
>  /*  56h-5Fh reserved */
>  
>  #define	 SFF8636_OPTION_2_OFFSET	0xC1
> +/* Tx input equalizers auto-adaptive */
> +#define	  SFF8636_O2_TX_EQ_AUTO		(1 << 3)
>  /* Rx output amplitude */
>  #define	  SFF8636_O2_RX_OUTPUT_AMP	(1 << 0)
>  #define	 SFF8636_OPTION_3_OFFSET	0xC2
> +/* Rx CDR Loss of Lock */
> +#define	  SFF8636_O3_RX_LOL		(1 << 5)
> +/* Tx CDR Loss of Lock */
> +#define	  SFF8636_O3_TX_LOL		(1 << 4)

I'm looking at revision 2.10a and bit 4 is Rx while bit 5 is Tx.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault
  2023-06-11  7:39 ` Ido Schimmel
@ 2023-06-12 15:48   ` Jakub Kicinski
  2023-06-12 18:37     ` Ido Schimmel
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Kicinski @ 2023-06-12 15:48 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: mkubecek, danieller, netdev, vladyslavt, linux, andrew

On Sun, 11 Jun 2023 10:39:27 +0300 Ido Schimmel wrote:
> > I was asked if "Linux reports" this information.
> > I'm not very confident on the merits, I'm going by the spec.
> > If it makes sense I'll send a similar patch for CMIS.  
> 
> I have two AOC connected to each other. When both are up:
> 
> # ethtool -m swp13 | grep "Rx loss of signal" -A 4
>         Rx loss of signal                         : None
>         Tx loss of signal                         : None
>         Rx loss of lock                           : None
>         Tx loss of lock                           : None
>         Tx adaptive eq fault                      : None
> 
> When I bring the other side down:
> 
> # ip link set dev swp14 down
> # ethtool -m swp13 | grep "Rx loss of signal" -A 4
>         Rx loss of signal                         : [ Yes, Yes, Yes, Yes ]
>         Tx loss of signal                         : None
>         Rx loss of lock                           : [ Yes, Yes, Yes, Yes ]
>         Tx loss of lock                           : None
>         Tx adaptive eq fault                      : None
> 
> When I bring the interface itself down:
> 
> # ip link set dev swp13 down
> # ethtool -m swp13 | grep "Rx loss of signal" -A 4
>         Rx loss of signal                         : [ Yes, Yes, Yes, Yes ]
>         Tx loss of signal                         : [ Yes, Yes, Yes, Yes ]
>         Rx loss of lock                           : [ Yes, Yes, Yes, Yes ]
>         Tx loss of lock                           : [ Yes, Yes, Yes, Yes ]
>         Tx adaptive eq fault                      : [ Yes, Yes, Yes, Yes ]
> 
> And I don't see these fields on PC:
> 
> # ethtool -m swp3 | grep "Rx loss of signal" -A 4

IOW works as expected, am I interpreting this correctly?

> > diff --git a/qsfp.h b/qsfp.h
> > index aabf09fdc623..b4a0ffe06da1 100644
> > --- a/qsfp.h
> > +++ b/qsfp.h
> > @@ -55,6 +55,8 @@
> >  #define	 SFF8636_TX2_FAULT_AW	(1 << 1)
> >  #define	 SFF8636_TX1_FAULT_AW	(1 << 0)
> >  
> > +#define	SFF8636_LOL_AW_OFFSET	0x05
> > +
> >  /* Module Monitor Interrupt Flags - 6-8 */
> >  #define	SFF8636_TEMP_AW_OFFSET	0x06
> >  #define	 SFF8636_TEMP_HALARM_STATUS		(1 << 7)
> > @@ -525,9 +527,15 @@
> >  /*  56h-5Fh reserved */
> >  
> >  #define	 SFF8636_OPTION_2_OFFSET	0xC1
> > +/* Tx input equalizers auto-adaptive */
> > +#define	  SFF8636_O2_TX_EQ_AUTO		(1 << 3)
> >  /* Rx output amplitude */
> >  #define	  SFF8636_O2_RX_OUTPUT_AMP	(1 << 0)
> >  #define	 SFF8636_OPTION_3_OFFSET	0xC2
> > +/* Rx CDR Loss of Lock */
> > +#define	  SFF8636_O3_RX_LOL		(1 << 5)
> > +/* Tx CDR Loss of Lock */
> > +#define	  SFF8636_O3_TX_LOL		(1 << 4)  
> 
> I'm looking at revision 2.10a and bit 4 is Rx while bit 5 is Tx.

Ugh, you're right, thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault
  2023-06-12 15:48   ` Jakub Kicinski
@ 2023-06-12 18:37     ` Ido Schimmel
  0 siblings, 0 replies; 7+ messages in thread
From: Ido Schimmel @ 2023-06-12 18:37 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: mkubecek, danieller, netdev, vladyslavt, linux, andrew

On Mon, Jun 12, 2023 at 08:48:37AM -0700, Jakub Kicinski wrote:
> IOW works as expected, am I interpreting this correctly?

Yes, LGTM. I should be able to test CMIS patches as well. I have both PC
and AOC QSFP-DDs.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-06-12 18:38 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-09  0:44 [PATCH ethtool-next] sff-8636: report LOL / LOS / Tx Fault Jakub Kicinski
2023-06-09 15:31 ` Andrew Lunn
2023-06-09 16:18   ` Russell King (Oracle)
2023-06-09 16:36     ` Jakub Kicinski
2023-06-11  7:39 ` Ido Schimmel
2023-06-12 15:48   ` Jakub Kicinski
2023-06-12 18:37     ` Ido Schimmel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).