All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
To: Ira Weiny <ira.weiny@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>,
	Alison Schofield <alison.schofield@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	"Ben Widawsky" <bwidawsk@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Davidlohr Bueso <dave@stgolabs.net>,
	<linux-kernel@vger.kernel.org>, <linux-cxl@vger.kernel.org>
Subject: Re: [RFC PATCH 1/9] cxl/mem: Implement Get Event Records command
Date: Thu, 8 Sep 2022 13:52:40 +0100	[thread overview]
Message-ID: <20220908135240.00001217@huawei.com> (raw)
In-Reply-To: <Yxgd51zdrk9pEXE6@iweiny-mobl>


> > > +static int cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> > > +				   enum cxl_event_log_type type)
> > > +{
> > > +	struct cxl_get_event_payload payload;
> > > +
> > > +	do {
> > > +		u8 log_type = type;
> > > +		u16 record_count;
> > > +		int rc;
> > > +
> > > +		rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVENT_RECORD,
> > > +				       &log_type, sizeof(log_type),
> > > +				       &payload, sizeof(payload));
> > > +		if (rc)
> > > +			return rc;
> > > +
> > > +		record_count = le16_to_cpu(payload.record_count);
> > > +		if (record_count > 0)  
> > 
> > If it is anything other than 1 you have a problem..  So fornow
> > I would check for that.  
> 
> I assume you mean if there are any records at all?  For the next version I've
> checked for 1 here but 0 is also valid if there are no records to return.  So
> != 1 is not an error.

Yes, I must meant if (record_count == 1)
for this case..

> 
> (Currently all logs are checked when the event records are queried and some may
> be empty.  I don't plan on trying to distinguish the various interrupts.)
> 
> >   
> > > +			trace_cxl_event(dev_name(cxlds->dev), type,
> > > +					&payload.record);
> > > +
> > > +		if (payload.flags & CXL_GET_EVENT_FLAG_OVERFLOW)
> > > +			trace_cxl_event_overflow(dev_name(cxlds->dev), type,
> > > +						 &payload);
> > > +
> > > +	} while (payload.flags & CXL_GET_EVENT_FLAG_MORE_RECORDS);
> > > +
> > > +	return 0;
> > > +}
> > > +

> > >   * cxl_mem_get_partition_info - Get partition info
> > >   * @cxlds: The device data for the operation
> > > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > > index 88e3a8e54b6a..f83634f3bc8d 100644
> > > --- a/drivers/cxl/cxlmem.h
> > > +++ b/drivers/cxl/cxlmem.h
> > > @@ -4,6 +4,7 @@
> > >  #define __CXL_MEM_H__
> > >  #include <uapi/linux/cxl_mem.h>
> > >  #include <linux/cdev.h>
> > > +#include <linux/uuid.h>
> > >  #include "cxl.h"
> > >  
> > >  /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
> > > @@ -253,6 +254,7 @@ struct cxl_dev_state {
> > >  enum cxl_opcode {
> > >  	CXL_MBOX_OP_INVALID		= 0x0000,
> > >  	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
> > > +	CXL_MBOX_OP_GET_EVENT_RECORD	= 0x0100,
> > >  	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
> > >  	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
> > >  	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
> > > @@ -322,6 +324,69 @@ struct cxl_mbox_identify {
> > >  	u8 qos_telemetry_caps;
> > >  } __packed;
> > >  
> > > +/*
> > > + * Common Event Record Format
> > > + * CXL v3.0 section 8.2.9.2.1; Table 8-42
> > > + */
> > > +struct cxl_event_record_hdr {
> > > +	uuid_t id;
> > > +	__le32 flags_length;  
> > 
> > Can you split this into a u8 and
> > u8[3] then use the get_unaligned_le24 accessor
> > where appropriate? Oh for 24bit types ;)  
> 
> Sure!  Another function I did not know about.
> 
> So the following should be correct ordering?
> 
> ...
> 	uuid_t id;
> 	u8 length;
> 	u8 flags[3];
> 	__le16 handle;
> ...
> 
Looks good.

> There are other records which may work better this way too.
> 
> >   
> > > +	__le16 handle;
> > > +	__le16 related_handle;
> > > +	__le64 timestamp;
> > > +	__le64 reserved1;  
> > 
> > As below. Maintenance op from CXL 3.0?  Seems easy
> > to add now rather than needing a change later.  
> 
> Yes I see it now.  Added.
> 
> >   
> > > +	__le64 reserved2;
> > > +} __packed;
> > > +
> > > +#define EVENT_RECORD_DATA_LENGTH 0x50
> > > +struct cxl_event_record_raw {
> > > +	struct cxl_event_record_hdr hdr;
> > > +	u8 data[EVENT_RECORD_DATA_LENGTH];
> > > +} __packed;
> > > +
> > > +/*
> > > + * Get Event Records output payload
> > > + * CXL v3.0 section 8.2.9.2.2; Table 8-50  
> > 
> > r3.0 :) (just drop the v and go with 3.0 would be my preference).  
> 
> Can do.
> 
> >   
> > > + *
> > > + * Space given for 1 record
> > > + */
> > > +#define CXL_GET_EVENT_FLAG_OVERFLOW		BIT(0)
> > > +#define CXL_GET_EVENT_FLAG_MORE_RECORDS	BIT(1)
> > > +struct cxl_get_event_payload {
> > > +	u8 flags;
> > > +	u8 reserved1;
> > > +	__le16 overflow_err_count;
> > > +	__le64 first_overflow_timestamp;
> > > +	__le64 last_overflow_timestamp;
> > > +	__le16 record_count;
> > > +	u8 reserved2[0xa];
> > > +	struct cxl_event_record_raw record;
> > > +} __packed;
> > > +
> > > +enum cxl_event_log_type {
> > > +	CXL_EVENT_TYPE_INFO = 0x00,
> > > +	CXL_EVENT_TYPE_WARN,
> > > +	CXL_EVENT_TYPE_FAIL,
> > > +	CXL_EVENT_TYPE_FATAL,  
> > 
> > Worth putting Dynamic capacity in now? Up to you.  
> 
> Might as well.
> 
> >   
> > > +	CXL_EVENT_TYPE_MAX
> > > +};  
> > 
> > Blank line for readability.  
> 
> Done.
> 
> >   
> > > +static inline char *cxl_event_log_type_str(enum cxl_event_log_type type)
> > > +{
> > > +	switch (type) {
> > > +	case CXL_EVENT_TYPE_INFO:
> > > +		return "Informational";
> > > +	case CXL_EVENT_TYPE_WARN:
> > > +		return "Warning";
> > > +	case CXL_EVENT_TYPE_FAIL:
> > > +		return "Failure";
> > > +	case CXL_EVENT_TYPE_FATAL:
> > > +		return "Fatal";
> > > +	default:
> > > +		break;
> > > +	}
> > > +	return "<unknown>";
> > > +}
> > > +
> > >  struct cxl_mbox_get_partition_info {
> > >  	__le64 active_volatile_cap;
> > >  	__le64 active_persistent_cap;
> > > @@ -381,6 +446,7 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
> > >  struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
> > >  void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
> > >  void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
> > > +void cxl_mem_get_event_records(struct cxl_dev_state *cxlds);
> > >  #ifdef CONFIG_CXL_SUSPEND
> > >  void cxl_mem_active_inc(void);
> > >  void cxl_mem_active_dec(void);
> > > diff --git a/include/trace/events/cxl-events.h b/include/trace/events/cxl-events.h
> > > new file mode 100644
> > > index 000000000000..f4baeae66cf3
> > > --- /dev/null
> > > +++ b/include/trace/events/cxl-events.h
> > > @@ -0,0 +1,127 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#undef TRACE_SYSTEM
> > > +#define TRACE_SYSTEM cxl_events
> > > +
> > > +#if !defined(_CXL_TRACE_EVENTS_H) ||  defined(TRACE_HEADER_MULTI_READ)
> > > +#define _CXL_TRACE_EVENTS_H
> > > +
> > > +#include <linux/tracepoint.h>
> > > +
> > > +#define EVENT_LOGS					\
> > > +	EM(CXL_EVENT_TYPE_INFO,		"Info")		\
> > > +	EM(CXL_EVENT_TYPE_WARN,		"Warning")	\
> > > +	EM(CXL_EVENT_TYPE_FAIL,		"Failure")	\
> > > +	EM(CXL_EVENT_TYPE_FATAL,	"Fatal")	\
> > > +	EMe(CXL_EVENT_TYPE_MAX,		"<undefined>")  
> > 
> > Hmm. 4 is defined in CXL 3.0, but I'd assume we won't use tracepoints for
> > dynamic capacity events so I guess it doesn't matter.  
> 
> I'm not sure why you would say that.  I anticipate some user space daemon
> requiring these events to set things up.

Certainly a possible solution. I'd kind of expect a more hand shake based approach
than a tracepoint.  Guess we'll see :)


> >   
> > > +	{ CXL_EVENT_RECORD_FLAG_PERF_DEGRADED,	"Performance Degraded"		}, \
> > > +	{ CXL_EVENT_RECORD_FLAG_HW_REPLACE,	"Hardware Replacement Needed"	}  \
> > > +)
> > > +
> > > +TRACE_EVENT(cxl_event,
> > > +
> > > +	TP_PROTO(const char *dev_name, enum cxl_event_log_type log,
> > > +		 struct cxl_event_record_raw *rec),
> > > +
> > > +	TP_ARGS(dev_name, log, rec),
> > > +
> > > +	TP_STRUCT__entry(
> > > +		__string(dev_name, dev_name)
> > > +		__field(int, log)
> > > +		__array(u8, id, UUID_SIZE)
> > > +		__field(u32, flags)
> > > +		__field(u16, handle)
> > > +		__field(u16, related_handle)
> > > +		__field(u64, timestamp)
> > > +		__array(u8, data, EVENT_RECORD_DATA_LENGTH)
> > > +		__field(u8, length)  
> > 
> > Do we want the maintenance operation class added in Table 8-42 from CXL 3.0?
> > (only noticed because I happen to have that spec revision open rather than 2.0).  
> 
> Yes done.
> 
> There is some discussion with Dan regarding not decoding anything and letting
> user space take care of it all.  I think this shows a valid reason Dan
> suggested this.

I like being able to print tracepoints with out userspace tools.
This also enforces structure and stability of interface which I like.

Maybe a raw tracepoint or variable length trailing buffer to pass
on what we don't understand?

Jonathan



  reply	other threads:[~2022-09-08 12:52 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-13  5:32 [RFC PATCH 0/9] CXL: Read and clear event logs ira.weiny
2022-08-13  5:32 ` [RFC PATCH 1/9] cxl/mem: Implement Get Event Records command ira.weiny
2022-08-16 16:39   ` Steven Rostedt
2022-08-16 16:41     ` Steven Rostedt
2022-08-16 23:11       ` Ira Weiny
2022-08-16 23:35     ` Ira Weiny
2022-08-17 22:54   ` Dave Jiang
2022-09-07  4:53     ` Ira Weiny
2022-08-24 15:50   ` Jonathan Cameron
2022-09-07  4:28     ` Ira Weiny
2022-09-08 12:52       ` Jonathan Cameron [this message]
2022-09-09 20:53         ` Ira Weiny
2022-09-20 15:49           ` Jonathan Cameron
2022-09-20 20:23             ` Dave Jiang
2022-09-20 22:10               ` Ira Weiny
2022-09-21 16:36                 ` Jonathan Cameron
2022-09-22  4:16                   ` Ira Weiny
2022-08-13  5:32 ` [RFC PATCH 2/9] cxl/mem: Implement Clear " ira.weiny
2022-08-24 15:55   ` Jonathan Cameron
2022-09-09 21:35     ` Ira Weiny
2022-08-13  5:32 ` [RFC PATCH 3/9] cxl/mem: Clear events on driver load ira.weiny
2022-08-24 15:57   ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 4/9] cxl/mem: Trace General Media Event Record ira.weiny
2022-08-24 16:11   ` Jonathan Cameron
2022-09-12 22:38     ` Ira Weiny
2022-09-20 15:52       ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 5/9] cxl/mem: Trace DRAM " ira.weiny
2022-08-25 10:46   ` Jonathan Cameron
2022-09-12 23:04     ` Ira Weiny
2022-09-20 16:02       ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 6/9] cxl/mem: Trace Memory Module " ira.weiny
2022-08-25 10:58   ` Jonathan Cameron
2022-09-14 21:17     ` Ira Weiny
2022-09-20 16:11       ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 7/9] cxl/test: Add generic mock events ira.weiny
2022-08-25 11:31   ` Jonathan Cameron
2022-09-15 18:53     ` Ira Weiny
2022-09-20 16:17       ` Jonathan Cameron
2022-09-26 21:39         ` Ira Weiny
2022-09-27 13:56           ` Jonathan Cameron
2022-09-27 16:13             ` Ira Weiny
2022-09-28  9:49               ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 8/9] cxl/test: Add specific events ira.weiny
2022-08-25 11:37   ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 9/9] cxl/test: Simulate event log overflow ira.weiny
2022-08-16 16:44   ` Steven Rostedt
2022-08-22 16:18 ` [RFC PATCH 0/9] CXL: Read and clear event logs Davidlohr Bueso
2022-08-22 22:53   ` Ira Weiny
2022-08-23 16:12     ` Davidlohr Bueso
2022-08-24 10:07     ` Jonathan Cameron
2022-09-01 18:10       ` Dave Jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220908135240.00001217@huawei.com \
    --to=jonathan.cameron@huawei.com \
    --cc=alison.schofield@intel.com \
    --cc=bwidawsk@kernel.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=vishal.l.verma@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.