All of lore.kernel.org
 help / color / mirror / Atom feed
* [Accel-config] Re: [PATCH 1/2] accel-config: Add APIs to get details of last error and associated devices
@ 2021-07-12 20:26 Dave Jiang
  0 siblings, 0 replies; 4+ messages in thread
From: Dave Jiang @ 2021-07-12 20:26 UTC (permalink / raw)
  To: accel-config

[-- Attachment #1: Type: text/plain, Size: 15081 bytes --]


On 7/9/2021 10:47 AM, ramesh.thomas(a)intel.com wrote:
> From: Ramesh Thomas <ramesh.thomas(a)intel.com>
>
> Capture a snapshot of error condition containing cmd_status and the
> associated devices. Provide APIs to return the captured details. Handles
> both hw and sw error codes stored in sysfs cmd_status field of devices.
> Following APIs are added:
> - accfg_ctx_get_last_error;
> - accfg_ctx_get_last_error_str;
> - accfg_ctx_get_last_error_device;
> - accfg_ctx_get_last_error_wq;
> - accfg_ctx_get_last_error_group;
> - accfg_ctx_get_last_error_engine;
>
> Signed-off-by: Ramesh Thomas <ramesh.thomas(a)intel.com>
> ---
>   accfg/lib/libaccel-config.sym |  10 +++
>   accfg/lib/libaccfg.c          | 156 ++++++++++++++++++++++++++++++----
>   accfg/lib/private.h           |   9 ++
>   accfg/libaccel_config.h       |   9 +-
>   4 files changed, 164 insertions(+), 20 deletions(-)
>
> diff --git a/accfg/lib/libaccel-config.sym b/accfg/lib/libaccel-config.sym
> index aecc98b..679c597 100644
> --- a/accfg/lib/libaccel-config.sym
> +++ b/accfg/lib/libaccel-config.sym
> @@ -149,3 +149,13 @@ LIBACCFG_9 {
>   global:
>   	accfg_device_get_op_cap;
>   } LIBACCFG_8;
> +
> +LIBACCFG_10 {
> +global:
> +	accfg_ctx_get_last_error;
> +	accfg_ctx_get_last_error_str;
> +	accfg_ctx_get_last_error_device;
> +	accfg_ctx_get_last_error_wq;
> +	accfg_ctx_get_last_error_group;
> +	accfg_ctx_get_last_error_engine;
> +} LIBACCFG_9;
> diff --git a/accfg/lib/libaccfg.c b/accfg/lib/libaccfg.c
> index f53751e..00314b0 100644
> --- a/accfg/lib/libaccfg.c
> +++ b/accfg/lib/libaccfg.c
> @@ -70,6 +70,7 @@ ACCFG_EXPORT char *accfg_mdev_basenames[] = {
>   
>   enum {
>   	ACCFG_CMD_STATUS_MAX = 0x45,
> +	ACCFG_CMD_STATUS_ERROR = 0x80010000,
>   };
>   
>   const char *accfg_device_cmd_status[] = {
> @@ -105,9 +106,43 @@ const char *accfg_device_cmd_status[] = {
>   	[0x42]	= "No interrupt handle available",
>   	[0x43]	= "No interrupt handles associated with the index",
>   	[0x44]	= "No revoked handles associted with the index",
> -	[ACCFG_CMD_STATUS_MAX]	= "",
> +	[ACCFG_CMD_STATUS_MAX]	= "Unknown error",
>   };
>   
> +const char *accfg_sw_cmd_status[] = {
> +	[0x01] = "Error reading cmd_status",
> +	[0x02] = "DMA device registration error",
> +	[0x03] = "wq error - group not set",
> +	[0x04] = "wq error - name not set",
> +	[0x05] = "wq error - no SVM support and needed",
> +	[0x06] = "wq error - threshold not set",
> +	[0x07] = "wq portal mapping error",
> +	[0x08] = "wq resource allocation error",
> +	[0x09] = "wq percpu counter allocation error",
> +	[0x0a] = "wq DMA channel registration error",
> +	[0x0b] = "wq char dev registration error",
> +	[0x0c] = "wq error - no shared wq support",
> +	[0x0d] = "wq error - no wqs configured",
> +	[0x0e] = "wq error - size not set",
> +};
> +
> +static void save_last_error(struct accfg_device *device, struct accfg_wq *wq,
> +		struct accfg_group *group, struct accfg_engine *engine)
> +{
> +	struct accfg_ctx *ctx = device->ctx;
> +
> +	if (!ctx->error_ctx)
> +		ctx->error_ctx = calloc(1, sizeof(struct accfg_error_ctx));
> +
> +	if (ctx->error_ctx) {
> +		ctx->error_ctx->cmd_status = accfg_device_get_cmd_status(device);
> +		ctx->error_ctx->device = device;
> +		ctx->error_ctx->wq = wq;
> +		ctx->error_ctx->group = group;
> +		ctx->error_ctx->engine = engine;
> +	}
> +}
> +
>   static inline bool is_mdev_registered(struct accfg_device *device)
>   {
>   	return device->mdev_path && !access(device->mdev_path, R_OK);
> @@ -251,6 +286,7 @@ static void free_context(struct accfg_ctx *ctx)
>   
>   	list_for_each_safe(&ctx->devices, device, _b, list)
>   		free_device(device, &ctx->devices);
> +	free(ctx->error_ctx);
>   	free(ctx);
>   }
>   
> @@ -1082,6 +1118,7 @@ ACCFG_EXPORT int accfg_create_mdev(struct accfg_device *device,
>   	rc = sysfs_write_attr(ctx, mdev_path, uuid_str);
>   	if (rc < 0) {
>   		err(ctx, "create mdev failed %d\n", rc);
> +		save_last_error(device, NULL, NULL, NULL);
>   		goto create_err;
>   	}
>   
> @@ -1105,8 +1142,10 @@ static int accfg_device_mdev_remove(struct accfg_device *device,
>   	uuid_unparse(mdev->uuid, uuid_str);
>   	sprintf(mdev_path, "%s/%s/remove", device->mdev_path, uuid_str);
>   	rc = sysfs_write_attr(ctx, mdev_path, "1");
> -	if (rc < 0)
> +	if (rc < 0) {
> +		save_last_error(device, NULL, NULL, NULL);
>   		return rc;
> +	}
>   
>   	list_del(&mdev->list);
>   	free(mdev);
> @@ -1355,6 +1394,7 @@ ACCFG_EXPORT enum accfg_device_state accfg_device_get_state(
>   		err(ctx, "%s: sysfs_read_attr failed '%s': %s\n",
>   				__func__, device->device_path,
>   				strerror(errno));
> +		save_last_error(device, NULL, NULL, NULL);
>   		return ACCFG_DEVICE_UNKNOWN;
>   	}
>   
> @@ -1414,6 +1454,7 @@ ACCFG_EXPORT int accfg_device_get_clients(struct accfg_device *device)
>   	if (sysfs_read_attr(ctx, path, buf) < 0) {
>   		err(ctx, "%s: retrieve clients failed '%s': %s\n",
>   				__func__, path, strerror(errno));
> +		save_last_error(device, NULL, NULL, NULL);
>   		return -errno;
>   	}
>   
> @@ -1448,6 +1489,7 @@ ACCFG_EXPORT int accfg_device_set_token_limit(struct accfg_device *dev, int val)
>   	if (sysfs_write_attr(ctx, path, buf) < 0) {
>   		err(ctx, "%s: write failed: %s\n",
>   				accfg_device_get_devname(dev), strerror(errno));
> +		save_last_error(dev, NULL, NULL, NULL);
>   		return -errno;
>   	}
>   
> @@ -1476,8 +1518,10 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device *device)
>   		return 0;
>   	}
>   
> -	if (sysfs_read_attr(ctx, path, buf) < 0)
> +	if (sysfs_read_attr(ctx, path, buf) < 0) {
> +		save_last_error(device, NULL, NULL, NULL);
>   		return 0;
> +	}
>   
>   	if (strcmp(buf, "enabled") == 0)
>   		return 1;
> @@ -1485,17 +1529,16 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device *device)
>   	return 0;
>   }
>   
> -ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device *device)
> +ACCFG_EXPORT unsigned int accfg_device_get_cmd_status(struct accfg_device *device)
>   {
>   	struct accfg_ctx *ctx;
>   	long status;
>   	char *path;
>   	int len;
>   	char buf[SYSFS_ATTR_SIZE], *end_ptr;
> -	int rc;
>   
>   	if (!device)
> -		return -EINVAL;
> +		return ACCFG_CMD_STATUS_ERROR;
>   
>   	ctx = accfg_device_get_ctx(device);
>   	path = device->device_buf;
> @@ -1504,31 +1547,96 @@ ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device *device)
>   	if (snprintf(path, len, "%s/cmd_status", device->device_path) >= len) {
>   		err(ctx, "%s: buffer too small!\n",
>   				accfg_device_get_devname(device));
> -		return -ENOMEM;
> +		return ACCFG_CMD_STATUS_ERROR;
>   	}
>   
> -	rc = sysfs_read_attr(ctx, path, buf);
> -	if (rc < 0)
> -		return rc;
> +	if (sysfs_read_attr(ctx, path, buf))
> +		return ACCFG_CMD_STATUS_ERROR;
>   
>   	status = strtol(buf, &end_ptr, 0);
>   	if (errno == ERANGE || end_ptr == buf)
> -		return -EIO;
> +		return ACCFG_CMD_STATUS_ERROR;
>   
> -	return (int)status;
> +	return (unsigned int)status;
>   }
>   
> -ACCFG_EXPORT const char * accfg_device_get_cmd_status_str(struct accfg_device *device)
> +static const char *get_cmd_status_str(unsigned int status)
>   {
> -	int status;
> +	unsigned int sw_status;
>   
> -	status = accfg_device_get_cmd_status(device);
> -	if (status < 0 || status >= ACCFG_CMD_STATUS_MAX)
> -		return NULL;
> +	if (status & 0x80000000) {
> +		sw_status = (status & ~0x80000000) >> 16;


Magic number. Do I need to add a mask define to the header?

> +		if (sw_status) {
> +			if (sw_status >= (sizeof(accfg_sw_cmd_status) /
> +						sizeof(char *))) {
> +				status = ACCFG_CMD_STATUS_MAX;
> +				goto ext;
> +			}
> +			return accfg_sw_cmd_status[sw_status];
> +		}
> +		status &= ~0x80000000;
> +	}
> +
> +	if (status > ACCFG_CMD_STATUS_MAX)
> +		status = ACCFG_CMD_STATUS_MAX;
>   
> +ext:
>   	return accfg_device_cmd_status[status];
>   }
>   
> +ACCFG_EXPORT const char *accfg_device_get_cmd_status_str(struct accfg_device *device)
> +{
> +	return get_cmd_status_str(accfg_device_get_cmd_status(device));
> +}
> +
> +ACCFG_EXPORT unsigned int accfg_ctx_get_last_error(struct accfg_ctx *ctx)
> +{
> +	if (ctx->error_ctx)
> +		return ctx->error_ctx->cmd_status;
> +
> +	return 0;
> +}
> +
> +ACCFG_EXPORT const char *accfg_ctx_get_last_error_str(struct accfg_ctx *ctx)
> +{
> +	return get_cmd_status_str(accfg_ctx_get_last_error(ctx));
> +}
> +
> +ACCFG_EXPORT struct accfg_device *accfg_ctx_get_last_error_device(
> +		struct accfg_ctx *ctx)
> +{
> +	if (ctx->error_ctx)
> +		return ctx->error_ctx->device;
> +
> +	return NULL;
> +}
> +
> +ACCFG_EXPORT struct accfg_wq *accfg_ctx_get_last_error_wq(struct accfg_ctx *ctx)
> +{
> +	if (ctx->error_ctx)
> +		return ctx->error_ctx->wq;
> +
> +	return NULL;
> +}
> +
> +ACCFG_EXPORT struct accfg_group *accfg_ctx_get_last_error_group(
> +		struct accfg_ctx *ctx)
> +{
> +	if (ctx->error_ctx)
> +		return ctx->error_ctx->group;
> +
> +	return NULL;
> +}
> +
> +ACCFG_EXPORT struct accfg_engine *accfg_ctx_get_last_error_engine(
> +		struct accfg_ctx *ctx)
> +{
> +	if (ctx->error_ctx)
> +		return ctx->error_ctx->engine;
> +
> +	return NULL;
> +}
> +
>   /* Helper function to validate device type in the defined device array based on
>    * device name */
>   ACCFG_EXPORT int accfg_device_type_validate(const char *dev_name)
> @@ -1632,6 +1740,7 @@ static int accfg_device_control(struct accfg_device *device,
>   		rc = sysfs_write_attr(ctx, path, accfg_device_get_devname(device));
>   		free(path);
>   		if (rc < 0) {
> +			save_last_error(device, NULL, NULL, NULL);
>   			return rc;
>   		}
>   	}
> @@ -1721,8 +1830,10 @@ ACCFG_EXPORT uint64_t accfg_group_get_available_size(
>   		return ULLONG_MAX;
>   	}
>   
> -	if (sysfs_read_attr(ctx, path, buf) < 0)
> +	if (sysfs_read_attr(ctx, path, buf) < 0) {
> +		save_last_error(group->device, NULL, group, NULL);
>   		return ULLONG_MAX;
> +	}
>   
>   	return strtoull(buf, NULL, 0);
>   }
> @@ -1748,6 +1859,7 @@ ACCFG_EXPORT int accfg_group_set_##field( \
>   		err(ctx, "%s: write failed: %s\n", \
>   				accfg_group_get_devname(group), \
>   				strerror(errno)); \
> +		save_last_error(group->device, NULL, group, NULL); \
>   		return -errno; \
>   	} \
>   	group->field = val; \
> @@ -1906,6 +2018,7 @@ ACCFG_EXPORT int accfg_wq_get_clients(struct accfg_wq *wq)
>   	if (sysfs_read_attr(ctx, path, buf) < 0) {
>   		err(ctx, "%s: retrieve clients failed: '%s': %s\n",
>   				__func__, wq->wq_path, strerror(errno));
> +		save_last_error(wq->device, wq, NULL, NULL);
>   		return -errno;
>   	}
>   
> @@ -1993,6 +2106,7 @@ static int accfg_wq_control(struct accfg_wq *wq, enum accfg_control_flag flag,
>   	if (path) {
>   		rc = sysfs_write_attr(ctx, path, wq_name);
>   		if (rc < 0) {
> +			save_last_error(wq->device, wq, NULL, NULL);
>   			free(path);
>   			return rc;
>   		}
> @@ -2037,6 +2151,7 @@ ACCFG_EXPORT enum accfg_wq_state accfg_wq_get_state(struct accfg_wq *wq)
>   	if (sysfs_read_attr(ctx, path, read_state) < 0) {
>   		err(ctx, "%s: sysfs_read_attr failed '%s': %s\n",
>   				__func__, wq->wq_path, strerror(errno));
> +		save_last_error(wq->device, wq, NULL, NULL);
>   		return ACCFG_WQ_UNKNOWN;
>   	}
>   
> @@ -2162,6 +2277,7 @@ ACCFG_EXPORT int accfg_wq_set_##field( \
>   		err(ctx, "%s: write failed: %s\n", \
>   				accfg_wq_get_devname(wq), \
>   				strerror(errno)); \
> +		save_last_error(wq->device, wq, NULL, NULL); \
>   		return -errno; \
>   	} \
>   	wq->field = val; \
> @@ -2200,6 +2316,7 @@ ACCFG_EXPORT int accfg_wq_set_##field( \
>   		err(ctx, "%s: write failed: %s\n", \
>   				accfg_wq_get_devname(wq), \
>   				strerror(errno)); \
> +		save_last_error(wq->device, wq, NULL, NULL); \
>   		return -errno; \
>   	} \
>   	wq->field = val; \
> @@ -2235,6 +2352,7 @@ ACCFG_EXPORT int accfg_wq_set_str_##field( \
>   		err(ctx, "%s: write failed: %s\n", \
>   				accfg_wq_get_devname(wq), \
>   				strerror(errno)); \
> +		save_last_error(wq->device, wq, NULL, NULL); \
>   		return -errno; \
>   	} \
>   	if (wq->field) \
> @@ -2273,6 +2391,7 @@ ACCFG_EXPORT int accfg_wq_set_str_type(struct accfg_wq *wq, const char *val)
>   		err(ctx, "%s: write failed: %s\n",
>   				accfg_wq_get_devname(wq),
>   				strerror(errno));
> +		save_last_error(wq->device, wq, NULL, NULL);
>   		return -errno;
>   	}
>   
> @@ -2381,6 +2500,7 @@ ACCFG_EXPORT int accfg_engine_set_##field( \
>   		err(ctx, "%s: write failed: %s\n", \
>   				accfg_engine_get_devname(engine), \
>   				strerror(errno)); \
> +		save_last_error(engine->device, NULL, NULL, engine); \
>   		return -errno; \
>   	} \
>   	engine->field = val; \
> diff --git a/accfg/lib/private.h b/accfg/lib/private.h
> index 7f3162e..4a9843e 100644
> --- a/accfg/lib/private.h
> +++ b/accfg/lib/private.h
> @@ -130,6 +130,14 @@ struct accfg_wq {
>   
>   #define ACCFG_EXPORT __attribute__ ((visibility("default")))
>   
> +struct accfg_error_ctx {
> +	unsigned int cmd_status;
> +	struct accfg_device *device;
> +	struct accfg_group *group;
> +	struct accfg_wq *wq;
> +	struct accfg_engine *engine;
> +};
> +
>   /**
>    * struct accfg_ctx - library user context to find device instances
>    *
> @@ -148,6 +156,7 @@ struct accfg_ctx {
>   	uint64_t timeout;
>   	void *private_data;
>   	bool compat;
> +	struct accfg_error_ctx *error_ctx;
>   };
>   
>   static inline int check_udev(struct udev *udev)
> diff --git a/accfg/libaccel_config.h b/accfg/libaccel_config.h
> index 5b37790..27d10b6 100644
> --- a/accfg/libaccel_config.h
> +++ b/accfg/libaccel_config.h
> @@ -188,9 +188,14 @@ unsigned int accfg_device_get_version(struct accfg_device *device);
>   int accfg_device_get_clients(struct accfg_device *device);
>   int accfg_device_set_token_limit(struct accfg_device *dev, int val);
>   int accfg_device_is_active(struct accfg_device *device);
> -int accfg_device_get_cmd_status(struct accfg_device *device);
> +unsigned int accfg_device_get_cmd_status(struct accfg_device *device);
>   const char *accfg_device_get_cmd_status_str(struct accfg_device *device);
> -
> +unsigned int accfg_ctx_get_last_error(struct accfg_ctx *ctx);
> +const char *accfg_ctx_get_last_error_str(struct accfg_ctx *ctx);
> +struct accfg_device *accfg_ctx_get_last_error_device(struct accfg_ctx *ctx);
> +struct accfg_wq *accfg_ctx_get_last_error_wq(struct accfg_ctx *ctx);
> +struct accfg_group *accfg_ctx_get_last_error_group(struct accfg_ctx *ctx);
> +struct accfg_engine *accfg_ctx_get_last_error_engine(struct accfg_ctx *ctx);
>   struct accfg_device_mdev;
>   struct accfg_device_mdev *accfg_device_first_mdev(struct accfg_device *device);
>   struct accfg_device_mdev *accfg_device_next_mdev(struct accfg_device_mdev *mdev);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Accel-config] Re: [PATCH 1/2] accel-config: Add APIs to get details of last error and associated devices
@ 2021-07-12 21:15 Thomas, Ramesh
  0 siblings, 0 replies; 4+ messages in thread
From: Thomas, Ramesh @ 2021-07-12 21:15 UTC (permalink / raw)
  To: accel-config

[-- Attachment #1: Type: text/plain, Size: 9679 bytes --]

On Mon, Jul 12, 2021 at 01:58:45PM -0700, Dave Jiang wrote:
> 
> On 7/12/2021 1:40 PM, Thomas, Ramesh wrote:
> > On Mon, Jul 12, 2021 at 01:26:52PM -0700, Dave Jiang wrote:
> >> On 7/9/2021 10:47 AM, ramesh.thomas(a)intel.com wrote:
> >>> From: Ramesh Thomas <ramesh.thomas(a)intel.com>
> >>>
> >>> Capture a snapshot of error condition containing cmd_status and the
> >>> associated devices. Provide APIs to return the captured details. Handles
> >>> both hw and sw error codes stored in sysfs cmd_status field of devices.
> >>> Following APIs are added:
> >>> - accfg_ctx_get_last_error;
> >>> - accfg_ctx_get_last_error_str;
> >>> - accfg_ctx_get_last_error_device;
> >>> - accfg_ctx_get_last_error_wq;
> >>> - accfg_ctx_get_last_error_group;
> >>> - accfg_ctx_get_last_error_engine;
> >>>
> >>> Signed-off-by: Ramesh Thomas <ramesh.thomas(a)intel.com>
> >>> ---
> >>>    accfg/lib/libaccel-config.sym |  10 +++
> >>>    accfg/lib/libaccfg.c          | 156 ++++++++++++++++++++++++++++++----
> >>>    accfg/lib/private.h           |   9 ++
> >>>    accfg/libaccel_config.h       |   9 +-
> >>>    4 files changed, 164 insertions(+), 20 deletions(-)
> >>>
> >>> diff --git a/accfg/lib/libaccel-config.sym b/accfg/lib/libaccel-config.sym
> >>> index aecc98b..679c597 100644
> >>> --- a/accfg/lib/libaccel-config.sym
> >>> +++ b/accfg/lib/libaccel-config.sym
> >>> @@ -149,3 +149,13 @@ LIBACCFG_9 {
> >>>    global:
> >>>      accfg_device_get_op_cap;
> >>>    } LIBACCFG_8;
> >>> +
> >>> +LIBACCFG_10 {
> >>> +global:
> >>> +   accfg_ctx_get_last_error;
> >>> +   accfg_ctx_get_last_error_str;
> >>> +   accfg_ctx_get_last_error_device;
> >>> +   accfg_ctx_get_last_error_wq;
> >>> +   accfg_ctx_get_last_error_group;
> >>> +   accfg_ctx_get_last_error_engine;
> >>> +} LIBACCFG_9;
> >>> diff --git a/accfg/lib/libaccfg.c b/accfg/lib/libaccfg.c
> >>> index f53751e..00314b0 100644
> >>> --- a/accfg/lib/libaccfg.c
> >>> +++ b/accfg/lib/libaccfg.c
> >>> @@ -70,6 +70,7 @@ ACCFG_EXPORT char *accfg_mdev_basenames[] = {
> >>>
> >>>    enum {
> >>>      ACCFG_CMD_STATUS_MAX = 0x45,
> >>> +   ACCFG_CMD_STATUS_ERROR = 0x80010000,
> >>>    };
> >>>
> >>>    const char *accfg_device_cmd_status[] = {
> >>> @@ -105,9 +106,43 @@ const char *accfg_device_cmd_status[] = {
> >>>      [0x42]  = "No interrupt handle available",
> >>>      [0x43]  = "No interrupt handles associated with the index",
> >>>      [0x44]  = "No revoked handles associted with the index",
> >>> -   [ACCFG_CMD_STATUS_MAX]  = "",
> >>> +   [ACCFG_CMD_STATUS_MAX]  = "Unknown error",
> >>>    };
> >>>
> >>> +const char *accfg_sw_cmd_status[] = {
> >>> +   [0x01] = "Error reading cmd_status",
> >>> +   [0x02] = "DMA device registration error",
> >>> +   [0x03] = "wq error - group not set",
> >>> +   [0x04] = "wq error - name not set",
> >>> +   [0x05] = "wq error - no SVM support and needed",
> >>> +   [0x06] = "wq error - threshold not set",
> >>> +   [0x07] = "wq portal mapping error",
> >>> +   [0x08] = "wq resource allocation error",
> >>> +   [0x09] = "wq percpu counter allocation error",
> >>> +   [0x0a] = "wq DMA channel registration error",
> >>> +   [0x0b] = "wq char dev registration error",
> >>> +   [0x0c] = "wq error - no shared wq support",
> >>> +   [0x0d] = "wq error - no wqs configured",
> >>> +   [0x0e] = "wq error - size not set",
> >>> +};
> >>> +
> >>> +static void save_last_error(struct accfg_device *device, struct accfg_wq *wq,
> >>> +           struct accfg_group *group, struct accfg_engine *engine)
> >>> +{
> >>> +   struct accfg_ctx *ctx = device->ctx;
> >>> +
> >>> +   if (!ctx->error_ctx)
> >>> +           ctx->error_ctx = calloc(1, sizeof(struct accfg_error_ctx));
> >>> +
> >>> +   if (ctx->error_ctx) {
> >>> +           ctx->error_ctx->cmd_status = accfg_device_get_cmd_status(device);
> >>> +           ctx->error_ctx->device = device;
> >>> +           ctx->error_ctx->wq = wq;
> >>> +           ctx->error_ctx->group = group;
> >>> +           ctx->error_ctx->engine = engine;
> >>> +   }
> >>> +}
> >>> +
> >>>    static inline bool is_mdev_registered(struct accfg_device *device)
> >>>    {
> >>>      return device->mdev_path && !access(device->mdev_path, R_OK);
> >>> @@ -251,6 +286,7 @@ static void free_context(struct accfg_ctx *ctx)
> >>>
> >>>      list_for_each_safe(&ctx->devices, device, _b, list)
> >>>              free_device(device, &ctx->devices);
> >>> +   free(ctx->error_ctx);
> >>>      free(ctx);
> >>>    }
> >>>
> >>> @@ -1082,6 +1118,7 @@ ACCFG_EXPORT int accfg_create_mdev(struct accfg_device *device,
> >>>      rc = sysfs_write_attr(ctx, mdev_path, uuid_str);
> >>>      if (rc < 0) {
> >>>              err(ctx, "create mdev failed %d\n", rc);
> >>> +           save_last_error(device, NULL, NULL, NULL);
> >>>              goto create_err;
> >>>      }
> >>>
> >>> @@ -1105,8 +1142,10 @@ static int accfg_device_mdev_remove(struct accfg_device *device,
> >>>      uuid_unparse(mdev->uuid, uuid_str);
> >>>      sprintf(mdev_path, "%s/%s/remove", device->mdev_path, uuid_str);
> >>>      rc = sysfs_write_attr(ctx, mdev_path, "1");
> >>> -   if (rc < 0)
> >>> +   if (rc < 0) {
> >>> +           save_last_error(device, NULL, NULL, NULL);
> >>>              return rc;
> >>> +   }
> >>>
> >>>      list_del(&mdev->list);
> >>>      free(mdev);
> >>> @@ -1355,6 +1394,7 @@ ACCFG_EXPORT enum accfg_device_state accfg_device_get_state(
> >>>              err(ctx, "%s: sysfs_read_attr failed '%s': %s\n",
> >>>                              __func__, device->device_path,
> >>>                              strerror(errno));
> >>> +           save_last_error(device, NULL, NULL, NULL);
> >>>              return ACCFG_DEVICE_UNKNOWN;
> >>>      }
> >>>
> >>> @@ -1414,6 +1454,7 @@ ACCFG_EXPORT int accfg_device_get_clients(struct accfg_device
> *device)
> >>>      if (sysfs_read_attr(ctx, path, buf) < 0) {
> >>>              err(ctx, "%s: retrieve clients failed '%s': %s\n",
> >>>                              __func__, path, strerror(errno));
> >>> +           save_last_error(device, NULL, NULL, NULL);
> >>>              return -errno;
> >>>      }
> >>>
> >>> @@ -1448,6 +1489,7 @@ ACCFG_EXPORT int accfg_device_set_token_limit(struct accfg_device
> >> *dev, int val)
> >>>      if (sysfs_write_attr(ctx, path, buf) < 0) {
> >>>              err(ctx, "%s: write failed: %s\n",
> >>>                              accfg_device_get_devname(dev), strerror(errno));
> >>> +           save_last_error(dev, NULL, NULL, NULL);
> >>>              return -errno;
> >>>      }
> >>>
> >>> @@ -1476,8 +1518,10 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device
> *device)
> >>>              return 0;
> >>>      }
> >>>
> >>> -   if (sysfs_read_attr(ctx, path, buf) < 0)
> >>> +   if (sysfs_read_attr(ctx, path, buf) < 0) {
> >>> +           save_last_error(device, NULL, NULL, NULL);
> >>>              return 0;
> >>> +   }
> >>>
> >>>      if (strcmp(buf, "enabled") == 0)
> >>>              return 1;
> >>> @@ -1485,17 +1529,16 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device
> *device)
> >>>      return 0;
> >>>    }
> >>>
> >>> -ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device *device)
> >>> +ACCFG_EXPORT unsigned int accfg_device_get_cmd_status(struct accfg_device *device)
> >>>    {
> >>>      struct accfg_ctx *ctx;
> >>>      long status;
> >>>      char *path;
> >>>      int len;
> >>>      char buf[SYSFS_ATTR_SIZE], *end_ptr;
> >>> -   int rc;
> >>>
> >>>      if (!device)
> >>> -           return -EINVAL;
> >>> +           return ACCFG_CMD_STATUS_ERROR;
> >>>
> >>>      ctx = accfg_device_get_ctx(device);
> >>>      path = device->device_buf;
> >>> @@ -1504,31 +1547,96 @@ ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device
> >> *device)
> >>>      if (snprintf(path, len, "%s/cmd_status", device->device_path) >= len) {
> >>>              err(ctx, "%s: buffer too small!\n",
> >>>                              accfg_device_get_devname(device));
> >>> -           return -ENOMEM;
> >>> +           return ACCFG_CMD_STATUS_ERROR;
> >>>      }
> >>>
> >>> -   rc = sysfs_read_attr(ctx, path, buf);
> >>> -   if (rc < 0)
> >>> -           return rc;
> >>> +   if (sysfs_read_attr(ctx, path, buf))
> >>> +           return ACCFG_CMD_STATUS_ERROR;
> >>>
> >>>      status = strtol(buf, &end_ptr, 0);
> >>>      if (errno == ERANGE || end_ptr == buf)
> >>> -           return -EIO;
> >>> +           return ACCFG_CMD_STATUS_ERROR;
> >>>
> >>> -   return (int)status;
> >>> +   return (unsigned int)status;
> >>>    }
> >>>
> >>> -ACCFG_EXPORT const char * accfg_device_get_cmd_status_str(struct accfg_device *device)
> >>> +static const char *get_cmd_status_str(unsigned int status)
> >>>    {
> >>> -   int status;
> >>> +   unsigned int sw_status;
> >>>
> >>> -   status = accfg_device_get_cmd_status(device);
> >>> -   if (status < 0 || status >= ACCFG_CMD_STATUS_MAX)
> >>> -           return NULL;
> >>> +   if (status & 0x80000000) {
> >>> +           sw_status = (status & ~0x80000000) >> 16;
> >>
> >> Magic number. Do I need to add a mask define to the header?
> > Or add a comment stating device error codes are in the lower 16 bits and
> > sw error codes are in the higher bits with the highest bit set.
> 
> Looks like I already defined the mask in the user header:
> 
> IDXD_SCMD_SOFTERR_MASK
> 
> I've also added a shift define. Just fetch djiang5/idxd-upstream-next again.
> 

I will use that and send an update.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Accel-config] Re: [PATCH 1/2] accel-config: Add APIs to get details of last error and associated devices
@ 2021-07-12 20:58 Dave Jiang
  0 siblings, 0 replies; 4+ messages in thread
From: Dave Jiang @ 2021-07-12 20:58 UTC (permalink / raw)
  To: accel-config

[-- Attachment #1: Type: text/plain, Size: 9108 bytes --]


On 7/12/2021 1:40 PM, Thomas, Ramesh wrote:
> On Mon, Jul 12, 2021 at 01:26:52PM -0700, Dave Jiang wrote:
>> On 7/9/2021 10:47 AM, ramesh.thomas(a)intel.com wrote:
>>> From: Ramesh Thomas <ramesh.thomas(a)intel.com>
>>>
>>> Capture a snapshot of error condition containing cmd_status and the
>>> associated devices. Provide APIs to return the captured details. Handles
>>> both hw and sw error codes stored in sysfs cmd_status field of devices.
>>> Following APIs are added:
>>> - accfg_ctx_get_last_error;
>>> - accfg_ctx_get_last_error_str;
>>> - accfg_ctx_get_last_error_device;
>>> - accfg_ctx_get_last_error_wq;
>>> - accfg_ctx_get_last_error_group;
>>> - accfg_ctx_get_last_error_engine;
>>>
>>> Signed-off-by: Ramesh Thomas <ramesh.thomas(a)intel.com>
>>> ---
>>>    accfg/lib/libaccel-config.sym |  10 +++
>>>    accfg/lib/libaccfg.c          | 156 ++++++++++++++++++++++++++++++----
>>>    accfg/lib/private.h           |   9 ++
>>>    accfg/libaccel_config.h       |   9 +-
>>>    4 files changed, 164 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/accfg/lib/libaccel-config.sym b/accfg/lib/libaccel-config.sym
>>> index aecc98b..679c597 100644
>>> --- a/accfg/lib/libaccel-config.sym
>>> +++ b/accfg/lib/libaccel-config.sym
>>> @@ -149,3 +149,13 @@ LIBACCFG_9 {
>>>    global:
>>>      accfg_device_get_op_cap;
>>>    } LIBACCFG_8;
>>> +
>>> +LIBACCFG_10 {
>>> +global:
>>> +   accfg_ctx_get_last_error;
>>> +   accfg_ctx_get_last_error_str;
>>> +   accfg_ctx_get_last_error_device;
>>> +   accfg_ctx_get_last_error_wq;
>>> +   accfg_ctx_get_last_error_group;
>>> +   accfg_ctx_get_last_error_engine;
>>> +} LIBACCFG_9;
>>> diff --git a/accfg/lib/libaccfg.c b/accfg/lib/libaccfg.c
>>> index f53751e..00314b0 100644
>>> --- a/accfg/lib/libaccfg.c
>>> +++ b/accfg/lib/libaccfg.c
>>> @@ -70,6 +70,7 @@ ACCFG_EXPORT char *accfg_mdev_basenames[] = {
>>>
>>>    enum {
>>>      ACCFG_CMD_STATUS_MAX = 0x45,
>>> +   ACCFG_CMD_STATUS_ERROR = 0x80010000,
>>>    };
>>>
>>>    const char *accfg_device_cmd_status[] = {
>>> @@ -105,9 +106,43 @@ const char *accfg_device_cmd_status[] = {
>>>      [0x42]  = "No interrupt handle available",
>>>      [0x43]  = "No interrupt handles associated with the index",
>>>      [0x44]  = "No revoked handles associted with the index",
>>> -   [ACCFG_CMD_STATUS_MAX]  = "",
>>> +   [ACCFG_CMD_STATUS_MAX]  = "Unknown error",
>>>    };
>>>
>>> +const char *accfg_sw_cmd_status[] = {
>>> +   [0x01] = "Error reading cmd_status",
>>> +   [0x02] = "DMA device registration error",
>>> +   [0x03] = "wq error - group not set",
>>> +   [0x04] = "wq error - name not set",
>>> +   [0x05] = "wq error - no SVM support and needed",
>>> +   [0x06] = "wq error - threshold not set",
>>> +   [0x07] = "wq portal mapping error",
>>> +   [0x08] = "wq resource allocation error",
>>> +   [0x09] = "wq percpu counter allocation error",
>>> +   [0x0a] = "wq DMA channel registration error",
>>> +   [0x0b] = "wq char dev registration error",
>>> +   [0x0c] = "wq error - no shared wq support",
>>> +   [0x0d] = "wq error - no wqs configured",
>>> +   [0x0e] = "wq error - size not set",
>>> +};
>>> +
>>> +static void save_last_error(struct accfg_device *device, struct accfg_wq *wq,
>>> +           struct accfg_group *group, struct accfg_engine *engine)
>>> +{
>>> +   struct accfg_ctx *ctx = device->ctx;
>>> +
>>> +   if (!ctx->error_ctx)
>>> +           ctx->error_ctx = calloc(1, sizeof(struct accfg_error_ctx));
>>> +
>>> +   if (ctx->error_ctx) {
>>> +           ctx->error_ctx->cmd_status = accfg_device_get_cmd_status(device);
>>> +           ctx->error_ctx->device = device;
>>> +           ctx->error_ctx->wq = wq;
>>> +           ctx->error_ctx->group = group;
>>> +           ctx->error_ctx->engine = engine;
>>> +   }
>>> +}
>>> +
>>>    static inline bool is_mdev_registered(struct accfg_device *device)
>>>    {
>>>      return device->mdev_path && !access(device->mdev_path, R_OK);
>>> @@ -251,6 +286,7 @@ static void free_context(struct accfg_ctx *ctx)
>>>
>>>      list_for_each_safe(&ctx->devices, device, _b, list)
>>>              free_device(device, &ctx->devices);
>>> +   free(ctx->error_ctx);
>>>      free(ctx);
>>>    }
>>>
>>> @@ -1082,6 +1118,7 @@ ACCFG_EXPORT int accfg_create_mdev(struct accfg_device *device,
>>>      rc = sysfs_write_attr(ctx, mdev_path, uuid_str);
>>>      if (rc < 0) {
>>>              err(ctx, "create mdev failed %d\n", rc);
>>> +           save_last_error(device, NULL, NULL, NULL);
>>>              goto create_err;
>>>      }
>>>
>>> @@ -1105,8 +1142,10 @@ static int accfg_device_mdev_remove(struct accfg_device *device,
>>>      uuid_unparse(mdev->uuid, uuid_str);
>>>      sprintf(mdev_path, "%s/%s/remove", device->mdev_path, uuid_str);
>>>      rc = sysfs_write_attr(ctx, mdev_path, "1");
>>> -   if (rc < 0)
>>> +   if (rc < 0) {
>>> +           save_last_error(device, NULL, NULL, NULL);
>>>              return rc;
>>> +   }
>>>
>>>      list_del(&mdev->list);
>>>      free(mdev);
>>> @@ -1355,6 +1394,7 @@ ACCFG_EXPORT enum accfg_device_state accfg_device_get_state(
>>>              err(ctx, "%s: sysfs_read_attr failed '%s': %s\n",
>>>                              __func__, device->device_path,
>>>                              strerror(errno));
>>> +           save_last_error(device, NULL, NULL, NULL);
>>>              return ACCFG_DEVICE_UNKNOWN;
>>>      }
>>>
>>> @@ -1414,6 +1454,7 @@ ACCFG_EXPORT int accfg_device_get_clients(struct accfg_device *device)
>>>      if (sysfs_read_attr(ctx, path, buf) < 0) {
>>>              err(ctx, "%s: retrieve clients failed '%s': %s\n",
>>>                              __func__, path, strerror(errno));
>>> +           save_last_error(device, NULL, NULL, NULL);
>>>              return -errno;
>>>      }
>>>
>>> @@ -1448,6 +1489,7 @@ ACCFG_EXPORT int accfg_device_set_token_limit(struct accfg_device
>> *dev, int val)
>>>      if (sysfs_write_attr(ctx, path, buf) < 0) {
>>>              err(ctx, "%s: write failed: %s\n",
>>>                              accfg_device_get_devname(dev), strerror(errno));
>>> +           save_last_error(dev, NULL, NULL, NULL);
>>>              return -errno;
>>>      }
>>>
>>> @@ -1476,8 +1518,10 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device *device)
>>>              return 0;
>>>      }
>>>
>>> -   if (sysfs_read_attr(ctx, path, buf) < 0)
>>> +   if (sysfs_read_attr(ctx, path, buf) < 0) {
>>> +           save_last_error(device, NULL, NULL, NULL);
>>>              return 0;
>>> +   }
>>>
>>>      if (strcmp(buf, "enabled") == 0)
>>>              return 1;
>>> @@ -1485,17 +1529,16 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device *device)
>>>      return 0;
>>>    }
>>>
>>> -ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device *device)
>>> +ACCFG_EXPORT unsigned int accfg_device_get_cmd_status(struct accfg_device *device)
>>>    {
>>>      struct accfg_ctx *ctx;
>>>      long status;
>>>      char *path;
>>>      int len;
>>>      char buf[SYSFS_ATTR_SIZE], *end_ptr;
>>> -   int rc;
>>>
>>>      if (!device)
>>> -           return -EINVAL;
>>> +           return ACCFG_CMD_STATUS_ERROR;
>>>
>>>      ctx = accfg_device_get_ctx(device);
>>>      path = device->device_buf;
>>> @@ -1504,31 +1547,96 @@ ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device
>> *device)
>>>      if (snprintf(path, len, "%s/cmd_status", device->device_path) >= len) {
>>>              err(ctx, "%s: buffer too small!\n",
>>>                              accfg_device_get_devname(device));
>>> -           return -ENOMEM;
>>> +           return ACCFG_CMD_STATUS_ERROR;
>>>      }
>>>
>>> -   rc = sysfs_read_attr(ctx, path, buf);
>>> -   if (rc < 0)
>>> -           return rc;
>>> +   if (sysfs_read_attr(ctx, path, buf))
>>> +           return ACCFG_CMD_STATUS_ERROR;
>>>
>>>      status = strtol(buf, &end_ptr, 0);
>>>      if (errno == ERANGE || end_ptr == buf)
>>> -           return -EIO;
>>> +           return ACCFG_CMD_STATUS_ERROR;
>>>
>>> -   return (int)status;
>>> +   return (unsigned int)status;
>>>    }
>>>
>>> -ACCFG_EXPORT const char * accfg_device_get_cmd_status_str(struct accfg_device *device)
>>> +static const char *get_cmd_status_str(unsigned int status)
>>>    {
>>> -   int status;
>>> +   unsigned int sw_status;
>>>
>>> -   status = accfg_device_get_cmd_status(device);
>>> -   if (status < 0 || status >= ACCFG_CMD_STATUS_MAX)
>>> -           return NULL;
>>> +   if (status & 0x80000000) {
>>> +           sw_status = (status & ~0x80000000) >> 16;
>>
>> Magic number. Do I need to add a mask define to the header?
> Or add a comment stating device error codes are in the lower 16 bits and
> sw error codes are in the higher bits with the highest bit set.

Looks like I already defined the mask in the user header:

IDXD_SCMD_SOFTERR_MASK

I've also added a shift define. Just fetch djiang5/idxd-upstream-next again.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Accel-config] Re: [PATCH 1/2] accel-config: Add APIs to get details of last error and associated devices
@ 2021-07-12 20:40 Thomas, Ramesh
  0 siblings, 0 replies; 4+ messages in thread
From: Thomas, Ramesh @ 2021-07-12 20:40 UTC (permalink / raw)
  To: accel-config

[-- Attachment #1: Type: text/plain, Size: 8259 bytes --]

On Mon, Jul 12, 2021 at 01:26:52PM -0700, Dave Jiang wrote:
> 
> On 7/9/2021 10:47 AM, ramesh.thomas(a)intel.com wrote:
> > From: Ramesh Thomas <ramesh.thomas(a)intel.com>
> >
> > Capture a snapshot of error condition containing cmd_status and the
> > associated devices. Provide APIs to return the captured details. Handles
> > both hw and sw error codes stored in sysfs cmd_status field of devices.
> > Following APIs are added:
> > - accfg_ctx_get_last_error;
> > - accfg_ctx_get_last_error_str;
> > - accfg_ctx_get_last_error_device;
> > - accfg_ctx_get_last_error_wq;
> > - accfg_ctx_get_last_error_group;
> > - accfg_ctx_get_last_error_engine;
> >
> > Signed-off-by: Ramesh Thomas <ramesh.thomas(a)intel.com>
> > ---
> >   accfg/lib/libaccel-config.sym |  10 +++
> >   accfg/lib/libaccfg.c          | 156 ++++++++++++++++++++++++++++++----
> >   accfg/lib/private.h           |   9 ++
> >   accfg/libaccel_config.h       |   9 +-
> >   4 files changed, 164 insertions(+), 20 deletions(-)
> >
> > diff --git a/accfg/lib/libaccel-config.sym b/accfg/lib/libaccel-config.sym
> > index aecc98b..679c597 100644
> > --- a/accfg/lib/libaccel-config.sym
> > +++ b/accfg/lib/libaccel-config.sym
> > @@ -149,3 +149,13 @@ LIBACCFG_9 {
> >   global:
> >   	accfg_device_get_op_cap;
> >   } LIBACCFG_8;
> > +
> > +LIBACCFG_10 {
> > +global:
> > +	accfg_ctx_get_last_error;
> > +	accfg_ctx_get_last_error_str;
> > +	accfg_ctx_get_last_error_device;
> > +	accfg_ctx_get_last_error_wq;
> > +	accfg_ctx_get_last_error_group;
> > +	accfg_ctx_get_last_error_engine;
> > +} LIBACCFG_9;
> > diff --git a/accfg/lib/libaccfg.c b/accfg/lib/libaccfg.c
> > index f53751e..00314b0 100644
> > --- a/accfg/lib/libaccfg.c
> > +++ b/accfg/lib/libaccfg.c
> > @@ -70,6 +70,7 @@ ACCFG_EXPORT char *accfg_mdev_basenames[] = {
> >
> >   enum {
> >   	ACCFG_CMD_STATUS_MAX = 0x45,
> > +	ACCFG_CMD_STATUS_ERROR = 0x80010000,
> >   };
> >
> >   const char *accfg_device_cmd_status[] = {
> > @@ -105,9 +106,43 @@ const char *accfg_device_cmd_status[] = {
> >   	[0x42]	= "No interrupt handle available",
> >   	[0x43]	= "No interrupt handles associated with the index",
> >   	[0x44]	= "No revoked handles associted with the index",
> > -	[ACCFG_CMD_STATUS_MAX]	= "",
> > +	[ACCFG_CMD_STATUS_MAX]	= "Unknown error",
> >   };
> >
> > +const char *accfg_sw_cmd_status[] = {
> > +	[0x01] = "Error reading cmd_status",
> > +	[0x02] = "DMA device registration error",
> > +	[0x03] = "wq error - group not set",
> > +	[0x04] = "wq error - name not set",
> > +	[0x05] = "wq error - no SVM support and needed",
> > +	[0x06] = "wq error - threshold not set",
> > +	[0x07] = "wq portal mapping error",
> > +	[0x08] = "wq resource allocation error",
> > +	[0x09] = "wq percpu counter allocation error",
> > +	[0x0a] = "wq DMA channel registration error",
> > +	[0x0b] = "wq char dev registration error",
> > +	[0x0c] = "wq error - no shared wq support",
> > +	[0x0d] = "wq error - no wqs configured",
> > +	[0x0e] = "wq error - size not set",
> > +};
> > +
> > +static void save_last_error(struct accfg_device *device, struct accfg_wq *wq,
> > +		struct accfg_group *group, struct accfg_engine *engine)
> > +{
> > +	struct accfg_ctx *ctx = device->ctx;
> > +
> > +	if (!ctx->error_ctx)
> > +		ctx->error_ctx = calloc(1, sizeof(struct accfg_error_ctx));
> > +
> > +	if (ctx->error_ctx) {
> > +		ctx->error_ctx->cmd_status = accfg_device_get_cmd_status(device);
> > +		ctx->error_ctx->device = device;
> > +		ctx->error_ctx->wq = wq;
> > +		ctx->error_ctx->group = group;
> > +		ctx->error_ctx->engine = engine;
> > +	}
> > +}
> > +
> >   static inline bool is_mdev_registered(struct accfg_device *device)
> >   {
> >   	return device->mdev_path && !access(device->mdev_path, R_OK);
> > @@ -251,6 +286,7 @@ static void free_context(struct accfg_ctx *ctx)
> >
> >   	list_for_each_safe(&ctx->devices, device, _b, list)
> >   		free_device(device, &ctx->devices);
> > +	free(ctx->error_ctx);
> >   	free(ctx);
> >   }
> >
> > @@ -1082,6 +1118,7 @@ ACCFG_EXPORT int accfg_create_mdev(struct accfg_device *device,
> >   	rc = sysfs_write_attr(ctx, mdev_path, uuid_str);
> >   	if (rc < 0) {
> >   		err(ctx, "create mdev failed %d\n", rc);
> > +		save_last_error(device, NULL, NULL, NULL);
> >   		goto create_err;
> >   	}
> >
> > @@ -1105,8 +1142,10 @@ static int accfg_device_mdev_remove(struct accfg_device *device,
> >   	uuid_unparse(mdev->uuid, uuid_str);
> >   	sprintf(mdev_path, "%s/%s/remove", device->mdev_path, uuid_str);
> >   	rc = sysfs_write_attr(ctx, mdev_path, "1");
> > -	if (rc < 0)
> > +	if (rc < 0) {
> > +		save_last_error(device, NULL, NULL, NULL);
> >   		return rc;
> > +	}
> >
> >   	list_del(&mdev->list);
> >   	free(mdev);
> > @@ -1355,6 +1394,7 @@ ACCFG_EXPORT enum accfg_device_state accfg_device_get_state(
> >   		err(ctx, "%s: sysfs_read_attr failed '%s': %s\n",
> >   				__func__, device->device_path,
> >   				strerror(errno));
> > +		save_last_error(device, NULL, NULL, NULL);
> >   		return ACCFG_DEVICE_UNKNOWN;
> >   	}
> >
> > @@ -1414,6 +1454,7 @@ ACCFG_EXPORT int accfg_device_get_clients(struct accfg_device *device)
> >   	if (sysfs_read_attr(ctx, path, buf) < 0) {
> >   		err(ctx, "%s: retrieve clients failed '%s': %s\n",
> >   				__func__, path, strerror(errno));
> > +		save_last_error(device, NULL, NULL, NULL);
> >   		return -errno;
> >   	}
> >
> > @@ -1448,6 +1489,7 @@ ACCFG_EXPORT int accfg_device_set_token_limit(struct accfg_device
> *dev, int val)
> >   	if (sysfs_write_attr(ctx, path, buf) < 0) {
> >   		err(ctx, "%s: write failed: %s\n",
> >   				accfg_device_get_devname(dev), strerror(errno));
> > +		save_last_error(dev, NULL, NULL, NULL);
> >   		return -errno;
> >   	}
> >
> > @@ -1476,8 +1518,10 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device *device)
> >   		return 0;
> >   	}
> >
> > -	if (sysfs_read_attr(ctx, path, buf) < 0)
> > +	if (sysfs_read_attr(ctx, path, buf) < 0) {
> > +		save_last_error(device, NULL, NULL, NULL);
> >   		return 0;
> > +	}
> >
> >   	if (strcmp(buf, "enabled") == 0)
> >   		return 1;
> > @@ -1485,17 +1529,16 @@ ACCFG_EXPORT int accfg_device_is_active(struct accfg_device *device)
> >   	return 0;
> >   }
> >
> > -ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device *device)
> > +ACCFG_EXPORT unsigned int accfg_device_get_cmd_status(struct accfg_device *device)
> >   {
> >   	struct accfg_ctx *ctx;
> >   	long status;
> >   	char *path;
> >   	int len;
> >   	char buf[SYSFS_ATTR_SIZE], *end_ptr;
> > -	int rc;
> >
> >   	if (!device)
> > -		return -EINVAL;
> > +		return ACCFG_CMD_STATUS_ERROR;
> >
> >   	ctx = accfg_device_get_ctx(device);
> >   	path = device->device_buf;
> > @@ -1504,31 +1547,96 @@ ACCFG_EXPORT int accfg_device_get_cmd_status(struct accfg_device
> *device)
> >   	if (snprintf(path, len, "%s/cmd_status", device->device_path) >= len) {
> >   		err(ctx, "%s: buffer too small!\n",
> >   				accfg_device_get_devname(device));
> > -		return -ENOMEM;
> > +		return ACCFG_CMD_STATUS_ERROR;
> >   	}
> >
> > -	rc = sysfs_read_attr(ctx, path, buf);
> > -	if (rc < 0)
> > -		return rc;
> > +	if (sysfs_read_attr(ctx, path, buf))
> > +		return ACCFG_CMD_STATUS_ERROR;
> >
> >   	status = strtol(buf, &end_ptr, 0);
> >   	if (errno == ERANGE || end_ptr == buf)
> > -		return -EIO;
> > +		return ACCFG_CMD_STATUS_ERROR;
> >
> > -	return (int)status;
> > +	return (unsigned int)status;
> >   }
> >
> > -ACCFG_EXPORT const char * accfg_device_get_cmd_status_str(struct accfg_device *device)
> > +static const char *get_cmd_status_str(unsigned int status)
> >   {
> > -	int status;
> > +	unsigned int sw_status;
> >
> > -	status = accfg_device_get_cmd_status(device);
> > -	if (status < 0 || status >= ACCFG_CMD_STATUS_MAX)
> > -		return NULL;
> > +	if (status & 0x80000000) {
> > +		sw_status = (status & ~0x80000000) >> 16;
> 
> 
> Magic number. Do I need to add a mask define to the header?

Or add a comment stating device error codes are in the lower 16 bits and
sw error codes are in the higher bits with the highest bit set.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-07-12 21:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-12 20:26 [Accel-config] Re: [PATCH 1/2] accel-config: Add APIs to get details of last error and associated devices Dave Jiang
2021-07-12 20:40 Thomas, Ramesh
2021-07-12 20:58 Dave Jiang
2021-07-12 21:15 Thomas, Ramesh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.