* [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
@ 2013-02-21 21:33 Ira Weiny
[not found] ` <20130221133331.593f30977427848f4373b57a-i2BcT+NCU+M@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Ira Weiny @ 2013-02-21 21:33 UTC (permalink / raw)
To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
---
include/opensm/osm_perfmgr.h | 4 +
opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++---------
2 files changed, 183 insertions(+), 45 deletions(-)
diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h
index 26b1ae6..3fa42d5 100644
--- a/include/opensm/osm_perfmgr.h
+++ b/include/opensm/osm_perfmgr.h
@@ -100,6 +100,9 @@ typedef struct monitored_port {
ib_net16_t lid;
ib_net16_t pkey;
ib_net32_t qp;
+ /* ClassPortInfo fields */
+ boolean_t cpi_valid;
+ ib_net16_t cap_mask;
} monitored_port_t;
/* Node to store information about nodes being monitored */
@@ -107,6 +110,7 @@ typedef struct monitored_node {
cl_map_item_t map_item;
struct monitored_node *next;
uint64_t guid;
+ uint8_t node_type;
boolean_t esp0;
char *name;
uint32_t num_ports;
diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c
index 9bc1154..c71111f 100644
--- a/opensm/osm_perfmgr.c
+++ b/opensm/osm_perfmgr.c
@@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
return get_base_lid(p_node, port);
}
+
/**********************************************************************
- * Form and send the Port Counters MAD for a single port.
+ * Build a Performance Management class MAD
**********************************************************************/
-static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
- ib_net16_t dest_lid,
- ib_net32_t dest_qp, uint16_t pkey_ix,
- uint8_t port, uint8_t mad_method,
- osm_madw_context_t * p_context)
+static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
+ ib_net16_t dest_lid,
+ uint8_t sl,
+ ib_net32_t dest_qp,
+ uint16_t pkey_ix,
+ uint8_t mad_method,
+ ib_net16_t attr_id,
+ osm_madw_context_t * p_context,
+ ib_perfmgt_mad_t ** p_pm_mad)
{
- ib_api_status_t status = IB_SUCCESS;
- ib_port_counters_t *port_counter = NULL;
ib_perfmgt_mad_t *pm_mad = NULL;
osm_madw_t *p_madw = NULL;
@@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
MAD_BLOCK_SIZE, NULL);
if (p_madw == NULL)
- return IB_INSUFFICIENT_MEMORY;
+ return NULL;
pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
@@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
pm_mad->header.trans_id =
cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
(uint64_t) (0xFFFFFFFF));
- pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS;
+ pm_mad->header.attr_id = attr_id;
pm_mad->header.resv = 0;
pm_mad->header.attr_mod = 0;
- port_counter = (ib_port_counters_t *) & pm_mad->data;
- memset(port_counter, 0, sizeof(*port_counter));
- port_counter->port_select = port;
- port_counter->counter_select = 0xFFFF;
-
p_madw->mad_addr.dest_lid = dest_lid;
p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
p_madw->mad_addr.addr_type.gsi.remote_qkey =
cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
- p_madw->mad_addr.addr_type.gsi.service_level = 0;
+ p_madw->mad_addr.addr_type.gsi.service_level = sl;
p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
p_madw->resp_expected = TRUE;
if (p_context)
p_madw->context = *p_context;
- status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE);
+ if (p_pm_mad)
+ *p_pm_mad = pm_mad;
+
+ OSM_LOG_EXIT(perfmgr->log);
+
+ return (p_madw);
+}
+/**********************************************************************
+ * Send a Performance Management class MAD
+ **********************************************************************/
+static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
+ osm_madw_t * const p_madw)
+{
+ ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
+ TRUE);
if (status == IB_SUCCESS) {
/* pause thread if there are too many outstanding requests */
cl_atomic_inc(&(perfmgr->outstanding_queries));
@@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
}
perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
}
+ return (status);
+}
+
+
+/**********************************************************************
+ * Form and send the PortCounters MAD for a single port.
+ **********************************************************************/
+static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
+ ib_net16_t dest_lid,
+ ib_net32_t dest_qp, uint16_t pkey_ix,
+ uint8_t port, uint8_t mad_method,
+ osm_madw_context_t * p_context,
+ uint8_t sl)
+{
+ ib_api_status_t status = IB_SUCCESS;
+ ib_port_counters_t *port_counter = NULL;
+ ib_perfmgt_mad_t *pm_mad = NULL;
+ osm_madw_t *p_madw = NULL;
+
+ OSM_LOG_ENTER(perfmgr->log);
+
+ p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
+ mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
+ &pm_mad);
+ if (p_madw == NULL)
+ return IB_INSUFFICIENT_MEMORY;
+
+ port_counter = (ib_port_counters_t *) & pm_mad->data;
+ memset(port_counter, 0, sizeof(*port_counter));
+ port_counter->port_select = port;
+ port_counter->counter_select = 0xFFFF;
+
+ status = perfmgr_send_mad(perfmgr, p_madw);
OSM_LOG_EXIT(perfmgr->log);
return status;
@@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
mon_node->guid = node_guid;
mon_node->name = strdup(node->print_desc);
mon_node->num_ports = num_ports;
+ mon_node->node_type = node->node_info.node_type;
/* check for enhanced switch port 0 */
mon_node->esp0 = (node->sw &&
ib_switch_info_is_enhanced_port0(&node->sw->
@@ -491,6 +537,35 @@ Exit:
}
/**********************************************************************
+ * Form and send the ClassPortInfo MAD for a single port.
+ **********************************************************************/
+static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
+ ib_net16_t dest_lid,
+ ib_net32_t dest_qp,
+ uint16_t pkey_ix,
+ uint8_t port,
+ osm_madw_context_t * p_context,
+ uint8_t sl)
+{
+ ib_api_status_t status = IB_SUCCESS;
+ osm_madw_t *p_madw = NULL;
+
+ OSM_LOG_ENTER(pm->log);
+
+ p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
+ pkey_ix, IB_MAD_METHOD_GET,
+ IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
+ NULL);
+ if (p_madw == NULL)
+ return IB_INSUFFICIENT_MEMORY;
+
+ status = perfmgr_send_mad(pm, p_madw);
+
+ OSM_LOG_EXIT(pm->log);
+ return status;
+}
+
+/**********************************************************************
* query the Port Counters of all the nodes in the subnet.
**********************************************************************/
static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
@@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
mad_context.perfmgr_context.node_guid = node_guid;
mad_context.perfmgr_context.port = port;
mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
+
+ if (!mon_node->port[port].cpi_valid) {
+ status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
+ mon_node->port[port].pkey_ix,
+ port, &mad_context,
+ 0); /* FIXME SL != 0 */
+ if (status != IB_SUCCESS)
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
+ "Failed to issue ClassPortInfo query "
+ "for node 0x%" PRIx64
+ " port %d (%s)\n",
+ node->node_info.node_guid, port,
+ node->print_desc);
+ if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
+ goto Exit; /* only need to issue 1 CPI query
+ for switches */
+ } else {
+
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
- gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
+ gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
#endif
- OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
- PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
- cl_ntoh16(lid), node->print_desc);
- status = perfmgr_send_pc_mad(pm, lid, remote_qp,
- mon_node->port[port].pkey_ix,
- port, IB_MAD_METHOD_GET,
- &mad_context);
- if (status != IB_SUCCESS)
- OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
- "Failed to issue port counter query for node 0x%"
- PRIx64 " port %d (%s)\n",
- node->node_info.node_guid, port,
- node->print_desc);
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
+ PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
+ cl_ntoh16(lid), node->print_desc);
+ status = perfmgr_send_pc_mad(pm, lid, remote_qp,
+ mon_node->port[port].pkey_ix,
+ port, IB_MAD_METHOD_GET,
+ &mad_context,
+ 0); /* FIXME SL != 0 */
+ if (status != IB_SUCCESS)
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
+ "Failed to issue port counter query for node 0x%"
+ PRIx64 " port %d (%s)\n",
+ node->node_info.node_guid, port,
+ node->print_desc);
+ }
}
Exit:
cl_plock_release(&pm->osm->lock);
@@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
/* clear port counters */
status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
port, IB_MAD_METHOD_SET,
- &mad_context);
+ &mad_context,
+ 0); /* FIXME SL != 0 */
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
"Failed to send clear counters MAD for %s (0x%"
@@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data)
monitored_node_t *p_mon_node;
int16_t pkey_ix = 0;
boolean_t valid = TRUE;
+ ib_class_port_info_t *cpi = NULL;
OSM_LOG_ENTER(pm->log);
@@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data)
CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
+ /* capture CLASS_PORT_INFO data */
+ if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+ cpi = (ib_class_port_info_t *) &
+ (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
+
+ cl_plock_acquire(&pm->osm->lock);
+ /* validate port number */
+ if (port >= p_mon_node->num_ports) {
+ cl_plock_release(&pm->osm->lock);
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
+ "Invalid port num %d for GUID 0x%016"
+ PRIx64 " num ports %d\n", port, node_guid,
+ p_mon_node->num_ports);
+ goto Exit;
+ }
+ if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
+ int i = 0;
+ for (i = p_mon_node->esp0 ? 0 : 1;
+ i < p_mon_node->num_ports;
+ i++) {
+ p_mon_node->port[i].cap_mask = cpi->cap_mask;
+ p_mon_node->port[i].cpi_valid = TRUE;
+ }
+ } else {
+ p_mon_node->port[port].cap_mask = cpi->cap_mask;
+ p_mon_node->port[port].cpi_valid = TRUE;
+ }
+ cl_plock_release(&pm->osm->lock);
+ }
+
/* Response could also be redirection (IBM eHCA PMA does this) */
- if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
- p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+ if (p_mad->status & IB_MAD_STATUS_REDIRECT) {
char gid_str[INET6_ADDRSTRLEN];
- ib_class_port_info_t *cpi =
- (ib_class_port_info_t *) &
- (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
ib_api_status_t status;
+ CL_ASSERT(cpi); /* Redirect should have returned CPI
+ (processed in previous block) */
+
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
"Redirection to LID %u GID %s QP 0x%x received\n",
cl_ntoh16(cpi->redir_lid),
@@ -1292,20 +1418,28 @@ static void pc_recv_process(void *context, void *data)
if (!valid)
goto Exit;
- /* Finally, reissue the query to the redirected location */
- status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
- pkey_ix, port,
- mad_context->perfmgr_context.
- mad_method, mad_context);
+ /* Finally, issue a CPI query to the redirected location */
+ p_mon_node->port[port].cpi_valid = FALSE;
+ status = perfmgr_send_cpi_mad(pm, cpi->redir_lid,
+ cpi->redir_qp, pkey_ix,
+ port, mad_context,
+ 0); /* FIXME SL != 0 */
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: "
- "Failed to send redirected MAD with method 0x%x for node 0x%"
- PRIx64 " port %d\n",
+ "Failed to send redirected MAD "
+ "with method 0x%x for node %s "
+ "(NodeGuid 0x%" PRIx64 ") port %d\n",
mad_context->perfmgr_context.mad_method,
- node_guid, port);
+ p_mon_node->name, node_guid, port);
goto Exit;
}
+ /* ClassPortInfo needed to process optional Redirection
+ * now exit normally
+ */
+ if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO)
+ goto Exit;
+
perfmgr_db_fill_err_read(wire_read, &err_reading);
/* FIXME separate query for extended counters if they are supported
* on the port.
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
[not found] ` <20130221133331.593f30977427848f4373b57a-i2BcT+NCU+M@public.gmane.org>
@ 2013-02-26 15:03 ` Hal Rosenstock
[not found] ` <512CCEC7.70304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Hal Rosenstock @ 2013-02-26 15:03 UTC (permalink / raw)
To: Ira Weiny; +Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA
On 2/21/2013 4:33 PM, Ira Weiny wrote:
>
So 2 round trips are now needed for first time ports now to determine
whether or not extended counters are supported. I don't see a better way
around this.
>
> Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
> ---
> include/opensm/osm_perfmgr.h | 4 +
> opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++---------
> 2 files changed, 183 insertions(+), 45 deletions(-)
>
> diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h
> index 26b1ae6..3fa42d5 100644
> --- a/include/opensm/osm_perfmgr.h
> +++ b/include/opensm/osm_perfmgr.h
> @@ -100,6 +100,9 @@ typedef struct monitored_port {
> ib_net16_t lid;
> ib_net16_t pkey;
> ib_net32_t qp;
> + /* ClassPortInfo fields */
> + boolean_t cpi_valid;
> + ib_net16_t cap_mask;
> } monitored_port_t;
>
> /* Node to store information about nodes being monitored */
> @@ -107,6 +110,7 @@ typedef struct monitored_node {
> cl_map_item_t map_item;
> struct monitored_node *next;
> uint64_t guid;
> + uint8_t node_type;
> boolean_t esp0;
> char *name;
> uint32_t num_ports;
> diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c
> index 9bc1154..c71111f 100644
> --- a/opensm/osm_perfmgr.c
> +++ b/opensm/osm_perfmgr.c
> @@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
> return get_base_lid(p_node, port);
> }
>
> +
> /**********************************************************************
> - * Form and send the Port Counters MAD for a single port.
> + * Build a Performance Management class MAD
> **********************************************************************/
> -static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> - ib_net16_t dest_lid,
> - ib_net32_t dest_qp, uint16_t pkey_ix,
> - uint8_t port, uint8_t mad_method,
> - osm_madw_context_t * p_context)
> +static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
> + ib_net16_t dest_lid,
> + uint8_t sl,
> + ib_net32_t dest_qp,
> + uint16_t pkey_ix,
> + uint8_t mad_method,
> + ib_net16_t attr_id,
> + osm_madw_context_t * p_context,
> + ib_perfmgt_mad_t ** p_pm_mad)
> {
> - ib_api_status_t status = IB_SUCCESS;
> - ib_port_counters_t *port_counter = NULL;
> ib_perfmgt_mad_t *pm_mad = NULL;
> osm_madw_t *p_madw = NULL;
>
> @@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
> MAD_BLOCK_SIZE, NULL);
> if (p_madw == NULL)
> - return IB_INSUFFICIENT_MEMORY;
> + return NULL;
>
> pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
>
> @@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> pm_mad->header.trans_id =
> cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
> (uint64_t) (0xFFFFFFFF));
> - pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS;
> + pm_mad->header.attr_id = attr_id;
> pm_mad->header.resv = 0;
> pm_mad->header.attr_mod = 0;
>
> - port_counter = (ib_port_counters_t *) & pm_mad->data;
> - memset(port_counter, 0, sizeof(*port_counter));
> - port_counter->port_select = port;
> - port_counter->counter_select = 0xFFFF;
> -
> p_madw->mad_addr.dest_lid = dest_lid;
> p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
> p_madw->mad_addr.addr_type.gsi.remote_qkey =
> cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
> p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
> - p_madw->mad_addr.addr_type.gsi.service_level = 0;
> + p_madw->mad_addr.addr_type.gsi.service_level = sl;
> p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
> p_madw->resp_expected = TRUE;
>
> if (p_context)
> p_madw->context = *p_context;
>
> - status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE);
> + if (p_pm_mad)
> + *p_pm_mad = pm_mad;
Nit: formatting (tabs rather than spaces)
> +
> + OSM_LOG_EXIT(perfmgr->log);
> +
> + return (p_madw);
> +}
>
> +/**********************************************************************
> + * Send a Performance Management class MAD
> + **********************************************************************/
> +static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
> + osm_madw_t * const p_madw)
> +{
> + ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
> + TRUE);
> if (status == IB_SUCCESS) {
> /* pause thread if there are too many outstanding requests */
> cl_atomic_inc(&(perfmgr->outstanding_queries));
> @@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> }
> perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
> }
> + return (status);
> +}
> +
> +
> +/**********************************************************************
> + * Form and send the PortCounters MAD for a single port.
> + **********************************************************************/
> +static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> + ib_net16_t dest_lid,
> + ib_net32_t dest_qp, uint16_t pkey_ix,
> + uint8_t port, uint8_t mad_method,
> + osm_madw_context_t * p_context,
> + uint8_t sl)
> +{
> + ib_api_status_t status = IB_SUCCESS;
> + ib_port_counters_t *port_counter = NULL;
> + ib_perfmgt_mad_t *pm_mad = NULL;
> + osm_madw_t *p_madw = NULL;
> +
> + OSM_LOG_ENTER(perfmgr->log);
> +
> + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
> + mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
> + &pm_mad);
> + if (p_madw == NULL)
> + return IB_INSUFFICIENT_MEMORY;
> +
> + port_counter = (ib_port_counters_t *) & pm_mad->data;
> + memset(port_counter, 0, sizeof(*port_counter));
> + port_counter->port_select = port;
> + port_counter->counter_select = 0xFFFF;
> +
> + status = perfmgr_send_mad(perfmgr, p_madw);
>
> OSM_LOG_EXIT(perfmgr->log);
> return status;
> @@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
> mon_node->guid = node_guid;
> mon_node->name = strdup(node->print_desc);
> mon_node->num_ports = num_ports;
> + mon_node->node_type = node->node_info.node_type;
> /* check for enhanced switch port 0 */
> mon_node->esp0 = (node->sw &&
> ib_switch_info_is_enhanced_port0(&node->sw->
> @@ -491,6 +537,35 @@ Exit:
> }
>
> /**********************************************************************
> + * Form and send the ClassPortInfo MAD for a single port.
> + **********************************************************************/
> +static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
> + ib_net16_t dest_lid,
> + ib_net32_t dest_qp,
> + uint16_t pkey_ix,
> + uint8_t port,
> + osm_madw_context_t * p_context,
> + uint8_t sl)
> +{
> + ib_api_status_t status = IB_SUCCESS;
> + osm_madw_t *p_madw = NULL;
> +
> + OSM_LOG_ENTER(pm->log);
> +
> + p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
> + pkey_ix, IB_MAD_METHOD_GET,
> + IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
> + NULL);
> + if (p_madw == NULL)
> + return IB_INSUFFICIENT_MEMORY;
> +
> + status = perfmgr_send_mad(pm, p_madw);
> +
> + OSM_LOG_EXIT(pm->log);
> + return status;
> +}
> +
> +/**********************************************************************
> * query the Port Counters of all the nodes in the subnet.
> **********************************************************************/
> static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
> @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
> mad_context.perfmgr_context.node_guid = node_guid;
> mad_context.perfmgr_context.port = port;
> mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
> +
> + if (!mon_node->port[port].cpi_valid) {
> + status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
> + mon_node->port[port].pkey_ix,
> + port, &mad_context,
> + 0); /* FIXME SL != 0 */
> + if (status != IB_SUCCESS)
> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
> + "Failed to issue ClassPortInfo query "
> + "for node 0x%" PRIx64
> + " port %d (%s)\n",
> + node->node_info.node_guid, port,
> + node->print_desc);
> + if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
> + goto Exit; /* only need to issue 1 CPI query
> + for switches */
Have you tried switches with base SP0 ?
> + } else {
> +
> #ifdef ENABLE_OSM_PERF_MGR_PROFILE
> - gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
> + gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
> #endif
> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
> - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
> - cl_ntoh16(lid), node->print_desc);
> - status = perfmgr_send_pc_mad(pm, lid, remote_qp,
> - mon_node->port[port].pkey_ix,
> - port, IB_MAD_METHOD_GET,
> - &mad_context);
> - if (status != IB_SUCCESS)
> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
> - "Failed to issue port counter query for node 0x%"
> - PRIx64 " port %d (%s)\n",
> - node->node_info.node_guid, port,
> - node->print_desc);
> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
> + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
> + cl_ntoh16(lid), node->print_desc);
> + status = perfmgr_send_pc_mad(pm, lid, remote_qp,
> + mon_node->port[port].pkey_ix,
> + port, IB_MAD_METHOD_GET,
> + &mad_context,
> + 0); /* FIXME SL != 0 */
> + if (status != IB_SUCCESS)
> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
> + "Failed to issue port counter query for node 0x%"
> + PRIx64 " port %d (%s)\n",
> + node->node_info.node_guid, port,
> + node->print_desc);
> + }
> }
> Exit:
> cl_plock_release(&pm->osm->lock);
> @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
> /* clear port counters */
> status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
> port, IB_MAD_METHOD_SET,
> - &mad_context);
> + &mad_context,
> + 0); /* FIXME SL != 0 */
> if (status != IB_SUCCESS)
> OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
> "Failed to send clear counters MAD for %s (0x%"
> @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data)
> monitored_node_t *p_mon_node;
> int16_t pkey_ix = 0;
> boolean_t valid = TRUE;
> + ib_class_port_info_t *cpi = NULL;
>
> OSM_LOG_ENTER(pm->log);
>
> @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data)
> CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
> p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
>
> + /* capture CLASS_PORT_INFO data */
> + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
> + cpi = (ib_class_port_info_t *) &
> + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
> +
> + cl_plock_acquire(&pm->osm->lock);
> + /* validate port number */
> + if (port >= p_mon_node->num_ports) {
> + cl_plock_release(&pm->osm->lock);
> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
> + "Invalid port num %d for GUID 0x%016"
> + PRIx64 " num ports %d\n", port, node_guid,
> + p_mon_node->num_ports);
> + goto Exit;
> + }
> + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
> + int i = 0;
> + for (i = p_mon_node->esp0 ? 0 : 1;
> + i < p_mon_node->num_ports;
> + i++) {
> + p_mon_node->port[i].cap_mask = cpi->cap_mask;
> + p_mon_node->port[i].cpi_valid = TRUE;
> + }
> + } else {
> + p_mon_node->port[port].cap_mask = cpi->cap_mask;
> + p_mon_node->port[port].cpi_valid = TRUE;
> + }
> + cl_plock_release(&pm->osm->lock);
> + }
> +
> /* Response could also be redirection (IBM eHCA PMA does this) */
> - if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
> - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
> + if (p_mad->status & IB_MAD_STATUS_REDIRECT) {
Shouldn't this be part of if (p_mad->attr_id ==
IB_MAD_ATTR_CLASS_PORT_INFO) clause ?
-- Hal
> char gid_str[INET6_ADDRSTRLEN];
> - ib_class_port_info_t *cpi =
> - (ib_class_port_info_t *) &
> - (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
> ib_api_status_t status;
>
> + CL_ASSERT(cpi); /* Redirect should have returned CPI
> + (processed in previous block) */
> +
> OSM_LOG(pm->log, OSM_LOG_VERBOSE,
> "Redirection to LID %u GID %s QP 0x%x received\n",
> cl_ntoh16(cpi->redir_lid),
> @@ -1292,20 +1418,28 @@ static void pc_recv_process(void *context, void *data)
> if (!valid)
> goto Exit;
>
> - /* Finally, reissue the query to the redirected location */
> - status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
> - pkey_ix, port,
> - mad_context->perfmgr_context.
> - mad_method, mad_context);
> + /* Finally, issue a CPI query to the redirected location */
> + p_mon_node->port[port].cpi_valid = FALSE;
> + status = perfmgr_send_cpi_mad(pm, cpi->redir_lid,
> + cpi->redir_qp, pkey_ix,
> + port, mad_context,
> + 0); /* FIXME SL != 0 */
> if (status != IB_SUCCESS)
> OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: "
> - "Failed to send redirected MAD with method 0x%x for node 0x%"
> - PRIx64 " port %d\n",
> + "Failed to send redirected MAD "
> + "with method 0x%x for node %s "
> + "(NodeGuid 0x%" PRIx64 ") port %d\n",
> mad_context->perfmgr_context.mad_method,
> - node_guid, port);
> + p_mon_node->name, node_guid, port);
> goto Exit;
> }
>
> + /* ClassPortInfo needed to process optional Redirection
> + * now exit normally
> + */
> + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO)
> + goto Exit;
> +
> perfmgr_db_fill_err_read(wire_read, &err_reading);
> /* FIXME separate query for extended counters if they are supported
> * on the port.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
[not found] ` <512CCEC7.70304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2013-02-26 18:58 ` Ira Weiny
[not found] ` <20130226105803.c3bae4bdd1d2af03bce373db-i2BcT+NCU+M@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Ira Weiny @ 2013-02-26 18:58 UTC (permalink / raw)
To: Hal Rosenstock; +Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA
On Tue, 26 Feb 2013 10:03:35 -0500
Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:
> On 2/21/2013 4:33 PM, Ira Weiny wrote:
> >
>
> So 2 round trips are now needed for first time ports now to determine
> whether or not extended counters are supported. I don't see a better way
> around this.
Nope, this is the best we can do with the current spec.
>
> >
> > Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
> > ---
> > include/opensm/osm_perfmgr.h | 4 +
> > opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++---------
> > 2 files changed, 183 insertions(+), 45 deletions(-)
> >
> > diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h
> > index 26b1ae6..3fa42d5 100644
> > --- a/include/opensm/osm_perfmgr.h
> > +++ b/include/opensm/osm_perfmgr.h
> > @@ -100,6 +100,9 @@ typedef struct monitored_port {
> > ib_net16_t lid;
> > ib_net16_t pkey;
> > ib_net32_t qp;
> > + /* ClassPortInfo fields */
> > + boolean_t cpi_valid;
> > + ib_net16_t cap_mask;
> > } monitored_port_t;
> >
> > /* Node to store information about nodes being monitored */
> > @@ -107,6 +110,7 @@ typedef struct monitored_node {
> > cl_map_item_t map_item;
> > struct monitored_node *next;
> > uint64_t guid;
> > + uint8_t node_type;
> > boolean_t esp0;
> > char *name;
> > uint32_t num_ports;
> > diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c
> > index 9bc1154..c71111f 100644
> > --- a/opensm/osm_perfmgr.c
> > +++ b/opensm/osm_perfmgr.c
> > @@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
> > return get_base_lid(p_node, port);
> > }
> >
> > +
> > /**********************************************************************
> > - * Form and send the Port Counters MAD for a single port.
> > + * Build a Performance Management class MAD
> > **********************************************************************/
> > -static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> > - ib_net16_t dest_lid,
> > - ib_net32_t dest_qp, uint16_t pkey_ix,
> > - uint8_t port, uint8_t mad_method,
> > - osm_madw_context_t * p_context)
> > +static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
> > + ib_net16_t dest_lid,
> > + uint8_t sl,
> > + ib_net32_t dest_qp,
> > + uint16_t pkey_ix,
> > + uint8_t mad_method,
> > + ib_net16_t attr_id,
> > + osm_madw_context_t * p_context,
> > + ib_perfmgt_mad_t ** p_pm_mad)
> > {
> > - ib_api_status_t status = IB_SUCCESS;
> > - ib_port_counters_t *port_counter = NULL;
> > ib_perfmgt_mad_t *pm_mad = NULL;
> > osm_madw_t *p_madw = NULL;
> >
> > @@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> > p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
> > MAD_BLOCK_SIZE, NULL);
> > if (p_madw == NULL)
> > - return IB_INSUFFICIENT_MEMORY;
> > + return NULL;
> >
> > pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
> >
> > @@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> > pm_mad->header.trans_id =
> > cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
> > (uint64_t) (0xFFFFFFFF));
> > - pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS;
> > + pm_mad->header.attr_id = attr_id;
> > pm_mad->header.resv = 0;
> > pm_mad->header.attr_mod = 0;
> >
> > - port_counter = (ib_port_counters_t *) & pm_mad->data;
> > - memset(port_counter, 0, sizeof(*port_counter));
> > - port_counter->port_select = port;
> > - port_counter->counter_select = 0xFFFF;
> > -
> > p_madw->mad_addr.dest_lid = dest_lid;
> > p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
> > p_madw->mad_addr.addr_type.gsi.remote_qkey =
> > cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
> > p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
> > - p_madw->mad_addr.addr_type.gsi.service_level = 0;
> > + p_madw->mad_addr.addr_type.gsi.service_level = sl;
> > p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
> > p_madw->resp_expected = TRUE;
> >
> > if (p_context)
> > p_madw->context = *p_context;
> >
> > - status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE);
> > + if (p_pm_mad)
> > + *p_pm_mad = pm_mad;
>
> Nit: formatting (tabs rather than spaces)
Got it in V2
>
> > +
> > + OSM_LOG_EXIT(perfmgr->log);
> > +
> > + return (p_madw);
> > +}
> >
> > +/**********************************************************************
> > + * Send a Performance Management class MAD
> > + **********************************************************************/
> > +static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
> > + osm_madw_t * const p_madw)
> > +{
> > + ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
> > + TRUE);
> > if (status == IB_SUCCESS) {
> > /* pause thread if there are too many outstanding requests */
> > cl_atomic_inc(&(perfmgr->outstanding_queries));
> > @@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> > }
> > perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
> > }
> > + return (status);
> > +}
> > +
> > +
> > +/**********************************************************************
> > + * Form and send the PortCounters MAD for a single port.
> > + **********************************************************************/
> > +static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
> > + ib_net16_t dest_lid,
> > + ib_net32_t dest_qp, uint16_t pkey_ix,
> > + uint8_t port, uint8_t mad_method,
> > + osm_madw_context_t * p_context,
> > + uint8_t sl)
> > +{
> > + ib_api_status_t status = IB_SUCCESS;
> > + ib_port_counters_t *port_counter = NULL;
> > + ib_perfmgt_mad_t *pm_mad = NULL;
> > + osm_madw_t *p_madw = NULL;
> > +
> > + OSM_LOG_ENTER(perfmgr->log);
> > +
> > + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
> > + mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
> > + &pm_mad);
> > + if (p_madw == NULL)
> > + return IB_INSUFFICIENT_MEMORY;
> > +
> > + port_counter = (ib_port_counters_t *) & pm_mad->data;
> > + memset(port_counter, 0, sizeof(*port_counter));
> > + port_counter->port_select = port;
> > + port_counter->counter_select = 0xFFFF;
> > +
> > + status = perfmgr_send_mad(perfmgr, p_madw);
> >
> > OSM_LOG_EXIT(perfmgr->log);
> > return status;
> > @@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
> > mon_node->guid = node_guid;
> > mon_node->name = strdup(node->print_desc);
> > mon_node->num_ports = num_ports;
> > + mon_node->node_type = node->node_info.node_type;
> > /* check for enhanced switch port 0 */
> > mon_node->esp0 = (node->sw &&
> > ib_switch_info_is_enhanced_port0(&node->sw->
> > @@ -491,6 +537,35 @@ Exit:
> > }
> >
> > /**********************************************************************
> > + * Form and send the ClassPortInfo MAD for a single port.
> > + **********************************************************************/
> > +static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
> > + ib_net16_t dest_lid,
> > + ib_net32_t dest_qp,
> > + uint16_t pkey_ix,
> > + uint8_t port,
> > + osm_madw_context_t * p_context,
> > + uint8_t sl)
> > +{
> > + ib_api_status_t status = IB_SUCCESS;
> > + osm_madw_t *p_madw = NULL;
> > +
> > + OSM_LOG_ENTER(pm->log);
> > +
> > + p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
> > + pkey_ix, IB_MAD_METHOD_GET,
> > + IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
> > + NULL);
> > + if (p_madw == NULL)
> > + return IB_INSUFFICIENT_MEMORY;
> > +
> > + status = perfmgr_send_mad(pm, p_madw);
> > +
> > + OSM_LOG_EXIT(pm->log);
> > + return status;
> > +}
> > +
> > +/**********************************************************************
> > * query the Port Counters of all the nodes in the subnet.
> > **********************************************************************/
> > static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
> > @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
> > mad_context.perfmgr_context.node_guid = node_guid;
> > mad_context.perfmgr_context.port = port;
> > mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
> > +
> > + if (!mon_node->port[port].cpi_valid) {
> > + status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
> > + mon_node->port[port].pkey_ix,
> > + port, &mad_context,
> > + 0); /* FIXME SL != 0 */
> > + if (status != IB_SUCCESS)
> > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
> > + "Failed to issue ClassPortInfo query "
> > + "for node 0x%" PRIx64
> > + " port %d (%s)\n",
> > + node->node_info.node_guid, port,
> > + node->print_desc);
> > + if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
> > + goto Exit; /* only need to issue 1 CPI query
> > + for switches */
>
> Have you tried switches with base SP0 ?
Yes. Why? I admit I may be confused about when SP0 is different from physical ports.
>
> > + } else {
> > +
> > #ifdef ENABLE_OSM_PERF_MGR_PROFILE
> > - gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
> > + gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
> > #endif
> > - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
> > - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
> > - cl_ntoh16(lid), node->print_desc);
> > - status = perfmgr_send_pc_mad(pm, lid, remote_qp,
> > - mon_node->port[port].pkey_ix,
> > - port, IB_MAD_METHOD_GET,
> > - &mad_context);
> > - if (status != IB_SUCCESS)
> > - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
> > - "Failed to issue port counter query for node 0x%"
> > - PRIx64 " port %d (%s)\n",
> > - node->node_info.node_guid, port,
> > - node->print_desc);
> > + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
> > + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
> > + cl_ntoh16(lid), node->print_desc);
> > + status = perfmgr_send_pc_mad(pm, lid, remote_qp,
> > + mon_node->port[port].pkey_ix,
> > + port, IB_MAD_METHOD_GET,
> > + &mad_context,
> > + 0); /* FIXME SL != 0 */
> > + if (status != IB_SUCCESS)
> > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
> > + "Failed to issue port counter query for node 0x%"
> > + PRIx64 " port %d (%s)\n",
> > + node->node_info.node_guid, port,
> > + node->print_desc);
> > + }
> > }
> > Exit:
> > cl_plock_release(&pm->osm->lock);
> > @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
> > /* clear port counters */
> > status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
> > port, IB_MAD_METHOD_SET,
> > - &mad_context);
> > + &mad_context,
> > + 0); /* FIXME SL != 0 */
> > if (status != IB_SUCCESS)
> > OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
> > "Failed to send clear counters MAD for %s (0x%"
> > @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data)
> > monitored_node_t *p_mon_node;
> > int16_t pkey_ix = 0;
> > boolean_t valid = TRUE;
> > + ib_class_port_info_t *cpi = NULL;
> >
> > OSM_LOG_ENTER(pm->log);
> >
> > @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data)
> > CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
> > p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
> >
> > + /* capture CLASS_PORT_INFO data */
> > + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
> > + cpi = (ib_class_port_info_t *) &
> > + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
> > +
> > + cl_plock_acquire(&pm->osm->lock);
> > + /* validate port number */
> > + if (port >= p_mon_node->num_ports) {
> > + cl_plock_release(&pm->osm->lock);
> > + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
> > + "Invalid port num %d for GUID 0x%016"
> > + PRIx64 " num ports %d\n", port, node_guid,
> > + p_mon_node->num_ports);
> > + goto Exit;
> > + }
> > + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
> > + int i = 0;
> > + for (i = p_mon_node->esp0 ? 0 : 1;
> > + i < p_mon_node->num_ports;
> > + i++) {
> > + p_mon_node->port[i].cap_mask = cpi->cap_mask;
> > + p_mon_node->port[i].cpi_valid = TRUE;
> > + }
> > + } else {
> > + p_mon_node->port[port].cap_mask = cpi->cap_mask;
> > + p_mon_node->port[port].cpi_valid = TRUE;
> > + }
> > + cl_plock_release(&pm->osm->lock);
> > + }
> > +
> > /* Response could also be redirection (IBM eHCA PMA does this) */
> > - if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
> > - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
> > + if (p_mad->status & IB_MAD_STATUS_REDIRECT) {
>
> Shouldn't this be part of if (p_mad->attr_id ==
> IB_MAD_ATTR_CLASS_PORT_INFO) clause ?
>
Yes but I took care of that in the next patch where I cleaned up the code and made that entire block a function "handle_redirect".
Ira
> -- Hal
>
> > char gid_str[INET6_ADDRSTRLEN];
> > - ib_class_port_info_t *cpi =
> > - (ib_class_port_info_t *) &
> > - (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
> > ib_api_status_t status;
> >
> > + CL_ASSERT(cpi); /* Redirect should have returned CPI
> > + (processed in previous block) */
> > +
> > OSM_LOG(pm->log, OSM_LOG_VERBOSE,
> > "Redirection to LID %u GID %s QP 0x%x received\n",
> > cl_ntoh16(cpi->redir_lid),
> > @@ -1292,20 +1418,28 @@ static void pc_recv_process(void *context, void *data)
> > if (!valid)
> > goto Exit;
> >
> > - /* Finally, reissue the query to the redirected location */
> > - status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
> > - pkey_ix, port,
> > - mad_context->perfmgr_context.
> > - mad_method, mad_context);
> > + /* Finally, issue a CPI query to the redirected location */
> > + p_mon_node->port[port].cpi_valid = FALSE;
> > + status = perfmgr_send_cpi_mad(pm, cpi->redir_lid,
> > + cpi->redir_qp, pkey_ix,
> > + port, mad_context,
> > + 0); /* FIXME SL != 0 */
> > if (status != IB_SUCCESS)
> > OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: "
> > - "Failed to send redirected MAD with method 0x%x for node 0x%"
> > - PRIx64 " port %d\n",
> > + "Failed to send redirected MAD "
> > + "with method 0x%x for node %s "
> > + "(NodeGuid 0x%" PRIx64 ") port %d\n",
> > mad_context->perfmgr_context.mad_method,
> > - node_guid, port);
> > + p_mon_node->name, node_guid, port);
> > goto Exit;
> > }
> >
> > + /* ClassPortInfo needed to process optional Redirection
> > + * now exit normally
> > + */
> > + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO)
> > + goto Exit;
> > +
> > perfmgr_db_fill_err_read(wire_read, &err_reading);
> > /* FIXME separate query for extended counters if they are supported
> > * on the port.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
weiny2-i2BcT+NCU+M@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
[not found] ` <20130226105803.c3bae4bdd1d2af03bce373db-i2BcT+NCU+M@public.gmane.org>
@ 2013-02-26 19:48 ` Hal Rosenstock
[not found] ` <512D118A.5010503-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Hal Rosenstock @ 2013-02-26 19:48 UTC (permalink / raw)
To: Ira Weiny; +Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA
On 2/26/2013 1:58 PM, Ira Weiny wrote:
> On Tue, 26 Feb 2013 10:03:35 -0500
> Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:
>
>> On 2/21/2013 4:33 PM, Ira Weiny wrote:
>>>
>>
>> So 2 round trips are now needed for first time ports now to determine
>> whether or not extended counters are supported. I don't see a better way
>> around this.
>
> Nope, this is the best we can do with the current spec.
>
>>
>>>
>>> Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
>>> ---
>>> include/opensm/osm_perfmgr.h | 4 +
>>> opensm/osm_perfmgr.c | 224 +++++++++++++++++++++++++++++++++---------
>>> 2 files changed, 183 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h
>>> index 26b1ae6..3fa42d5 100644
>>> --- a/include/opensm/osm_perfmgr.h
>>> +++ b/include/opensm/osm_perfmgr.h
>>> @@ -100,6 +100,9 @@ typedef struct monitored_port {
>>> ib_net16_t lid;
>>> ib_net16_t pkey;
>>> ib_net32_t qp;
>>> + /* ClassPortInfo fields */
>>> + boolean_t cpi_valid;
>>> + ib_net16_t cap_mask;
>>> } monitored_port_t;
>>>
>>> /* Node to store information about nodes being monitored */
>>> @@ -107,6 +110,7 @@ typedef struct monitored_node {
>>> cl_map_item_t map_item;
>>> struct monitored_node *next;
>>> uint64_t guid;
>>> + uint8_t node_type;
>>> boolean_t esp0;
>>> char *name;
>>> uint32_t num_ports;
>>> diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c
>>> index 9bc1154..c71111f 100644
>>> --- a/opensm/osm_perfmgr.c
>>> +++ b/opensm/osm_perfmgr.c
>>> @@ -356,17 +356,20 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
>>> return get_base_lid(p_node, port);
>>> }
>>>
>>> +
>>> /**********************************************************************
>>> - * Form and send the Port Counters MAD for a single port.
>>> + * Build a Performance Management class MAD
>>> **********************************************************************/
>>> -static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
>>> - ib_net16_t dest_lid,
>>> - ib_net32_t dest_qp, uint16_t pkey_ix,
>>> - uint8_t port, uint8_t mad_method,
>>> - osm_madw_context_t * p_context)
>>> +static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
>>> + ib_net16_t dest_lid,
>>> + uint8_t sl,
>>> + ib_net32_t dest_qp,
>>> + uint16_t pkey_ix,
>>> + uint8_t mad_method,
>>> + ib_net16_t attr_id,
>>> + osm_madw_context_t * p_context,
>>> + ib_perfmgt_mad_t ** p_pm_mad)
>>> {
>>> - ib_api_status_t status = IB_SUCCESS;
>>> - ib_port_counters_t *port_counter = NULL;
>>> ib_perfmgt_mad_t *pm_mad = NULL;
>>> osm_madw_t *p_madw = NULL;
>>>
>>> @@ -375,7 +378,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
>>> p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
>>> MAD_BLOCK_SIZE, NULL);
>>> if (p_madw == NULL)
>>> - return IB_INSUFFICIENT_MEMORY;
>>> + return NULL;
>>>
>>> pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
>>>
>>> @@ -393,29 +396,38 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
>>> pm_mad->header.trans_id =
>>> cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
>>> (uint64_t) (0xFFFFFFFF));
>>> - pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS;
>>> + pm_mad->header.attr_id = attr_id;
>>> pm_mad->header.resv = 0;
>>> pm_mad->header.attr_mod = 0;
>>>
>>> - port_counter = (ib_port_counters_t *) & pm_mad->data;
>>> - memset(port_counter, 0, sizeof(*port_counter));
>>> - port_counter->port_select = port;
>>> - port_counter->counter_select = 0xFFFF;
>>> -
>>> p_madw->mad_addr.dest_lid = dest_lid;
>>> p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
>>> p_madw->mad_addr.addr_type.gsi.remote_qkey =
>>> cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
>>> p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
>>> - p_madw->mad_addr.addr_type.gsi.service_level = 0;
>>> + p_madw->mad_addr.addr_type.gsi.service_level = sl;
>>> p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
>>> p_madw->resp_expected = TRUE;
>>>
>>> if (p_context)
>>> p_madw->context = *p_context;
>>>
>>> - status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE);
>>> + if (p_pm_mad)
>>> + *p_pm_mad = pm_mad;
>>
>> Nit: formatting (tabs rather than spaces)
>
> Got it in V2
Thanks.
>
>>
>>> +
>>> + OSM_LOG_EXIT(perfmgr->log);
>>> +
>>> + return (p_madw);
>>> +}
>>>
>>> +/**********************************************************************
>>> + * Send a Performance Management class MAD
>>> + **********************************************************************/
>>> +static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
>>> + osm_madw_t * const p_madw)
>>> +{
>>> + ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
>>> + TRUE);
>>> if (status == IB_SUCCESS) {
>>> /* pause thread if there are too many outstanding requests */
>>> cl_atomic_inc(&(perfmgr->outstanding_queries));
>>> @@ -427,6 +439,39 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
>>> }
>>> perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
>>> }
>>> + return (status);
>>> +}
>>> +
>>> +
>>> +/**********************************************************************
>>> + * Form and send the PortCounters MAD for a single port.
>>> + **********************************************************************/
>>> +static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
>>> + ib_net16_t dest_lid,
>>> + ib_net32_t dest_qp, uint16_t pkey_ix,
>>> + uint8_t port, uint8_t mad_method,
>>> + osm_madw_context_t * p_context,
>>> + uint8_t sl)
>>> +{
>>> + ib_api_status_t status = IB_SUCCESS;
>>> + ib_port_counters_t *port_counter = NULL;
>>> + ib_perfmgt_mad_t *pm_mad = NULL;
>>> + osm_madw_t *p_madw = NULL;
>>> +
>>> + OSM_LOG_ENTER(perfmgr->log);
>>> +
>>> + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
>>> + mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
>>> + &pm_mad);
>>> + if (p_madw == NULL)
>>> + return IB_INSUFFICIENT_MEMORY;
>>> +
>>> + port_counter = (ib_port_counters_t *) & pm_mad->data;
>>> + memset(port_counter, 0, sizeof(*port_counter));
>>> + port_counter->port_select = port;
>>> + port_counter->counter_select = 0xFFFF;
>>> +
>>> + status = perfmgr_send_mad(perfmgr, p_madw);
>>>
>>> OSM_LOG_EXIT(perfmgr->log);
>>> return status;
>>> @@ -469,6 +514,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
>>> mon_node->guid = node_guid;
>>> mon_node->name = strdup(node->print_desc);
>>> mon_node->num_ports = num_ports;
>>> + mon_node->node_type = node->node_info.node_type;
>>> /* check for enhanced switch port 0 */
>>> mon_node->esp0 = (node->sw &&
>>> ib_switch_info_is_enhanced_port0(&node->sw->
>>> @@ -491,6 +537,35 @@ Exit:
>>> }
>>>
>>> /**********************************************************************
>>> + * Form and send the ClassPortInfo MAD for a single port.
>>> + **********************************************************************/
>>> +static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
>>> + ib_net16_t dest_lid,
>>> + ib_net32_t dest_qp,
>>> + uint16_t pkey_ix,
>>> + uint8_t port,
>>> + osm_madw_context_t * p_context,
>>> + uint8_t sl)
>>> +{
>>> + ib_api_status_t status = IB_SUCCESS;
>>> + osm_madw_t *p_madw = NULL;
>>> +
>>> + OSM_LOG_ENTER(pm->log);
>>> +
>>> + p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
>>> + pkey_ix, IB_MAD_METHOD_GET,
>>> + IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
>>> + NULL);
>>> + if (p_madw == NULL)
>>> + return IB_INSUFFICIENT_MEMORY;
>>> +
>>> + status = perfmgr_send_mad(pm, p_madw);
>>> +
>>> + OSM_LOG_EXIT(pm->log);
>>> + return status;
>>> +}
>>> +
>>> +/**********************************************************************
>>> * query the Port Counters of all the nodes in the subnet.
>>> **********************************************************************/
>>> static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
>>> @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
>>> mad_context.perfmgr_context.node_guid = node_guid;
>>> mad_context.perfmgr_context.port = port;
>>> mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
>>> +
>>> + if (!mon_node->port[port].cpi_valid) {
>>> + status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
>>> + mon_node->port[port].pkey_ix,
>>> + port, &mad_context,
>>> + 0); /* FIXME SL != 0 */
>>> + if (status != IB_SUCCESS)
>>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
>>> + "Failed to issue ClassPortInfo query "
>>> + "for node 0x%" PRIx64
>>> + " port %d (%s)\n",
>>> + node->node_info.node_guid, port,
>>> + node->print_desc);
>>> + if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
>>> + goto Exit; /* only need to issue 1 CPI query
>>> + for switches */
>>
>> Have you tried switches with base SP0 ?
>
> Yes. Why? I admit I may be confused about when SP0 is different from physical ports.
The reason I asked here is that the ClassPortInfo when BSP0 is being
requested on port 1 not 0.
I'm not sure what you're referring to in terms of SP0 being different
from physical ports so I'll elaborate on what I think you may be asking:
In general, BSP0 is different than ESP0 in terms of PMA attributes in
that it is excluded in PortSelect. PortSelect says "However, 0 is
only valid for the enhanced switch management port; it is
ignored for the base switch management port."
SP0 is different from physical ports in that it's a virtual IB port
(usually across CPU link or the like) and doesn't have a remote peer.
>>
>>> + } else {
>>> +
>>> #ifdef ENABLE_OSM_PERF_MGR_PROFILE
>>> - gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
>>> + gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
>>> #endif
>>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
>>> - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
>>> - cl_ntoh16(lid), node->print_desc);
>>> - status = perfmgr_send_pc_mad(pm, lid, remote_qp,
>>> - mon_node->port[port].pkey_ix,
>>> - port, IB_MAD_METHOD_GET,
>>> - &mad_context);
>>> - if (status != IB_SUCCESS)
>>> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
>>> - "Failed to issue port counter query for node 0x%"
>>> - PRIx64 " port %d (%s)\n",
>>> - node->node_info.node_guid, port,
>>> - node->print_desc);
>>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
>>> + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
>>> + cl_ntoh16(lid), node->print_desc);
>>> + status = perfmgr_send_pc_mad(pm, lid, remote_qp,
>>> + mon_node->port[port].pkey_ix,
>>> + port, IB_MAD_METHOD_GET,
>>> + &mad_context,
>>> + 0); /* FIXME SL != 0 */
>>> + if (status != IB_SUCCESS)
>>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
>>> + "Failed to issue port counter query for node 0x%"
>>> + PRIx64 " port %d (%s)\n",
>>> + node->node_info.node_guid, port,
>>> + node->print_desc);
>>> + }
>>> }
>>> Exit:
>>> cl_plock_release(&pm->osm->lock);
>>> @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
>>> /* clear port counters */
>>> status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
>>> port, IB_MAD_METHOD_SET,
>>> - &mad_context);
>>> + &mad_context,
>>> + 0); /* FIXME SL != 0 */
>>> if (status != IB_SUCCESS)
>>> OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
>>> "Failed to send clear counters MAD for %s (0x%"
>>> @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data)
>>> monitored_node_t *p_mon_node;
>>> int16_t pkey_ix = 0;
>>> boolean_t valid = TRUE;
>>> + ib_class_port_info_t *cpi = NULL;
>>>
>>> OSM_LOG_ENTER(pm->log);
>>>
>>> @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data)
>>> CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
>>> p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
>>>
>>> + /* capture CLASS_PORT_INFO data */
>>> + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
>>> + cpi = (ib_class_port_info_t *) &
>>> + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
>>> +
>>> + cl_plock_acquire(&pm->osm->lock);
>>> + /* validate port number */
>>> + if (port >= p_mon_node->num_ports) {
>>> + cl_plock_release(&pm->osm->lock);
>>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
>>> + "Invalid port num %d for GUID 0x%016"
>>> + PRIx64 " num ports %d\n", port, node_guid,
>>> + p_mon_node->num_ports);
>>> + goto Exit;
>>> + }
>>> + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
>>> + int i = 0;
>>> + for (i = p_mon_node->esp0 ? 0 : 1;
>>> + i < p_mon_node->num_ports;
>>> + i++) {
>>> + p_mon_node->port[i].cap_mask = cpi->cap_mask;
>>> + p_mon_node->port[i].cpi_valid = TRUE;
>>> + }
>>> + } else {
>>> + p_mon_node->port[port].cap_mask = cpi->cap_mask;
>>> + p_mon_node->port[port].cpi_valid = TRUE;
>>> + }
>>> + cl_plock_release(&pm->osm->lock);
>>> + }
>>> +
>>> /* Response could also be redirection (IBM eHCA PMA does this) */
>>> - if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
>>> - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
>>> + if (p_mad->status & IB_MAD_STATUS_REDIRECT) {
>>
>> Shouldn't this be part of if (p_mad->attr_id ==
>> IB_MAD_ATTR_CLASS_PORT_INFO) clause ?
>>
>
> Yes but I took care of that in the next patch where I cleaned up the code and made that entire block a function "handle_redirect".
I see that now. The semantics here shouldn't have been changed but it's
probably OK that the next patch fixes that again.
-- Hal
> Ira
<snip...>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
[not found] ` <512D118A.5010503-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2013-02-26 21:07 ` Ira Weiny
[not found] ` <20130226130711.8d96c1834b47c55e2abef24e-i2BcT+NCU+M@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Ira Weiny @ 2013-02-26 21:07 UTC (permalink / raw)
To: Hal Rosenstock; +Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA
On Tue, 26 Feb 2013 14:48:26 -0500
Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:
> On 2/26/2013 1:58 PM, Ira Weiny wrote:
> > On Tue, 26 Feb 2013 10:03:35 -0500
> > Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:
> >
> >> On 2/21/2013 4:33 PM, Ira Weiny wrote:
> >>>
[snip]
> >>> +/**********************************************************************
> >>> * query the Port Counters of all the nodes in the subnet.
> >>> **********************************************************************/
> >>> static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
> >>> @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
> >>> mad_context.perfmgr_context.node_guid = node_guid;
> >>> mad_context.perfmgr_context.port = port;
> >>> mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
> >>> +
> >>> + if (!mon_node->port[port].cpi_valid) {
> >>> + status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
> >>> + mon_node->port[port].pkey_ix,
> >>> + port, &mad_context,
> >>> + 0); /* FIXME SL != 0 */
> >>> + if (status != IB_SUCCESS)
> >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
> >>> + "Failed to issue ClassPortInfo query "
> >>> + "for node 0x%" PRIx64
> >>> + " port %d (%s)\n",
> >>> + node->node_info.node_guid, port,
> >>> + node->print_desc);
> >>> + if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
> >>> + goto Exit; /* only need to issue 1 CPI query
> >>> + for switches */
> >>
> >> Have you tried switches with base SP0 ?
> >
> > Yes. Why? I admit I may be confused about when SP0 is different from physical ports.
>
> The reason I asked here is that the ClassPortInfo when BSP0 is being
> requested on port 1 not 0.
>
> I'm not sure what you're referring to in terms of SP0 being different
> from physical ports so I'll elaborate on what I think you may be asking:
>
> In general, BSP0 is different than ESP0 in terms of PMA attributes in
> that it is excluded in PortSelect. PortSelect says "However, 0 is
> only valid for the enhanced switch management port; it is
> ignored for the base switch management port."
Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0?
To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly.
Ira
>
> SP0 is different from physical ports in that it's a virtual IB port
> (usually across CPU link or the like) and doesn't have a remote peer.
>
> >>
> >>> + } else {
> >>> +
> >>> #ifdef ENABLE_OSM_PERF_MGR_PROFILE
> >>> - gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
> >>> + gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
> >>> #endif
> >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
> >>> - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
> >>> - cl_ntoh16(lid), node->print_desc);
> >>> - status = perfmgr_send_pc_mad(pm, lid, remote_qp,
> >>> - mon_node->port[port].pkey_ix,
> >>> - port, IB_MAD_METHOD_GET,
> >>> - &mad_context);
> >>> - if (status != IB_SUCCESS)
> >>> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
> >>> - "Failed to issue port counter query for node 0x%"
> >>> - PRIx64 " port %d (%s)\n",
> >>> - node->node_info.node_guid, port,
> >>> - node->print_desc);
> >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
> >>> + PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
> >>> + cl_ntoh16(lid), node->print_desc);
> >>> + status = perfmgr_send_pc_mad(pm, lid, remote_qp,
> >>> + mon_node->port[port].pkey_ix,
> >>> + port, IB_MAD_METHOD_GET,
> >>> + &mad_context,
> >>> + 0); /* FIXME SL != 0 */
> >>> + if (status != IB_SUCCESS)
> >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
> >>> + "Failed to issue port counter query for node 0x%"
> >>> + PRIx64 " port %d (%s)\n",
> >>> + node->node_info.node_guid, port,
> >>> + node->print_desc);
> >>> + }
> >>> }
> >>> Exit:
> >>> cl_plock_release(&pm->osm->lock);
> >>> @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
> >>> /* clear port counters */
> >>> status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
> >>> port, IB_MAD_METHOD_SET,
> >>> - &mad_context);
> >>> + &mad_context,
> >>> + 0); /* FIXME SL != 0 */
> >>> if (status != IB_SUCCESS)
> >>> OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
> >>> "Failed to send clear counters MAD for %s (0x%"
> >>> @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void *data)
> >>> monitored_node_t *p_mon_node;
> >>> int16_t pkey_ix = 0;
> >>> boolean_t valid = TRUE;
> >>> + ib_class_port_info_t *cpi = NULL;
> >>>
> >>> OSM_LOG_ENTER(pm->log);
> >>>
> >>> @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void *data)
> >>> CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
> >>> p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
> >>>
> >>> + /* capture CLASS_PORT_INFO data */
> >>> + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
> >>> + cpi = (ib_class_port_info_t *) &
> >>> + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
> >>> +
> >>> + cl_plock_acquire(&pm->osm->lock);
> >>> + /* validate port number */
> >>> + if (port >= p_mon_node->num_ports) {
> >>> + cl_plock_release(&pm->osm->lock);
> >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
> >>> + "Invalid port num %d for GUID 0x%016"
> >>> + PRIx64 " num ports %d\n", port, node_guid,
> >>> + p_mon_node->num_ports);
> >>> + goto Exit;
> >>> + }
> >>> + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
> >>> + int i = 0;
> >>> + for (i = p_mon_node->esp0 ? 0 : 1;
> >>> + i < p_mon_node->num_ports;
> >>> + i++) {
> >>> + p_mon_node->port[i].cap_mask = cpi->cap_mask;
> >>> + p_mon_node->port[i].cpi_valid = TRUE;
> >>> + }
> >>> + } else {
> >>> + p_mon_node->port[port].cap_mask = cpi->cap_mask;
> >>> + p_mon_node->port[port].cpi_valid = TRUE;
> >>> + }
> >>> + cl_plock_release(&pm->osm->lock);
> >>> + }
> >>> +
> >>> /* Response could also be redirection (IBM eHCA PMA does this) */
> >>> - if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
> >>> - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
> >>> + if (p_mad->status & IB_MAD_STATUS_REDIRECT) {
> >>
> >> Shouldn't this be part of if (p_mad->attr_id ==
> >> IB_MAD_ATTR_CLASS_PORT_INFO) clause ?
> >>
> >
> > Yes but I took care of that in the next patch where I cleaned up the code and made that entire block a function "handle_redirect".
>
> I see that now. The semantics here shouldn't have been changed but it's
> probably OK that the next patch fixes that again.
>
> -- Hal
>
> > Ira
>
> <snip...>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
weiny2-i2BcT+NCU+M@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
[not found] ` <20130226130711.8d96c1834b47c55e2abef24e-i2BcT+NCU+M@public.gmane.org>
@ 2013-02-26 21:14 ` Hal Rosenstock
[not found] ` <512D25CC.3010109-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Hal Rosenstock @ 2013-02-26 21:14 UTC (permalink / raw)
To: Ira Weiny; +Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA
On 2/26/2013 4:07 PM, Ira Weiny wrote:
> Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0?
I thought I answered that in the previous response:
"The reason I asked here is that the ClassPortInfo is being requested on
port 1 not 0 for BSP0."
-- Hal
> To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly.
>
> Ira
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
[not found] ` <512D25CC.3010109-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2013-02-26 21:24 ` Ira Weiny
[not found] ` <20130226132400.4ec40c660efd0c1eb0b6f69c-i2BcT+NCU+M@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Ira Weiny @ 2013-02-26 21:24 UTC (permalink / raw)
To: Hal Rosenstock; +Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA
On Tue, 26 Feb 2013 16:14:52 -0500
Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:
> On 2/26/2013 4:07 PM, Ira Weiny wrote:
> > Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0?
>
> I thought I answered that in the previous response:
> "The reason I asked here is that the ClassPortInfo is being requested on
> port 1 not 0 for BSP0."
But that is the correct thing to do, right?
Ira
>
> -- Hal
>
> > To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly.
> >
> > Ira
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
weiny2-i2BcT+NCU+M@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port.
[not found] ` <20130226132400.4ec40c660efd0c1eb0b6f69c-i2BcT+NCU+M@public.gmane.org>
@ 2013-02-26 22:43 ` Hal Rosenstock
0 siblings, 0 replies; 8+ messages in thread
From: Hal Rosenstock @ 2013-02-26 22:43 UTC (permalink / raw)
To: Ira Weiny; +Cc: Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA
On 2/26/2013 4:24 PM, Ira Weiny wrote:
> On Tue, 26 Feb 2013 16:14:52 -0500
> Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:
>
>> On 2/26/2013 4:07 PM, Ira Weiny wrote:
>>> Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0?
>>
>> I thought I answered that in the previous response:
>> "The reason I asked here is that the ClassPortInfo is being requested on
>> port 1 not 0 for BSP0."
>
> But that is the correct thing to do, right?
I think that's legal per:
C13-29: The ClassPortInfo attribute shall be implemented for every GS
class agent supported by a node; it shall be implemented on every port
through which the GS class agent may be accessed.
-- Hal
>
> Ira
>
>>
>> -- Hal
>>
>>> To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly.
>>>
>>> Ira
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2013-02-26 22:43 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-21 21:33 [PATCH 01/06] opensm/perfmgr: issue ClassPortInfo as first query to each port Ira Weiny
[not found] ` <20130221133331.593f30977427848f4373b57a-i2BcT+NCU+M@public.gmane.org>
2013-02-26 15:03 ` Hal Rosenstock
[not found] ` <512CCEC7.70304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-02-26 18:58 ` Ira Weiny
[not found] ` <20130226105803.c3bae4bdd1d2af03bce373db-i2BcT+NCU+M@public.gmane.org>
2013-02-26 19:48 ` Hal Rosenstock
[not found] ` <512D118A.5010503-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-02-26 21:07 ` Ira Weiny
[not found] ` <20130226130711.8d96c1834b47c55e2abef24e-i2BcT+NCU+M@public.gmane.org>
2013-02-26 21:14 ` Hal Rosenstock
[not found] ` <512D25CC.3010109-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-02-26 21:24 ` Ira Weiny
[not found] ` <20130226132400.4ec40c660efd0c1eb0b6f69c-i2BcT+NCU+M@public.gmane.org>
2013-02-26 22:43 ` Hal Rosenstock
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.