* [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
@ 2009-11-02 11:50 Hal Rosenstock
[not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
0 siblings, 1 reply; 3+ messages in thread
From: Hal Rosenstock @ 2009-11-02 11:50 UTC (permalink / raw)
To: sashak-smomgflXvOZWk0Htik3J/w; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Move some code out into separate handle_babbling_port routine
for readability/maintainability
Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
index 5790461..1621fbc 100644
--- a/opensm/opensm/osm_trap_rcv.c
+++ b/opensm/opensm/osm_trap_rcv.c
@@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
cl_ntoh16(source_lid), cl_ntoh64(trans_id));
}
+static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
+ uint32_t num_received,
+ boolean_t physp_change_trap,
+ osm_physp_t **pp_physp,
+ boolean_t *run_heavy_sweep,
+ uint64_t *event_wheel_timeout)
+{
+ if (print_num_received(num_received))
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
+ "Received trap %u times consecutively\n",
+ num_received);
+ /* If the trap provides info about a bad port, mark it as unhealthy. */
+ if (physp_change_trap == TRUE) {
+ /* get the port */
+ *pp_physp = get_physp_by_lid_and_num(sm,
+ cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+ p_ntci->data_details.ntc_129_131.port_num);
+
+ if (!*pp_physp)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
+ "Failed to find physical port by lid:%u num:%u\n",
+ cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+ p_ntci->data_details.ntc_129_131.port_num);
+ else {
+ /* When babbling port policy option is enabled and
+ Threshold for disabling a "babbling" port is exceeded */
+ if (sm->p_subn->opt.babbling_port_policy &&
+ num_received >= 250 &&
+ disable_port(sm, *pp_physp) == 0)
+ return 1;
+
+ OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+ "Marking unhealthy physical port by lid:%u num:%u\n",
+ cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+ p_ntci->data_details.ntc_129_131.port_num);
+ /* check if the current state of the p_physp is healthy. If
+ it is - then this is a first change of state. Run a heavy sweep.
+ if it is not - no need to mark it again - just restart the timer. */
+ if (osm_physp_is_healthy(*pp_physp)) {
+ osm_physp_set_health(*pp_physp, FALSE);
+ /* Make sure we sweep again - force a heavy sweep. */
+ /* The sweep should be done only after the re-registration, or
+ else we'll be losing track of the timer. */
+ *run_heavy_sweep = TRUE;
+ }
+ /* If we are marking the port as unhealthy - we want to
+ keep this for a longer period of time than the
+ OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
+ OSM_DEFAULT_UNHEALTHY_TIMEOUT */
+ *event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
+ }
+ }
+ return 0;
+}
+
/**********************************************************************
**********************************************************************/
static void
@@ -454,68 +509,11 @@ trap_rcv_process_request(IN osm_sm_t * sm, IN const osm_madw_t * const p_madw)
trap_key);
/* Now we know how many times it provided this trap */
- if (num_received > 10) {
- if (print_num_received(num_received))
- OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
- "Received trap %u times consecutively\n",
- num_received);
- /*
- * If the trap provides info about a bad port
- * we mark it as unhealthy.
- */
- if (physp_change_trap == TRUE) {
- /* get the port */
- p_physp = get_physp_by_lid_and_num(sm,
- cl_ntoh16
- (p_ntci->
- data_details.
- ntc_129_131.
- lid),
- port_num);
-
- if (!p_physp)
- OSM_LOG(sm->p_log, OSM_LOG_ERROR,
- "ERR 3805: "
- "Failed to find physical port by lid:%u num:%u\n",
- cl_ntoh16(p_ntci->data_details.
- ntc_129_131.lid),
- p_ntci->data_details.
- ntc_129_131.port_num);
- else {
- /* When babbling port policy option is enabled and
- Threshold for disabling a "babbling" port is exceeded */
- if (sm->p_subn->opt.
- babbling_port_policy
- && num_received >= 250
- && disable_port(sm, p_physp) == 0)
- goto Exit;
-
- OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
- "Marking unhealthy physical port by lid:%u num:%u\n",
- cl_ntoh16(p_ntci->data_details.
- ntc_129_131.lid),
- p_ntci->data_details.
- ntc_129_131.port_num);
- /* check if the current state of the p_physp is healthy. If
- it is - then this is a first change of state. Run a heavy sweep.
- if it is not - no need to mark it again - just restart the timer. */
- if (osm_physp_is_healthy(p_physp)) {
- osm_physp_set_health(p_physp,
- FALSE);
- /* Make sure we sweep again - force a heavy sweep. */
- /* The sweep should be done only after the re-registration, or
- else we'll be losing track of the timer. */
- run_heavy_sweep = TRUE;
- }
- /* If we are marking the port as unhealthy - we want to
- keep this for a longer period of time than the
- OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
- OSM_DEFAULT_UNHEALTHY_TIMEOUT */
- event_wheel_timeout =
- OSM_DEFAULT_UNHEALTHY_TIMEOUT;
- }
- }
- }
+ if (num_received > 10 &&
+ handle_babbling_port(sm, p_ntci, num_received,
+ physp_change_trap, &p_physp,
+ &run_heavy_sweep, &event_wheel_timeout))
+ goto Exit;
/* restart the aging anyway */
/* If physp_change_trap is TRUE - then use a callback to unset
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
[not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
@ 2009-11-03 2:00 ` Sasha Khapyorsky
2009-11-03 13:50 ` Hal Rosenstock
0 siblings, 1 reply; 3+ messages in thread
From: Sasha Khapyorsky @ 2009-11-03 2:00 UTC (permalink / raw)
To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
On 06:50 Mon 02 Nov , Hal Rosenstock wrote:
>
> Move some code out into separate handle_babbling_port routine
> for readability/maintainability
>
> Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---
> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
> index 5790461..1621fbc 100644
> --- a/opensm/opensm/osm_trap_rcv.c
> +++ b/opensm/opensm/osm_trap_rcv.c
> @@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
> cl_ntoh16(source_lid), cl_ntoh64(trans_id));
> }
>
> +static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
> + uint32_t num_received,
> + boolean_t physp_change_trap,
> + osm_physp_t **pp_physp,
> + boolean_t *run_heavy_sweep,
> + uint64_t *event_wheel_timeout)
> +{
> + if (print_num_received(num_received))
> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
> + "Received trap %u times consecutively\n",
> + num_received);
> + /* If the trap provides info about a bad port, mark it as unhealthy. */
> + if (physp_change_trap == TRUE) {
> + /* get the port */
> + *pp_physp = get_physp_by_lid_and_num(sm,
> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> + p_ntci->data_details.ntc_129_131.port_num);
> +
> + if (!*pp_physp)
> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
> + "Failed to find physical port by lid:%u num:%u\n",
> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> + p_ntci->data_details.ntc_129_131.port_num);
> + else {
> + /* When babbling port policy option is enabled and
> + Threshold for disabling a "babbling" port is exceeded */
> + if (sm->p_subn->opt.babbling_port_policy &&
> + num_received >= 250 &&
> + disable_port(sm, *pp_physp) == 0)
> + return 1;
> +
> + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> + "Marking unhealthy physical port by lid:%u num:%u\n",
> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> + p_ntci->data_details.ntc_129_131.port_num);
> + /* check if the current state of the p_physp is healthy. If
> + it is - then this is a first change of state. Run a heavy sweep.
> + if it is not - no need to mark it again - just restart the timer. */
> + if (osm_physp_is_healthy(*pp_physp)) {
> + osm_physp_set_health(*pp_physp, FALSE);
> + /* Make sure we sweep again - force a heavy sweep. */
> + /* The sweep should be done only after the re-registration, or
> + else we'll be losing track of the timer. */
> + *run_heavy_sweep = TRUE;
> + }
> + /* If we are marking the port as unhealthy - we want to
> + keep this for a longer period of time than the
> + OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
> + OSM_DEFAULT_UNHEALTHY_TIMEOUT */
> + *event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
> + }
> + return 0;
> +}
> +
It looks that you are moving the code as it is to a separate function
where many flow switchers (run_heavy_sweep, event_wheel_timeout,
pp_physp) are magically modified.
What about something like below instead?
Sasha
>From 5ed9114a32f9536123af4bd135c7cc603d340f92 Mon Sep 17 00:00:00 2001
From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
Date: Tue, 3 Nov 2009 03:46:54 +0200
Subject: [PATCH] opensm/osm_trap_rcv.c: some consolidation
Consolidate noisy port handling code in shutup_noisy_port() function.
As before it will try to disable port or to mark it unhealthy.
Signed-off-by: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
opensm/opensm/osm_trap_rcv.c | 113 ++++++++++++++++++++----------------------
1 files changed, 53 insertions(+), 60 deletions(-)
diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
index 0ee4e77..dae892e 100644
--- a/opensm/opensm/osm_trap_rcv.c
+++ b/opensm/opensm/osm_trap_rcv.c
@@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
uint8_t payload[IB_SMP_DATA_SIZE];
osm_madw_context_t context;
ib_port_info_t *pi = (ib_port_info_t *)payload;
- int ret;
/* select the nearest port to master opensm */
if (p->p_remote_physp &&
@@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
/* If trap 131, might want to disable peer port if available */
/* but peer port has been observed not to respond to SM requests */
- OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: "
- "Disabling physical port 0x%016" PRIx64 " num:%u\n",
- cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num);
-
memcpy(payload, &p->port_info, sizeof(ib_port_info_t));
/* Set port to disabled/down */
@@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
context.pi_context.light_sweep = FALSE;
context.pi_context.active_transition = FALSE;
- ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
- payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
- cl_hton32(osm_physp_get_port_num(p)),
- CL_DISP_MSGID_NONE, &context);
- if (ret)
- OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
- "Request to set PortInfo failed\n");
-
- return ret;
+ return osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
+ payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
+ cl_hton32(osm_physp_get_port_num(p)),
+ CL_DISP_MSGID_NONE, &context);
}
static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
@@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
cl_ntoh16(source_lid), cl_ntoh64(trans_id));
}
+static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port,
+ unsigned num)
+{
+ osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port);
+ if (!p) {
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
+ "Failed to find physical port by lid:%u num:%u\n",
+ lid, port);
+ return -1;
+ }
+
+ /* When babbling port policy option is enabled and
+ Threshold for disabling a "babbling" port is exceeded */
+ if (sm->p_subn->opt.babbling_port_policy && num >= 250) {
+ OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+ "Disabling noisy physical port 0x%016" PRIx64
+ ": lid %u, num %u\n",
+ cl_ntoh64(osm_physp_get_port_guid(p)), lid, port);
+ if (disable_port(sm, p))
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
+ "Failed to disable.\n");
+ else
+ return 1;
+ }
+
+ /* check if the current state of the p_physp is healthy. If
+ it is - then this is a first change of state. Run a heavy sweep. */
+ if (osm_physp_is_healthy(p)) {
+ OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+ "Marking unhealthy physical port by lid:%u num:%u\n",
+ lid, port);
+ osm_physp_set_health(p, FALSE);
+ return 2;
+ }
+ return 0;
+}
/**********************************************************************
**********************************************************************/
static void trap_rcv_process_request(IN osm_sm_t * sm,
@@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
/* Now we know how many times it provided this trap */
if (num_received > 10) {
if (print_num_received(num_received))
- OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
+ OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
"Received trap %u times consecutively\n",
num_received);
/*
@@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
* we mark it as unhealthy.
*/
if (physp_change_trap == TRUE) {
- /* get the port */
- p_physp = get_physp_by_lid_and_num(sm,
- cl_ntoh16
- (source_lid),
- port_num);
-
- if (!p_physp)
- OSM_LOG(sm->p_log, OSM_LOG_ERROR,
- "ERR 3805: "
- "Failed to find physical port by lid:%u num:%u\n",
- cl_ntoh16(source_lid),
- port_num);
- else {
- /* When babbling port policy option is enabled and
- Threshold for disabling a "babbling" port is exceeded */
- if (sm->p_subn->opt.
- babbling_port_policy
- && num_received >= 250
- && disable_port(sm, p_physp) == 0)
- goto Exit;
-
- OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
- "Marking unhealthy physical port by lid:%u num:%u\n",
- cl_ntoh16(source_lid),
- port_num);
- /* check if the current state of the p_physp is healthy. If
- it is - then this is a first change of state. Run a heavy sweep.
- if it is not - no need to mark it again - just restart the timer. */
- if (osm_physp_is_healthy(p_physp)) {
- osm_physp_set_health(p_physp,
- FALSE);
- /* Make sure we sweep again - force a heavy sweep. */
- /* The sweep should be done only after the re-registration, or
- else we'll be losing track of the timer. */
- run_heavy_sweep = TRUE;
- }
- /* If we are marking the port as unhealthy - we want to
- keep this for a longer period of time than the
- OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
- OSM_DEFAULT_UNHEALTHY_TIMEOUT */
- event_wheel_timeout =
- OSM_DEFAULT_UNHEALTHY_TIMEOUT;
- }
+ int ret = shutup_noisy_port(sm,
+ cl_ntoh16(source_lid),
+ port_num,
+ num_received);
+ if (ret == 1) /* port disabled */
+ goto Exit;
+ else if (ret == 2) /* unhealthy - run sweep */
+ run_heavy_sweep = TRUE;
+ /* in any case increase timeout interval */
+ event_wheel_timeout =
+ OSM_DEFAULT_UNHEALTHY_TIMEOUT;
}
}
@@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
if (num_received > 10 && run_heavy_sweep == FALSE) {
if (print_num_received(num_received))
OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
- "Continuously received this trap %u times. Ignoring\n",
- num_received);
+ "Ignoring noisy traps.\n");
goto Exit;
}
}
--
1.6.5.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
2009-11-03 2:00 ` Sasha Khapyorsky
@ 2009-11-03 13:50 ` Hal Rosenstock
0 siblings, 0 replies; 3+ messages in thread
From: Hal Rosenstock @ 2009-11-03 13:50 UTC (permalink / raw)
To: Sasha Khapyorsky; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
On Mon, Nov 2, 2009 at 9:00 PM, Sasha Khapyorsky <sashak@voltaire.com> wrote:
> On 06:50 Mon 02 Nov , Hal Rosenstock wrote:
>>
>> Move some code out into separate handle_babbling_port routine
>> for readability/maintainability
>>
>> Signed-off-by: Hal Rosenstock <hal.rosenstock@gmail.com>
>> ---
>> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
>> index 5790461..1621fbc 100644
>> --- a/opensm/opensm/osm_trap_rcv.c
>> +++ b/opensm/opensm/osm_trap_rcv.c
>> @@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
>> cl_ntoh16(source_lid), cl_ntoh64(trans_id));
>> }
>>
>> +static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
>> + uint32_t num_received,
>> + boolean_t physp_change_trap,
>> + osm_physp_t **pp_physp,
>> + boolean_t *run_heavy_sweep,
>> + uint64_t *event_wheel_timeout)
>> +{
>> + if (print_num_received(num_received))
>> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
>> + "Received trap %u times consecutively\n",
>> + num_received);
>> + /* If the trap provides info about a bad port, mark it as unhealthy. */
>> + if (physp_change_trap == TRUE) {
>> + /* get the port */
>> + *pp_physp = get_physp_by_lid_and_num(sm,
>> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
>> + p_ntci->data_details.ntc_129_131.port_num);
>> +
>> + if (!*pp_physp)
>> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
>> + "Failed to find physical port by lid:%u num:%u\n",
>> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
>> + p_ntci->data_details.ntc_129_131.port_num);
>> + else {
>> + /* When babbling port policy option is enabled and
>> + Threshold for disabling a "babbling" port is exceeded */
>> + if (sm->p_subn->opt.babbling_port_policy &&
>> + num_received >= 250 &&
>> + disable_port(sm, *pp_physp) == 0)
>> + return 1;
>> +
>> + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>> + "Marking unhealthy physical port by lid:%u num:%u\n",
>> + cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
>> + p_ntci->data_details.ntc_129_131.port_num);
>> + /* check if the current state of the p_physp is healthy. If
>> + it is - then this is a first change of state. Run a heavy sweep.
>> + if it is not - no need to mark it again - just restart the timer. */
>> + if (osm_physp_is_healthy(*pp_physp)) {
>> + osm_physp_set_health(*pp_physp, FALSE);
>> + /* Make sure we sweep again - force a heavy sweep. */
>> + /* The sweep should be done only after the re-registration, or
>> + else we'll be losing track of the timer. */
>> + *run_heavy_sweep = TRUE;
>> + }
>> + /* If we are marking the port as unhealthy - we want to
>> + keep this for a longer period of time than the
>> + OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
>> + OSM_DEFAULT_UNHEALTHY_TIMEOUT */
>> + *event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
>> + }
>> + return 0;
>> +}
>> +
>
> It looks that you are moving the code as it is to a separate function
> where many flow switchers (run_heavy_sweep, event_wheel_timeout,
> pp_physp) are magically modified.
>
> What about something like below instead?
Reviewed-by: Hal Rosenstock <hal.rosenstock@gmail.com>
>
> Sasha
>
>
> >From 5ed9114a32f9536123af4bd135c7cc603d340f92 Mon Sep 17 00:00:00 2001
> From: Sasha Khapyorsky <sashak@voltaire.com>
> Date: Tue, 3 Nov 2009 03:46:54 +0200
> Subject: [PATCH] opensm/osm_trap_rcv.c: some consolidation
>
> Consolidate noisy port handling code in shutup_noisy_port() function.
> As before it will try to disable port or to mark it unhealthy.
>
> Signed-off-by: Sasha Khapyorsky <sashak@voltaire.com>
> ---
> opensm/opensm/osm_trap_rcv.c | 113 ++++++++++++++++++++----------------------
> 1 files changed, 53 insertions(+), 60 deletions(-)
>
> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
> index 0ee4e77..dae892e 100644
> --- a/opensm/opensm/osm_trap_rcv.c
> +++ b/opensm/opensm/osm_trap_rcv.c
> @@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
> uint8_t payload[IB_SMP_DATA_SIZE];
> osm_madw_context_t context;
> ib_port_info_t *pi = (ib_port_info_t *)payload;
> - int ret;
>
> /* select the nearest port to master opensm */
> if (p->p_remote_physp &&
> @@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
> /* If trap 131, might want to disable peer port if available */
> /* but peer port has been observed not to respond to SM requests */
>
> - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: "
> - "Disabling physical port 0x%016" PRIx64 " num:%u\n",
> - cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num);
> -
> memcpy(payload, &p->port_info, sizeof(ib_port_info_t));
>
> /* Set port to disabled/down */
> @@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
> context.pi_context.light_sweep = FALSE;
> context.pi_context.active_transition = FALSE;
>
> - ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
> - payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
> - cl_hton32(osm_physp_get_port_num(p)),
> - CL_DISP_MSGID_NONE, &context);
> - if (ret)
> - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
> - "Request to set PortInfo failed\n");
> -
> - return ret;
> + return osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
> + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
> + cl_hton32(osm_physp_get_port_num(p)),
> + CL_DISP_MSGID_NONE, &context);
> }
>
> static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
> @@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
> cl_ntoh16(source_lid), cl_ntoh64(trans_id));
> }
>
> +static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port,
> + unsigned num)
> +{
> + osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port);
> + if (!p) {
> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
> + "Failed to find physical port by lid:%u num:%u\n",
> + lid, port);
> + return -1;
> + }
> +
> + /* When babbling port policy option is enabled and
> + Threshold for disabling a "babbling" port is exceeded */
> + if (sm->p_subn->opt.babbling_port_policy && num >= 250) {
> + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> + "Disabling noisy physical port 0x%016" PRIx64
> + ": lid %u, num %u\n",
> + cl_ntoh64(osm_physp_get_port_guid(p)), lid, port);
> + if (disable_port(sm, p))
> + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
> + "Failed to disable.\n");
Might want to include port information in this log message.
-- Hal
> + else
> + return 1;
> + }
> +
> + /* check if the current state of the p_physp is healthy. If
> + it is - then this is a first change of state. Run a heavy sweep. */
> + if (osm_physp_is_healthy(p)) {
> + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> + "Marking unhealthy physical port by lid:%u num:%u\n",
> + lid, port);
> + osm_physp_set_health(p, FALSE);
> + return 2;
> + }
> + return 0;
> +}
> /**********************************************************************
> **********************************************************************/
> static void trap_rcv_process_request(IN osm_sm_t * sm,
> @@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
> /* Now we know how many times it provided this trap */
> if (num_received > 10) {
> if (print_num_received(num_received))
> - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
> + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> "Received trap %u times consecutively\n",
> num_received);
> /*
> @@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
> * we mark it as unhealthy.
> */
> if (physp_change_trap == TRUE) {
> - /* get the port */
> - p_physp = get_physp_by_lid_and_num(sm,
> - cl_ntoh16
> - (source_lid),
> - port_num);
> -
> - if (!p_physp)
> - OSM_LOG(sm->p_log, OSM_LOG_ERROR,
> - "ERR 3805: "
> - "Failed to find physical port by lid:%u num:%u\n",
> - cl_ntoh16(source_lid),
> - port_num);
> - else {
> - /* When babbling port policy option is enabled and
> - Threshold for disabling a "babbling" port is exceeded */
> - if (sm->p_subn->opt.
> - babbling_port_policy
> - && num_received >= 250
> - && disable_port(sm, p_physp) == 0)
> - goto Exit;
> -
> - OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> - "Marking unhealthy physical port by lid:%u num:%u\n",
> - cl_ntoh16(source_lid),
> - port_num);
> - /* check if the current state of the p_physp is healthy. If
> - it is - then this is a first change of state. Run a heavy sweep.
> - if it is not - no need to mark it again - just restart the timer. */
> - if (osm_physp_is_healthy(p_physp)) {
> - osm_physp_set_health(p_physp,
> - FALSE);
> - /* Make sure we sweep again - force a heavy sweep. */
> - /* The sweep should be done only after the re-registration, or
> - else we'll be losing track of the timer. */
> - run_heavy_sweep = TRUE;
> - }
> - /* If we are marking the port as unhealthy - we want to
> - keep this for a longer period of time than the
> - OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
> - OSM_DEFAULT_UNHEALTHY_TIMEOUT */
> - event_wheel_timeout =
> - OSM_DEFAULT_UNHEALTHY_TIMEOUT;
> - }
> + int ret = shutup_noisy_port(sm,
> + cl_ntoh16(source_lid),
> + port_num,
> + num_received);
> + if (ret == 1) /* port disabled */
> + goto Exit;
> + else if (ret == 2) /* unhealthy - run sweep */
> + run_heavy_sweep = TRUE;
> + /* in any case increase timeout interval */
> + event_wheel_timeout =
> + OSM_DEFAULT_UNHEALTHY_TIMEOUT;
> }
> }
>
> @@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
> if (num_received > 10 && run_heavy_sweep == FALSE) {
> if (print_num_received(num_received))
> OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> - "Continuously received this trap %u times. Ignoring\n",
> - num_received);
> + "Ignoring noisy traps.\n");
> goto Exit;
> }
> }
> --
> 1.6.5.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-11-03 13:50 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-02 11:50 [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request Hal Rosenstock
[not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
2009-11-03 2:00 ` Sasha Khapyorsky
2009-11-03 13:50 ` Hal Rosenstock
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.