All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
@ 2009-11-02 11:50 Hal Rosenstock
       [not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Hal Rosenstock @ 2009-11-02 11:50 UTC (permalink / raw)
  To: sashak-smomgflXvOZWk0Htik3J/w; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


Move some code out into separate handle_babbling_port routine
for readability/maintainability 

Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
index 5790461..1621fbc 100644
--- a/opensm/opensm/osm_trap_rcv.c
+++ b/opensm/opensm/osm_trap_rcv.c
@@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
 			cl_ntoh16(source_lid), cl_ntoh64(trans_id));
 }
 
+static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
+				uint32_t num_received,
+				boolean_t physp_change_trap,
+				osm_physp_t **pp_physp,
+				boolean_t *run_heavy_sweep,
+				uint64_t *event_wheel_timeout)
+{
+	if (print_num_received(num_received))
+		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
+			"Received trap %u times consecutively\n",
+			num_received);
+	/* If the trap provides info about a bad port, mark it as unhealthy. */
+	if (physp_change_trap == TRUE) {
+		/* get the port */
+		*pp_physp = get_physp_by_lid_and_num(sm,
+						     cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+						     p_ntci->data_details.ntc_129_131.port_num);
+
+		if (!*pp_physp)
+			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
+				"Failed to find physical port by lid:%u num:%u\n",
+				cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+				p_ntci->data_details.ntc_129_131.port_num);
+		else {
+			/* When babbling port policy option is enabled and
+			   Threshold for disabling a "babbling" port is exceeded */
+			if (sm->p_subn->opt.babbling_port_policy &&
+			    num_received >= 250 &&
+			    disable_port(sm, *pp_physp) == 0)
+				return 1;
+
+			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+				"Marking unhealthy physical port by lid:%u num:%u\n",
+				cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
+				p_ntci->data_details.ntc_129_131.port_num);
+			/* check if the current state of the p_physp is healthy. If
+			   it is - then this is a first change of state. Run a heavy sweep.
+			   if it is not - no need to mark it again - just restart the timer. */
+			if (osm_physp_is_healthy(*pp_physp)) {
+				osm_physp_set_health(*pp_physp, FALSE);
+				/* Make sure we sweep again - force a heavy sweep. */
+				/* The sweep should be done only after the re-registration, or
+				   else we'll be losing track of the timer. */
+				*run_heavy_sweep = TRUE;
+			}
+			/* If we are marking the port as unhealthy - we want to
+			   keep this for a longer period of time than the
+			   OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
+			   OSM_DEFAULT_UNHEALTHY_TIMEOUT */
+			*event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
+		}
+	}
+	return 0;
+}
+
 /**********************************************************************
  **********************************************************************/
 static void
@@ -454,68 +509,11 @@ trap_rcv_process_request(IN osm_sm_t * sm, IN const osm_madw_t * const p_madw)
 					    trap_key);
 
 		/* Now we know how many times it provided this trap */
-		if (num_received > 10) {
-			if (print_num_received(num_received))
-				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
-					"Received trap %u times consecutively\n",
-					num_received);
-			/*
-			 * If the trap provides info about a bad port
-			 * we mark it as unhealthy.
-			 */
-			if (physp_change_trap == TRUE) {
-				/* get the port */
-				p_physp = get_physp_by_lid_and_num(sm,
-								   cl_ntoh16
-								   (p_ntci->
-								    data_details.
-								    ntc_129_131.
-								    lid),
-								   port_num);
-
-				if (!p_physp)
-					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
-						"ERR 3805: "
-						"Failed to find physical port by lid:%u num:%u\n",
-						cl_ntoh16(p_ntci->data_details.
-							  ntc_129_131.lid),
-						p_ntci->data_details.
-						ntc_129_131.port_num);
-				else {
-					/* When babbling port policy option is enabled and
-					   Threshold for disabling a "babbling" port is exceeded */
-					if (sm->p_subn->opt.
-					    babbling_port_policy
-					    && num_received >= 250
-					    && disable_port(sm, p_physp) == 0)
-						goto Exit;
-
-					OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
-						"Marking unhealthy physical port by lid:%u num:%u\n",
-						cl_ntoh16(p_ntci->data_details.
-							  ntc_129_131.lid),
-						p_ntci->data_details.
-						ntc_129_131.port_num);
-					/* check if the current state of the p_physp is healthy. If
-					   it is - then this is a first change of state. Run a heavy sweep.
-					   if it is not - no need to mark it again - just restart the timer. */
-					if (osm_physp_is_healthy(p_physp)) {
-						osm_physp_set_health(p_physp,
-								     FALSE);
-						/* Make sure we sweep again - force a heavy sweep. */
-						/* The sweep should be done only after the re-registration, or
-						   else we'll be losing track of the timer. */
-						run_heavy_sweep = TRUE;
-					}
-					/* If we are marking the port as unhealthy - we want to
-					   keep this for a longer period of time than the
-					   OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
-					   OSM_DEFAULT_UNHEALTHY_TIMEOUT */
-					event_wheel_timeout =
-					    OSM_DEFAULT_UNHEALTHY_TIMEOUT;
-				}
-			}
-		}
+		if (num_received > 10 &&
+		    handle_babbling_port(sm, p_ntci, num_received,
+					 physp_change_trap, &p_physp,
+					 &run_heavy_sweep, &event_wheel_timeout))
+			goto Exit; 
 
 		/* restart the aging anyway */
 		/* If physp_change_trap is TRUE - then use a callback to unset
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
       [not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
@ 2009-11-03  2:00   ` Sasha Khapyorsky
  2009-11-03 13:50     ` Hal Rosenstock
  0 siblings, 1 reply; 3+ messages in thread
From: Sasha Khapyorsky @ 2009-11-03  2:00 UTC (permalink / raw)
  To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 06:50 Mon 02 Nov     , Hal Rosenstock wrote:
> 
> Move some code out into separate handle_babbling_port routine
> for readability/maintainability 
> 
> Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---
> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
> index 5790461..1621fbc 100644
> --- a/opensm/opensm/osm_trap_rcv.c
> +++ b/opensm/opensm/osm_trap_rcv.c
> @@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
>  			cl_ntoh16(source_lid), cl_ntoh64(trans_id));
>  }
>  
> +static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
> +				uint32_t num_received,
> +				boolean_t physp_change_trap,
> +				osm_physp_t **pp_physp,
> +				boolean_t *run_heavy_sweep,
> +				uint64_t *event_wheel_timeout)
> +{
> +	if (print_num_received(num_received))
> +		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
> +			"Received trap %u times consecutively\n",
> +			num_received);
> +	/* If the trap provides info about a bad port, mark it as unhealthy. */
> +	if (physp_change_trap == TRUE) {
> +		/* get the port */
> +		*pp_physp = get_physp_by_lid_and_num(sm,
> +						     cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> +						     p_ntci->data_details.ntc_129_131.port_num);
> +
> +		if (!*pp_physp)
> +			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
> +				"Failed to find physical port by lid:%u num:%u\n",
> +				cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> +				p_ntci->data_details.ntc_129_131.port_num);
> +		else {
> +			/* When babbling port policy option is enabled and
> +			   Threshold for disabling a "babbling" port is exceeded */
> +			if (sm->p_subn->opt.babbling_port_policy &&
> +			    num_received >= 250 &&
> +			    disable_port(sm, *pp_physp) == 0)
> +				return 1;
> +
> +			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> +				"Marking unhealthy physical port by lid:%u num:%u\n",
> +				cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
> +				p_ntci->data_details.ntc_129_131.port_num);
> +			/* check if the current state of the p_physp is healthy. If
> +			   it is - then this is a first change of state. Run a heavy sweep.
> +			   if it is not - no need to mark it again - just restart the timer. */
> +			if (osm_physp_is_healthy(*pp_physp)) {
> +				osm_physp_set_health(*pp_physp, FALSE);
> +				/* Make sure we sweep again - force a heavy sweep. */
> +				/* The sweep should be done only after the re-registration, or
> +				   else we'll be losing track of the timer. */
> +				*run_heavy_sweep = TRUE;
> +			}
> +			/* If we are marking the port as unhealthy - we want to
> +			   keep this for a longer period of time than the
> +			   OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
> +			   OSM_DEFAULT_UNHEALTHY_TIMEOUT */
> +			*event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
> +	}
> +	return 0;
> +}
> +

It looks that you are moving the code as it is to a separate function
where many flow switchers (run_heavy_sweep, event_wheel_timeout,
pp_physp) are magically modified.

What about something like below instead?

Sasha


>From 5ed9114a32f9536123af4bd135c7cc603d340f92 Mon Sep 17 00:00:00 2001
From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
Date: Tue, 3 Nov 2009 03:46:54 +0200
Subject: [PATCH] opensm/osm_trap_rcv.c: some consolidation

Consolidate noisy port handling code in shutup_noisy_port() function.
As before it will try to disable port or to mark it unhealthy.

Signed-off-by: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 opensm/opensm/osm_trap_rcv.c |  113 ++++++++++++++++++++----------------------
 1 files changed, 53 insertions(+), 60 deletions(-)

diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
index 0ee4e77..dae892e 100644
--- a/opensm/opensm/osm_trap_rcv.c
+++ b/opensm/opensm/osm_trap_rcv.c
@@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
 	uint8_t payload[IB_SMP_DATA_SIZE];
 	osm_madw_context_t context;
 	ib_port_info_t *pi = (ib_port_info_t *)payload;
-	int ret;
 
 	/* select the nearest port to master opensm */
 	if (p->p_remote_physp &&
@@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
 	/* If trap 131, might want to disable peer port if available */
 	/* but peer port has been observed not to respond to SM requests */
 
-	OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: "
-		"Disabling physical port 0x%016" PRIx64 " num:%u\n",
-		cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num);
-
 	memcpy(payload, &p->port_info, sizeof(ib_port_info_t));
 
 	/* Set port to disabled/down */
@@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
 	context.pi_context.light_sweep = FALSE;
 	context.pi_context.active_transition = FALSE;
 
-	ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
-			  payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
-			  cl_hton32(osm_physp_get_port_num(p)),
-			  CL_DISP_MSGID_NONE, &context);
-	if (ret)
-		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
-			"Request to set PortInfo failed\n");
-
-	return ret;
+	return osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
+			   payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
+			   cl_hton32(osm_physp_get_port_num(p)),
+			   CL_DISP_MSGID_NONE, &context);
 }
 
 static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
@@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
 			cl_ntoh16(source_lid), cl_ntoh64(trans_id));
 }
 
+static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port,
+			     unsigned num)
+{
+	osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port);
+	if (!p) {
+		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
+			"Failed to find physical port by lid:%u num:%u\n",
+			lid, port);
+		return -1;
+	}
+
+	/* When babbling port policy option is enabled and
+	   Threshold for disabling a "babbling" port is exceeded */
+	if (sm->p_subn->opt.babbling_port_policy && num >= 250) {
+		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+			"Disabling noisy physical port 0x%016" PRIx64
+			": lid %u, num %u\n",
+			cl_ntoh64(osm_physp_get_port_guid(p)), lid, port);
+		if (disable_port(sm, p))
+			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
+				"Failed to disable.\n");
+		else
+			return 1;
+	}
+
+	/* check if the current state of the p_physp is healthy. If
+	   it is - then this is a first change of state. Run a heavy sweep. */
+	if (osm_physp_is_healthy(p)) {
+		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+			"Marking unhealthy physical port by lid:%u num:%u\n",
+			lid, port);
+		osm_physp_set_health(p, FALSE);
+		return 2;
+	}
+	return 0;
+}
 /**********************************************************************
  **********************************************************************/
 static void trap_rcv_process_request(IN osm_sm_t * sm,
@@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
 		/* Now we know how many times it provided this trap */
 		if (num_received > 10) {
 			if (print_num_received(num_received))
-				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
+				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
 					"Received trap %u times consecutively\n",
 					num_received);
 			/*
@@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
 			 * we mark it as unhealthy.
 			 */
 			if (physp_change_trap == TRUE) {
-				/* get the port */
-				p_physp = get_physp_by_lid_and_num(sm,
-								   cl_ntoh16
-								   (source_lid),
-								   port_num);
-
-				if (!p_physp)
-					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
-						"ERR 3805: "
-						"Failed to find physical port by lid:%u num:%u\n",
-						cl_ntoh16(source_lid),
-						port_num);
-				else {
-					/* When babbling port policy option is enabled and
-					   Threshold for disabling a "babbling" port is exceeded */
-					if (sm->p_subn->opt.
-					    babbling_port_policy
-					    && num_received >= 250
-					    && disable_port(sm, p_physp) == 0)
-						goto Exit;
-
-					OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
-						"Marking unhealthy physical port by lid:%u num:%u\n",
-						cl_ntoh16(source_lid),
-						port_num);
-					/* check if the current state of the p_physp is healthy. If
-					   it is - then this is a first change of state. Run a heavy sweep.
-					   if it is not - no need to mark it again - just restart the timer. */
-					if (osm_physp_is_healthy(p_physp)) {
-						osm_physp_set_health(p_physp,
-								     FALSE);
-						/* Make sure we sweep again - force a heavy sweep. */
-						/* The sweep should be done only after the re-registration, or
-						   else we'll be losing track of the timer. */
-						run_heavy_sweep = TRUE;
-					}
-					/* If we are marking the port as unhealthy - we want to
-					   keep this for a longer period of time than the
-					   OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
-					   OSM_DEFAULT_UNHEALTHY_TIMEOUT */
-					event_wheel_timeout =
-					    OSM_DEFAULT_UNHEALTHY_TIMEOUT;
-				}
+				int ret = shutup_noisy_port(sm,
+							    cl_ntoh16(source_lid),
+							    port_num,
+							    num_received);
+				if (ret == 1) /* port disabled */
+					goto Exit;
+				else if (ret == 2) /* unhealthy - run sweep */
+					run_heavy_sweep = TRUE;
+				/* in any case increase timeout interval */
+				event_wheel_timeout =
+				    OSM_DEFAULT_UNHEALTHY_TIMEOUT;
 			}
 		}
 
@@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
 		if (num_received > 10 && run_heavy_sweep == FALSE) {
 			if (print_num_received(num_received))
 				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
-					"Continuously received this trap %u times. Ignoring\n",
-					num_received);
+					"Ignoring noisy traps.\n");
 			goto Exit;
 		}
 	}
-- 
1.6.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request
  2009-11-03  2:00   ` Sasha Khapyorsky
@ 2009-11-03 13:50     ` Hal Rosenstock
  0 siblings, 0 replies; 3+ messages in thread
From: Hal Rosenstock @ 2009-11-03 13:50 UTC (permalink / raw)
  To: Sasha Khapyorsky; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Nov 2, 2009 at 9:00 PM, Sasha Khapyorsky <sashak@voltaire.com> wrote:
> On 06:50 Mon 02 Nov     , Hal Rosenstock wrote:
>>
>> Move some code out into separate handle_babbling_port routine
>> for readability/maintainability
>>
>> Signed-off-by: Hal Rosenstock <hal.rosenstock@gmail.com>
>> ---
>> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
>> index 5790461..1621fbc 100644
>> --- a/opensm/opensm/osm_trap_rcv.c
>> +++ b/opensm/opensm/osm_trap_rcv.c
>> @@ -312,6 +312,61 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
>>                       cl_ntoh16(source_lid), cl_ntoh64(trans_id));
>>  }
>>
>> +static int handle_babbling_port(osm_sm_t *sm, ib_mad_notice_attr_t *p_ntci,
>> +                             uint32_t num_received,
>> +                             boolean_t physp_change_trap,
>> +                             osm_physp_t **pp_physp,
>> +                             boolean_t *run_heavy_sweep,
>> +                             uint64_t *event_wheel_timeout)
>> +{
>> +     if (print_num_received(num_received))
>> +             OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
>> +                     "Received trap %u times consecutively\n",
>> +                     num_received);
>> +     /* If the trap provides info about a bad port, mark it as unhealthy. */
>> +     if (physp_change_trap == TRUE) {
>> +             /* get the port */
>> +             *pp_physp = get_physp_by_lid_and_num(sm,
>> +                                                  cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
>> +                                                  p_ntci->data_details.ntc_129_131.port_num);
>> +
>> +             if (!*pp_physp)
>> +                     OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
>> +                             "Failed to find physical port by lid:%u num:%u\n",
>> +                             cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
>> +                             p_ntci->data_details.ntc_129_131.port_num);
>> +             else {
>> +                     /* When babbling port policy option is enabled and
>> +                        Threshold for disabling a "babbling" port is exceeded */
>> +                     if (sm->p_subn->opt.babbling_port_policy &&
>> +                         num_received >= 250 &&
>> +                         disable_port(sm, *pp_physp) == 0)
>> +                             return 1;
>> +
>> +                     OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>> +                             "Marking unhealthy physical port by lid:%u num:%u\n",
>> +                             cl_ntoh16(p_ntci->data_details.ntc_129_131.lid),
>> +                             p_ntci->data_details.ntc_129_131.port_num);
>> +                     /* check if the current state of the p_physp is healthy. If
>> +                        it is - then this is a first change of state. Run a heavy sweep.
>> +                        if it is not - no need to mark it again - just restart the timer. */
>> +                     if (osm_physp_is_healthy(*pp_physp)) {
>> +                             osm_physp_set_health(*pp_physp, FALSE);
>> +                             /* Make sure we sweep again - force a heavy sweep. */
>> +                             /* The sweep should be done only after the re-registration, or
>> +                                else we'll be losing track of the timer. */
>> +                             *run_heavy_sweep = TRUE;
>> +                     }
>> +                     /* If we are marking the port as unhealthy - we want to
>> +                        keep this for a longer period of time than the
>> +                        OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
>> +                        OSM_DEFAULT_UNHEALTHY_TIMEOUT */
>> +                     *event_wheel_timeout = OSM_DEFAULT_UNHEALTHY_TIMEOUT;
>> +     }
>> +     return 0;
>> +}
>> +
>
> It looks that you are moving the code as it is to a separate function
> where many flow switchers (run_heavy_sweep, event_wheel_timeout,
> pp_physp) are magically modified.
>
> What about something like below instead?

Reviewed-by: Hal Rosenstock <hal.rosenstock@gmail.com>

>
> Sasha
>
>
> >From 5ed9114a32f9536123af4bd135c7cc603d340f92 Mon Sep 17 00:00:00 2001
> From: Sasha Khapyorsky <sashak@voltaire.com>
> Date: Tue, 3 Nov 2009 03:46:54 +0200
> Subject: [PATCH] opensm/osm_trap_rcv.c: some consolidation
>
> Consolidate noisy port handling code in shutup_noisy_port() function.
> As before it will try to disable port or to mark it unhealthy.
>
> Signed-off-by: Sasha Khapyorsky <sashak@voltaire.com>
> ---
>  opensm/opensm/osm_trap_rcv.c |  113 ++++++++++++++++++++----------------------
>  1 files changed, 53 insertions(+), 60 deletions(-)
>
> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
> index 0ee4e77..dae892e 100644
> --- a/opensm/opensm/osm_trap_rcv.c
> +++ b/opensm/opensm/osm_trap_rcv.c
> @@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
>        uint8_t payload[IB_SMP_DATA_SIZE];
>        osm_madw_context_t context;
>        ib_port_info_t *pi = (ib_port_info_t *)payload;
> -       int ret;
>
>        /* select the nearest port to master opensm */
>        if (p->p_remote_physp &&
> @@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
>        /* If trap 131, might want to disable peer port if available */
>        /* but peer port has been observed not to respond to SM requests */
>
> -       OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: "
> -               "Disabling physical port 0x%016" PRIx64 " num:%u\n",
> -               cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num);
> -
>        memcpy(payload, &p->port_info, sizeof(ib_port_info_t));
>
>        /* Set port to disabled/down */
> @@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p)
>        context.pi_context.light_sweep = FALSE;
>        context.pi_context.active_transition = FALSE;
>
> -       ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
> -                         payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
> -                         cl_hton32(osm_physp_get_port_num(p)),
> -                         CL_DISP_MSGID_NONE, &context);
> -       if (ret)
> -               OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
> -                       "Request to set PortInfo failed\n");
> -
> -       return ret;
> +       return osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
> +                          payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
> +                          cl_hton32(osm_physp_get_port_num(p)),
> +                          CL_DISP_MSGID_NONE, &context);
>  }
>
>  static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
> @@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci,
>                        cl_ntoh16(source_lid), cl_ntoh64(trans_id));
>  }
>
> +static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port,
> +                            unsigned num)
> +{
> +       osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port);
> +       if (!p) {
> +               OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: "
> +                       "Failed to find physical port by lid:%u num:%u\n",
> +                       lid, port);
> +               return -1;
> +       }
> +
> +       /* When babbling port policy option is enabled and
> +          Threshold for disabling a "babbling" port is exceeded */
> +       if (sm->p_subn->opt.babbling_port_policy && num >= 250) {
> +               OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> +                       "Disabling noisy physical port 0x%016" PRIx64
> +                       ": lid %u, num %u\n",
> +                       cl_ntoh64(osm_physp_get_port_guid(p)), lid, port);
> +               if (disable_port(sm, p))
> +                       OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
> +                               "Failed to disable.\n");

Might want to include port information in this log message.

-- Hal

> +               else
> +                       return 1;
> +       }
> +
> +       /* check if the current state of the p_physp is healthy. If
> +          it is - then this is a first change of state. Run a heavy sweep. */
> +       if (osm_physp_is_healthy(p)) {
> +               OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> +                       "Marking unhealthy physical port by lid:%u num:%u\n",
> +                       lid, port);
> +               osm_physp_set_health(p, FALSE);
> +               return 2;
> +       }
> +       return 0;
> +}
>  /**********************************************************************
>  **********************************************************************/
>  static void trap_rcv_process_request(IN osm_sm_t * sm,
> @@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
>                /* Now we know how many times it provided this trap */
>                if (num_received > 10) {
>                        if (print_num_received(num_received))
> -                               OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
> +                               OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>                                        "Received trap %u times consecutively\n",
>                                        num_received);
>                        /*
> @@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
>                         * we mark it as unhealthy.
>                         */
>                        if (physp_change_trap == TRUE) {
> -                               /* get the port */
> -                               p_physp = get_physp_by_lid_and_num(sm,
> -                                                                  cl_ntoh16
> -                                                                  (source_lid),
> -                                                                  port_num);
> -
> -                               if (!p_physp)
> -                                       OSM_LOG(sm->p_log, OSM_LOG_ERROR,
> -                                               "ERR 3805: "
> -                                               "Failed to find physical port by lid:%u num:%u\n",
> -                                               cl_ntoh16(source_lid),
> -                                               port_num);
> -                               else {
> -                                       /* When babbling port policy option is enabled and
> -                                          Threshold for disabling a "babbling" port is exceeded */
> -                                       if (sm->p_subn->opt.
> -                                           babbling_port_policy
> -                                           && num_received >= 250
> -                                           && disable_port(sm, p_physp) == 0)
> -                                               goto Exit;
> -
> -                                       OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> -                                               "Marking unhealthy physical port by lid:%u num:%u\n",
> -                                               cl_ntoh16(source_lid),
> -                                               port_num);
> -                                       /* check if the current state of the p_physp is healthy. If
> -                                          it is - then this is a first change of state. Run a heavy sweep.
> -                                          if it is not - no need to mark it again - just restart the timer. */
> -                                       if (osm_physp_is_healthy(p_physp)) {
> -                                               osm_physp_set_health(p_physp,
> -                                                                    FALSE);
> -                                               /* Make sure we sweep again - force a heavy sweep. */
> -                                               /* The sweep should be done only after the re-registration, or
> -                                                  else we'll be losing track of the timer. */
> -                                               run_heavy_sweep = TRUE;
> -                                       }
> -                                       /* If we are marking the port as unhealthy - we want to
> -                                          keep this for a longer period of time than the
> -                                          OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
> -                                          OSM_DEFAULT_UNHEALTHY_TIMEOUT */
> -                                       event_wheel_timeout =
> -                                           OSM_DEFAULT_UNHEALTHY_TIMEOUT;
> -                               }
> +                               int ret = shutup_noisy_port(sm,
> +                                                           cl_ntoh16(source_lid),
> +                                                           port_num,
> +                                                           num_received);
> +                               if (ret == 1) /* port disabled */
> +                                       goto Exit;
> +                               else if (ret == 2) /* unhealthy - run sweep */
> +                                       run_heavy_sweep = TRUE;
> +                               /* in any case increase timeout interval */
> +                               event_wheel_timeout =
> +                                   OSM_DEFAULT_UNHEALTHY_TIMEOUT;
>                        }
>                }
>
> @@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm,
>                if (num_received > 10 && run_heavy_sweep == FALSE) {
>                        if (print_num_received(num_received))
>                                OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
> -                                       "Continuously received this trap %u times. Ignoring\n",
> -                                       num_received);
> +                                       "Ignoring noisy traps.\n");
>                        goto Exit;
>                }
>        }
> --
> 1.6.5.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-11-03 13:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-02 11:50 [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request Hal Rosenstock
     [not found] ` <20091102115051.GA32233-Wuw85uim5zDR7s880joybQ@public.gmane.org>
2009-11-03  2:00   ` Sasha Khapyorsky
2009-11-03 13:50     ` Hal Rosenstock

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.