linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2,hyperv-fixes] Drivers: hv: vmbus: Fix duplicate CPU assignments within a device
@ 2021-07-16  1:37 Haiyang Zhang
  2021-07-16 17:45 ` Michael Kelley
  0 siblings, 1 reply; 3+ messages in thread
From: Haiyang Zhang @ 2021-07-16  1:37 UTC (permalink / raw)
  To: linux-hyperv; +Cc: haiyangz, kys, linux-kernel

The vmbus module uses a rotational algorithm to assign target CPUs to
device's channels. Depends on the timing of different device's channel
offers, different channels of a device may be assigned to the same CPU.

For example on a VM with 2 CPUs, if NIC A and B's channels are offered
in the following order, NIC A will have both channels on CPU0, and
NIC B will have both channels on CPU1 -- see below. This kind of
assignment causes RSS load that is spreading across different channels
to end up on the same CPU.

Timing of channel offers:
NIC A channel 0
NIC B channel 0
NIC A channel 1
NIC B channel 1

VMBUS ID 14: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter
        Device_ID = {cab064cd-1f31-47d5-a8b4-9d57e320cccd}
        Sysfs path: /sys/bus/vmbus/devices/cab064cd-1f31-47d5-a8b4-9d57e320cccd
        Rel_ID=14, target_cpu=0
        Rel_ID=17, target_cpu=0

VMBUS ID 16: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter
        Device_ID = {244225ca-743e-4020-a17d-d7baa13d6cea}
        Sysfs path: /sys/bus/vmbus/devices/244225ca-743e-4020-a17d-d7baa13d6cea
        Rel_ID=16, target_cpu=1
        Rel_ID=18, target_cpu=1

Update the vmbus CPU assignment algorithm to avoid duplicate CPU
assignments within a device.

The new algorithm iterates num_online_cpus + 1 times.
The existing rotational algorithm to find "next NUMA & CPU" is still here.
But if the resulting CPU is already used by the same device, it will try
the next CPU.
In the last iteration, it assigns the channel to the next available CPU
like the existing algorithm. This is not normally expected, because
during device probe, we limit the number of channels of a device to
be <= number of online CPUs.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/hv/channel_mgmt.c | 96 ++++++++++++++++++++++++++-------------
 1 file changed, 64 insertions(+), 32 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index caf6d0c4bc1b..8584914291e7 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -605,6 +605,17 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	 */
 	mutex_lock(&vmbus_connection.channel_mutex);
 
+	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		if (guid_equal(&channel->offermsg.offer.if_type,
+			       &newchannel->offermsg.offer.if_type) &&
+		    guid_equal(&channel->offermsg.offer.if_instance,
+			       &newchannel->offermsg.offer.if_instance)) {
+			fnew = false;
+			newchannel->primary_channel = channel;
+			break;
+		}
+	}
+
 	init_vp_index(newchannel);
 
 	/* Remember the channels that should be cleaned up upon suspend. */
@@ -617,16 +628,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	 */
 	atomic_dec(&vmbus_connection.offer_in_progress);
 
-	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
-		if (guid_equal(&channel->offermsg.offer.if_type,
-			       &newchannel->offermsg.offer.if_type) &&
-		    guid_equal(&channel->offermsg.offer.if_instance,
-			       &newchannel->offermsg.offer.if_instance)) {
-			fnew = false;
-			break;
-		}
-	}
-
 	if (fnew) {
 		list_add_tail(&newchannel->listentry,
 			      &vmbus_connection.chn_list);
@@ -647,7 +648,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 		/*
 		 * Process the sub-channel.
 		 */
-		newchannel->primary_channel = channel;
 		list_add_tail(&newchannel->sc_list, &channel->sc_list);
 	}
 
@@ -683,6 +683,30 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	queue_work(wq, &newchannel->add_channel_work);
 }
 
+/*
+ * Check if CPUs used by other channels of the same device.
+ * It's should only be called by init_vp_index().
+ */
+static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
+{
+	struct vmbus_channel *primary = chn->primary_channel;
+	struct vmbus_channel *sc;
+
+	lockdep_assert_held(&vmbus_connection.channel_mutex);
+
+	if (!primary)
+		return false;
+
+	if (primary->target_cpu == cpu)
+		return true;
+
+	list_for_each_entry(sc, &primary->sc_list, sc_list)
+		if (sc != chn && sc->target_cpu == cpu)
+			return true;
+
+	return false;
+}
+
 /*
  * We use this state to statically distribute the channel interrupt load.
  */
@@ -702,6 +726,7 @@ static int next_numa_node_id;
 static void init_vp_index(struct vmbus_channel *channel)
 {
 	bool perf_chn = hv_is_perf_channel(channel);
+	u32 i, ncpu = num_online_cpus();
 	cpumask_var_t available_mask;
 	struct cpumask *alloced_mask;
 	u32 target_cpu;
@@ -724,31 +749,38 @@ static void init_vp_index(struct vmbus_channel *channel)
 		return;
 	}
 
-	while (true) {
-		numa_node = next_numa_node_id++;
-		if (numa_node == nr_node_ids) {
-			next_numa_node_id = 0;
-			continue;
+	for (i = 1; i <= ncpu + 1; i++) {
+		while (true) {
+			numa_node = next_numa_node_id++;
+			if (numa_node == nr_node_ids) {
+				next_numa_node_id = 0;
+				continue;
+			}
+			if (cpumask_empty(cpumask_of_node(numa_node)))
+				continue;
+			break;
+		}
+		alloced_mask = &hv_context.hv_numa_map[numa_node];
+
+		if (cpumask_weight(alloced_mask) ==
+		    cpumask_weight(cpumask_of_node(numa_node))) {
+			/*
+			 * We have cycled through all the CPUs in the node;
+			 * reset the alloced map.
+			 */
+			cpumask_clear(alloced_mask);
 		}
-		if (cpumask_empty(cpumask_of_node(numa_node)))
-			continue;
-		break;
-	}
-	alloced_mask = &hv_context.hv_numa_map[numa_node];
 
-	if (cpumask_weight(alloced_mask) ==
-	    cpumask_weight(cpumask_of_node(numa_node))) {
-		/*
-		 * We have cycled through all the CPUs in the node;
-		 * reset the alloced map.
-		 */
-		cpumask_clear(alloced_mask);
-	}
+		cpumask_xor(available_mask, alloced_mask,
+			    cpumask_of_node(numa_node));
 
-	cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node));
+		target_cpu = cpumask_first(available_mask);
+		cpumask_set_cpu(target_cpu, alloced_mask);
 
-	target_cpu = cpumask_first(available_mask);
-	cpumask_set_cpu(target_cpu, alloced_mask);
+		if (channel->offermsg.offer.sub_channel_index >= ncpu ||
+		    i > ncpu || !hv_cpuself_used(target_cpu, channel))
+			break;
+	}
 
 	channel->target_cpu = target_cpu;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH v2,hyperv-fixes] Drivers: hv: vmbus: Fix duplicate CPU assignments within a device
  2021-07-16  1:37 [PATCH v2,hyperv-fixes] Drivers: hv: vmbus: Fix duplicate CPU assignments within a device Haiyang Zhang
@ 2021-07-16 17:45 ` Michael Kelley
  2021-07-16 17:47   ` Haiyang Zhang
  0 siblings, 1 reply; 3+ messages in thread
From: Michael Kelley @ 2021-07-16 17:45 UTC (permalink / raw)
  To: Haiyang Zhang, linux-hyperv; +Cc: Haiyang Zhang, KY Srinivasan, linux-kernel

From: LKML haiyangz <lkmlhyz@microsoft.com> On Behalf Of Haiyang Zhang Sent: Thursday, July 15, 2021 6:38 PM
> 
> The vmbus module uses a rotational algorithm to assign target CPUs to
> device's channels. Depends on the timing of different device's channel

s/device's/a device's/
s/Depends/Depending/

> offers, different channels of a device may be assigned to the same CPU.
> 
> For example on a VM with 2 CPUs, if NIC A and B's channels are offered
> in the following order, NIC A will have both channels on CPU0, and
> NIC B will have both channels on CPU1 -- see below. This kind of
> assignment causes RSS load that is spreading across different channels
> to end up on the same CPU.
> 
> Timing of channel offers:
> NIC A channel 0
> NIC B channel 0
> NIC A channel 1
> NIC B channel 1
> 
> VMBUS ID 14: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter
>         Device_ID = {cab064cd-1f31-47d5-a8b4-9d57e320cccd}
>         Sysfs path: /sys/bus/vmbus/devices/cab064cd-1f31-47d5-a8b4-9d57e320cccd
>         Rel_ID=14, target_cpu=0
>         Rel_ID=17, target_cpu=0
> 
> VMBUS ID 16: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter
>         Device_ID = {244225ca-743e-4020-a17d-d7baa13d6cea}
>         Sysfs path: /sys/bus/vmbus/devices/244225ca-743e-4020-a17d-d7baa13d6cea
>         Rel_ID=16, target_cpu=1
>         Rel_ID=18, target_cpu=1
> 
> Update the vmbus CPU assignment algorithm to avoid duplicate CPU
> assignments within a device.
> 
> The new algorithm iterates num_online_cpus + 1 times.
> The existing rotational algorithm to find "next NUMA & CPU" is still here.
> But if the resulting CPU is already used by the same device, it will try
> the next CPU.
> In the last iteration, it assigns the channel to the next available CPU
> like the existing algorithm. This is not normally expected, because
> during device probe, we limit the number of channels of a device to
> be <= number of online CPUs.
> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> ---
>  drivers/hv/channel_mgmt.c | 96 ++++++++++++++++++++++++++-------------
>  1 file changed, 64 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index caf6d0c4bc1b..8584914291e7 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -605,6 +605,17 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
>  	 */
>  	mutex_lock(&vmbus_connection.channel_mutex);
> 
> +	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
> +		if (guid_equal(&channel->offermsg.offer.if_type,
> +			       &newchannel->offermsg.offer.if_type) &&
> +		    guid_equal(&channel->offermsg.offer.if_instance,
> +			       &newchannel->offermsg.offer.if_instance)) {
> +			fnew = false;
> +			newchannel->primary_channel = channel;
> +			break;
> +		}
> +	}
> +
>  	init_vp_index(newchannel);
> 
>  	/* Remember the channels that should be cleaned up upon suspend. */
> @@ -617,16 +628,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
>  	 */
>  	atomic_dec(&vmbus_connection.offer_in_progress);
> 
> -	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
> -		if (guid_equal(&channel->offermsg.offer.if_type,
> -			       &newchannel->offermsg.offer.if_type) &&
> -		    guid_equal(&channel->offermsg.offer.if_instance,
> -			       &newchannel->offermsg.offer.if_instance)) {
> -			fnew = false;
> -			break;
> -		}
> -	}
> -
>  	if (fnew) {
>  		list_add_tail(&newchannel->listentry,
>  			      &vmbus_connection.chn_list);
> @@ -647,7 +648,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
>  		/*
>  		 * Process the sub-channel.
>  		 */
> -		newchannel->primary_channel = channel;
>  		list_add_tail(&newchannel->sc_list, &channel->sc_list);
>  	}
> 
> @@ -683,6 +683,30 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
>  	queue_work(wq, &newchannel->add_channel_work);
>  }
> 
> +/*
> + * Check if CPUs used by other channels of the same device.
> + * It's should only be called by init_vp_index().

s/It's/It/

> + */
> +static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
> +{
> +	struct vmbus_channel *primary = chn->primary_channel;
> +	struct vmbus_channel *sc;
> +
> +	lockdep_assert_held(&vmbus_connection.channel_mutex);
> +
> +	if (!primary)
> +		return false;
> +
> +	if (primary->target_cpu == cpu)
> +		return true;
> +
> +	list_for_each_entry(sc, &primary->sc_list, sc_list)
> +		if (sc != chn && sc->target_cpu == cpu)
> +			return true;
> +
> +	return false;
> +}
> +
>  /*
>   * We use this state to statically distribute the channel interrupt load.
>   */
> @@ -702,6 +726,7 @@ static int next_numa_node_id;
>  static void init_vp_index(struct vmbus_channel *channel)
>  {
>  	bool perf_chn = hv_is_perf_channel(channel);
> +	u32 i, ncpu = num_online_cpus();
>  	cpumask_var_t available_mask;
>  	struct cpumask *alloced_mask;
>  	u32 target_cpu;
> @@ -724,31 +749,38 @@ static void init_vp_index(struct vmbus_channel *channel)
>  		return;
>  	}
> 
> -	while (true) {
> -		numa_node = next_numa_node_id++;
> -		if (numa_node == nr_node_ids) {
> -			next_numa_node_id = 0;
> -			continue;
> +	for (i = 1; i <= ncpu + 1; i++) {
> +		while (true) {
> +			numa_node = next_numa_node_id++;
> +			if (numa_node == nr_node_ids) {
> +				next_numa_node_id = 0;
> +				continue;
> +			}
> +			if (cpumask_empty(cpumask_of_node(numa_node)))
> +				continue;
> +			break;
> +		}
> +		alloced_mask = &hv_context.hv_numa_map[numa_node];
> +
> +		if (cpumask_weight(alloced_mask) ==
> +		    cpumask_weight(cpumask_of_node(numa_node))) {
> +			/*
> +			 * We have cycled through all the CPUs in the node;
> +			 * reset the alloced map.
> +			 */
> +			cpumask_clear(alloced_mask);
>  		}
> -		if (cpumask_empty(cpumask_of_node(numa_node)))
> -			continue;
> -		break;
> -	}
> -	alloced_mask = &hv_context.hv_numa_map[numa_node];
> 
> -	if (cpumask_weight(alloced_mask) ==
> -	    cpumask_weight(cpumask_of_node(numa_node))) {
> -		/*
> -		 * We have cycled through all the CPUs in the node;
> -		 * reset the alloced map.
> -		 */
> -		cpumask_clear(alloced_mask);
> -	}
> +		cpumask_xor(available_mask, alloced_mask,
> +			    cpumask_of_node(numa_node));
> 
> -	cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node));
> +		target_cpu = cpumask_first(available_mask);
> +		cpumask_set_cpu(target_cpu, alloced_mask);
> 
> -	target_cpu = cpumask_first(available_mask);
> -	cpumask_set_cpu(target_cpu, alloced_mask);
> +		if (channel->offermsg.offer.sub_channel_index >= ncpu ||
> +		    i > ncpu || !hv_cpuself_used(target_cpu, channel))
> +			break;
> +	}
> 
>  	channel->target_cpu = target_cpu;
> 
> --
> 2.25.1

I like this approach much better.  I did some testing with a variety of
CPU counts (1, 2, 3, 4, 8, and 32), NUMA nodes (1, 3, and 4) and with
4 SCSI controllers and 4 NICs.  Everything worked as expected, and it's
definitely an improvement.

I flagged a couple of typos/wording errors, but maybe they can be
fixed when the patch is pulled into hyperv-fixes.

Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH v2,hyperv-fixes] Drivers: hv: vmbus: Fix duplicate CPU assignments within a device
  2021-07-16 17:45 ` Michael Kelley
@ 2021-07-16 17:47   ` Haiyang Zhang
  0 siblings, 0 replies; 3+ messages in thread
From: Haiyang Zhang @ 2021-07-16 17:47 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv; +Cc: KY Srinivasan, linux-kernel



> -----Original Message-----
> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Friday, July 16, 2021 1:45 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>; linux-
> hyperv@vger.kernel.org
> Cc: Haiyang Zhang <haiyangz@microsoft.com>; KY Srinivasan
> <kys@microsoft.com>; linux-kernel@vger.kernel.org
> Subject: RE: [PATCH v2,hyperv-fixes] Drivers: hv: vmbus: Fix duplicate CPU
> assignments within a device
> 
> From: LKML haiyangz <lkmlhyz@microsoft.com> On Behalf Of Haiyang Zhang
> Sent: Thursday, July 15, 2021 6:38 PM
> >
> > The vmbus module uses a rotational algorithm to assign target CPUs to
> > device's channels. Depends on the timing of different device's channel
> 
> s/device's/a device's/
> s/Depends/Depending/
> 
> > offers, different channels of a device may be assigned to the same CPU.
> >
> > For example on a VM with 2 CPUs, if NIC A and B's channels are offered
> > in the following order, NIC A will have both channels on CPU0, and NIC
> > B will have both channels on CPU1 -- see below. This kind of
> > assignment causes RSS load that is spreading across different channels
> > to end up on the same CPU.
> >
> > Timing of channel offers:
> > NIC A channel 0
> > NIC B channel 0
> > NIC A channel 1
> > NIC B channel 1
> >
> > VMBUS ID 14: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} -
> Synthetic network adapter
> >         Device_ID = {cab064cd-1f31-47d5-a8b4-9d57e320cccd}
> >         Sysfs path: /sys/bus/vmbus/devices/cab064cd-1f31-47d5-a8b4-
> 9d57e320cccd
> >         Rel_ID=14, target_cpu=0
> >         Rel_ID=17, target_cpu=0
> >
> > VMBUS ID 16: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} -
> Synthetic network adapter
> >         Device_ID = {244225ca-743e-4020-a17d-d7baa13d6cea}
> >         Sysfs path: /sys/bus/vmbus/devices/244225ca-743e-4020-a17d-
> d7baa13d6cea
> >         Rel_ID=16, target_cpu=1
> >         Rel_ID=18, target_cpu=1
> >
> > Update the vmbus CPU assignment algorithm to avoid duplicate CPU
> > assignments within a device.
> >
> > The new algorithm iterates num_online_cpus + 1 times.
> > The existing rotational algorithm to find "next NUMA & CPU" is still here.
> > But if the resulting CPU is already used by the same device, it will
> > try the next CPU.
> > In the last iteration, it assigns the channel to the next available
> > CPU like the existing algorithm. This is not normally expected,
> > because during device probe, we limit the number of channels of a
> > device to be <= number of online CPUs.
> >
> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > ---
> >  drivers/hv/channel_mgmt.c | 96
> > ++++++++++++++++++++++++++-------------
> >  1 file changed, 64 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> > index caf6d0c4bc1b..8584914291e7 100644
> > --- a/drivers/hv/channel_mgmt.c
> > +++ b/drivers/hv/channel_mgmt.c
> > @@ -605,6 +605,17 @@ static void vmbus_process_offer(struct
> vmbus_channel *newchannel)
> >  	 */
> >  	mutex_lock(&vmbus_connection.channel_mutex);
> >
> > +	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry)
> {
> > +		if (guid_equal(&channel->offermsg.offer.if_type,
> > +			       &newchannel->offermsg.offer.if_type) &&
> > +		    guid_equal(&channel->offermsg.offer.if_instance,
> > +			       &newchannel->offermsg.offer.if_instance)) {
> > +			fnew = false;
> > +			newchannel->primary_channel = channel;
> > +			break;
> > +		}
> > +	}
> > +
> >  	init_vp_index(newchannel);
> >
> >  	/* Remember the channels that should be cleaned up upon suspend.
> */
> > @@ -617,16 +628,6 @@ static void vmbus_process_offer(struct
> vmbus_channel *newchannel)
> >  	 */
> >  	atomic_dec(&vmbus_connection.offer_in_progress);
> >
> > -	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry)
> {
> > -		if (guid_equal(&channel->offermsg.offer.if_type,
> > -			       &newchannel->offermsg.offer.if_type) &&
> > -		    guid_equal(&channel->offermsg.offer.if_instance,
> > -			       &newchannel->offermsg.offer.if_instance)) {
> > -			fnew = false;
> > -			break;
> > -		}
> > -	}
> > -
> >  	if (fnew) {
> >  		list_add_tail(&newchannel->listentry,
> >  			      &vmbus_connection.chn_list); @@ -647,7 +648,6
> @@ static void
> > vmbus_process_offer(struct vmbus_channel *newchannel)
> >  		/*
> >  		 * Process the sub-channel.
> >  		 */
> > -		newchannel->primary_channel = channel;
> >  		list_add_tail(&newchannel->sc_list, &channel->sc_list);
> >  	}
> >
> > @@ -683,6 +683,30 @@ static void vmbus_process_offer(struct
> vmbus_channel *newchannel)
> >  	queue_work(wq, &newchannel->add_channel_work);  }
> >
> > +/*
> > + * Check if CPUs used by other channels of the same device.
> > + * It's should only be called by init_vp_index().
> 
> s/It's/It/
> 
> > + */
> > +static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) {
> > +	struct vmbus_channel *primary = chn->primary_channel;
> > +	struct vmbus_channel *sc;
> > +
> > +	lockdep_assert_held(&vmbus_connection.channel_mutex);
> > +
> > +	if (!primary)
> > +		return false;
> > +
> > +	if (primary->target_cpu == cpu)
> > +		return true;
> > +
> > +	list_for_each_entry(sc, &primary->sc_list, sc_list)
> > +		if (sc != chn && sc->target_cpu == cpu)
> > +			return true;
> > +
> > +	return false;
> > +}
> > +
> >  /*
> >   * We use this state to statically distribute the channel interrupt load.
> >   */
> > @@ -702,6 +726,7 @@ static int next_numa_node_id;  static void
> > init_vp_index(struct vmbus_channel *channel)  {
> >  	bool perf_chn = hv_is_perf_channel(channel);
> > +	u32 i, ncpu = num_online_cpus();
> >  	cpumask_var_t available_mask;
> >  	struct cpumask *alloced_mask;
> >  	u32 target_cpu;
> > @@ -724,31 +749,38 @@ static void init_vp_index(struct vmbus_channel
> *channel)
> >  		return;
> >  	}
> >
> > -	while (true) {
> > -		numa_node = next_numa_node_id++;
> > -		if (numa_node == nr_node_ids) {
> > -			next_numa_node_id = 0;
> > -			continue;
> > +	for (i = 1; i <= ncpu + 1; i++) {
> > +		while (true) {
> > +			numa_node = next_numa_node_id++;
> > +			if (numa_node == nr_node_ids) {
> > +				next_numa_node_id = 0;
> > +				continue;
> > +			}
> > +			if
> (cpumask_empty(cpumask_of_node(numa_node)))
> > +				continue;
> > +			break;
> > +		}
> > +		alloced_mask = &hv_context.hv_numa_map[numa_node];
> > +
> > +		if (cpumask_weight(alloced_mask) ==
> > +		    cpumask_weight(cpumask_of_node(numa_node))) {
> > +			/*
> > +			 * We have cycled through all the CPUs in the node;
> > +			 * reset the alloced map.
> > +			 */
> > +			cpumask_clear(alloced_mask);
> >  		}
> > -		if (cpumask_empty(cpumask_of_node(numa_node)))
> > -			continue;
> > -		break;
> > -	}
> > -	alloced_mask = &hv_context.hv_numa_map[numa_node];
> >
> > -	if (cpumask_weight(alloced_mask) ==
> > -	    cpumask_weight(cpumask_of_node(numa_node))) {
> > -		/*
> > -		 * We have cycled through all the CPUs in the node;
> > -		 * reset the alloced map.
> > -		 */
> > -		cpumask_clear(alloced_mask);
> > -	}
> > +		cpumask_xor(available_mask, alloced_mask,
> > +			    cpumask_of_node(numa_node));
> >
> > -	cpumask_xor(available_mask, alloced_mask,
> cpumask_of_node(numa_node));
> > +		target_cpu = cpumask_first(available_mask);
> > +		cpumask_set_cpu(target_cpu, alloced_mask);
> >
> > -	target_cpu = cpumask_first(available_mask);
> > -	cpumask_set_cpu(target_cpu, alloced_mask);
> > +		if (channel->offermsg.offer.sub_channel_index >= ncpu ||
> > +		    i > ncpu || !hv_cpuself_used(target_cpu, channel))
> > +			break;
> > +	}
> >
> >  	channel->target_cpu = target_cpu;
> >
> > --
> > 2.25.1
> 
> I like this approach much better.  I did some testing with a variety of CPU
> counts (1, 2, 3, 4, 8, and 32), NUMA nodes (1, 3, and 4) and with
> 4 SCSI controllers and 4 NICs.  Everything worked as expected, and it's
> definitely an improvement.
> 
> I flagged a couple of typos/wording errors, but maybe they can be fixed
> when the patch is pulled into hyperv-fixes.
> 
> Reviewed-by: Michael Kelley <mikelley@microsoft.com>
> Tested-by: Michael Kelley <mikelley@microsoft.com>

Thank you for the review and test! I will just correct the comments and
submit a v3.

- Haiyang

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-07-16 17:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-16  1:37 [PATCH v2,hyperv-fixes] Drivers: hv: vmbus: Fix duplicate CPU assignments within a device Haiyang Zhang
2021-07-16 17:45 ` Michael Kelley
2021-07-16 17:47   ` Haiyang Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).