From: "Andrea Parri (Microsoft)" <parri.andrea@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: "K . Y . Srinivasan" <kys@microsoft.com>,
Haiyang Zhang <haiyangz@microsoft.com>,
Stephen Hemminger <sthemmin@microsoft.com>,
Wei Liu <wei.liu@kernel.org>,
linux-hyperv@vger.kernel.org,
Michael Kelley <mikelley@microsoft.com>,
Dexuan Cui <decui@microsoft.com>,
Boqun Feng <boqun.feng@gmail.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
"Andrea Parri (Microsoft)" <parri.andrea@gmail.com>,
"James E.J. Bottomley" <jejb@linux.ibm.com>,
"Martin K. Petersen" <martin.petersen@oracle.com>,
linux-scsi@vger.kernel.org
Subject: [RFC PATCH 11/11] scsi: storvsc: Re-init stor_chns when a channel interrupt is re-assigned
Date: Wed, 25 Mar 2020 23:55:05 +0100 [thread overview]
Message-ID: <20200325225505.23998-12-parri.andrea@gmail.com> (raw)
In-Reply-To: <20200325225505.23998-1-parri.andrea@gmail.com>
For each storvsc_device, storvsc keeps track of the channel target CPUs
associated to the device (alloced_cpus) and it uses this information to
fill a "cache" (stor_chns) mapping CPU->channel according to a certain
heuristic. Update the alloced_cpus mask and the stor_chns array when a
channel of the storvsc device is re-assigned to a different CPU.
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: <linux-scsi@vger.kernel.org>
---
drivers/hv/vmbus_drv.c | 4 ++
drivers/scsi/storvsc_drv.c | 95 ++++++++++++++++++++++++++++++++++----
include/linux/hyperv.h | 3 ++
3 files changed, 94 insertions(+), 8 deletions(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 84d2f22c569aa..7199fee2b5869 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1721,6 +1721,10 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
* in on a CPU that is different from the channel target_cpu value.
*/
+ if (channel->change_target_cpu_callback)
+ (*channel->change_target_cpu_callback)(channel,
+ channel->target_cpu, target_cpu);
+
channel->target_cpu = target_cpu;
channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
channel->numa_node = cpu_to_node(target_cpu);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index fb41636519ee8..a680592b9d32a 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -621,6 +621,63 @@ static inline struct storvsc_device *get_in_stor_device(
}
+void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old, u32 new)
+{
+ struct storvsc_device *stor_device;
+ struct vmbus_channel *cur_chn;
+ bool old_is_alloced = false;
+ struct hv_device *device;
+ unsigned long flags;
+ int cpu;
+
+ device = channel->primary_channel ?
+ channel->primary_channel->device_obj
+ : channel->device_obj;
+ stor_device = get_out_stor_device(device);
+ if (!stor_device)
+ return;
+
+ /* See storvsc_do_io() -> get_og_chn(). */
+ spin_lock_irqsave(&device->channel->lock, flags);
+
+ /*
+ * Determines if the storvsc device has other channels assigned to
+ * the "old" CPU to update the alloced_cpus mask and the stor_chns
+ * array.
+ */
+ if (device->channel != channel && device->channel->target_cpu == old) {
+ cur_chn = device->channel;
+ old_is_alloced = true;
+ goto old_is_alloced;
+ }
+ list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) {
+ if (cur_chn == channel)
+ continue;
+ if (cur_chn->target_cpu == old) {
+ old_is_alloced = true;
+ goto old_is_alloced;
+ }
+ }
+
+old_is_alloced:
+ if (old_is_alloced)
+ WRITE_ONCE(stor_device->stor_chns[old], cur_chn);
+ else
+ cpumask_clear_cpu(old, &stor_device->alloced_cpus);
+
+ /* "Flush" the stor_chns array. */
+ for_each_possible_cpu(cpu) {
+ if (stor_device->stor_chns[cpu] && !cpumask_test_cpu(
+ cpu, &stor_device->alloced_cpus))
+ WRITE_ONCE(stor_device->stor_chns[cpu], NULL);
+ }
+
+ WRITE_ONCE(stor_device->stor_chns[new], channel);
+ cpumask_set_cpu(new, &stor_device->alloced_cpus);
+
+ spin_unlock_irqrestore(&device->channel->lock, flags);
+}
+
static void handle_sc_creation(struct vmbus_channel *new_sc)
{
struct hv_device *device = new_sc->primary_channel->device_obj;
@@ -648,6 +705,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
return;
}
+ new_sc->change_target_cpu_callback = storvsc_change_target_cpu;
+
/* Add the sub-channel to the array of available channels. */
stor_device->stor_chns[new_sc->target_cpu] = new_sc;
cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
@@ -876,6 +935,8 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
if (stor_device->stor_chns == NULL)
return -ENOMEM;
+ device->channel->change_target_cpu_callback = storvsc_change_target_cpu;
+
stor_device->stor_chns[device->channel->target_cpu] = device->channel;
cpumask_set_cpu(device->channel->target_cpu,
&stor_device->alloced_cpus);
@@ -1248,8 +1309,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
const struct cpumask *node_mask;
int num_channels, tgt_cpu;
- if (stor_device->num_sc == 0)
+ if (stor_device->num_sc == 0) {
+ stor_device->stor_chns[q_num] = stor_device->device->channel;
return stor_device->device->channel;
+ }
/*
* Our channel array is sparsley populated and we
@@ -1258,7 +1321,6 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
* The strategy is simple:
* I. Ensure NUMA locality
* II. Distribute evenly (best effort)
- * III. Mapping is persistent.
*/
node_mask = cpumask_of_node(cpu_to_node(q_num));
@@ -1268,8 +1330,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
if (cpumask_test_cpu(tgt_cpu, node_mask))
num_channels++;
}
- if (num_channels == 0)
+ if (num_channels == 0) {
+ stor_device->stor_chns[q_num] = stor_device->device->channel;
return stor_device->device->channel;
+ }
hash_qnum = q_num;
while (hash_qnum >= num_channels)
@@ -1295,6 +1359,7 @@ static int storvsc_do_io(struct hv_device *device,
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
struct vmbus_channel *outgoing_channel, *channel;
+ unsigned long flags;
int ret = 0;
const struct cpumask *node_mask;
int tgt_cpu;
@@ -1308,10 +1373,11 @@ static int storvsc_do_io(struct hv_device *device,
request->device = device;
/*
- * Select an an appropriate channel to send the request out.
+ * Select an appropriate channel to send the request out.
*/
- if (stor_device->stor_chns[q_num] != NULL) {
- outgoing_channel = stor_device->stor_chns[q_num];
+ /* See storvsc_change_target_cpu(). */
+ outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
+ if (outgoing_channel != NULL) {
if (outgoing_channel->target_cpu == q_num) {
/*
* Ideally, we want to pick a different channel if
@@ -1324,7 +1390,10 @@ static int storvsc_do_io(struct hv_device *device,
continue;
if (tgt_cpu == q_num)
continue;
- channel = stor_device->stor_chns[tgt_cpu];
+ channel = READ_ONCE(
+ stor_device->stor_chns[tgt_cpu]);
+ if (channel == NULL)
+ continue;
if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
@@ -1350,7 +1419,10 @@ static int storvsc_do_io(struct hv_device *device,
for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
if (cpumask_test_cpu(tgt_cpu, node_mask))
continue;
- channel = stor_device->stor_chns[tgt_cpu];
+ channel = READ_ONCE(
+ stor_device->stor_chns[tgt_cpu]);
+ if (channel == NULL)
+ continue;
if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
@@ -1360,7 +1432,14 @@ static int storvsc_do_io(struct hv_device *device,
}
}
} else {
+ spin_lock_irqsave(&device->channel->lock, flags);
+ outgoing_channel = stor_device->stor_chns[q_num];
+ if (outgoing_channel != NULL) {
+ spin_unlock_irqrestore(&device->channel->lock, flags);
+ goto found_channel;
+ }
outgoing_channel = get_og_chn(stor_device, q_num);
+ spin_unlock_irqrestore(&device->channel->lock, flags);
}
found_channel:
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index edfcd42319ef3..9018b89614b78 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -773,6 +773,9 @@ struct vmbus_channel {
void (*onchannel_callback)(void *context);
void *channel_callback_context;
+ void (*change_target_cpu_callback)(struct vmbus_channel *channel,
+ u32 old, u32 new);
+
/*
* Synchronize channel scheduling and channel removal; see the inline
* comments in vmbus_chan_sched() and vmbus_reset_channel_cb().
--
2.24.0
next prev parent reply other threads:[~2020-03-25 22:56 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-25 22:54 [RFC PATCH 00/11] VMBus channel interrupt reassignment Andrea Parri (Microsoft)
2020-03-25 22:54 ` [RFC PATCH 01/11] Drivers: hv: vmbus: Always handle the VMBus messages on CPU0 Andrea Parri (Microsoft)
2020-03-26 14:05 ` Vitaly Kuznetsov
2020-03-28 18:50 ` Andrea Parri
2020-03-25 22:54 ` [RFC PATCH 02/11] Drivers: hv: vmbus: Don't bind the offer&rescind works to a specific CPU Andrea Parri (Microsoft)
2020-03-26 14:16 ` Vitaly Kuznetsov
2020-03-26 15:47 ` Andrea Parri
2020-03-26 17:26 ` Vitaly Kuznetsov
2020-03-28 17:08 ` Andrea Parri
2020-03-29 3:43 ` Michael Kelley
2020-03-30 12:24 ` Vitaly Kuznetsov
2020-04-03 12:04 ` Andrea Parri
2020-03-25 22:54 ` [RFC PATCH 03/11] Drivers: hv: vmbus: Replace the per-CPU channel lists with a global array of channels Andrea Parri (Microsoft)
2020-03-26 14:31 ` Vitaly Kuznetsov
2020-03-26 17:05 ` Andrea Parri
2020-03-26 17:43 ` Vitaly Kuznetsov
2020-03-28 18:21 ` Andrea Parri
2020-03-29 3:49 ` Michael Kelley
2020-03-30 12:45 ` Vitaly Kuznetsov
2020-04-03 13:38 ` Andrea Parri
2020-04-03 14:56 ` Vitaly Kuznetsov
2020-03-25 22:54 ` [RFC PATCH 04/11] hv_netvsc: Disable NAPI before closing the VMBus channel Andrea Parri (Microsoft)
2020-03-26 15:26 ` Stephen Hemminger
2020-03-26 17:55 ` Andrea Parri
2020-03-25 22:54 ` [RFC PATCH 05/11] hv_utils: Always execute the fcopy and vss callbacks in a tasklet Andrea Parri (Microsoft)
2020-03-25 22:55 ` [RFC PATCH 06/11] Drivers: hv: vmbus: Use a spin lock for synchronizing channel scheduling vs. channel removal Andrea Parri (Microsoft)
2020-03-25 22:55 ` [RFC PATCH 07/11] PCI: hv: Prepare hv_compose_msi_msg() for the VMBus-channel-interrupt-to-vCPU reassignment functionality Andrea Parri (Microsoft)
2020-03-25 22:55 ` [RFC PATCH 08/11] Drivers: hv: vmbus: Remove the unused HV_LOCALIZED channel affinity logic Andrea Parri (Microsoft)
2020-03-25 22:55 ` [RFC PATCH 09/11] Drivers: hv: vmbus: Synchronize init_vp_index() vs. CPU hotplug Andrea Parri (Microsoft)
2020-03-25 22:55 ` [RFC PATCH 10/11] Drivers: hv: vmbus: Introduce the CHANNELMSG_MODIFYCHANNEL message type Andrea Parri (Microsoft)
2020-03-26 14:46 ` Vitaly Kuznetsov
2020-03-28 18:48 ` Andrea Parri
2020-04-03 14:55 ` Andrea Parri
2020-03-25 22:55 ` Andrea Parri (Microsoft) [this message]
2020-03-30 16:42 ` [RFC PATCH 11/11] scsi: storvsc: Re-init stor_chns when a channel interrupt is re-assigned Michael Kelley
2020-03-30 18:55 ` Andrea Parri
2020-03-30 19:49 ` Michael Kelley
2020-04-03 13:41 ` Andrea Parri
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200325225505.23998-12-parri.andrea@gmail.com \
--to=parri.andrea@gmail.com \
--cc=boqun.feng@gmail.com \
--cc=decui@microsoft.com \
--cc=haiyangz@microsoft.com \
--cc=jejb@linux.ibm.com \
--cc=kys@microsoft.com \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=mikelley@microsoft.com \
--cc=sthemmin@microsoft.com \
--cc=vkuznets@redhat.com \
--cc=wei.liu@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).