linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/7] Drivers: hv: Some miscellaneous fixes
@ 2016-09-02 12:56 kys
  2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
  0 siblings, 1 reply; 8+ messages in thread
From: kys @ 2016-09-02 12:56 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: K. Y. Srinivasan

From: K. Y. Srinivasan <kys@microsoft.com>

Some miscellaneous fixes and enhancements. These patches were all
sent earlier but failed to apply clean on Greg's tree. These have
now been rebased.

Alex Ng (2):
  Drivers: hv: utils: Continue to poll VSS channel after handling
    requests.
  Drivers: hv: utils: Check VSS daemon is listening before a hot backup

K. Y. Srinivasan (1):
  Drivers: hv: Introduce a policy for controlling channel affinity

Vitaly Kuznetsov (4):
  Drivers: hv: cleanup vmbus_open() for wrap around mappings
  Drivers: hv: ring_buffer: wrap around mappings for ring buffers
  Drivers: hv: ring_buffer: use wrap around mappings in
    hv_copy{from,to}_ringbuffer()
  Drivers: hv: ring_buffer: count on wrap around mappings in
    get_next_pkt_raw()

 drivers/hv/channel.c      |   66 ++++++++++++++++---------------
 drivers/hv/channel_mgmt.c |   68 +++++++++++++++++++--------------
 drivers/hv/hv_snapshot.c  |   93 ++++++++++++++++++++++-----------------------
 drivers/hv/hyperv_vmbus.h |    4 +-
 drivers/hv/ring_buffer.c  |   61 +++++++++++++++++------------
 include/linux/hyperv.h    |   55 ++++++++++++++++----------
 tools/hv/hv_vss_daemon.c  |    3 +
 7 files changed, 193 insertions(+), 157 deletions(-)

-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings
  2016-09-02 12:56 [PATCH 0/7] Drivers: hv: Some miscellaneous fixes kys
@ 2016-09-02 12:58 ` kys
  2016-09-02 12:58   ` [PATCH 2/7] Drivers: hv: ring_buffer: wrap around mappings for ring buffers kys
                     ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: kys @ 2016-09-02 12:58 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: K. Y. Srinivasan

From: Vitaly Kuznetsov <vkuznets@redhat.com>

In preparation for doing wrap around mappings for ring buffers cleanup
vmbus_open() function:
- check that ring sizes are PAGE_SIZE aligned (they are for all in-kernel
  drivers now);
- kfree(open_info) on error only after we kzalloc() it (not an issue as it
  is valid to call kfree(NULL);
- rename poorly named labels;
- use alloc_pages() instead of __get_free_pages() as we need struct page
  pointer for future.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/hv/channel.c |   41 ++++++++++++++++++++++-------------------
 1 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 884cebf..15e8330 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -80,6 +80,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	int ret, err = 0;
 	struct page *page;
 
+	if (send_ringbuffer_size % PAGE_SIZE ||
+	    recv_ringbuffer_size % PAGE_SIZE)
+		return -EINVAL;
+
 	spin_lock_irqsave(&newchannel->lock, flags);
 	if (newchannel->state == CHANNEL_OPEN_STATE) {
 		newchannel->state = CHANNEL_OPENING_STATE;
@@ -99,17 +103,16 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 				recv_ringbuffer_size));
 
 	if (!page)
-		out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-					       get_order(send_ringbuffer_size +
-					       recv_ringbuffer_size));
-	else
-		out = (void *)page_address(page);
+		page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
+				   get_order(send_ringbuffer_size +
+					     recv_ringbuffer_size));
 
-	if (!out) {
+	if (!page) {
 		err = -ENOMEM;
-		goto error0;
+		goto error_set_chnstate;
 	}
 
+	out = page_address(page);
 	in = (void *)((unsigned long)out + send_ringbuffer_size);
 
 	newchannel->ringbuffer_pages = out;
@@ -121,14 +124,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
 	ret = hv_ringbuffer_init(
 		&newchannel->inbound, in, recv_ringbuffer_size);
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
 
@@ -143,7 +146,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
 	/* Create and init the channel open message */
@@ -152,7 +155,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 			   GFP_KERNEL);
 	if (!open_info) {
 		err = -ENOMEM;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	init_completion(&open_info->waitevent);
@@ -168,7 +171,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (userdatalen > MAX_USER_DEFINED_BYTES) {
 		err = -EINVAL;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	if (userdatalen)
@@ -184,7 +187,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (ret != 0) {
 		err = ret;
-		goto error1;
+		goto error_clean_msglist;
 	}
 
 	wait_for_completion(&open_info->waitevent);
@@ -195,25 +198,25 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (open_info->response.open_result.status) {
 		err = -EAGAIN;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	newchannel->state = CHANNEL_OPENED_STATE;
 	kfree(open_info);
 	return 0;
 
-error1:
+error_clean_msglist:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&open_info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 
-error_gpadl:
+error_free_gpadl:
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-
-error0:
+	kfree(open_info);
+error_free_pages:
 	free_pages((unsigned long)out,
 		get_order(send_ringbuffer_size + recv_ringbuffer_size));
-	kfree(open_info);
+error_set_chnstate:
 	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
 }
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/7] Drivers: hv: ring_buffer: wrap around mappings for ring buffers
  2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
@ 2016-09-02 12:58   ` kys
  2016-09-02 12:58   ` [PATCH 3/7] Drivers: hv: ring_buffer: use wrap around mappings in hv_copy{from,to}_ringbuffer() kys
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: kys @ 2016-09-02 12:58 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: K. Y. Srinivasan

From: Vitaly Kuznetsov <vkuznets@redhat.com>

Make it possible to always use a single memcpy() or to provide a direct
link to a packet on the ring buffer by creating virtual mapping for two
copies of the ring buffer with vmap(). Utilize currently empty
hv_ringbuffer_cleanup() to do the unmap.

While on it, replace sizeof(struct hv_ring_buffer) check
in hv_ringbuffer_init() with BUILD_BUG_ON() as it is a compile time check.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/hv/channel.c      |   29 ++++++++++++++---------------
 drivers/hv/hyperv_vmbus.h |    4 ++--
 drivers/hv/ring_buffer.c  |   39 +++++++++++++++++++++++++++++++++------
 3 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 15e8330..16f91c8 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -75,7 +75,6 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 {
 	struct vmbus_channel_open_channel *open_msg;
 	struct vmbus_channel_msginfo *open_info = NULL;
-	void *in, *out;
 	unsigned long flags;
 	int ret, err = 0;
 	struct page *page;
@@ -112,23 +111,21 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 		goto error_set_chnstate;
 	}
 
-	out = page_address(page);
-	in = (void *)((unsigned long)out + send_ringbuffer_size);
-
-	newchannel->ringbuffer_pages = out;
+	newchannel->ringbuffer_pages = page_address(page);
 	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
 					   recv_ringbuffer_size) >> PAGE_SHIFT;
 
-	ret = hv_ringbuffer_init(
-		&newchannel->outbound, out, send_ringbuffer_size);
+	ret = hv_ringbuffer_init(&newchannel->outbound, page,
+				 send_ringbuffer_size >> PAGE_SHIFT);
 
 	if (ret != 0) {
 		err = ret;
 		goto error_free_pages;
 	}
 
-	ret = hv_ringbuffer_init(
-		&newchannel->inbound, in, recv_ringbuffer_size);
+	ret = hv_ringbuffer_init(&newchannel->inbound,
+				 &page[send_ringbuffer_size >> PAGE_SHIFT],
+				 recv_ringbuffer_size >> PAGE_SHIFT);
 	if (ret != 0) {
 		err = ret;
 		goto error_free_pages;
@@ -139,10 +136,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	newchannel->ringbuffer_gpadlhandle = 0;
 
 	ret = vmbus_establish_gpadl(newchannel,
-					 newchannel->outbound.ring_buffer,
-					 send_ringbuffer_size +
-					 recv_ringbuffer_size,
-					 &newchannel->ringbuffer_gpadlhandle);
+				    page_address(page),
+				    send_ringbuffer_size +
+				    recv_ringbuffer_size,
+				    &newchannel->ringbuffer_gpadlhandle);
 
 	if (ret != 0) {
 		err = ret;
@@ -214,8 +211,10 @@ error_free_gpadl:
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
 	kfree(open_info);
 error_free_pages:
-	free_pages((unsigned long)out,
-		get_order(send_ringbuffer_size + recv_ringbuffer_size));
+	hv_ringbuffer_cleanup(&newchannel->outbound);
+	hv_ringbuffer_cleanup(&newchannel->inbound);
+	__free_pages(page,
+		     get_order(send_ringbuffer_size + recv_ringbuffer_size));
 error_set_chnstate:
 	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index ddcc348..a5b4442 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -522,8 +522,8 @@ extern unsigned int host_info_edx;
 /* Interface */
 
 
-int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer,
-		   u32 buflen);
+int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+		       struct page *pages, u32 pagecnt);
 
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index e3edcae..7e21c2c 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -27,6 +27,8 @@
 #include <linux/mm.h>
 #include <linux/hyperv.h>
 #include <linux/uio.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
 
 #include "hyperv_vmbus.h"
 
@@ -243,22 +245,46 @@ void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
 
 /* Initialize the ring buffer. */
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		   void *buffer, u32 buflen)
+		       struct page *pages, u32 page_cnt)
 {
-	if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
-		return -EINVAL;
+	int i;
+	struct page **pages_wraparound;
+
+	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
 
 	memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
 
-	ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
+	/*
+	 * First page holds struct hv_ring_buffer, do wraparound mapping for
+	 * the rest.
+	 */
+	pages_wraparound = kzalloc(sizeof(struct page *) * (page_cnt * 2 - 1),
+				   GFP_KERNEL);
+	if (!pages_wraparound)
+		return -ENOMEM;
+
+	pages_wraparound[0] = pages;
+	for (i = 0; i < 2 * (page_cnt - 1); i++)
+		pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+
+	ring_info->ring_buffer = (struct hv_ring_buffer *)
+		vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+
+	kfree(pages_wraparound);
+
+
+	if (!ring_info->ring_buffer)
+		return -ENOMEM;
+
 	ring_info->ring_buffer->read_index =
 		ring_info->ring_buffer->write_index = 0;
 
 	/* Set the feature bit for enabling flow control. */
 	ring_info->ring_buffer->feature_bits.value = 1;
 
-	ring_info->ring_size = buflen;
-	ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
+	ring_info->ring_size = page_cnt << PAGE_SHIFT;
+	ring_info->ring_datasize = ring_info->ring_size -
+		sizeof(struct hv_ring_buffer);
 
 	spin_lock_init(&ring_info->ring_lock);
 
@@ -268,6 +294,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 /* Cleanup the ring buffer. */
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 {
+	vunmap(ring_info->ring_buffer);
 }
 
 /* Write to the ring buffer. */
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/7] Drivers: hv: ring_buffer: use wrap around mappings in hv_copy{from,to}_ringbuffer()
  2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
  2016-09-02 12:58   ` [PATCH 2/7] Drivers: hv: ring_buffer: wrap around mappings for ring buffers kys
@ 2016-09-02 12:58   ` kys
  2016-09-02 12:58   ` [PATCH 4/7] Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw() kys
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: kys @ 2016-09-02 12:58 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: K. Y. Srinivasan

From: Vitaly Kuznetsov <vkuznets@redhat.com>

With wrap around mappings for ring buffers we can always use a single
memcpy() to do the job.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/ring_buffer.c |   24 +++---------------------
 1 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 7e21c2c..08043da 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -172,18 +172,7 @@ static u32 hv_copyfrom_ringbuffer(
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
 	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
 
-	u32 frag_len;
-
-	/* wrap-around detected at the src */
-	if (destlen > ring_buffer_size - start_read_offset) {
-		frag_len = ring_buffer_size - start_read_offset;
-
-		memcpy(dest, ring_buffer + start_read_offset, frag_len);
-		memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
-	} else
-
-		memcpy(dest, ring_buffer + start_read_offset, destlen);
-
+	memcpy(dest, ring_buffer + start_read_offset, destlen);
 
 	start_read_offset += destlen;
 	start_read_offset %= ring_buffer_size;
@@ -204,15 +193,8 @@ static u32 hv_copyto_ringbuffer(
 {
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
 	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
-	u32 frag_len;
-
-	/* wrap-around detected! */
-	if (srclen > ring_buffer_size - start_write_offset) {
-		frag_len = ring_buffer_size - start_write_offset;
-		memcpy(ring_buffer + start_write_offset, src, frag_len);
-		memcpy(ring_buffer, src + frag_len, srclen - frag_len);
-	} else
-		memcpy(ring_buffer + start_write_offset, src, srclen);
+
+	memcpy(ring_buffer + start_write_offset, src, srclen);
 
 	start_write_offset += srclen;
 	start_write_offset %= ring_buffer_size;
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/7] Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw()
  2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
  2016-09-02 12:58   ` [PATCH 2/7] Drivers: hv: ring_buffer: wrap around mappings for ring buffers kys
  2016-09-02 12:58   ` [PATCH 3/7] Drivers: hv: ring_buffer: use wrap around mappings in hv_copy{from,to}_ringbuffer() kys
@ 2016-09-02 12:58   ` kys
  2016-09-02 12:58   ` [PATCH 5/7] Drivers: hv: Introduce a policy for controlling channel affinity kys
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: kys @ 2016-09-02 12:58 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: K. Y. Srinivasan

From: Vitaly Kuznetsov <vkuznets@redhat.com>

With wrap around mappings in place we can always provide drivers with
direct links to packets on the ring buffer, even when they wrap around.
Do the required updates to get_next_pkt_raw()/put_pkt_raw()

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
---
 include/linux/hyperv.h |   32 +++++++++++---------------------
 1 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 755e8f5..e6ef571 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1469,31 +1469,23 @@ static inline struct vmpacket_descriptor *
 get_next_pkt_raw(struct vmbus_channel *channel)
 {
 	struct hv_ring_buffer_info *ring_info = &channel->inbound;
-	u32 read_loc = ring_info->priv_read_index;
+	u32 priv_read_loc = ring_info->priv_read_index;
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
-	struct vmpacket_descriptor *cur_desc;
-	u32 packetlen;
 	u32 dsize = ring_info->ring_datasize;
-	u32 delta = read_loc - ring_info->ring_buffer->read_index;
+	/*
+	 * delta is the difference between what is available to read and
+	 * what was already consumed in place. We commit read index after
+	 * the whole batch is processed.
+	 */
+	u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ?
+		priv_read_loc - ring_info->ring_buffer->read_index :
+		(dsize - ring_info->ring_buffer->read_index) + priv_read_loc;
 	u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
 
 	if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
 		return NULL;
 
-	if ((read_loc + sizeof(*cur_desc)) > dsize)
-		return NULL;
-
-	cur_desc = ring_buffer + read_loc;
-	packetlen = cur_desc->len8 << 3;
-
-	/*
-	 * If the packet under consideration is wrapping around,
-	 * return failure.
-	 */
-	if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1))
-		return NULL;
-
-	return cur_desc;
+	return ring_buffer + priv_read_loc;
 }
 
 /*
@@ -1505,16 +1497,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel,
 				struct vmpacket_descriptor *desc)
 {
 	struct hv_ring_buffer_info *ring_info = &channel->inbound;
-	u32 read_loc = ring_info->priv_read_index;
 	u32 packetlen = desc->len8 << 3;
 	u32 dsize = ring_info->ring_datasize;
 
-	if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize)
-		BUG();
 	/*
 	 * Include the packet trailer.
 	 */
 	ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
+	ring_info->priv_read_index %= dsize;
 }
 
 /*
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/7] Drivers: hv: Introduce a policy for controlling channel affinity
  2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
                     ` (2 preceding siblings ...)
  2016-09-02 12:58   ` [PATCH 4/7] Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw() kys
@ 2016-09-02 12:58   ` kys
  2016-09-02 12:58   ` [PATCH 6/7] Drivers: hv: utils: Continue to poll VSS channel after handling requests kys
  2016-09-02 12:58   ` [PATCH 7/7] Drivers: hv: utils: Check VSS daemon is listening before a hot backup kys
  5 siblings, 0 replies; 8+ messages in thread
From: kys @ 2016-09-02 12:58 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: K. Y. Srinivasan, K. Y. Srinivasan

From: K. Y. Srinivasan <kys@exchange.microsoft.com>

Introduce a mechanism to control how channels will be affinitized. We will
support two policies:

1. HV_BALANCED: All performance critical channels will be dstributed
evenly amongst all the available NUMA nodes. Once the Node is assigned,
we will assign the CPU based on a simple round robin scheme.

2. HV_LOCALIZED: Only the primary channels are distributed across all
NUMA nodes. Sub-channels will be in the same NUMA node as the primary
channel. This is the current behaviour.

The default policy will be the HV_BALANCED as it can minimize the remote
memory access on NUMA machines with applications that span NUMA nodes.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/hv/channel_mgmt.c |   68 +++++++++++++++++++++++++-------------------
 include/linux/hyperv.h    |   23 +++++++++++++++
 2 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index d8b64ba..bbd812e 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -356,8 +356,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 	 * We need to free the bit for init_vp_index() to work in the case
 	 * of sub-channel, when we reload drivers like hv_netvsc.
 	 */
-	cpumask_clear_cpu(channel->target_cpu,
-			  &primary_channel->alloced_cpus_in_node);
+	if (channel->affinity_policy == HV_LOCALIZED)
+		cpumask_clear_cpu(channel->target_cpu,
+				  &primary_channel->alloced_cpus_in_node);
 
 	vmbus_release_relid(relid);
 
@@ -548,17 +549,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 	}
 
 	/*
-	 * We distribute primary channels evenly across all the available
-	 * NUMA nodes and within the assigned NUMA node we will assign the
-	 * first available CPU to the primary channel.
-	 * The sub-channels will be assigned to the CPUs available in the
-	 * NUMA node evenly.
+	 * Based on the channel affinity policy, we will assign the NUMA
+	 * nodes.
 	 */
-	if (!primary) {
+
+	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 		while (true) {
 			next_node = next_numa_node_id++;
-			if (next_node == nr_node_ids)
+			if (next_node == nr_node_ids) {
 				next_node = next_numa_node_id = 0;
+				continue;
+			}
 			if (cpumask_empty(cpumask_of_node(next_node)))
 				continue;
 			break;
@@ -582,15 +583,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 
 	cur_cpu = -1;
 
-	/*
-	 * Normally Hyper-V host doesn't create more subchannels than there
-	 * are VCPUs on the node but it is possible when not all present VCPUs
-	 * on the node are initialized by guest. Clear the alloced_cpus_in_node
-	 * to start over.
-	 */
-	if (cpumask_equal(&primary->alloced_cpus_in_node,
-			  cpumask_of_node(primary->numa_node)))
-		cpumask_clear(&primary->alloced_cpus_in_node);
+	if (primary->affinity_policy == HV_LOCALIZED) {
+		/*
+		 * Normally Hyper-V host doesn't create more subchannels
+		 * than there are VCPUs on the node but it is possible when not
+		 * all present VCPUs on the node are initialized by guest.
+		 * Clear the alloced_cpus_in_node to start over.
+		 */
+		if (cpumask_equal(&primary->alloced_cpus_in_node,
+				  cpumask_of_node(primary->numa_node)))
+			cpumask_clear(&primary->alloced_cpus_in_node);
+	}
 
 	while (true) {
 		cur_cpu = cpumask_next(cur_cpu, &available_mask);
@@ -601,17 +604,24 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 			continue;
 		}
 
-		/*
-		 * NOTE: in the case of sub-channel, we clear the sub-channel
-		 * related bit(s) in primary->alloced_cpus_in_node in
-		 * hv_process_channel_removal(), so when we reload drivers
-		 * like hv_netvsc in SMP guest, here we're able to re-allocate
-		 * bit from primary->alloced_cpus_in_node.
-		 */
-		if (!cpumask_test_cpu(cur_cpu,
-				&primary->alloced_cpus_in_node)) {
-			cpumask_set_cpu(cur_cpu,
-					&primary->alloced_cpus_in_node);
+		if (primary->affinity_policy == HV_LOCALIZED) {
+			/*
+			 * NOTE: in the case of sub-channel, we clear the
+			 * sub-channel related bit(s) in
+			 * primary->alloced_cpus_in_node in
+			 * hv_process_channel_removal(), so when we
+			 * reload drivers like hv_netvsc in SMP guest, here
+			 * we're able to re-allocate
+			 * bit from primary->alloced_cpus_in_node.
+			 */
+			if (!cpumask_test_cpu(cur_cpu,
+					      &primary->alloced_cpus_in_node)) {
+				cpumask_set_cpu(cur_cpu,
+						&primary->alloced_cpus_in_node);
+				cpumask_set_cpu(cur_cpu, alloced_mask);
+				break;
+			}
+		} else {
 			cpumask_set_cpu(cur_cpu, alloced_mask);
 			break;
 		}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e6ef571..c877e79 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -674,6 +674,11 @@ enum hv_signal_policy {
 	HV_SIGNAL_POLICY_EXPLICIT,
 };
 
+enum hv_numa_policy {
+	HV_BALANCED = 0,
+	HV_LOCALIZED,
+};
+
 enum vmbus_device_type {
 	HV_IDE = 0,
 	HV_SCSI,
@@ -876,6 +881,18 @@ struct vmbus_channel {
 	 */
 	bool low_latency;
 
+	/*
+	 * NUMA distribution policy:
+	 * We support teo policies:
+	 * 1) Balanced: Here all performance critical channels are
+	 *    distributed evenly amongst all the NUMA nodes.
+	 *    This policy will be the default policy.
+	 * 2) Localized: All channels of a given instance of a
+	 *    performance critical service will be assigned CPUs
+	 *    within a selected NUMA node.
+	 */
+	enum hv_numa_policy affinity_policy;
+
 };
 
 static inline void set_channel_lock_state(struct vmbus_channel *c, bool state)
@@ -895,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c,
 	c->signal_policy = policy;
 }
 
+static inline void set_channel_affinity_state(struct vmbus_channel *c,
+					      enum hv_numa_policy policy)
+{
+	c->affinity_policy = policy;
+}
+
 static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
 {
 	c->batched_reading = state;
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/7] Drivers: hv: utils: Continue to poll VSS channel after handling requests.
  2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
                     ` (3 preceding siblings ...)
  2016-09-02 12:58   ` [PATCH 5/7] Drivers: hv: Introduce a policy for controlling channel affinity kys
@ 2016-09-02 12:58   ` kys
  2016-09-02 12:58   ` [PATCH 7/7] Drivers: hv: utils: Check VSS daemon is listening before a hot backup kys
  5 siblings, 0 replies; 8+ messages in thread
From: kys @ 2016-09-02 12:58 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: K. Y. Srinivasan

From: Alex Ng <alexng@microsoft.com>

Multiple VSS_OP_HOT_BACKUP requests may arrive in quick succession, even
though the host only signals once. The driver wass handling the first
request while ignoring the others in the ring buffer. We should poll the
VSS channel after handling a request to continue processing other requests.

Signed-off-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/hv/hv_snapshot.c |   90 ++++++++++++++++++++++------------------------
 1 files changed, 43 insertions(+), 47 deletions(-)

diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index fde4586..c4013c8 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -67,11 +67,11 @@ static const char vss_devname[] = "vmbus/hv_vss";
 static __u8 *recv_buffer;
 static struct hvutil_transport *hvt;
 
-static void vss_send_op(struct work_struct *dummy);
 static void vss_timeout_func(struct work_struct *dummy);
+static void vss_handle_request(struct work_struct *dummy);
 
 static DECLARE_DELAYED_WORK(vss_timeout_work, vss_timeout_func);
-static DECLARE_WORK(vss_send_op_work, vss_send_op);
+static DECLARE_WORK(vss_handle_request_work, vss_handle_request);
 
 static void vss_poll_wrapper(void *channel)
 {
@@ -156,8 +156,7 @@ static int vss_on_msg(void *msg, int len)
 	return 0;
 }
 
-
-static void vss_send_op(struct work_struct *dummy)
+static void vss_send_op(void)
 {
 	int op = vss_transaction.msg->vss_hdr.operation;
 	int rc;
@@ -174,6 +173,9 @@ static void vss_send_op(struct work_struct *dummy)
 	vss_msg->vss_hdr.operation = op;
 
 	vss_transaction.state = HVUTIL_USERSPACE_REQ;
+
+	schedule_delayed_work(&vss_timeout_work, VSS_USERSPACE_TIMEOUT);
+
 	rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
 	if (rc) {
 		pr_warn("VSS: failed to communicate to the daemon: %d\n", rc);
@@ -188,6 +190,40 @@ static void vss_send_op(struct work_struct *dummy)
 	return;
 }
 
+static void vss_handle_request(struct work_struct *dummy)
+{
+	switch (vss_transaction.msg->vss_hdr.operation) {
+	/*
+	 * Initiate a "freeze/thaw" operation in the guest.
+	 * We respond to the host once the operation is complete.
+	 *
+	 * We send the message to the user space daemon and the operation is
+	 * performed in the daemon.
+	 */
+	case VSS_OP_THAW:
+	case VSS_OP_FREEZE:
+		if (vss_transaction.state < HVUTIL_READY) {
+			/* Userspace is not registered yet */
+			vss_respond_to_host(HV_E_FAIL);
+			return;
+		}
+		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
+		vss_send_op();
+		return;
+	case VSS_OP_HOT_BACKUP:
+		vss_transaction.msg->vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
+		break;
+	case VSS_OP_GET_DM_INFO:
+		vss_transaction.msg->dm_info.flags = 0;
+		break;
+	default:
+		break;
+	}
+
+	vss_respond_to_host(0);
+	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+}
+
 /*
  * Send a response back to the host.
  */
@@ -272,48 +308,8 @@ void hv_vss_onchannelcallback(void *context)
 			vss_transaction.recv_req_id = requestid;
 			vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
 
-			switch (vss_msg->vss_hdr.operation) {
-				/*
-				 * Initiate a "freeze/thaw"
-				 * operation in the guest.
-				 * We respond to the host once
-				 * the operation is complete.
-				 *
-				 * We send the message to the
-				 * user space daemon and the
-				 * operation is performed in
-				 * the daemon.
-				 */
-			case VSS_OP_FREEZE:
-			case VSS_OP_THAW:
-				if (vss_transaction.state < HVUTIL_READY) {
-					/* Userspace is not registered yet */
-					vss_respond_to_host(HV_E_FAIL);
-					return;
-				}
-				vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
-				schedule_work(&vss_send_op_work);
-				schedule_delayed_work(&vss_timeout_work,
-						      VSS_USERSPACE_TIMEOUT);
-				return;
-
-			case VSS_OP_HOT_BACKUP:
-				vss_msg->vss_cf.flags =
-					 VSS_HBU_NO_AUTO_RECOVERY;
-				vss_respond_to_host(0);
-				return;
-
-			case VSS_OP_GET_DM_INFO:
-				vss_msg->dm_info.flags = 0;
-				vss_respond_to_host(0);
-				return;
-
-			default:
-				vss_respond_to_host(0);
-				return;
-
-			}
-
+			schedule_work(&vss_handle_request_work);
+			return;
 		}
 
 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -364,6 +360,6 @@ void hv_vss_deinit(void)
 {
 	vss_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&vss_timeout_work);
-	cancel_work_sync(&vss_send_op_work);
+	cancel_work_sync(&vss_handle_request_work);
 	hvutil_transport_destroy(hvt);
 }
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/7] Drivers: hv: utils: Check VSS daemon is listening before a hot backup
  2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
                     ` (4 preceding siblings ...)
  2016-09-02 12:58   ` [PATCH 6/7] Drivers: hv: utils: Continue to poll VSS channel after handling requests kys
@ 2016-09-02 12:58   ` kys
  5 siblings, 0 replies; 8+ messages in thread
From: kys @ 2016-09-02 12:58 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, alexng
  Cc: Alex Ng, K. Y. Srinivasan

From: Alex Ng <alexng@messages.microsoft.com>

Hyper-V host will send a VSS_OP_HOT_BACKUP request to check if guest is
ready for a live backup/snapshot. The driver should respond to the check
only if the daemon is running and listening to requests. This allows the
host to fallback to standard snapshots in case the VSS daemon is not
running.

Signed-off-by: Alex Ng <alexng@messages.microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/hv/hv_snapshot.c |    9 ++++++---
 tools/hv/hv_vss_daemon.c |    3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index c4013c8..a670713 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -142,6 +142,11 @@ static int vss_on_msg(void *msg, int len)
 		return vss_handle_handshake(vss_msg);
 	} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
 		vss_transaction.state = HVUTIL_USERSPACE_RECV;
+
+		if (vss_msg->vss_hdr.operation == VSS_OP_HOT_BACKUP)
+			vss_transaction.msg->vss_cf.flags =
+				VSS_HBU_NO_AUTO_RECOVERY;
+
 		if (cancel_delayed_work_sync(&vss_timeout_work)) {
 			vss_respond_to_host(vss_msg->error);
 			/* Transaction is finished, reset the state. */
@@ -202,6 +207,7 @@ static void vss_handle_request(struct work_struct *dummy)
 	 */
 	case VSS_OP_THAW:
 	case VSS_OP_FREEZE:
+	case VSS_OP_HOT_BACKUP:
 		if (vss_transaction.state < HVUTIL_READY) {
 			/* Userspace is not registered yet */
 			vss_respond_to_host(HV_E_FAIL);
@@ -210,9 +216,6 @@ static void vss_handle_request(struct work_struct *dummy)
 		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
 		vss_send_op();
 		return;
-	case VSS_OP_HOT_BACKUP:
-		vss_transaction.msg->vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
-		break;
 	case VSS_OP_GET_DM_INFO:
 		vss_transaction.msg->dm_info.flags = 0;
 		break;
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 5d51d6f..e082980 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -250,6 +250,9 @@ int main(int argc, char *argv[])
 				syslog(LOG_ERR, "/etc/fstab and /proc/mounts");
 			}
 			break;
+		case VSS_OP_HOT_BACKUP:
+			syslog(LOG_INFO, "VSS: op=CHECK HOT BACKUP\n");
+			break;
 		default:
 			syslog(LOG_ERR, "Illegal op:%d\n", op);
 		}
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-09-02 11:11 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-02 12:56 [PATCH 0/7] Drivers: hv: Some miscellaneous fixes kys
2016-09-02 12:58 ` [PATCH 1/7] Drivers: hv: cleanup vmbus_open() for wrap around mappings kys
2016-09-02 12:58   ` [PATCH 2/7] Drivers: hv: ring_buffer: wrap around mappings for ring buffers kys
2016-09-02 12:58   ` [PATCH 3/7] Drivers: hv: ring_buffer: use wrap around mappings in hv_copy{from,to}_ringbuffer() kys
2016-09-02 12:58   ` [PATCH 4/7] Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw() kys
2016-09-02 12:58   ` [PATCH 5/7] Drivers: hv: Introduce a policy for controlling channel affinity kys
2016-09-02 12:58   ` [PATCH 6/7] Drivers: hv: utils: Continue to poll VSS channel after handling requests kys
2016-09-02 12:58   ` [PATCH 7/7] Drivers: hv: utils: Check VSS daemon is listening before a hot backup kys

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).