Xen-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [Xen-devel] [RFC PATCH v3 05/12] xen-netfront: add callbacks for PM suspend and hibernation support
@ 2020-02-12 22:32 Anchal Agarwal
  0 siblings, 0 replies; 2+ messages in thread
From: Anchal Agarwal @ 2020-02-12 22:32 UTC (permalink / raw)
  To: tglx, mingo, bp, hpa, x86, boris.ostrovsky, jgross, linux-pm,
	linux-mm, kamatam, sstabellini, konrad.wilk, roger.pau, axboe,
	davem, rjw, len.brown, pavel, peterz, eduval, sblbir, anchalag,
	xen-devel, vkuznets, netdev, linux-kernel, dwmw, fllinden, benh

From: Munehisa Kamata <kamatam@amazon.com>

Add freeze, thaw and restore callbacks for PM suspend and hibernation
support. The freeze handler simply disconnects the frotnend from the
backend and frees resources associated with queues after disabling the
net_device from the system. The restore handler just changes the
frontend state and let the xenbus handler to re-allocate the resources
and re-connect to the backend. This can be performed transparently to
the rest of the system. The handlers are used for both PM suspend and
hibernation so that we can keep the existing suspend/resume callbacks
for Xen suspend without modification. Freezing netfront devices is
normally expected to finish within a few hundred milliseconds, but it
can rarely take more than 5 seconds and hit the hard coded timeout,
it would depend on backend state which may be congested and/or have
complex configuration. While it's rare case, longer default timeout
seems a bit more reasonable here to avoid hitting the timeout.
Also, make it configurable via module parameter so that we can cover
broader setups than what we know currently.

[Anchal changelog: Variable name fix and checkpatch.pl fixes]
Signed-off-by: Anchal Agarwal <anchalag@amazon.com>
Signed-off-by: Munehisa Kamata <kamatam@amazon.com>

---
Changes since V2: None
---
 drivers/net/xen-netfront.c | 98 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 482c6c8b0fb7..65edcdd6e05f 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -43,6 +43,7 @@
 #include <linux/moduleparam.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/completion.h>
 #include <net/ip.h>
 
 #include <xen/xen.h>
@@ -56,6 +57,12 @@
 #include <xen/interface/memory.h>
 #include <xen/interface/grant_table.h>
 
+enum netif_freeze_state {
+	NETIF_FREEZE_STATE_UNFROZEN,
+	NETIF_FREEZE_STATE_FREEZING,
+	NETIF_FREEZE_STATE_FROZEN,
+};
+
 /* Module parameters */
 #define MAX_QUEUES_DEFAULT 8
 static unsigned int xennet_max_queues;
@@ -63,6 +70,12 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644);
 MODULE_PARM_DESC(max_queues,
 		 "Maximum number of queues per virtual interface");
 
+static unsigned int netfront_freeze_timeout_secs = 10;
+module_param_named(freeze_timeout_secs,
+		   netfront_freeze_timeout_secs, uint, 0644);
+MODULE_PARM_DESC(freeze_timeout_secs,
+		 "timeout when freezing netfront device in seconds");
+
 static const struct ethtool_ops xennet_ethtool_ops;
 
 struct netfront_cb {
@@ -160,6 +173,10 @@ struct netfront_info {
 	struct netfront_stats __percpu *tx_stats;
 
 	atomic_t rx_gso_checksum_fixup;
+
+	int freeze_state;
+
+	struct completion wait_backend_disconnected;
 };
 
 struct netfront_rx_info {
@@ -721,6 +738,21 @@ static int xennet_close(struct net_device *dev)
 	return 0;
 }
 
+static int xennet_disable_interrupts(struct net_device *dev)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	unsigned int num_queues = dev->real_num_tx_queues;
+	unsigned int queue_index;
+	struct netfront_queue *queue;
+
+	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
+		queue = &np->queues[queue_index];
+		disable_irq(queue->tx_irq);
+		disable_irq(queue->rx_irq);
+	}
+	return 0;
+}
+
 static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 				grant_ref_t ref)
 {
@@ -1301,6 +1333,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 
 	np->queues = NULL;
 
+	init_completion(&np->wait_backend_disconnected);
+
 	err = -ENOMEM;
 	np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
 	if (np->rx_stats == NULL)
@@ -1794,6 +1828,50 @@ static int xennet_create_queues(struct netfront_info *info,
 	return 0;
 }
 
+static int netfront_freeze(struct xenbus_device *dev)
+{
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
+	unsigned long timeout = netfront_freeze_timeout_secs * HZ;
+	int err = 0;
+
+	xennet_disable_interrupts(info->netdev);
+
+	netif_device_detach(info->netdev);
+
+	info->freeze_state = NETIF_FREEZE_STATE_FREEZING;
+
+	/* Kick the backend to disconnect */
+	xenbus_switch_state(dev, XenbusStateClosing);
+
+	/* We don't want to move forward before the frontend is diconnected
+	 * from the backend cleanly.
+	 */
+	timeout = wait_for_completion_timeout(&info->wait_backend_disconnected,
+					      timeout);
+	if (!timeout) {
+		err = -EBUSY;
+		xenbus_dev_error(dev, err, "Freezing timed out;"
+				 "the device may become inconsistent state");
+		return err;
+	}
+
+	/* Tear down queues */
+	xennet_disconnect_backend(info);
+	xennet_destroy_queues(info);
+
+	info->freeze_state = NETIF_FREEZE_STATE_FROZEN;
+
+	return err;
+}
+
+static int netfront_restore(struct xenbus_device *dev)
+{
+	/* Kick the backend to re-connect */
+	xenbus_switch_state(dev, XenbusStateInitialising);
+
+	return 0;
+}
+
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_netback(struct xenbus_device *dev,
 			   struct netfront_info *info)
@@ -1999,6 +2077,8 @@ static int xennet_connect(struct net_device *dev)
 		spin_unlock_bh(&queue->rx_lock);
 	}
 
+	np->freeze_state = NETIF_FREEZE_STATE_UNFROZEN;
+
 	return 0;
 }
 
@@ -2036,10 +2116,23 @@ static void netback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateClosed:
-		if (dev->state == XenbusStateClosed)
+		if (dev->state == XenbusStateClosed) {
+		     /* dpm context is waiting for the backend */
+			if (np->freeze_state == NETIF_FREEZE_STATE_FREEZING)
+				complete(&np->wait_backend_disconnected);
 			break;
+		}
+
 		/* Fall through - Missed the backend's CLOSING state. */
 	case XenbusStateClosing:
+	       /* We may see unexpected Closed or Closing from the backend.
+		* Just ignore it not to prevent the frontend from being
+		* re-connected in the case of PM suspend or hibernation.
+		*/
+		if (np->freeze_state == NETIF_FREEZE_STATE_FROZEN &&
+		    dev->state == XenbusStateInitialising) {
+			break;
+		}
 		xenbus_frontend_closed(dev);
 		break;
 	}
@@ -2186,6 +2279,9 @@ static struct xenbus_driver netfront_driver = {
 	.probe = netfront_probe,
 	.remove = xennet_remove,
 	.resume = netfront_resume,
+	.freeze = netfront_freeze,
+	.thaw	= netfront_restore,
+	.restore = netfront_restore,
 	.otherend_changed = netback_changed,
 };
 
-- 
2.24.1.AMZN


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 2+ messages in thread
* [Xen-devel] [RFC RESEND PATCH v3 00/12] Enable PM hibernation on guest VMs
@ 2020-02-14 23:21 Anchal Agarwal
  2020-02-14 23:24 ` [Xen-devel] [RFC PATCH v3 05/12] xen-netfront: add callbacks for PM suspend and hibernation support Anchal Agarwal
  0 siblings, 1 reply; 2+ messages in thread
From: Anchal Agarwal @ 2020-02-14 23:21 UTC (permalink / raw)
  To: tglx, mingo, bp, hpa, x86, boris.ostrovsky, jgross, linux-pm,
	linux-mm, kamatam, sstabellini, konrad.wilk, roger.pau, axboe,
	davem, rjw, len.brown, pavel, peterz, eduval, sblbir, anchalag,
	xen-devel, vkuznets, netdev, linux-kernel, dwmw, fllinden, benh

Resending this in a more threaded format.
  
Hello,
I am sending out a v3 version of series of patches that implements guest
PM hibernation.
These guests are running on xen hypervisor. The patches had been tested
against mainstream kernel. EC2 instance hibernation feature is provided
to the AWS EC2 customers. PM hibernation uses swap space carved out within
the guest[or can be a separate partition], where hibernation image is
stored and restored from.

Doing guest hibernation does not involve any support from hypervisor and
this way guest has complete control over its state. Infrastructure
restrictions for saving up guest state can be overcome by guest initiated
hibernation.

This series includes some improvements over RFC series sent last year:
https://lists.xenproject.org/archives/html/xen-devel/2018-06/msg00823.html

Changelog v3:
1. Feedback from V2
2. Introduced 2 new patches for xen sched clock offset fix
3. Fixed pirq shutdown/restore in generic irq subsystem
4. Split save/restore steal clock patches into 2 for better readability

Changelog v2:
1. Removed timeout/request present on the ring in xen-blkfront during blkfront freeze
2. Fixed restoring of PIRQs which was apparently working for 4.9 kernels but not for
newer kernel. [Legacy irqs were no longer restored after hibernation introduced with
this commit "020db9d3c1dc0"]
3. Merged couple of related patches to make the code more coherent and readable
4. Code refactoring
5. Sched clock fix when hibernating guest is under heavy CPU load
Note: Under very rare circumstances we see resume failures with KASLR enabled only
on xen instances.  We are roughly seeing 3% failures [>1000 runs] when testing with
various instance sizes and some workload running on each instance. I am currently
investigating the issue as to confirm if its a xen issue or kernel issue.
However, it should not hold back anyone from reviewing/accepting these patches.

Testing done:
All testing is done for multiple hibernation cycle for 5.4 kernel on EC2.

Testing How to:
---------------
Example:
Set up a file-backed swap space. Swap file size>=Total memory on the system
sudo dd if=/dev/zero of=/swap bs=$(( 1024 * 1024 )) count=4096 # 4096MiB
sudo chmod 600 /swap
sudo mkswap /swap
sudo swapon /swap

Update resume device/resume offset in grub if using swap file:
resume=/dev/xvda1 resume_offset=200704

Execute:
--------
sudo pm-hibernate
OR
echo disk > /sys/power/state && echo reboot > /sys/power/disk

Compute resume offset code:
"
#!/usr/bin/env python
import sys
import array
import fcntl

#swap file
f = open(sys.argv[1], 'r')
buf = array.array('L', [0])

#FIBMAP
ret = fcntl.ioctl(f.fileno(), 0x01, buf)
print buf[0]
"

Aleksei Besogonov (1):
  PM / hibernate: update the resume offset on SNAPSHOT_SET_SWAP_AREA

Anchal Agarwal (4):
  x86/xen: Introduce new function to map HYPERVISOR_shared_info on
    Resume
  genirq: Shutdown irq chips in suspend/resume during hibernation
  xen: Introduce wrapper for save/restore sched clock offset
  xen: Update sched clock offset to avoid system instability in
    hibernation

Munehisa Kamata (7):
  xen/manage: keep track of the on-going suspend mode
  xenbus: add freeze/thaw/restore callbacks support
  x86/xen: add system core suspend and resume callbacks
  xen-netfront: add callbacks for PM suspend and hibernation support
  xen-blkfront: add callbacks for PM suspend and hibernation
  xen/time: introduce xen_{save,restore}_steal_clock
  x86/xen: save and restore steal clock

 arch/x86/xen/enlighten_hvm.c      |   8 ++
 arch/x86/xen/suspend.c            |  72 ++++++++++++++++++
 arch/x86/xen/time.c               |  18 ++++-
 arch/x86/xen/xen-ops.h            |   3 +
 drivers/block/xen-blkfront.c      | 119 ++++++++++++++++++++++++++++--
 drivers/net/xen-netfront.c        |  98 +++++++++++++++++++++++-
 drivers/xen/events/events_base.c  |   1 +
 drivers/xen/manage.c              |  73 ++++++++++++++++++
 drivers/xen/time.c                |  29 +++++++-
 drivers/xen/xenbus/xenbus_probe.c |  99 ++++++++++++++++++++-----
 include/linux/irq.h               |   2 +
 include/xen/xen-ops.h             |   8 ++
 include/xen/xenbus.h              |   3 +
 kernel/irq/chip.c                 |   2 +-
 kernel/irq/internals.h            |   1 +
 kernel/irq/pm.c                   |  31 +++++---
 kernel/power/user.c               |   6 +-
 17 files changed, 533 insertions(+), 40 deletions(-)

-- 
2.24.1.AMZN


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-12 22:32 [Xen-devel] [RFC PATCH v3 05/12] xen-netfront: add callbacks for PM suspend and hibernation support Anchal Agarwal
2020-02-14 23:21 [Xen-devel] [RFC RESEND PATCH v3 00/12] Enable PM hibernation on guest VMs Anchal Agarwal
2020-02-14 23:24 ` [Xen-devel] [RFC PATCH v3 05/12] xen-netfront: add callbacks for PM suspend and hibernation support Anchal Agarwal

Xen-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/xen-devel/0 xen-devel/git/0.git
	git clone --mirror https://lore.kernel.org/xen-devel/1 xen-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 xen-devel xen-devel/ https://lore.kernel.org/xen-devel \
		xen-devel@lists.xenproject.org xen-devel@lists.xen.org
	public-inbox-index xen-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.xenproject.lists.xen-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git