All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Hildenbrand <david@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
	Pavel Tatashin <pasha.tatashin@soleen.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	virtualization@lists.linux-foundation.org, linux-mm@kvack.org,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Igor Mammedov <imammedo@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Michal Hocko <mhocko@kernel.org>, Dave Young <dyoung@redhat.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Oscar Salvador <osalvador@suse.de>
Subject: [PATCH RFC v3 7/9] virtio-mem: Allow to offline partially unplugged memory blocks
Date: Thu, 19 Sep 2019 16:22:26 +0200	[thread overview]
Message-ID: <20190919142228.5483-8-david__18012.6650653378$1568903049$gmane$org@redhat.com> (raw)
In-Reply-To: <20190919142228.5483-1-david@redhat.com>

Dropping the reference count of PageOffline() pages allows offlining
code to skip them. However, we also have to convert PG_reserved to
another flag - let's use PG_dirty - so has_unmovable_pages() will
properly handle them. PG_reserved pages get detected as unmovable right
away.

We need the flag to see if we are onlining pages the first time, or if
we allocated them via alloc_contig_range().

Properly take care of offlining code also modifying the stats and
special handling in case the driver gets unloaded.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 drivers/virtio/virtio_mem.c | 102 ++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 10 deletions(-)

diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 91052a37d10d..9cb31459b211 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -561,6 +561,30 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id,
 		virtio_mem_retry(vm);
 }
 
+/*
+ * When we unplug subblocks, we already modify stats (e.g., subtract them
+ * from totalram_pages). Offlining code will modify the stats, too. So
+ * properly fixup the stats when GOING_OFFLINE and revert that when
+ * CANCEL_OFFLINE.
+ */
+static void virtio_mem_mb_going_offline_fixup_stats(struct virtio_mem *vm,
+						    unsigned long mb_id,
+						    bool cancel)
+{
+	const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
+	int sb_id;
+
+	for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
+		if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+			continue;
+
+		if (cancel)
+			totalram_pages_add(-nr_pages);
+		else
+			totalram_pages_add(nr_pages);
+	}
+}
+
 /*
  * This callback will either be called synchonously from add_memory() or
  * asynchronously (e.g., triggered via user space). We have to be careful
@@ -608,6 +632,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
 			mutex_lock(&vm->hotplug_mutex);
 			vm->hotplug_active = true;
 		}
+		virtio_mem_mb_going_offline_fixup_stats(vm, mb_id, false);
 		break;
 	case MEM_GOING_ONLINE:
 		spin_lock_irq(&vm->removal_lock);
@@ -633,6 +658,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
 		mutex_unlock(&vm->hotplug_mutex);
 		break;
 	case MEM_CANCEL_OFFLINE:
+		virtio_mem_mb_going_offline_fixup_stats(vm, mb_id, true);
+		/* fall through */
 	case MEM_CANCEL_ONLINE:
 		/* We might not get a MEM_GOING* if somebody else canceled */
 		if (vm->hotplug_active) {
@@ -648,23 +675,55 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
 }
 
 /*
- * Set a range of pages PG_offline.
+ * Convert PG_reserved to PG_dirty. Needed to allow isolation code to
+ * not immediately consider them as unmovable.
+ */
+static void virtio_mem_reserved_to_dirty(unsigned long pfn,
+					 unsigned int nr_pages)
+{
+	for (; nr_pages--; pfn++) {
+		SetPageDirty(pfn_to_page(pfn));
+		ClearPageReserved(pfn_to_page(pfn));
+	}
+}
+
+/*
+ * Convert PG_dirty to PG_reserved. Needed so generic_online_page()
+ * works correctly.
+ */
+static void virtio_mem_dirty_to_reserved(unsigned long pfn,
+					 unsigned int nr_pages)
+{
+	for (; nr_pages--; pfn++) {
+		SetPageReserved(pfn_to_page(pfn));
+		ClearPageDirty(pfn_to_page(pfn));
+	}
+}
+
+/*
+ * Set a range of pages PG_offline and drop the reference. The dropped
+ * reference (0) and the flag allows isolation code to isolate this range
+ * and offline code to offline it.
  */
 static void virtio_mem_set_fake_offline(unsigned long pfn,
 					unsigned int nr_pages)
 {
-	for (; nr_pages--; pfn++)
+	for (; nr_pages--; pfn++) {
 		__SetPageOffline(pfn_to_page(pfn));
+		page_ref_dec(pfn_to_page(pfn));
+	}
 }
 
 /*
- * Clear PG_offline from a range of pages.
+ * Get a reference and clear PG_offline from a range of pages.
  */
 static void virtio_mem_clear_fake_offline(unsigned long pfn,
 					  unsigned int nr_pages)
 {
-	for (; nr_pages--; pfn++)
+	for (; nr_pages--; pfn++) {
+		page_ref_inc(pfn_to_page(pfn));
 		__ClearPageOffline(pfn_to_page(pfn));
+	}
 }
 
 /*
@@ -679,7 +738,7 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages)
 	/*
 	 * We are always called with subblock granularity, which is at least
 	 * aligned to MAX_ORDER - 1. All pages in a subblock are either
-	 * reserved or not.
+	 * PG_dirty (converted PG_reserved) or not.
 	 */
 	BUG_ON(!IS_ALIGNED(pfn, 1 << order));
 	BUG_ON(!IS_ALIGNED(nr_pages, 1 << order));
@@ -690,13 +749,14 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages)
 		struct page *page = pfn_to_page(pfn + i);
 
 		/*
-		 * If the page is reserved, it was kept fake-offline when
+		 * If the page is PG_dirty, it was kept fake-offline when
 		 * onlining the memory block. Otherwise, it was allocated
 		 * using alloc_contig_range().
 		 */
-		if (PageReserved(page))
+		if (PageDirty(page)) {
+			virtio_mem_dirty_to_reserved(pfn + i, 1 << order);
 			generic_online_page(page, order);
-		else {
+		} else {
 			free_contig_range(pfn + i, 1 << order);
 			totalram_pages_add(1 << order);
 		}
@@ -728,8 +788,10 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
 		 */
 		if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
 			generic_online_page(page, order);
-		else
+		else {
 			virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order);
+			virtio_mem_reserved_to_dirty(PFN_DOWN(addr), 1 << order);
+		}
 		rcu_read_unlock();
 		return;
 	}
@@ -1674,7 +1736,8 @@ static int virtio_mem_probe(struct virtio_device *vdev)
 static void virtio_mem_remove(struct virtio_device *vdev)
 {
 	struct virtio_mem *vm = vdev->priv;
-	unsigned long mb_id;
+	unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
+	unsigned long pfn, mb_id, sb_id;
 	int rc;
 
 	/*
@@ -1701,6 +1764,25 @@ static void virtio_mem_remove(struct virtio_device *vdev)
 		BUG_ON(rc);
 		mutex_lock(&vm->hotplug_mutex);
 	}
+	/*
+	 * After we unregistered our callbacks, user space can offline +
+	 * re-online partially plugged online blocks. Make sure they can't
+	 * get offlined by getting a reference. Also, restore PG_reserved.
+	 */
+	virtio_mem_for_each_mb_state(vm, mb_id,
+				     VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
+		for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
+			if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+				continue;
+			pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
+			      sb_id * vm->subblock_size);
+
+			if (PageDirty(pfn_to_page(pfn)))
+				virtio_mem_dirty_to_reserved(pfn, nr_pages);
+			for (; nr_pages--; pfn++)
+				page_ref_inc(pfn_to_page(pfn));
+		}
+	}
 	mutex_unlock(&vm->hotplug_mutex);
 
 	/* unregister callbacks */
-- 
2.21.0

  parent reply	other threads:[~2019-09-19 14:22 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 14:22 [PATCH RFC v3 0/9] virtio-mem: paravirtualized memory David Hildenbrand
2019-09-19 14:22 ` [PATCH RFC v3 1/9] ACPI: NUMA: export pxm_to_node David Hildenbrand
2019-09-19 14:22 ` David Hildenbrand
2019-09-23 10:13   ` David Hildenbrand
2019-09-23 10:36     ` Michal Hocko
2019-09-23 10:36       ` Michal Hocko
2019-09-23 10:39       ` David Hildenbrand
2019-09-23 10:39       ` David Hildenbrand
2019-09-23 10:13   ` David Hildenbrand
2019-09-19 14:22 ` [PATCH RFC v3 2/9] virtio-mem: Paravirtualized memory hotplug David Hildenbrand
2019-09-19 14:22 ` David Hildenbrand
2019-09-19 14:22 ` [PATCH RFC v3 3/9] virtio-mem: Paravirtualized memory hotunplug part 1 David Hildenbrand
2019-09-19 14:22 ` David Hildenbrand
2019-09-19 14:22 ` [PATCH RFC v3 4/9] mm: Export alloc_contig_range() / free_contig_range() David Hildenbrand
2019-10-16 11:20   ` Michal Hocko
2019-10-16 12:31     ` David Hildenbrand
2019-10-16 12:31     ` David Hildenbrand
2019-10-16 11:20   ` Michal Hocko
2019-09-19 14:22 ` David Hildenbrand
2019-09-19 14:22 ` [PATCH RFC v3 5/9] virtio-mem: Paravirtualized memory hotunplug part 2 David Hildenbrand
2019-09-19 14:22 ` David Hildenbrand
2019-09-19 14:22 ` [PATCH RFC v3 6/9] mm: Allow to offline PageOffline() pages with a reference count of 0 David Hildenbrand
2019-09-19 14:22   ` David Hildenbrand
2019-10-16 11:43   ` Michal Hocko
2019-10-16 12:50     ` David Hildenbrand
2019-10-16 12:50     ` David Hildenbrand
2019-10-16 13:45       ` Michal Hocko
2019-10-16 13:55         ` David Hildenbrand
2019-10-16 13:55         ` David Hildenbrand
2019-10-16 14:09           ` Michal Hocko
2019-10-16 14:16             ` David Hildenbrand
2019-10-16 14:16             ` David Hildenbrand
2019-10-16 14:09           ` Michal Hocko
2019-10-16 13:59         ` David Hildenbrand
2019-10-16 13:59           ` David Hildenbrand
2019-10-16 13:45       ` Michal Hocko
2019-10-16 13:45     ` David Hildenbrand
2019-10-16 13:45     ` David Hildenbrand
2019-10-16 14:03       ` Michal Hocko
2019-10-16 14:14         ` David Hildenbrand
2019-10-16 14:14           ` David Hildenbrand
2019-10-18  8:15           ` Michal Hocko
2019-10-18  8:50             ` David Hildenbrand
2019-10-18  8:50             ` David Hildenbrand
2019-10-18 11:20               ` Michal Hocko
2019-10-18 11:20               ` Michal Hocko
2019-10-18 12:35                 ` David Hildenbrand
2019-10-18 12:35                   ` David Hildenbrand
2019-10-22 12:23                   ` Michal Hocko
2019-10-22 12:23                     ` Michal Hocko
2019-10-22 14:02                     ` David Hildenbrand
2019-10-22 14:02                       ` David Hildenbrand
2019-10-23  9:43                       ` Michal Hocko
2019-10-23  9:43                         ` Michal Hocko
2019-10-23 10:03                         ` David Hildenbrand
2019-10-23 10:03                         ` David Hildenbrand
2019-10-24  8:42                           ` Michal Hocko
2019-10-24  8:42                           ` Michal Hocko
2019-10-24  8:51                             ` David Hildenbrand
2019-10-24  8:51                               ` David Hildenbrand
2019-10-25 11:28                               ` [PATCH RFC] mm: Allow to offline unmovable PageOffline() pages if the driver agrees David Hildenbrand
2019-10-25 11:28                                 ` David Hildenbrand
2019-10-18  8:15           ` [PATCH RFC v3 6/9] mm: Allow to offline PageOffline() pages with a reference count of 0 Michal Hocko
2019-10-16 14:03       ` Michal Hocko
2019-10-16 11:43   ` Michal Hocko
2019-09-19 14:22 ` [PATCH RFC v3 7/9] virtio-mem: Allow to offline partially unplugged memory blocks David Hildenbrand
2019-09-19 14:22 ` David Hildenbrand [this message]
2019-09-19 14:22 ` [PATCH RFC v3 8/9] mm/memory_hotplug: Introduce offline_and_remove_memory() David Hildenbrand
2019-09-19 14:22 ` David Hildenbrand
2019-10-16 11:47   ` Michal Hocko
2019-10-16 11:47     ` Michal Hocko
2019-10-16 12:57     ` David Hildenbrand
2019-10-16 12:57       ` David Hildenbrand
2019-09-19 14:22 ` [PATCH RFC v3 9/9] virtio-mem: Offline and remove completely unplugged memory blocks David Hildenbrand
2019-09-19 14:22 ` David Hildenbrand
2019-10-16  8:12 ` [PATCH RFC v3 0/9] virtio-mem: paravirtualized memory David Hildenbrand
2019-10-16  8:12 ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='20190919142228.5483-8-david__18012.6650653378$1568903049$gmane$org@redhat.com' \
    --to=david@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=dyoung@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mst@redhat.com \
    --cc=osalvador@suse.de \
    --cc=pasha.tatashin@soleen.com \
    --cc=stefanha@redhat.com \
    --cc=vbabka@suse.cz \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.