All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: paulus@samba.org, mpe@ellerman.id.au, benh@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org, aik@ozlabs.ru, thuth@redhat.com,
	lvivier@redhat.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [RFCv2 7/9] pseries: Add support for hash table resizing
Date: Fri, 29 Jan 2016 16:24:01 +1100	[thread overview]
Message-ID: <1454045043-25545-8-git-send-email-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <1454045043-25545-1-git-send-email-david@gibson.dropbear.id.au>

This adds support for using experimental hypercalls to change the size
of the main hash page table while running as a PAPR guest.  For now these
hypercalls are only in experimental qemu versions.

The interface is two part: first H_RESIZE_HPT_PREPARE is used to allocate
and prepare the new hash table.  This may be slow, but can be done
asynchronously.  Then, H_RESIZE_HPT_COMMIT is used to switch to the new
hash table.  This requires that no CPUs be concurrently updating the HPT,
and so must be run under stop_machine().

This also adds a debugfs file which can be used to manually control
HPT resizing or testing purposes.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 arch/powerpc/include/asm/machdep.h    |   1 +
 arch/powerpc/mm/hash_utils_64.c       |  28 +++++++++
 arch/powerpc/platforms/pseries/lpar.c | 110 ++++++++++++++++++++++++++++++++++
 3 files changed, 139 insertions(+)

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index a7d3f66..532d795 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -61,6 +61,7 @@ struct machdep_calls {
 					       unsigned long addr,
 					       unsigned char *hpte_slot_array,
 					       int psize, int ssize, int local);
+	int		(*resize_hpt)(unsigned long shift);
 	/*
 	 * Special for kexec.
 	 * To be called in real mode with interrupts disabled. No locks are
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d63f7dc..882e409 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -34,6 +34,7 @@
 #include <linux/signal.h>
 #include <linux/memblock.h>
 #include <linux/context_tracking.h>
+#include <linux/debugfs.h>
 
 #include <asm/processor.h>
 #include <asm/pgtable.h>
@@ -1578,3 +1579,30 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 	/* Finally limit subsequent allocations */
 	memblock_set_current_limit(ppc64_rma_size);
 }
+
+static int ppc64_pft_size_get(void *data, u64 *val)
+{
+	*val = ppc64_pft_size;
+	return 0;
+}
+
+static int ppc64_pft_size_set(void *data, u64 val)
+{
+	if (!ppc_md.resize_hpt)
+		return -ENODEV;
+	return ppc_md.resize_hpt(val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_ppc64_pft_size,
+			ppc64_pft_size_get, ppc64_pft_size_set,	"%llu\n");
+
+static int __init hash64_debugfs(void)
+{
+	if (!debugfs_create_file("pft-size", 0600, powerpc_debugfs_root,
+				 NULL, &fops_ppc64_pft_size)) {
+		pr_err("lpar: unable to create ppc64_pft_size debugsfs file\n");
+	}
+
+	return 0;
+}
+machine_device_initcall(pseries, hash64_debugfs);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 92d472d..ebf02e7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -27,6 +27,8 @@
 #include <linux/console.h>
 #include <linux/export.h>
 #include <linux/jump_label.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -603,6 +605,113 @@ static int __init disable_bulk_remove(char *str)
 
 __setup("bulk_remove=", disable_bulk_remove);
 
+#define HPT_RESIZE_TIMEOUT	10000 /* ms */
+
+struct hpt_resize_state {
+	unsigned long shift;
+	int commit_rc;
+};
+
+static int pseries_lpar_resize_hpt_commit(void *data)
+{
+	struct hpt_resize_state *state = data;
+
+	state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
+	if (state->commit_rc != H_SUCCESS)
+		return -EIO;
+
+	/* Hypervisor has transitioned the HTAB, update our globals */
+	ppc64_pft_size = state->shift;
+	htab_size_bytes = 1UL << ppc64_pft_size;
+	htab_hash_mask = (htab_size_bytes >> 7) - 1;
+
+	return 0;
+}
+
+/* Must be called in user context */
+static int pseries_lpar_resize_hpt(unsigned long shift)
+{
+	struct hpt_resize_state state = {
+		.shift = shift,
+		.commit_rc = H_FUNCTION,
+	};
+	unsigned int delay, total_delay = 0;
+	int rc;
+	ktime_t t0, t1, t2;
+
+	might_sleep();
+
+	if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+		return -ENODEV;
+
+	printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
+	       shift);
+
+	t0 = ktime_get();
+
+	rc = plpar_resize_hpt_prepare(0, shift);
+	while (H_IS_LONG_BUSY(rc)) {
+		delay = get_longbusy_msecs(rc);
+		total_delay += delay;
+		if (total_delay > HPT_RESIZE_TIMEOUT) {
+			/* prepare call with shift==0 cancels an
+			 * in-progress resize */
+			rc = plpar_resize_hpt_prepare(0, 0);
+			if (rc != H_SUCCESS)
+				printk(KERN_WARNING
+				       "lpar: Unexpected error %d cancelling timed out HPT resize\n",
+				       rc);
+			return -ETIMEDOUT;
+		}
+		msleep(delay);
+		rc = plpar_resize_hpt_prepare(0, shift);
+	};
+
+	switch (rc) {
+	case H_SUCCESS:
+		/* Continue on */
+		break;
+
+	case H_PARAMETER:
+		return -EINVAL;
+	case H_RESOURCE:
+		return -EPERM;
+	default:
+		printk(KERN_WARNING
+		       "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
+		       rc);
+		return -EIO;
+	}
+
+	t1 = ktime_get();
+
+	rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
+
+	t2 = ktime_get();
+
+	if (rc != 0) {
+		switch (state.commit_rc) {
+		case H_PTEG_FULL:
+			printk(KERN_WARNING
+			       "lpar: Hash collision while resizing HPT\n");
+			return -ENOSPC;
+
+		default:
+			printk(KERN_WARNING
+			       "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
+			       state.commit_rc);
+			return -EIO;
+		};
+	}
+
+	printk(KERN_INFO
+	       "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
+	       shift, (long long) ktime_ms_delta(t1, t0),
+	       (long long) ktime_ms_delta(t2, t1));
+
+	return 0;
+}
+
 void __init hpte_init_lpar(void)
 {
 	ppc_md.hpte_invalidate	= pSeries_lpar_hpte_invalidate;
@@ -614,6 +723,7 @@ void __init hpte_init_lpar(void)
 	ppc_md.flush_hash_range	= pSeries_lpar_flush_hash_range;
 	ppc_md.hpte_clear_all   = pSeries_lpar_hptab_clear;
 	ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+	ppc_md.resize_hpt = pseries_lpar_resize_hpt;
 }
 
 #ifdef CONFIG_PPC_SMLPAR
-- 
2.5.0

  parent reply	other threads:[~2016-01-29  5:23 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-29  5:23 [RFCv2 0/9] PAPR hash page table resizing (guest side) David Gibson
2016-01-29  5:23 ` [RFCv2 1/9] memblock: Don't mark memblock_phys_mem_size() as __init David Gibson
2016-02-01  5:50   ` Anshuman Khandual
2016-02-08  2:46   ` Paul Mackerras
2016-01-29  5:23 ` [RFCv2 2/9] arch/powerpc: Clean up error handling for htab_remove_mapping David Gibson
2016-02-01  5:54   ` Anshuman Khandual
2016-02-08  2:48   ` Paul Mackerras
2016-01-29  5:23 ` [RFCv2 3/9] arch/powerpc: Handle removing maybe-present bolted HPTEs David Gibson
2016-02-01  5:58   ` Anshuman Khandual
2016-02-02  1:08     ` David Gibson
2016-02-02 13:49   ` Denis Kirjanov
2016-02-08  2:54   ` Paul Mackerras
2016-02-09  0:43     ` David Gibson
2016-01-29  5:23 ` [RFCv2 4/9] arch/powerpc: Clean up memory hotplug failure paths David Gibson
2016-02-01  6:29   ` Anshuman Khandual
2016-02-02 15:04   ` Nathan Fontenot
2016-02-03  4:31     ` David Gibson
2016-02-08  5:47   ` Paul Mackerras
2016-01-29  5:23 ` [RFCv2 5/9] arch/powerpc: Split hash page table sizing heuristic into a helper David Gibson
2016-02-01  7:04   ` Anshuman Khandual
2016-02-02  1:04     ` David Gibson
2016-02-04 10:56       ` Anshuman Khandual
2016-02-08  5:57         ` Paul Mackerras
2016-01-29  5:24 ` [RFCv2 6/9] pseries: Add hypercall wrappers for hash page table resizing David Gibson
2016-02-01  7:11   ` Anshuman Khandual
2016-02-02  0:58     ` David Gibson
2016-02-04 11:11       ` Anshuman Khandual
2016-02-07 22:33         ` David Gibson
2016-02-08  5:58   ` Paul Mackerras
2016-01-29  5:24 ` David Gibson [this message]
2016-02-01  8:31   ` [RFCv2 7/9] pseries: Add support for hash " Anshuman Khandual
2016-02-01 11:04     ` David Gibson
2016-02-08  5:59   ` Paul Mackerras
2016-01-29  5:24 ` [RFCv2 8/9] pseries: Advertise HPT resizing support via CAS David Gibson
2016-02-01  8:36   ` Anshuman Khandual
2016-02-08  6:00   ` Paul Mackerras
2016-01-29  5:24 ` [RFCv2 9/9] pseries: Automatically resize HPT for memory hot add/remove David Gibson
2016-02-01  8:51   ` Anshuman Khandual
2016-02-01 10:55     ` David Gibson
2016-02-08  6:01   ` Paul Mackerras
2016-02-01  5:50 ` [RFCv2 0/9] PAPR hash page table resizing (guest side) Anshuman Khandual
2016-02-02  0:57   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1454045043-25545-8-git-send-email-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=aik@ozlabs.ru \
    --cc=benh@kernel.crashing.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=lvivier@redhat.com \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.