All of lore.kernel.org
 help / color / mirror / Atom feed
From: Haozhong Zhang <haozhong.zhang@intel.com>
To: xen-devel@lists.xen.org
Cc: Haozhong Zhang <haozhong.zhang@intel.com>,
	Xiao Guangrong <guangrong.xiao@linux.intel.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	Daniel De Graaf <dgdegra@tycho.nsa.gov>
Subject: [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions
Date: Mon, 10 Oct 2016 08:32:22 +0800	[thread overview]
Message-ID: <20161010003235.4213-4-haozhong.zhang@intel.com> (raw)
In-Reply-To: <20161010003235.4213-1-haozhong.zhang@intel.com>

Xen hypervisor does not include a pmem driver. Instead, it relies on the
pmem driver in Dom0 to report the PFN ranges of the entire pmem region,
its reserved area and data area via XENPF_pmem_add. The reserved area is
used by Xen hypervisor to place the frame table and M2P table, and is
disallowed to be accessed from Dom0 once it's reported.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 xen/arch/x86/Makefile             |   1 +
 xen/arch/x86/platform_hypercall.c |   7 ++
 xen/arch/x86/pmem.c               | 161 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/x86_64/mm.c          |  54 +++++++++++++
 xen/include/asm-x86/mm.h          |   4 +
 xen/include/public/platform.h     |  14 ++++
 xen/include/xen/pmem.h            |  31 ++++++++
 xen/xsm/flask/hooks.c             |   1 +
 8 files changed, 273 insertions(+)
 create mode 100644 xen/arch/x86/pmem.c
 create mode 100644 xen/include/xen/pmem.h

diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 931917d..9cf2da1 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_TBOOT) += tboot.o
 obj-y += hpet.o
 obj-y += vm_event.o
 obj-y += xstate.o
+obj-y += pmem.o
 
 x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
 
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 0879e19..c47eea4 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -24,6 +24,7 @@
 #include <xen/pmstat.h>
 #include <xen/irq.h>
 #include <xen/symbols.h>
+#include <xen/pmem.h>
 #include <asm/current.h>
 #include <public/platform.h>
 #include <acpi/cpufreq/processor_perf.h>
@@ -822,6 +823,12 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
     }
     break;
 
+    case XENPF_pmem_add:
+        ret = pmem_add(op->u.pmem_add.spfn, op->u.pmem_add.epfn,
+                       op->u.pmem_add.rsv_spfn, op->u.pmem_add.rsv_epfn,
+                       op->u.pmem_add.data_spfn, op->u.pmem_add.data_epfn);
+        break;
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
new file mode 100644
index 0000000..70358ed
--- /dev/null
+++ b/xen/arch/x86/pmem.c
@@ -0,0 +1,161 @@
+/******************************************************************************
+ * arch/x86/pmem.c
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@intel.com>
+ */
+
+#include <xen/guest_access.h>
+#include <xen/list.h>
+#include <xen/spinlock.h>
+#include <xen/pmem.h>
+#include <xen/iocap.h>
+#include <asm-x86/mm.h>
+
+/*
+ * All pmem regions reported from Dom0 are linked in pmem_list, which
+ * is proected by pmem_list_lock. Its entries are of type struct pmem
+ * and sorted incrementally by field spa.
+ */
+static DEFINE_SPINLOCK(pmem_list_lock);
+static LIST_HEAD(pmem_list);
+
+struct pmem {
+    struct list_head link;   /* link to pmem_list */
+    unsigned long spfn;      /* start PFN of the whole pmem region */
+    unsigned long epfn;      /* end PFN of the whole pmem region */
+    unsigned long rsv_spfn;  /* start PFN of the reserved area */
+    unsigned long rsv_epfn;  /* end PFN of the reserved area */
+    unsigned long data_spfn; /* start PFN of the data area */
+    unsigned long data_epfn; /* end PFN of the data area */
+};
+
+static int is_included(unsigned long s1, unsigned long e1,
+                       unsigned long s2, unsigned long e2)
+{
+    return s1 <= s2 && s2 < e2 && e2 <= e1;
+}
+
+static int is_overlaped(unsigned long s1, unsigned long e1,
+                        unsigned long s2, unsigned long e2)
+{
+    return (s1 <= s2 && s2 < e1) || (s2 < s1 && s1 < e2);
+}
+
+static int check_reserved_size(unsigned long rsv_mfns, unsigned long total_mfns)
+{
+    return rsv_mfns >=
+        ((sizeof(struct page_info) * total_mfns) >> PAGE_SHIFT) +
+        ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
+}
+
+static int pmem_add_check(unsigned long spfn, unsigned long epfn,
+                          unsigned long rsv_spfn, unsigned long rsv_epfn,
+                          unsigned long data_spfn, unsigned long data_epfn)
+{
+    if ( spfn >= epfn || rsv_spfn >= rsv_epfn || data_spfn >= data_epfn )
+        return 0;
+
+    if ( !is_included(spfn, epfn, rsv_spfn, rsv_epfn) ||
+         !is_included(spfn, epfn, data_spfn, data_epfn) )
+        return 0;
+
+    if ( is_overlaped(rsv_spfn, rsv_epfn, data_spfn, data_epfn) )
+        return 0;
+
+    if ( !check_reserved_size(rsv_epfn - rsv_spfn, epfn - spfn) )
+        return 0;
+
+    return 1;
+}
+
+static int pmem_list_add(unsigned long spfn, unsigned long epfn,
+                         unsigned long rsv_spfn, unsigned long rsv_epfn,
+                         unsigned long data_spfn, unsigned long data_epfn)
+{
+    struct list_head *cur;
+    struct pmem *new_pmem;
+    int ret = 0;
+
+    spin_lock(&pmem_list_lock);
+
+    list_for_each_prev(cur, &pmem_list)
+    {
+        struct pmem *cur_pmem = list_entry(cur, struct pmem, link);
+        unsigned long cur_spfn = cur_pmem->spfn;
+        unsigned long cur_epfn = cur_pmem->epfn;
+
+        if ( (cur_spfn <= spfn && spfn < cur_epfn) ||
+             (spfn <= cur_spfn && cur_spfn < epfn) )
+        {
+            ret = -EINVAL;
+            goto out;
+        }
+
+        if ( cur_spfn < spfn )
+            break;
+    }
+
+    new_pmem = xmalloc(struct pmem);
+    if ( !new_pmem )
+    {
+        ret = -ENOMEM;
+        goto out;
+    }
+    new_pmem->spfn      = spfn;
+    new_pmem->epfn      = epfn;
+    new_pmem->rsv_spfn  = rsv_spfn;
+    new_pmem->rsv_epfn  = rsv_epfn;
+    new_pmem->data_spfn = data_spfn;
+    new_pmem->data_epfn = data_epfn;
+    list_add(&new_pmem->link, cur);
+
+ out:
+    spin_unlock(&pmem_list_lock);
+    return ret;
+}
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+             unsigned long rsv_spfn, unsigned long rsv_epfn,
+             unsigned long data_spfn, unsigned long data_epfn)
+{
+    int ret;
+
+    if ( !pmem_add_check(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn) )
+        return -EINVAL;
+
+    ret = pmem_setup(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+    if ( ret )
+        goto out;
+
+    ret = iomem_deny_access(current->domain, rsv_spfn, rsv_epfn);
+    if ( ret )
+        goto out;
+
+    ret = pmem_list_add(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+    if ( ret )
+        goto out;
+
+    printk(XENLOG_INFO
+           "pmem: pfns     0x%lx - 0x%lx\n"
+           "      reserved 0x%lx - 0x%lx\n"
+           "      data     0x%lx - 0x%lx\n",
+           spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+
+ out:
+    return ret;
+}
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 5c0f527..b1f92f6 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1474,6 +1474,60 @@ destroy_frametable:
     return ret;
 }
 
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+               unsigned long rsv_spfn, unsigned long rsv_epfn,
+               unsigned long data_spfn, unsigned long data_epfn)
+{
+    unsigned old_max = max_page, old_total = total_pages;
+    struct mem_hotadd_info info =
+        { .spfn = spfn, .epfn = epfn, .cur = spfn };
+    struct mem_hotadd_info rsv_info =
+        { .spfn = rsv_spfn, .epfn = rsv_epfn, .cur = rsv_spfn };
+    int ret;
+    unsigned long i;
+    struct page_info *pg;
+
+    if ( !mem_hotadd_check(spfn, epfn) )
+        return -EINVAL;
+
+    ret = extend_frame_table(&info, &rsv_info);
+    if ( ret )
+        goto destroy_frametable;
+
+    if ( max_page < epfn )
+    {
+        max_page = epfn;
+        max_pdx = pfn_to_pdx(max_page - 1) + 1;
+    }
+    total_pages += epfn - spfn;
+
+    set_pdx_range(spfn, epfn);
+    ret = setup_m2p_table(&info, &rsv_info);
+    if ( ret )
+        goto destroy_m2p;
+
+    share_hotadd_m2p_table(&info);
+
+    for ( i = spfn; i < epfn; i++ )
+    {
+        pg = mfn_to_page(i);
+        pg->count_info = (rsv_spfn <= i && i < rsv_info.cur) ?
+                         PGC_state_inuse : PGC_state_free;
+    }
+
+    return 0;
+
+destroy_m2p:
+    destroy_m2p_mapping(&info);
+    max_page = old_max;
+    total_pages = old_total;
+    max_pdx = pfn_to_pdx(max_page - 1) + 1;
+destroy_frametable:
+    cleanup_frame_table(&info);
+
+    return ret;
+}
+
 #include "compat/mm.c"
 
 /*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index b781495..e31f1c8 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -597,4 +597,8 @@ typedef struct mm_rwlock {
 
 extern const char zero_page[];
 
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+               unsigned long rsv_spfn, unsigned long rsv_epfn,
+               unsigned long data_spfn, unsigned long data_epfn);
+
 #endif /* __ASM_X86_MM_H__ */
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 1e6a6ce..c7e7cce 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -608,6 +608,19 @@ struct xenpf_symdata {
 typedef struct xenpf_symdata xenpf_symdata_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
 
+#define XENPF_pmem_add     64
+struct xenpf_pmem_add {
+    /* IN variables */
+    uint64_t spfn;      /* start PFN of the whole pmem region */
+    uint64_t epfn;      /* end PFN of the whole pmem region */
+    uint64_t rsv_spfn;  /* start PFN of the reserved area within the region */
+    uint64_t rsv_epfn;  /* end PFN of the reserved area within the region */
+    uint64_t data_spfn; /* start PFN of the data area within the region */
+    uint64_t data_epfn; /* end PFN of the data area within the region */
+};
+typedef struct xenpf_pmem_add xenpf_pmem_add_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pmem_add_t);
+
 /*
  * ` enum neg_errnoval
  * ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -638,6 +651,7 @@ struct xen_platform_op {
         struct xenpf_core_parking      core_parking;
         struct xenpf_resource_op       resource_op;
         struct xenpf_symdata           symdata;
+        struct xenpf_pmem_add          pmem_add;
         uint8_t                        pad[128];
     } u;
 };
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
new file mode 100644
index 0000000..a670ab8
--- /dev/null
+++ b/xen/include/xen/pmem.h
@@ -0,0 +1,31 @@
+/*
+ * xen/include/xen/pmem.h
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@intel.com>
+ */
+
+#ifndef __XEN_PMEM_H__
+#define __XEN_PMEM_H__
+
+#include <xen/types.h>
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+             unsigned long rsv_spfn, unsigned long rsv_epfn,
+             unsigned long data_spfn, unsigned long data_epfn);
+
+#endif /* __XEN_PMEM_H__ */
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 177c11f..948a161 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1360,6 +1360,7 @@ static int flask_platform_op(uint32_t op)
     case XENPF_cpu_offline:
     case XENPF_cpu_hotadd:
     case XENPF_mem_hotadd:
+    case XENPF_pmem_add:
         return 0;
 #endif
 
-- 
2.10.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2016-10-10  0:32 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-10  0:32 [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 01/16] x86_64/mm: explicitly specify the location to place the frame table Haozhong Zhang
2016-12-09 21:35   ` Konrad Rzeszutek Wilk
2016-12-12  2:27     ` Haozhong Zhang
2016-12-12  8:25       ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 02/16] x86_64/mm: explicitly specify the location to place the M2P table Haozhong Zhang
2016-12-09 21:38   ` Konrad Rzeszutek Wilk
2016-12-12  2:31     ` Haozhong Zhang
2016-12-12  8:26       ` Jan Beulich
2016-12-12  8:35         ` Haozhong Zhang
2016-10-10  0:32 ` Haozhong Zhang [this message]
2016-10-11 19:13   ` [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions Andrew Cooper
2016-12-09 22:02   ` Konrad Rzeszutek Wilk
2016-12-12  4:16     ` Haozhong Zhang
2016-12-12  8:30       ` Jan Beulich
2016-12-12  8:38         ` Haozhong Zhang
2016-12-12 14:44           ` Konrad Rzeszutek Wilk
2016-12-13  1:08             ` Haozhong Zhang
2016-12-22 11:58   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest Haozhong Zhang
2016-12-09 22:22   ` Konrad Rzeszutek Wilk
2016-12-12  4:38     ` Haozhong Zhang
2016-12-22 12:19   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 05/16] xen/x86: release pmem pages at domain destroy Haozhong Zhang
2016-12-09 22:27   ` Konrad Rzeszutek Wilk
2016-12-12  4:47     ` Haozhong Zhang
2016-12-22 12:22   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 06/16] tools: reserve guest memory for ACPI from device model Haozhong Zhang
2017-01-27 20:44   ` Konrad Rzeszutek Wilk
2017-02-08  1:39     ` Haozhong Zhang
2017-02-08 14:31       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 07/16] tools/libacpi: add callback acpi_ctxt.p2v to get a pointer from physical address Haozhong Zhang
2017-01-27 20:46   ` Konrad Rzeszutek Wilk
2017-02-08  1:42     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 08/16] tools/libacpi: expose details of memory allocation callback Haozhong Zhang
2017-01-27 20:58   ` Konrad Rzeszutek Wilk
2017-02-08  2:12     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 09/16] tools/libacpi: add callbacks to access XenStore Haozhong Zhang
2017-01-27 21:10   ` Konrad Rzeszutek Wilk
2017-02-08  2:19     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 10/16] tools/libacpi: add a simple AML builder Haozhong Zhang
2017-01-27 21:19   ` Konrad Rzeszutek Wilk
2017-02-08  2:33     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 11/16] tools/libacpi: load ACPI built by the device model Haozhong Zhang
2017-01-27 21:40   ` Konrad Rzeszutek Wilk
2017-02-08  5:38     ` Haozhong Zhang
2017-02-08 14:35       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 12/16] tools/libxl: build qemu options from xl vNVDIMM configs Haozhong Zhang
2017-01-27 21:47   ` Konrad Rzeszutek Wilk
2017-02-08  5:42     ` Haozhong Zhang
2017-01-27 21:48   ` Konrad Rzeszutek Wilk
2017-02-08  5:47     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 13/16] tools/libxl: add support to map host pmem device to guests Haozhong Zhang
2017-01-27 22:06   ` Konrad Rzeszutek Wilk
2017-01-27 22:09     ` Konrad Rzeszutek Wilk
2017-02-08  5:59     ` Haozhong Zhang
2017-02-08 14:37       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 14/16] tools/libxl: add support to map files on pmem devices " Haozhong Zhang
2017-01-27 22:10   ` Konrad Rzeszutek Wilk
2017-02-08  6:03     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 15/16] tools/libxl: handle return code of libxl__qmp_initializations() Haozhong Zhang
2017-01-27 22:11   ` Konrad Rzeszutek Wilk
2017-02-08  6:07     ` Haozhong Zhang
2017-02-08 10:31       ` Wei Liu
2017-02-09  2:47         ` Haozhong Zhang
2017-02-09 10:13           ` Wei Liu
2017-02-09 10:16             ` Wei Liu
2017-02-10  2:37             ` Haozhong Zhang
2017-02-10  8:11               ` Wei Liu
2017-02-10  8:23                 ` Wei Liu
2017-02-10  8:24                 ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 16/16] tools/libxl: initiate pmem mapping via qmp callback Haozhong Zhang
2017-01-27 22:13   ` Konrad Rzeszutek Wilk
2017-02-08  6:08     ` Haozhong Zhang
2016-10-24 16:37 ` [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Wei Liu
2016-10-25  6:55   ` Haozhong Zhang
2016-10-25 11:28     ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161010003235.4213-4-haozhong.zhang@intel.com \
    --to=haozhong.zhang@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dgdegra@tycho.nsa.gov \
    --cc=guangrong.xiao@linux.intel.com \
    --cc=jbeulich@suse.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.