All of lore.kernel.org
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad@kernel.org>
To: Haozhong Zhang <haozhong.zhang@intel.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	xen-devel@lists.xen.org, Jan Beulich <jbeulich@suse.com>,
	Wei Liu <wei.liu2@citrix.com>
Subject: Re: [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest
Date: Fri, 9 Dec 2016 17:22:12 -0500	[thread overview]
Message-ID: <20161209222211.GD21093@localhost.localdomain> (raw)
In-Reply-To: <20161010003235.4213-5-haozhong.zhang@intel.com>

On Mon, Oct 10, 2016 at 08:32:23AM +0800, Haozhong Zhang wrote:
> XENMEM_populate_pmemmap is used by toolstack to map given host pmem pages
> to given guest pages. Only pages in the data area of a pmem region are
> allowed to be mapped to guest.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
>  tools/libxc/include/xenctrl.h |   8 +++
>  tools/libxc/xc_domain.c       |  14 +++++
>  xen/arch/x86/pmem.c           | 123 ++++++++++++++++++++++++++++++++++++++++++
>  xen/common/domain.c           |   3 ++
>  xen/common/memory.c           |  31 +++++++++++
>  xen/include/public/memory.h   |  14 ++++-
>  xen/include/xen/pmem.h        |  10 ++++
>  xen/include/xen/sched.h       |   3 ++
>  8 files changed, 205 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index 2c83544..46c71fc 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -2710,6 +2710,14 @@ int xc_livepatch_revert(xc_interface *xch, char *name, uint32_t timeout);
>  int xc_livepatch_unload(xc_interface *xch, char *name, uint32_t timeout);
>  int xc_livepatch_replace(xc_interface *xch, char *name, uint32_t timeout);
>  
> +/**
> + * Map host pmem pages at PFNs @mfn ~ (@mfn + @nr_mfns - 1) to
> + * guest physical pages at guest PFNs @gpfn ~ (@gpfn + @nr_mfns - 1)
> + */
> +int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
> +                               xen_pfn_t mfn, xen_pfn_t gpfn,
> +                               unsigned int nr_mfns);
> +
>  /* Compat shims */
>  #include "xenctrl_compat.h"
>  
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index 296b852..81a90a1 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -2520,6 +2520,20 @@ int xc_domain_soft_reset(xc_interface *xch,
>      domctl.domain = (domid_t)domid;
>      return do_domctl(xch, &domctl);
>  }
> +
> +int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
> +                               xen_pfn_t mfn, xen_pfn_t gpfn,
> +                               unsigned int nr_mfns)
> +{
> +    struct xen_pmemmap pmemmap = {
> +        .domid   = domid,
> +        .mfn     = mfn,
> +        .gpfn    = gpfn,
> +        .nr_mfns = nr_mfns,
> +    };
> +    return do_memory_op(xch, XENMEM_populate_pmemmap, &pmemmap, sizeof(pmemmap));
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
> index 70358ed..e4dc685 100644
> --- a/xen/arch/x86/pmem.c
> +++ b/xen/arch/x86/pmem.c
> @@ -24,6 +24,9 @@
>  #include <xen/spinlock.h>
>  #include <xen/pmem.h>
>  #include <xen/iocap.h>
> +#include <xen/sched.h>
> +#include <xen/event.h>
> +#include <xen/paging.h>
>  #include <asm-x86/mm.h>
>  
>  /*
> @@ -63,6 +66,48 @@ static int check_reserved_size(unsigned long rsv_mfns, unsigned long total_mfns)
>          ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
>  }
>  
> +static int is_data_mfn(unsigned long mfn)

bool
> +{
> +    struct list_head *cur;
> +    int data = 0;
> +
> +    ASSERT(spin_is_locked(&pmem_list_lock));
> +
> +    list_for_each(cur, &pmem_list)
> +    {
> +        struct pmem *pmem = list_entry(cur, struct pmem, link);
> +
> +        if ( pmem->data_spfn <= mfn && mfn < pmem->data_epfn )

You may want to change the first conditional to have 'mfn' on the left
side. And perhaps change 'mfn' to 'pfn' as that is what your structure
is called?

But ... maybe the #3 patch that introduces XENPF_pmem_add should
use 'data_smfn', 'data_emfn' and so on?

> +        {
> +            data = 1;
> +            break;
> +        }
> +    }
> +
> +    return data;
> +}
> +
> +static int pmem_page_valid(struct page_info *page, struct domain *d)

bool
> +{
> +    /* only data area can be mapped to guest */
> +    if ( !is_data_mfn(page_to_mfn(page)) )
> +    {
> +        dprintk(XENLOG_DEBUG, "pmem: mfn 0x%lx is not a pmem data page\n",
> +                page_to_mfn(page));
> +        return 0;
> +    }
> +
> +    /* inuse/offlined/offlining pmem page cannot be mapped to guest */
> +    if ( !page_state_is(page, free) )
> +    {
> +        dprintk(XENLOG_DEBUG, "pmem: invalid page state of mfn 0x%lx: 0x%lx\n",
> +                page_to_mfn(page), page->count_info & PGC_state);
> +        return 0;
> +    }
> +
> +    return 1;
> +}
> +
>  static int pmem_add_check(unsigned long spfn, unsigned long epfn,
>                            unsigned long rsv_spfn, unsigned long rsv_epfn,
>                            unsigned long data_spfn, unsigned long data_epfn)
> @@ -159,3 +204,81 @@ int pmem_add(unsigned long spfn, unsigned long epfn,
>   out:
>      return ret;
>  }
> +
> +static int pmem_assign_pages(struct domain *d,
> +                             struct page_info *pg, unsigned int order)
> +{
> +    int rc = 0;
> +    unsigned long i;
> +
> +    spin_lock(&d->pmem_lock);
> +
> +    if ( unlikely(d->is_dying) )
> +    {
> +        rc = -EINVAL;
> +        goto out;
> +    }
> +
> +    for ( i = 0; i < (1 << order); i++ )
> +    {
> +        ASSERT(page_get_owner(&pg[i]) == NULL);
> +        ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
> +        page_set_owner(&pg[i], d);
> +        smp_wmb();

Why here? Why not after the count_info is set?

> +        pg[i].count_info = PGC_allocated | 1;
> +        page_list_add_tail(&pg[i], &d->pmem_page_list);
> +    }
> +
> + out:
> +    spin_unlock(&d->pmem_lock);
> +    return rc;
> +}
> +
> +int pmem_populate(struct xen_pmemmap_args *args)
> +{
> +    struct domain *d = args->domain;
> +    unsigned long i, mfn, gpfn;
> +    struct page_info *page;
> +    int rc = 0;
> +
> +    if ( !has_hvm_container_domain(d) || !paging_mode_translate(d) )
> +        return -EINVAL;
> +
> +    for ( i = args->nr_done, mfn = args->mfn + i, gpfn = args->gpfn + i;
> +          i < args->nr_mfns;
> +          i++, mfn++, gpfn++ )
> +    {
> +        if ( i != args->nr_done && hypercall_preempt_check() )
> +        {
> +            args->preempted = 1;
> +            goto out;
> +        }
> +
> +        page = mfn_to_page(mfn);
> +
> +        spin_lock(&pmem_list_lock);
> +        if ( !pmem_page_valid(page, d) )
> +        {
> +            dprintk(XENLOG_DEBUG, "pmem: MFN 0x%lx not a valid pmem page\n", mfn);
> +            spin_unlock(&pmem_list_lock);
> +            rc = -EINVAL;
> +            goto out;
> +        }
> +        page->count_info = PGC_state_inuse;

No test_and_set_bit ?

> +        spin_unlock(&pmem_list_lock);
> +
> +        page->u.inuse.type_info = 0;
> +
> +        guest_physmap_add_page(d, _gfn(gpfn), _mfn(mfn), 0);
> +        if ( pmem_assign_pages(d, page, 0) )
> +        {
> +            guest_physmap_remove_page(d, _gfn(gpfn), _mfn(mfn), 0);

Don't you also need to do something about PGC_state_inuse ?
> +            rc = -EFAULT;
> +            goto out;
> +        }
> +    }
> +
> + out:
> +    args->nr_done = i;
> +    return rc;
> +}
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 3abaca9..8192548 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -288,6 +288,9 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
>      INIT_PAGE_LIST_HEAD(&d->page_list);
>      INIT_PAGE_LIST_HEAD(&d->xenpage_list);
>  
> +    spin_lock_init_prof(d, pmem_lock);
> +    INIT_PAGE_LIST_HEAD(&d->pmem_page_list);
> +
>      spin_lock_init(&d->node_affinity_lock);
>      d->node_affinity = NODE_MASK_ALL;
>      d->auto_node_affinity = 1;
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 21797ca..09cb1c9 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -24,6 +24,7 @@
>  #include <xen/numa.h>
>  #include <xen/mem_access.h>
>  #include <xen/trace.h>
> +#include <xen/pmem.h>
>  #include <asm/current.h>
>  #include <asm/hardirq.h>
>  #include <asm/p2m.h>
> @@ -1329,6 +1330,36 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>      }
>  #endif
>  
> +    case XENMEM_populate_pmemmap:
> +    {
> +        struct xen_pmemmap pmemmap;
> +        struct xen_pmemmap_args args;
> +
> +        if ( copy_from_guest(&pmemmap, arg, 1) )
> +            return -EFAULT;
> +
> +        d = rcu_lock_domain_by_any_id(pmemmap.domid);
> +        if ( !d )
> +            return -EINVAL;
> +
> +        args.domain = d;
> +        args.mfn = pmemmap.mfn;
> +        args.gpfn = pmemmap.gpfn;
> +        args.nr_mfns = pmemmap.nr_mfns;
> +        args.nr_done = start_extent;
> +        args.preempted = 0;
> +
> +        rc = pmem_populate(&args);
> +        rcu_unlock_domain(d);
> +
> +        if ( !rc && args.preempted )

Nice! Glad to see that preemption is there!

> +            return hypercall_create_continuation(
> +                __HYPERVISOR_memory_op, "lh",
> +                op | (args.nr_done << MEMOP_EXTENT_SHIFT), arg);
> +
> +        break;
> +    }
> +
>      default:
>          rc = arch_memory_op(cmd, arg);
>          break;

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  reply	other threads:[~2016-12-09 22:22 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-10  0:32 [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 01/16] x86_64/mm: explicitly specify the location to place the frame table Haozhong Zhang
2016-12-09 21:35   ` Konrad Rzeszutek Wilk
2016-12-12  2:27     ` Haozhong Zhang
2016-12-12  8:25       ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 02/16] x86_64/mm: explicitly specify the location to place the M2P table Haozhong Zhang
2016-12-09 21:38   ` Konrad Rzeszutek Wilk
2016-12-12  2:31     ` Haozhong Zhang
2016-12-12  8:26       ` Jan Beulich
2016-12-12  8:35         ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions Haozhong Zhang
2016-10-11 19:13   ` Andrew Cooper
2016-12-09 22:02   ` Konrad Rzeszutek Wilk
2016-12-12  4:16     ` Haozhong Zhang
2016-12-12  8:30       ` Jan Beulich
2016-12-12  8:38         ` Haozhong Zhang
2016-12-12 14:44           ` Konrad Rzeszutek Wilk
2016-12-13  1:08             ` Haozhong Zhang
2016-12-22 11:58   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest Haozhong Zhang
2016-12-09 22:22   ` Konrad Rzeszutek Wilk [this message]
2016-12-12  4:38     ` Haozhong Zhang
2016-12-22 12:19   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 05/16] xen/x86: release pmem pages at domain destroy Haozhong Zhang
2016-12-09 22:27   ` Konrad Rzeszutek Wilk
2016-12-12  4:47     ` Haozhong Zhang
2016-12-22 12:22   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 06/16] tools: reserve guest memory for ACPI from device model Haozhong Zhang
2017-01-27 20:44   ` Konrad Rzeszutek Wilk
2017-02-08  1:39     ` Haozhong Zhang
2017-02-08 14:31       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 07/16] tools/libacpi: add callback acpi_ctxt.p2v to get a pointer from physical address Haozhong Zhang
2017-01-27 20:46   ` Konrad Rzeszutek Wilk
2017-02-08  1:42     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 08/16] tools/libacpi: expose details of memory allocation callback Haozhong Zhang
2017-01-27 20:58   ` Konrad Rzeszutek Wilk
2017-02-08  2:12     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 09/16] tools/libacpi: add callbacks to access XenStore Haozhong Zhang
2017-01-27 21:10   ` Konrad Rzeszutek Wilk
2017-02-08  2:19     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 10/16] tools/libacpi: add a simple AML builder Haozhong Zhang
2017-01-27 21:19   ` Konrad Rzeszutek Wilk
2017-02-08  2:33     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 11/16] tools/libacpi: load ACPI built by the device model Haozhong Zhang
2017-01-27 21:40   ` Konrad Rzeszutek Wilk
2017-02-08  5:38     ` Haozhong Zhang
2017-02-08 14:35       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 12/16] tools/libxl: build qemu options from xl vNVDIMM configs Haozhong Zhang
2017-01-27 21:47   ` Konrad Rzeszutek Wilk
2017-02-08  5:42     ` Haozhong Zhang
2017-01-27 21:48   ` Konrad Rzeszutek Wilk
2017-02-08  5:47     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 13/16] tools/libxl: add support to map host pmem device to guests Haozhong Zhang
2017-01-27 22:06   ` Konrad Rzeszutek Wilk
2017-01-27 22:09     ` Konrad Rzeszutek Wilk
2017-02-08  5:59     ` Haozhong Zhang
2017-02-08 14:37       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 14/16] tools/libxl: add support to map files on pmem devices " Haozhong Zhang
2017-01-27 22:10   ` Konrad Rzeszutek Wilk
2017-02-08  6:03     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 15/16] tools/libxl: handle return code of libxl__qmp_initializations() Haozhong Zhang
2017-01-27 22:11   ` Konrad Rzeszutek Wilk
2017-02-08  6:07     ` Haozhong Zhang
2017-02-08 10:31       ` Wei Liu
2017-02-09  2:47         ` Haozhong Zhang
2017-02-09 10:13           ` Wei Liu
2017-02-09 10:16             ` Wei Liu
2017-02-10  2:37             ` Haozhong Zhang
2017-02-10  8:11               ` Wei Liu
2017-02-10  8:23                 ` Wei Liu
2017-02-10  8:24                 ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 16/16] tools/libxl: initiate pmem mapping via qmp callback Haozhong Zhang
2017-01-27 22:13   ` Konrad Rzeszutek Wilk
2017-02-08  6:08     ` Haozhong Zhang
2016-10-24 16:37 ` [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Wei Liu
2016-10-25  6:55   ` Haozhong Zhang
2016-10-25 11:28     ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161209222211.GD21093@localhost.localdomain \
    --to=konrad@kernel.org \
    --cc=andrew.cooper3@citrix.com \
    --cc=guangrong.xiao@linux.intel.com \
    --cc=haozhong.zhang@intel.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.