All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: Roger Pau Monne <roger.pau@citrix.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	boris.ostrovsky@oracle.com, xen-devel@lists.xenproject.org
Subject: Re: [PATCH v2 15/30] xen/x86: populate PVHv2 Dom0 physical memory map
Date: Fri, 30 Sep 2016 09:52:56 -0600	[thread overview]
Message-ID: <57EEA6780200007800114683@prv-mh.provo.novell.com> (raw)
In-Reply-To: <1474991845-27962-16-git-send-email-roger.pau@citrix.com>

>>> On 27.09.16 at 17:57, <roger.pau@citrix.com> wrote:
> @@ -43,6 +44,11 @@ static long __initdata dom0_nrpages;
>  static long __initdata dom0_min_nrpages;
>  static long __initdata dom0_max_nrpages = LONG_MAX;
>  
> +/* Size of the VM86 TSS for virtual 8086 mode to use. */
> +#define HVM_VM86_TSS_SIZE   128
> +
> +static unsigned int __initdata hvm_mem_stats[MAX_ORDER + 1];

This is for your debugging only I suppose?

> @@ -336,7 +343,8 @@ static unsigned long __init compute_dom0_nr_pages(
>          avail -= dom0_paging_pages(d, nr_pages);
>      }
>  
> -    if ( (parms->p2m_base == UNSET_ADDR) && (dom0_nrpages <= 0) &&
> +    if ( is_pv_domain(d) &&
> +         (parms->p2m_base == UNSET_ADDR) && (dom0_nrpages <= 0) &&

Perhaps better to simply force parms->p2m_base to UNSET_ADDR
earlier on?

> @@ -579,8 +588,19 @@ static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
>              continue;
>          }
>  
> -        *entry_guest = *entry;
> -        pages = PFN_UP(entry_guest->size);
> +        /*
> +         * Make sure the start and length are aligned to PAGE_SIZE, because
> +         * that's the minimum granularity of the 2nd stage translation.
> +         */
> +        start = ROUNDUP(entry->addr, PAGE_SIZE);
> +        end = (entry->addr + entry->size) & PAGE_MASK;
> +        if ( start >= end )
> +            continue;
> +
> +        entry_guest->type = E820_RAM;
> +        entry_guest->addr = start;
> +        entry_guest->size = end - start;
> +        pages = PFN_DOWN(entry_guest->size);
>          if ( (cur_pages + pages) > nr_pages )
>          {
>              /* Truncate region */
> @@ -591,6 +611,8 @@ static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
>          {
>              cur_pages += pages;
>          }
> +        ASSERT(IS_ALIGNED(entry_guest->addr, PAGE_SIZE) &&
> +               IS_ALIGNED(entry_guest->size, PAGE_SIZE));

What does this guard against? Your addition arranges for things to
be page aligned, and the only adjustment done until we get here is
one that obviously also doesn't violate that requirement. I'm all for
assertions when they check state which is not obviously right, but
here I don't see the need.

> @@ -1657,15 +1679,238 @@ out:
>      return rc;
>  }
>  
> +/* Populate an HVM memory range using the biggest possible order. */
> +static void __init hvm_populate_memory_range(struct domain *d, uint64_t start,
> +                                             uint64_t size)
> +{
> +    static unsigned int __initdata memflags = MEMF_no_dma|MEMF_exact_node;
> +    unsigned int order;
> +    struct page_info *page;
> +    int rc;
> +
> +    ASSERT(IS_ALIGNED(size, PAGE_SIZE) && IS_ALIGNED(start, PAGE_SIZE));
> +
> +    order = MAX_ORDER;
> +    while ( size != 0 )
> +    {
> +        order = min(get_order_from_bytes_floor(size), order);
> +        page = alloc_domheap_pages(d, order, memflags);
> +        if ( page == NULL )
> +        {
> +            if ( order == 0 && memflags )
> +            {
> +                /* Try again without any memflags. */
> +                memflags = 0;
> +                order = MAX_ORDER;
> +                continue;
> +            }
> +            if ( order == 0 )
> +                panic("Unable to allocate memory with order 0!\n");
> +            order--;
> +            continue;
> +        }

Is it not possible to utilize alloc_chunk() here?

> +        hvm_mem_stats[order]++;
> +        rc = guest_physmap_add_page(d, _gfn(PFN_DOWN(start)),
> +                                    _mfn(page_to_mfn(page)), order);
> +        if ( rc != 0 )
> +            panic("Failed to populate memory: [%" PRIx64 " - %" PRIx64 "] %d\n",

[<start>,<end>) please.

> +                  start, start + (((uint64_t)1) << (order + PAGE_SHIFT)), rc);
> +        start += ((uint64_t)1) << (order + PAGE_SHIFT);
> +        size -= ((uint64_t)1) << (order + PAGE_SHIFT);

Please prefer 1ULL over (uint64_t)1.

> +        if ( (size & 0xffffffff) == 0 )
> +            process_pending_softirqs();

That's 4Gb at a time - isn't that a little too much?

> +    }
> +
> +}

Stray blank line.

> +static int __init hvm_setup_vmx_unrestricted_guest(struct domain *d)
> +{
> +    struct e820entry *entry;
> +    p2m_type_t p2mt;
> +    uint32_t rc, *ident_pt;
> +    uint8_t *tss;
> +    mfn_t mfn;
> +    paddr_t gaddr = 0;
> +    int i;

unsigned int

> +    /*
> +     * Stole some space from the last found RAM region. One page will be

Steal

> +     * used for the identify page tables, and the remaining space for the

identity

> +     * VM86 TSS. Note that after this not all e820 regions will be aligned
> +     * to PAGE_SIZE.
> +     */
> +    for ( i = 1; i <= d->arch.nr_e820; i++ )
> +    {
> +        entry = &d->arch.e820[d->arch.nr_e820 - i];
> +        if ( entry->type != E820_RAM ||
> +             entry->size < PAGE_SIZE + HVM_VM86_TSS_SIZE )
> +            continue;
> +
> +        entry->size -= PAGE_SIZE + HVM_VM86_TSS_SIZE;
> +        gaddr = entry->addr + entry->size;
> +        break;
> +    }
> +
> +    if ( gaddr == 0 || gaddr < MB(1) )
> +    {
> +        printk("Unable to find memory to stash the identity map and TSS\n");
> +        return -ENOMEM;

One function up you panic() on error - please be consistent. Also for
one of the other patches I think we figured that the TSS isn't really
required, so please only warn in that case.

> +    }
> +
> +    /*
> +     * Identity-map page table is required for running with CR0.PG=0
> +     * when using Intel EPT. Create a 32-bit non-PAE page directory of
> +     * superpages.
> +     */
> +    tss = map_domain_gfn(p2m_get_hostp2m(d), _gfn(PFN_DOWN(gaddr)),
> +                         &mfn, &p2mt, 0, &rc);

Comment and operation don't really fit together.

> +static int __init hvm_setup_p2m(struct domain *d)
> +{
> +    struct vcpu *saved_current, *v = d->vcpu[0];
> +    unsigned long nr_pages;
> +    int i, rc, preempted;
> +
> +    printk("** Preparing memory map **\n");

Debugging leftover again?

> +    /*
> +     * Subtract one page for the EPT identity page table and two pages
> +     * for the MADT replacement.
> +     */
> +    nr_pages = compute_dom0_nr_pages(d, NULL, 0) - 3;

How do you know the MADT replacement requires two pages? Isn't
that CPU-count dependent? And doesn't the partial page used for
the TSS also need accounting for here?

> +    hvm_setup_e820(d, nr_pages);
> +    do {
> +        preempted = 0;
> +        paging_set_allocation(d, dom0_paging_pages(d, nr_pages),
> +                              &preempted);
> +        process_pending_softirqs();
> +    } while ( preempted );
> +
> +    /*
> +     * Special treatment for memory < 1MB:
> +     *  - Copy the data in e820 regions marked as RAM (BDA, EBDA...).
> +     *  - Map everything else as 1:1.
> +     * NB: all this only makes sense if booted from legacy BIOSes.
> +     */
> +    rc = modify_mmio_11(d, 0, PFN_DOWN(MB(1)), true);
> +    if ( rc )
> +    {
> +        printk("Failed to map low 1MB 1:1: %d\n", rc);
> +        return rc;
> +    }
> +
> +    printk("** Populating memory map **\n");
> +    /* Populate memory map. */
> +    for ( i = 0; i < d->arch.nr_e820; i++ )
> +    {
> +        if ( d->arch.e820[i].type != E820_RAM )
> +            continue;
> +
> +        hvm_populate_memory_range(d, d->arch.e820[i].addr,
> +                                  d->arch.e820[i].size);
> +        if ( d->arch.e820[i].addr < MB(1) )
> +        {
> +            unsigned long end = min_t(unsigned long,
> +                            d->arch.e820[i].addr + d->arch.e820[i].size, MB(1));
> +
> +            saved_current = current;
> +            set_current(v);
> +            rc = hvm_copy_to_guest_phys(d->arch.e820[i].addr,
> +                                        maddr_to_virt(d->arch.e820[i].addr),
> +                                        end - d->arch.e820[i].addr);
> +            set_current(saved_current);
> +            if ( rc != HVMCOPY_okay )
> +            {
> +                printk("Unable to copy RAM region %#lx - %#lx\n",
> +                       d->arch.e820[i].addr, end);
> +                return -EFAULT;
> +            }
> +        }
> +    }
> +
> +    printk("Memory allocation stats:\n");
> +    for ( i = 0; i <= MAX_ORDER; i++ )
> +    {
> +        if ( hvm_mem_stats[MAX_ORDER - i] != 0 )
> +            printk("Order %2u: %pZ\n", MAX_ORDER - i,
> +                   _p(((uint64_t)1 << (MAX_ORDER - i + PAGE_SHIFT)) *
> +                      hvm_mem_stats[MAX_ORDER - i]));
> +    }
> +
> +    if ( cpu_has_vmx && paging_mode_hap(d) && !vmx_unrestricted_guest(v) )
> +    {
> +        /*
> +         * Since Dom0 cannot be migrated, we will only setup the
> +         * unrestricted guest helpers if they are needed by the current
> +         * hardware we are running on.
> +         */
> +        rc = hvm_setup_vmx_unrestricted_guest(d);

Calling a function of this name inside an if() checking for
!vmx_unrestricted_guest() is, well, odd.

>  static int __init construct_dom0_hvm(struct domain *d, const module_t *image,
>                                       unsigned long image_headroom,
>                                       module_t *initrd,
>                                       void *(*bootstrap_map)(const module_t *),
>                                       char *cmdline)
>  {
> +    int rc;
>  
>      printk("** Building a PVH Dom0 **\n");
>  
> +    /* Sanity! */
> +    BUG_ON(d->domain_id != 0);
> +    BUG_ON(d->vcpu[0] == NULL);

May I suggest

    BUG_ON(d->domain_id);
    BUG_ON(!d->vcpu[0]);

in cases like this?

> +    process_pending_softirqs();

Why, outside of any loop?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  reply	other threads:[~2016-09-30 15:53 UTC|newest]

Thread overview: 146+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-27 15:56 [PATCH v2 00/30] PVHv2 Dom0 Roger Pau Monne
2016-09-27 15:56 ` [PATCH v2 01/30] xen/x86: move setup of the VM86 TSS to the domain builder Roger Pau Monne
2016-09-28 15:35   ` Jan Beulich
2016-09-29 12:57     ` Roger Pau Monne
2016-09-27 15:56 ` [PATCH v2 02/30] xen/x86: remove XENFEAT_hvm_pirqs for PVHv2 guests Roger Pau Monne
2016-09-28 16:03   ` Jan Beulich
2016-09-29 14:17     ` Roger Pau Monne
2016-09-29 16:07       ` Jan Beulich
2016-09-27 15:56 ` [PATCH v2 03/30] xen/x86: fix parameters and return value of *_set_allocation functions Roger Pau Monne
2016-09-28  9:34   ` Tim Deegan
2016-09-29 10:39   ` Jan Beulich
2016-09-29 14:33     ` Roger Pau Monne
2016-09-29 16:09       ` Jan Beulich
2016-09-30 16:48   ` George Dunlap
2016-10-03  8:05   ` Paul Durrant
2016-10-06 11:33     ` Roger Pau Monne
2016-09-27 15:56 ` [PATCH v2 04/30] xen/x86: allow calling {sh/hap}_set_allocation with the idle domain Roger Pau Monne
2016-09-29 10:43   ` Jan Beulich
2016-09-29 14:37     ` Roger Pau Monne
2016-09-29 16:10       ` Jan Beulich
2016-09-30 16:56   ` George Dunlap
2016-09-30 16:56     ` George Dunlap
2016-09-27 15:57 ` [PATCH v2 05/30] xen/x86: assert that local_events_need_delivery is not called by " Roger Pau Monne
2016-09-29 10:45   ` Jan Beulich
2016-09-30  8:32     ` Roger Pau Monne
2016-09-30  8:59       ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 06/30] x86/paging: introduce paging_set_allocation Roger Pau Monne
2016-09-29 10:51   ` Jan Beulich
2016-09-29 14:51     ` Roger Pau Monne
2016-09-29 16:12       ` Jan Beulich
2016-09-29 16:57         ` Roger Pau Monne
2016-09-30 17:00   ` George Dunlap
2016-09-27 15:57 ` [PATCH v2 07/30] xen/x86: split the setup of Dom0 permissions to a function Roger Pau Monne
2016-09-29 13:47   ` Jan Beulich
2016-09-29 15:53     ` Roger Pau Monne
2016-09-27 15:57 ` [PATCH v2 08/30] xen/x86: do the PCI scan unconditionally Roger Pau Monne
2016-09-29 13:55   ` Jan Beulich
2016-09-29 15:11     ` Roger Pau Monne
2016-09-29 16:14       ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 09/30] x86/vtd: fix and simplify mapping RMRR regions Roger Pau Monne
2016-09-29 14:18   ` Jan Beulich
2016-09-30 11:27     ` Roger Pau Monne
2016-09-30 13:21       ` Jan Beulich
2016-09-30 15:02         ` Roger Pau Monne
2016-09-30 15:09           ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 10/30] xen/x86: allow the emulated APICs to be enbled for the hardware domain Roger Pau Monne
2016-09-29 14:26   ` Jan Beulich
2016-09-30 15:44     ` Roger Pau Monne
2016-09-27 15:57 ` [PATCH v2 11/30] xen/x86: split Dom0 build into PV and PVHv2 Roger Pau Monne
2016-09-30 15:03   ` Jan Beulich
2016-10-03 10:09     ` Roger Pau Monne
2016-10-04  6:54       ` Jan Beulich
2016-10-04  7:09         ` Andrew Cooper
2016-09-27 15:57 ` [PATCH v2 12/30] xen/x86: make print_e820_memory_map global Roger Pau Monne
2016-09-30 15:04   ` Jan Beulich
2016-10-03 16:23     ` Roger Pau Monne
2016-10-04  6:47       ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 13/30] xen: introduce a new format specifier to print sizes in human-readable form Roger Pau Monne
2016-09-28  8:24   ` Juergen Gross
2016-09-28 11:56     ` Roger Pau Monne
2016-09-28 12:01       ` Andrew Cooper
2016-10-03  8:36   ` Paul Durrant
2016-10-11 10:27   ` Roger Pau Monne
2016-09-27 15:57 ` [PATCH v2 14/30] xen/mm: add a ceil sufix to current page calculation routine Roger Pau Monne
2016-09-30 15:20   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 15/30] xen/x86: populate PVHv2 Dom0 physical memory map Roger Pau Monne
2016-09-30 15:52   ` Jan Beulich [this message]
2016-10-04  9:12     ` Roger Pau Monne
2016-10-04 11:16       ` Jan Beulich
2016-10-11 14:01         ` Roger Pau Monne
2016-10-12 11:51           ` Jan Beulich
2016-10-11 14:06     ` Roger Pau Monne
2016-10-12 11:58       ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 16/30] xen/x86: parse Dom0 kernel for PVHv2 Roger Pau Monne
2016-10-06 15:14   ` Jan Beulich
2016-10-11 15:02     ` Roger Pau Monne
2016-09-27 15:57 ` [PATCH v2 17/30] xen/x86: setup PVHv2 Dom0 CPUs Roger Pau Monne
2016-10-06 15:20   ` Jan Beulich
2016-10-12 11:06     ` Roger Pau Monne
2016-10-12 11:32       ` Andrew Cooper
2016-10-12 12:02       ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 18/30] xen/x86: setup PVHv2 Dom0 ACPI tables Roger Pau Monne
2016-10-06 15:40   ` Jan Beulich
2016-10-06 15:48     ` Andrew Cooper
2016-10-12 15:35     ` Roger Pau Monne
2016-10-12 15:55       ` Jan Beulich
2016-10-26 11:35         ` Roger Pau Monne
2016-10-26 14:10           ` Jan Beulich
2016-10-26 15:08             ` Roger Pau Monne
2016-10-26 15:16               ` Jan Beulich
2016-10-26 16:03                 ` Roger Pau Monne
2016-10-27  7:25                   ` Jan Beulich
2016-10-27 11:08                     ` Roger Pau Monne
2016-10-26 17:14                 ` Boris Ostrovsky
2016-10-27  7:27                   ` Jan Beulich
2016-10-27 11:13                   ` Roger Pau Monne
2016-10-27 11:25                     ` Jan Beulich
2016-10-27 13:51                     ` Boris Ostrovsky
2016-10-27 14:02                       ` Jan Beulich
2016-10-27 14:15                         ` Boris Ostrovsky
2016-10-27 14:30                           ` Jan Beulich
2016-10-27 14:40                             ` Boris Ostrovsky
2016-10-27 15:04                               ` Roger Pau Monne
2016-10-27 15:20                                 ` Jan Beulich
2016-10-27 15:37                                   ` Roger Pau Monne
2016-10-28 13:51                                 ` Boris Ostrovsky
2016-09-27 15:57 ` [PATCH v2 19/30] xen/dcpi: add a dpci passthrough handler for hardware domain Roger Pau Monne
2016-10-03  9:02   ` Paul Durrant
2016-10-06 14:31     ` Roger Pau Monne
2016-10-06 15:44   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 20/30] xen/x86: add the basic infrastructure to import QEMU passthrough code Roger Pau Monne
2016-10-03  9:54   ` Paul Durrant
2016-10-06 15:08     ` Roger Pau Monne
2016-10-06 15:52       ` Lars Kurth
2016-10-07  9:13       ` Jan Beulich
2016-10-06 15:47   ` Jan Beulich
2016-10-10 12:41   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 21/30] xen/pci: split code to size BARs from pci_add_device Roger Pau Monne
2016-10-06 16:00   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 22/30] xen/x86: support PVHv2 Dom0 BAR remapping Roger Pau Monne
2016-10-03 10:10   ` Paul Durrant
2016-10-06 15:25     ` Roger Pau Monne
2016-09-27 15:57 ` [PATCH v2 23/30] xen/x86: route legacy PCI interrupts to Dom0 Roger Pau Monne
2016-10-10 13:37   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 24/30] x86/vmsi: add MSI emulation for hardware domain Roger Pau Monne
2016-09-27 15:57 ` [PATCH v2 25/30] xen/x86: add all PCI devices to PVHv2 Dom0 Roger Pau Monne
2016-10-10 13:44   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 26/30] xen/x86: add PCIe emulation Roger Pau Monne
2016-10-03 10:46   ` Paul Durrant
2016-10-06 15:53     ` Roger Pau Monne
2016-10-10 13:57   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 27/30] x86/msixtbl: disable MSI-X intercepts for domains without an ioreq server Roger Pau Monne
2016-10-10 14:18   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 28/30] xen/x86: add MSI-X emulation to PVHv2 Dom0 Roger Pau Monne
2016-10-03 10:57   ` Paul Durrant
2016-10-06 15:58     ` Roger Pau Monne
2016-10-10 16:15   ` Jan Beulich
2016-09-27 15:57 ` [PATCH v2 29/30] xen/x86: allow PVHv2 to perform foreign memory mappings Roger Pau Monne
2016-09-30 17:36   ` George Dunlap
2016-10-10 14:21   ` Jan Beulich
2016-10-10 14:27     ` George Dunlap
2016-10-10 14:50       ` Jan Beulich
2016-10-10 14:58         ` George Dunlap
2016-09-27 15:57 ` [PATCH v2 30/30] xen: allow setting the store pfn HVM parameter Roger Pau Monne
2016-10-03 11:01   ` Paul Durrant
2016-09-28 12:22 ` [PATCH v2 00/30] PVHv2 Dom0 Roger Pau Monne

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=57EEA6780200007800114683@prv-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.