All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] xen/tools: Add 64 bits big bar support
@ 2012-08-15  6:54 Xudong Hao
  2012-08-15  6:54 ` [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen Xudong Hao
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Xudong Hao @ 2012-08-15  6:54 UTC (permalink / raw)
  To: xen-devel; +Cc: Xudong Hao, ian.jackson, Xiantao Zhang

Currently it is assumed PCI device BAR access < 4G memory. If there is such a
device whose BAR size is larger than 4G, it must access > 4G memory address.
This patch enable the 64bits big BAR support on hvmloader.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Xudong Hao <xudong.hao@intel.com>

diff -r 663eb766cdde tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
+++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
@@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
 /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
 #define PCI_MEM_START       0xf0000000
 #define PCI_MEM_END         0xfc000000
+#define PCI_HIGH_MEM_START  0xa000000000ULL
+#define PCI_HIGH_MEM_END    0xf000000000ULL
+#define PCI_MIN_MMIO_ADDR   0x80000000
+
 extern unsigned long pci_mem_start, pci_mem_end;
 
 
diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
--- a/tools/firmware/hvmloader/pci.c	Tue Jul 24 17:02:04 2012 +0200
+++ b/tools/firmware/hvmloader/pci.c	Thu Jul 26 15:40:01 2012 +0800
@@ -31,24 +31,33 @@
 unsigned long pci_mem_start = PCI_MEM_START;
 unsigned long pci_mem_end = PCI_MEM_END;
 
+uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
+uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
+
 enum virtual_vga virtual_vga = VGA_none;
 unsigned long igd_opregion_pgbase = 0;
 
 void pci_setup(void)
 {
-    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
+    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
+    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
+    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
     uint32_t vga_devfn = 256;
     uint16_t class, vendor_id, device_id;
     unsigned int bar, pin, link, isa_irq;
+    int64_t mmio_left;
 
     /* Resources assignable to PCI devices via BARs. */
     struct resource {
-        uint32_t base, max;
-    } *resource, mem_resource, io_resource;
+        uint64_t base, max;
+    } *resource, mem_resource, high_mem_resource, io_resource;
 
     /* Create a list of device BARs in descending order of size. */
     struct bars {
-        uint32_t devfn, bar_reg, bar_sz;
+        uint32_t is_64bar;
+        uint32_t devfn;
+        uint32_t bar_reg;
+        uint64_t bar_sz;
     } *bars = (struct bars *)scratch_start;
     unsigned int i, nr_bars = 0;
 
@@ -133,23 +142,35 @@ void pci_setup(void)
         /* Map the I/O memory and port resources. */
         for ( bar = 0; bar < 7; bar++ )
         {
+            bar_sz_upper = 0;
             bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
             if ( bar == 6 )
                 bar_reg = PCI_ROM_ADDRESS;
 
             bar_data = pci_readl(devfn, bar_reg);
+            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
+                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
+                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
+                         PCI_BASE_ADDRESS_MEM_TYPE_64));
             pci_writel(devfn, bar_reg, ~0);
             bar_sz = pci_readl(devfn, bar_reg);
             pci_writel(devfn, bar_reg, bar_data);
+
+            if (is_64bar) {
+                bar_data_upper = pci_readl(devfn, bar_reg + 4);
+                pci_writel(devfn, bar_reg + 4, ~0);
+                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
+                pci_writel(devfn, bar_reg + 4, bar_data_upper);
+                bar_sz = (bar_sz_upper << 32) | bar_sz;
+            }
+            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
+                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
+                       0xfffffffffffffff0 :
+                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
+            bar_sz &= ~(bar_sz - 1);
             if ( bar_sz == 0 )
                 continue;
 
-            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
-                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
-                       PCI_BASE_ADDRESS_MEM_MASK :
-                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
-            bar_sz &= ~(bar_sz - 1);
-
             for ( i = 0; i < nr_bars; i++ )
                 if ( bars[i].bar_sz < bar_sz )
                     break;
@@ -157,6 +178,7 @@ void pci_setup(void)
             if ( i != nr_bars )
                 memmove(&bars[i+1], &bars[i], (nr_bars-i) * sizeof(*bars));
 
+            bars[i].is_64bar = is_64bar;
             bars[i].devfn   = devfn;
             bars[i].bar_reg = bar_reg;
             bars[i].bar_sz  = bar_sz;
@@ -167,11 +189,8 @@ void pci_setup(void)
 
             nr_bars++;
 
-            /* Skip the upper-half of the address for a 64-bit BAR. */
-            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
-                              PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == 
-                 (PCI_BASE_ADDRESS_SPACE_MEMORY | 
-                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
+            /*The upper half is already calculated, skip it! */
+            if (is_64bar)
                 bar++;
         }
 
@@ -193,10 +212,14 @@ void pci_setup(void)
         pci_writew(devfn, PCI_COMMAND, cmd);
     }
 
-    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
-            ((pci_mem_start << 1) != 0) )
+    while ( mmio_total > (pci_mem_end - pci_mem_start) && pci_mem_start )
         pci_mem_start <<= 1;
 
+    if (!pci_mem_start) {
+        bar64_relocate = 1;
+        pci_mem_start = PCI_MIN_MMIO_ADDR;
+    }
+
     /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
     while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
     {
@@ -218,11 +241,15 @@ void pci_setup(void)
         hvm_info->high_mem_pgend += nr_pages;
     }
 
+    high_mem_resource.base = pci_high_mem_start; 
+    high_mem_resource.max = pci_high_mem_end;
     mem_resource.base = pci_mem_start;
     mem_resource.max = pci_mem_end;
     io_resource.base = 0xc000;
     io_resource.max = 0x10000;
 
+    mmio_left = pci_mem_end - pci_mem_end;
+
     /* Assign iomem and ioport resources in descending order of size. */
     for ( i = 0; i < nr_bars; i++ )
     {
@@ -230,13 +257,21 @@ void pci_setup(void)
         bar_reg = bars[i].bar_reg;
         bar_sz  = bars[i].bar_sz;
 
+        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left < bar_sz);
         bar_data = pci_readl(devfn, bar_reg);
 
         if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
              PCI_BASE_ADDRESS_SPACE_MEMORY )
         {
-            resource = &mem_resource;
-            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
+            if (using_64bar) {
+                resource = &high_mem_resource;
+                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
+            } 
+            else {
+                resource = &mem_resource;
+                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
+            }
+            mmio_left -= bar_sz;
         }
         else
         {
@@ -244,13 +279,14 @@ void pci_setup(void)
             bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
         }
 
-        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
-        bar_data |= base;
+        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
+        bar_data |= (uint32_t)base;
+        bar_data_upper = (uint32_t)(base >> 32);
         base += bar_sz;
 
         if ( (base < resource->base) || (base > resource->max) )
         {
-            printf("pci dev %02x:%x bar %02x size %08x: no space for "
+            printf("pci dev %02x:%x bar %02x size %llx: no space for "
                    "resource!\n", devfn>>3, devfn&7, bar_reg, bar_sz);
             continue;
         }
@@ -258,7 +294,9 @@ void pci_setup(void)
         resource->base = base;
 
         pci_writel(devfn, bar_reg, bar_data);
-        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
+        if (using_64bar)
+            pci_writel(devfn, bar_reg + 4, bar_data_upper);
+        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
                devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
 
         /* Now enable the memory or I/O mapping. */

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen
  2012-08-15  6:54 [PATCH 1/3] xen/tools: Add 64 bits big bar support Xudong Hao
@ 2012-08-15  6:54 ` Xudong Hao
  2012-08-15  8:18 ` [PATCH 1/3] xen/tools: Add 64 bits big bar support Pasi Kärkkäinen
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 21+ messages in thread
From: Xudong Hao @ 2012-08-15  6:54 UTC (permalink / raw)
  To: xen-devel; +Cc: Xudong Hao, ian.jackson, Xiantao Zhang

Currently it is assumed PCI device BAR access < 4G memory. If there is such a
device whose BAR size is larger than 4G, it must access > 4G memory address.
This patch enable the 64bits big BAR support on qemu-xen.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Xudong Hao <xudong.hao@intel.com>

diff --git a/hw/pass-through.c b/hw/pass-through.c
index 6e396e3..9087fa5 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -1117,13 +1117,13 @@ uint8_t pci_intx(struct pt_dev *ptdev)
 }
 
 static int _pt_iomem_helper(struct pt_dev *assigned_device, int i,
-                            uint32_t e_base, uint32_t e_size, int op)
+                            unsigned long e_base, unsigned long e_size, int op)
 {
     if ( has_msix_mapping(assigned_device, i) )
     {
-        uint32_t msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 +
+        unsigned long msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 +
             assigned_device->msix->total_entries * 16) >> XC_PAGE_SHIFT;
-        uint32_t bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT;
+        unsigned long bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT;
         int ret = 0;
 
         if ( assigned_device->msix->table_off )
@@ -1159,26 +1159,33 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
                          int type)
 {
     struct pt_dev *assigned_device  = (struct pt_dev *)d;
-    uint32_t old_ebase = assigned_device->bases[i].e_physbase;
+    uint64_t e_phys64 = e_phys, e_size64 = e_size, old_ebase = assigned_device->bases[i].e_physbase;
     int first_map = ( assigned_device->bases[i].e_size == 0 );
+    PCIIORegion *r = &d->io_regions[i];
     int ret = 0;
 
-    assigned_device->bases[i].e_physbase = e_phys;
-    assigned_device->bases[i].e_size= e_size;
-
-    PT_LOG("e_phys=%08x maddr=%lx type=%d len=%d index=%d first_map=%d\n",
-        e_phys, (unsigned long)assigned_device->bases[i].access.maddr,
-        type, e_size, i, first_map);
-
-    if ( e_size == 0 )
+    if ( assigned_device->bases[i + 1].bar_flag == PT_BAR_FLAG_UPPER) {
+        uint64_t upper_addr = (r + 1)->addr;
+        uint64_t upper_size = (r + 1)->size;
+        e_phys64 += upper_addr << 32;
+        e_size64 += upper_size << 32;
+    } 
+    PT_LOG("e_phys64=%lx maddr=%lx type=%d len=%lx index=%d first_map=%d\n",
+        e_phys64, (unsigned long)assigned_device->bases[i].access.maddr,
+        type, e_size64, i, first_map);
+   
+    if(e_size64== 0 || !valid_addr(e_phys64))
         return;
 
+    assigned_device->bases[i].e_physbase = e_phys64;
+    assigned_device->bases[i].e_size= e_size64;
+
     if ( !first_map && old_ebase != -1 )
     {
         if ( has_msix_mapping(assigned_device, i) )
             unregister_iomem(assigned_device->msix->mmio_base_addr);
 
-        ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size,
+        ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size64,
                                DPCI_REMOVE_MAPPING);
         if ( ret != 0 )
         {
@@ -1188,7 +1195,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
     }
 
     /* map only valid guest address */
-    if (e_phys != -1)
+    if (e_phys64 != -1)
     {
         if ( has_msix_mapping(assigned_device, i) )
         {
@@ -1202,7 +1209,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
                  assigned_device->msix->mmio_index);
         }
 
-        ret = _pt_iomem_helper(assigned_device, i, e_phys, e_size,
+        ret = _pt_iomem_helper(assigned_device, i, e_phys64, e_size64,
                                DPCI_ADD_MAPPING);
         if ( ret != 0 )
         {
@@ -1210,7 +1217,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
             return;
         }
 
-        if ( old_ebase != e_phys && old_ebase != -1 )
+        if ( old_ebase != e_phys64 && old_ebase != -1 )
             pt_msix_update_remap(assigned_device, i);
     }
 }
@@ -1853,7 +1860,7 @@ exit:
 
 static void pt_libpci_fixup(struct pci_dev *dev)
 {
-#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION < 0x030100
+#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION <= 0x030100
     int i;
     FILE *fp;
     char path[PATH_MAX], buf[256];
@@ -1907,7 +1914,7 @@ static int pt_dev_is_virtfn(struct pci_dev *dev)
 
 static int pt_register_regions(struct pt_dev *assigned_device)
 {
-    int i = 0;
+    int i = 0, current_bar, bar_flag;
     uint32_t bar_data = 0;
     struct pci_dev *pci_dev = assigned_device->pci_dev;
     PCIDevice *d = &assigned_device->dev;
@@ -1916,6 +1923,7 @@ static int pt_register_regions(struct pt_dev *assigned_device)
     /* Register PIO/MMIO BARs */
     for ( i = 0; i < PCI_BAR_ENTRIES; i++ )
     {
+        current_bar = i;
         if ( pt_pci_base_addr(pci_dev->base_addr[i]) )
         {
             assigned_device->bases[i].e_physbase =
@@ -1928,18 +1936,26 @@ static int pt_register_regions(struct pt_dev *assigned_device)
                 pci_register_io_region((PCIDevice *)assigned_device, i,
                     (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_IO,
                     pt_ioport_map);
-            else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_PREFETCH )
+            else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_64BIT) {
+                bar_flag = pci_dev->base_addr[i] & 0xf;
                 pci_register_io_region((PCIDevice *)assigned_device, i,
-                    (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM_PREFETCH,
+                    (uint32_t)pci_dev->size[i], bar_flag,
                     pt_iomem_map);
-            else
-                pci_register_io_region((PCIDevice *)assigned_device, i,
-                    (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM,
+                pci_register_io_region((PCIDevice *)assigned_device, i + 1,
+                    (uint32_t)(pci_dev->size[i] >> 32), PCI_ADDRESS_SPACE_MEM,
                     pt_iomem_map);
-
-            PT_LOG("IO region registered (size=0x%08x base_addr=0x%08x)\n",
-                (uint32_t)(pci_dev->size[i]),
-                (uint32_t)(pci_dev->base_addr[i]));
+                /* skip upper half. */
+                i++;
+            } 
+            else {
+                bar_flag = pci_dev->base_addr[i] & 0xf;
+                pci_register_io_region((PCIDevice *)assigned_device, i,
+                        (uint32_t)(pci_dev->size[i]), bar_flag,
+                        pt_iomem_map);
+            }
+            PT_LOG("IO region registered (bar:%d,size=0x%lx base_addr=0x%lx)\n", current_bar, 
+                    (pci_dev->size[current_bar]),
+                    (pci_dev->base_addr[current_bar]));
         }
     }
 
@@ -1984,7 +2000,7 @@ static void pt_unregister_regions(struct pt_dev *assigned_device)
 
         type = d->io_regions[i].type;
 
-        if ( type == PCI_ADDRESS_SPACE_MEM ||
+        if ( type == PCI_ADDRESS_SPACE_MEM || type == PCI_ADDRESS_SPACE_MEM_64BIT ||
              type == PCI_ADDRESS_SPACE_MEM_PREFETCH )
         {
             ret = _pt_iomem_helper(assigned_device, i,
@@ -2117,6 +2133,7 @@ int pt_pci_host_write(struct pci_dev *pci_dev, u32 addr, u32 val, int len)
     return ret;
 }
 
+static uint64_t pt_get_bar_size(PCIIORegion *r);
 /* parse BAR */
 static int pt_bar_reg_parse(
         struct pt_dev *ptdev, struct pt_reg_info_tbl *reg)
@@ -2145,7 +2162,7 @@ static int pt_bar_reg_parse(
 
     /* check unused BAR */
     r = &d->io_regions[index];
-    if (!r->size)
+    if (!pt_get_bar_size(r))
         goto out;
 
     /* for ExpROM BAR */
@@ -2165,6 +2182,86 @@ out:
     return bar_flag;
 }
 
+static bool is_64bit_bar(PCIIORegion *r)
+{
+    return !!(r->type & PCI_ADDRESS_SPACE_MEM_64BIT);
+}
+
+static uint64_t pt_get_bar_size(PCIIORegion *r)
+{
+    if (is_64bit_bar(r))
+    {
+        uint64_t size64;
+        size64 = (r + 1)->size; 
+        size64 <<= 32; 
+        size64 += r->size;
+        return size64; 
+    }
+    return r->size; 
+}
+
+static uint64_t pt_get_bar_base(PCIIORegion *r)
+{
+    if (is_64bit_bar(r))
+    {
+        uint64_t base64;
+
+        base64 = (r + 1)->addr; 
+        base64 <<= 32; 
+        base64 += r->addr;
+        return base64; 
+    }
+    return r->addr; 
+}
+
+int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr,
+                        uint64_t size, uint8_t type)
+{
+    PCIDevice *devices = NULL;
+    PCIIORegion *r;
+    int ret = 0;
+    int i, j;
+
+    /* check Overlapped to Base Address */
+    for (i=0; i<256; i++)
+    {
+        if ( !(devices = bus->devices[i]) )
+            continue;
+
+        /* skip itself */
+        if (devices->devfn == devfn)
+            continue;
+        
+        for (j=0; j<PCI_NUM_REGIONS; j++)
+        {
+            r = &devices->io_regions[j];
+
+            /* skip different resource type, but don't skip when
+             * prefetch and non-prefetch memory are compared.
+             */
+            if (type != r->type)
+            {
+                if (type == PCI_ADDRESS_SPACE_IO ||
+                    r->type == PCI_ADDRESS_SPACE_IO)
+                    continue;
+            }
+
+            if ((addr < (pt_get_bar_base(r) + pt_get_bar_size(r))) && ((addr + size) > pt_get_bar_base(r)))
+            {
+                printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
+                    "[Address:%lxh][Size:%lxh]\n", bus->bus_num,
+                    (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
+                    j, pt_get_bar_base(r), pt_get_bar_size(r));
+                ret = 1;
+                goto out;
+            }
+        }
+    }
+
+out:
+    return ret;
+}
+
 /* mapping BAR */
 static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable,
     int mem_enable)
@@ -2174,13 +2271,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable,
     struct pt_reg_grp_tbl *reg_grp_entry = NULL;
     struct pt_reg_tbl *reg_entry = NULL;
     struct pt_region *base = NULL;
-    uint32_t r_size = 0, r_addr = -1;
+    uint64_t r_size = 0, r_addr = -1;
     int ret = 0;
 
     r = &dev->io_regions[bar];
-
+    
     /* check valid region */
-    if (!r->size)
+    if (!pt_get_bar_size(r))
         return;
 
     base = &ptdev->bases[bar];
@@ -2190,12 +2287,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable,
            return;
 
     /* copy region address to temporary */
-    r_addr = r->addr;
+    r_addr = pt_get_bar_base(r);
 
     /* need unmapping in case I/O Space or Memory Space disable */
     if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable ) ||
         ((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable ))
         r_addr = -1;
+
     if ( (bar == PCI_ROM_SLOT) && (r_addr != -1) )
     {
         reg_grp_entry = pt_find_reg_grp(ptdev, PCI_ROM_ADDRESS);
@@ -2208,26 +2306,27 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable,
     }
 
     /* prevent guest software mapping memory resource to 00000000h */
-    if ((base->bar_flag == PT_BAR_FLAG_MEM) && (r_addr == 0))
+    if ((base->bar_flag == PT_BAR_FLAG_MEM) && (pt_get_bar_base(r) == 0))
         r_addr = -1;
 
     /* align resource size (memory type only) */
-    r_size = r->size;
+    r_size = pt_get_bar_size(r);
     PT_GET_EMUL_SIZE(base->bar_flag, r_size);
 
     /* check overlapped address */
     ret = pt_chk_bar_overlap(dev->bus, dev->devfn,
                     r_addr, r_size, r->type);
     if (ret > 0)
-        PT_LOG_DEV(dev, "Warning: [Region:%d][Address:%08xh]"
-            "[Size:%08xh] is overlapped.\n", bar, r_addr, r_size);
+        PT_LOG("Warning: ptdev[%02x:%02x.%x][Region:%d][Address:%lxh]"
+            "[Size:%lxh] is overlapped.\n", pci_bus_num(dev->bus),
+             PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), bar, r_addr, r_size);
 
     /* check whether we need to update the mapping or not */
     if (r_addr != ptdev->bases[bar].e_physbase)
     {
         /* mapping BAR */
-        r->map_func((PCIDevice *)ptdev, bar, r_addr,
-                     r_size, r->type);
+        r->map_func((PCIDevice *)ptdev, bar, (uint32_t)r_addr,
+                     (uint32_t)r_size, r->type);
     }
 }
 
@@ -2823,7 +2922,7 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
     }
 
     /* set initial guest physical base address to -1 */
-    ptdev->bases[index].e_physbase = -1;
+    ptdev->bases[index].e_physbase = -1UL;
 
     /* set BAR flag */
     ptdev->bases[index].bar_flag = pt_bar_reg_parse(ptdev, reg);
@@ -3506,7 +3605,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
     {
     case PT_BAR_FLAG_MEM:
         bar_emu_mask = PT_BAR_MEM_EMU_MASK;
-        bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
+        if (!r_size)
+            bar_ro_mask = PT_BAR_ALLF;
+        else
+            bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
         break;
     case PT_BAR_FLAG_IO:
         bar_emu_mask = PT_BAR_IO_EMU_MASK;
@@ -3514,7 +3616,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
         break;
     case PT_BAR_FLAG_UPPER:
         bar_emu_mask = PT_BAR_ALLF;
-        bar_ro_mask = 0;    /* all upper 32bit are R/W */
+        if (!r_size)
+            bar_ro_mask = 0; 
+        else
+            bar_ro_mask = r_size - 1;
         break;
     default:
         break;
@@ -3527,6 +3632,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
     /* check whether we need to update the virtual region address or not */
     switch (ptdev->bases[index].bar_flag)
     {
+    case PT_BAR_FLAG_UPPER:
     case PT_BAR_FLAG_MEM:
         /* nothing to do */
         break;
@@ -3550,42 +3656,6 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
             goto exit;
         }
         break;
-    case PT_BAR_FLAG_UPPER:
-        if (cfg_entry->data)
-        {
-            if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))
-            {
-                PT_LOG_DEV(d, "Warning: Guest attempt to set high MMIO Base Address. "
-                    "Ignore mapping. "
-                    "[Offset:%02xh][High Address:%08xh]\n",
-                    reg->offset, cfg_entry->data);
-            }
-            /* clear lower address */
-            d->io_regions[index-1].addr = -1;
-        }
-        else
-        {
-            /* find lower 32bit BAR */
-            prev_offset = (reg->offset - 4);
-            reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset);
-            if (reg_grp_entry)
-            {
-                reg_entry = pt_find_reg(reg_grp_entry, prev_offset);
-                if (reg_entry)
-                    /* restore lower address */
-                    d->io_regions[index-1].addr = reg_entry->data;
-                else
-                    return -1;
-            }
-            else
-                return -1;
-        }
-
-        /* never mapping the 'empty' upper region,
-         * because we'll do it enough for the lower region.
-         */
-        r->addr = -1;
-        goto exit;
     default:
         break;
     }
@@ -3599,7 +3669,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
      * rather than mmio. Remapping this value to mmio should be prevented.
      */
 
-    if ( cfg_entry->data != writable_mask )
+    if ( cfg_entry->data != writable_mask || !cfg_entry->data)
         r->addr = cfg_entry->data;
 
 exit:
diff --git a/hw/pass-through.h b/hw/pass-through.h
index d7d837c..b651192 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -158,10 +158,13 @@ enum {
 #define PT_MERGE_VALUE(value, data, val_mask) \
     (((value) & (val_mask)) | ((data) & ~(val_mask)))
 
+#define valid_addr(addr) \
+    (addr >= 0x80000000 && !(addr & 0xfff))
+
 struct pt_region {
     /* Virtual phys base & size */
-    uint32_t e_physbase;
-    uint32_t e_size;
+    uint64_t e_physbase;
+    uint64_t e_size;
     /* Index of region in qemu */
     uint32_t memory_index;
     /* BAR flag */
diff --git a/hw/pci.c b/hw/pci.c
index f051de1..839863d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -39,24 +39,6 @@ extern int igd_passthru;
 
 //#define DEBUG_PCI
 
-struct PCIBus {
-    int bus_num;
-    int devfn_min;
-    pci_set_irq_fn set_irq;
-    pci_map_irq_fn map_irq;
-    uint32_t config_reg; /* XXX: suppress */
-    /* low level pic */
-    SetIRQFunc *low_set_irq;
-    qemu_irq *irq_opaque;
-    PCIDevice *devices[256];
-    PCIDevice *parent_dev;
-    PCIBus *next;
-    /* The bus IRQ state is the logical OR of the connected devices.
-       Keep a count of the number of devices with raised IRQs.  */
-    int nirq;
-    int irq_count[];
-};
-
 static void pci_update_mappings(PCIDevice *d);
 static void pci_set_irq(void *opaque, int irq_num, int level);
 
@@ -938,50 +920,3 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
     return s->bus;
 }
 
-int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr,
-                        uint32_t size, uint8_t type)
-{
-    PCIDevice *devices = NULL;
-    PCIIORegion *r;
-    int ret = 0;
-    int i, j;
-
-    /* check Overlapped to Base Address */
-    for (i=0; i<256; i++)
-    {
-        if ( !(devices = bus->devices[i]) )
-            continue;
-
-        /* skip itself */
-        if (devices->devfn == devfn)
-            continue;
-        
-        for (j=0; j<PCI_NUM_REGIONS; j++)
-        {
-            r = &devices->io_regions[j];
-
-            /* skip different resource type, but don't skip when
-             * prefetch and non-prefetch memory are compared.
-             */
-            if (type != r->type)
-            {
-                if (type == PCI_ADDRESS_SPACE_IO ||
-                    r->type == PCI_ADDRESS_SPACE_IO)
-                    continue;
-            }
-
-            if ((addr < (r->addr + r->size)) && ((addr + size) > r->addr))
-            {
-                printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
-                    "[Address:%08xh][Size:%08xh]\n", bus->bus_num,
-                    (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
-                    j, r->addr, r->size);
-                ret = 1;
-                goto out;
-            }
-        }
-    }
-
-out:
-    return ret;
-}
diff --git a/hw/pci.h b/hw/pci.h
index edc58b6..a036cc3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -137,6 +137,7 @@ typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
 
 #define PCI_ADDRESS_SPACE_MEM		0x00
 #define PCI_ADDRESS_SPACE_IO		0x01
+#define PCI_ADDRESS_SPACE_MEM_64BIT     0x04
 #define PCI_ADDRESS_SPACE_MEM_PREFETCH	0x08
 
 typedef struct PCIIORegion {
@@ -240,8 +241,8 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
-int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr,
-                       uint32_t size, uint8_t type);
+int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr,
+                       uint64_t size, uint8_t type);
 
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
@@ -360,5 +361,23 @@ void pci_bridge_write_config(PCIDevice *d,
                              uint32_t address, uint32_t val, int len);
 PCIBus *pci_register_secondary_bus(PCIDevice *dev, pci_map_irq_fn map_irq);
 
+struct PCIBus {
+    int bus_num;
+    int devfn_min;
+    pci_set_irq_fn set_irq;
+    pci_map_irq_fn map_irq;
+    uint32_t config_reg; /* XXX: suppress */
+    /* low level pic */
+    SetIRQFunc *low_set_irq;
+    qemu_irq *irq_opaque;
+    PCIDevice *devices[256];
+    PCIDevice *parent_dev;
+    PCIBus *next;
+    /* The bus IRQ state is the logical OR of the connected devices.
+       Keep a count of the number of devices with raised IRQs.  */
+    int nirq;
+    int irq_count[];
+};
+
 
 #endif

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-15  6:54 [PATCH 1/3] xen/tools: Add 64 bits big bar support Xudong Hao
  2012-08-15  6:54 ` [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen Xudong Hao
@ 2012-08-15  8:18 ` Pasi Kärkkäinen
  2012-08-15  9:17   ` Hao, Xudong
  2012-08-15  9:16 ` Jan Beulich
  2012-08-15 10:20 ` Stefano Stabellini
  3 siblings, 1 reply; 21+ messages in thread
From: Pasi Kärkkäinen @ 2012-08-15  8:18 UTC (permalink / raw)
  To: Xudong Hao; +Cc: ian.jackson, Xiantao Zhang, xen-devel

On Wed, Aug 15, 2012 at 02:54:41PM +0800, Xudong Hao wrote:
> Currently it is assumed PCI device BAR access < 4G memory. If there is such a
> device whose BAR size is larger than 4G, it must access > 4G memory address.
> This patch enable the 64bits big BAR support on hvmloader.
> 

Hello,

Do you have an example of such a PCI device with >4G BAR? 

Thanks,

-- Pasi

> Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> 
> diff -r 663eb766cdde tools/firmware/hvmloader/config.h
> --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
> +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
> @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
>  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
>  #define PCI_MEM_START       0xf0000000
>  #define PCI_MEM_END         0xfc000000
> +#define PCI_HIGH_MEM_START  0xa000000000ULL
> +#define PCI_HIGH_MEM_END    0xf000000000ULL
> +#define PCI_MIN_MMIO_ADDR   0x80000000
> +
>  extern unsigned long pci_mem_start, pci_mem_end;
>  
>  
> diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> --- a/tools/firmware/hvmloader/pci.c	Tue Jul 24 17:02:04 2012 +0200
> +++ b/tools/firmware/hvmloader/pci.c	Thu Jul 26 15:40:01 2012 +0800
> @@ -31,24 +31,33 @@
>  unsigned long pci_mem_start = PCI_MEM_START;
>  unsigned long pci_mem_end = PCI_MEM_END;
>  
> +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> +
>  enum virtual_vga virtual_vga = VGA_none;
>  unsigned long igd_opregion_pgbase = 0;
>  
>  void pci_setup(void)
>  {
> -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
>      uint32_t vga_devfn = 256;
>      uint16_t class, vendor_id, device_id;
>      unsigned int bar, pin, link, isa_irq;
> +    int64_t mmio_left;
>  
>      /* Resources assignable to PCI devices via BARs. */
>      struct resource {
> -        uint32_t base, max;
> -    } *resource, mem_resource, io_resource;
> +        uint64_t base, max;
> +    } *resource, mem_resource, high_mem_resource, io_resource;
>  
>      /* Create a list of device BARs in descending order of size. */
>      struct bars {
> -        uint32_t devfn, bar_reg, bar_sz;
> +        uint32_t is_64bar;
> +        uint32_t devfn;
> +        uint32_t bar_reg;
> +        uint64_t bar_sz;
>      } *bars = (struct bars *)scratch_start;
>      unsigned int i, nr_bars = 0;
>  
> @@ -133,23 +142,35 @@ void pci_setup(void)
>          /* Map the I/O memory and port resources. */
>          for ( bar = 0; bar < 7; bar++ )
>          {
> +            bar_sz_upper = 0;
>              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
>              if ( bar == 6 )
>                  bar_reg = PCI_ROM_ADDRESS;
>  
>              bar_data = pci_readl(devfn, bar_reg);
> +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
>              pci_writel(devfn, bar_reg, ~0);
>              bar_sz = pci_readl(devfn, bar_reg);
>              pci_writel(devfn, bar_reg, bar_data);
> +
> +            if (is_64bar) {
> +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> +                pci_writel(devfn, bar_reg + 4, ~0);
> +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> +            }
> +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> +                       0xfffffffffffffff0 :
> +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> +            bar_sz &= ~(bar_sz - 1);
>              if ( bar_sz == 0 )
>                  continue;
>  
> -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> -                       PCI_BASE_ADDRESS_MEM_MASK :
> -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> -            bar_sz &= ~(bar_sz - 1);
> -
>              for ( i = 0; i < nr_bars; i++ )
>                  if ( bars[i].bar_sz < bar_sz )
>                      break;
> @@ -157,6 +178,7 @@ void pci_setup(void)
>              if ( i != nr_bars )
>                  memmove(&bars[i+1], &bars[i], (nr_bars-i) * sizeof(*bars));
>  
> +            bars[i].is_64bar = is_64bar;
>              bars[i].devfn   = devfn;
>              bars[i].bar_reg = bar_reg;
>              bars[i].bar_sz  = bar_sz;
> @@ -167,11 +189,8 @@ void pci_setup(void)
>  
>              nr_bars++;
>  
> -            /* Skip the upper-half of the address for a 64-bit BAR. */
> -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> -                              PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == 
> -                 (PCI_BASE_ADDRESS_SPACE_MEMORY | 
> -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> +            /*The upper half is already calculated, skip it! */
> +            if (is_64bar)
>                  bar++;
>          }
>  
> @@ -193,10 +212,14 @@ void pci_setup(void)
>          pci_writew(devfn, PCI_COMMAND, cmd);
>      }
>  
> -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> -            ((pci_mem_start << 1) != 0) )
> +    while ( mmio_total > (pci_mem_end - pci_mem_start) && pci_mem_start )
>          pci_mem_start <<= 1;
>  
> +    if (!pci_mem_start) {
> +        bar64_relocate = 1;
> +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> +    }
> +
>      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
>      while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
>      {
> @@ -218,11 +241,15 @@ void pci_setup(void)
>          hvm_info->high_mem_pgend += nr_pages;
>      }
>  
> +    high_mem_resource.base = pci_high_mem_start; 
> +    high_mem_resource.max = pci_high_mem_end;
>      mem_resource.base = pci_mem_start;
>      mem_resource.max = pci_mem_end;
>      io_resource.base = 0xc000;
>      io_resource.max = 0x10000;
>  
> +    mmio_left = pci_mem_end - pci_mem_end;
> +
>      /* Assign iomem and ioport resources in descending order of size. */
>      for ( i = 0; i < nr_bars; i++ )
>      {
> @@ -230,13 +257,21 @@ void pci_setup(void)
>          bar_reg = bars[i].bar_reg;
>          bar_sz  = bars[i].bar_sz;
>  
> +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left < bar_sz);
>          bar_data = pci_readl(devfn, bar_reg);
>  
>          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
>               PCI_BASE_ADDRESS_SPACE_MEMORY )
>          {
> -            resource = &mem_resource;
> -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            if (using_64bar) {
> +                resource = &high_mem_resource;
> +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            } 
> +            else {
> +                resource = &mem_resource;
> +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            }
> +            mmio_left -= bar_sz;
>          }
>          else
>          {
> @@ -244,13 +279,14 @@ void pci_setup(void)
>              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
>          }
>  
> -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> -        bar_data |= base;
> +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> +        bar_data |= (uint32_t)base;
> +        bar_data_upper = (uint32_t)(base >> 32);
>          base += bar_sz;
>  
>          if ( (base < resource->base) || (base > resource->max) )
>          {
> -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
>                     "resource!\n", devfn>>3, devfn&7, bar_reg, bar_sz);
>              continue;
>          }
> @@ -258,7 +294,9 @@ void pci_setup(void)
>          resource->base = base;
>  
>          pci_writel(devfn, bar_reg, bar_data);
> -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> +        if (using_64bar)
> +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
>                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
>  
>          /* Now enable the memory or I/O mapping. */
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-15  6:54 [PATCH 1/3] xen/tools: Add 64 bits big bar support Xudong Hao
  2012-08-15  6:54 ` [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen Xudong Hao
  2012-08-15  8:18 ` [PATCH 1/3] xen/tools: Add 64 bits big bar support Pasi Kärkkäinen
@ 2012-08-15  9:16 ` Jan Beulich
  2012-08-16 10:48   ` Hao, Xudong
  2012-08-15 10:20 ` Stefano Stabellini
  3 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2012-08-15  9:16 UTC (permalink / raw)
  To: Xudong Hao; +Cc: ian.jackson, Xiantao Zhang, xen-devel

>>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com> wrote:
> --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
> +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
> @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
>  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
>  #define PCI_MEM_START       0xf0000000
>  #define PCI_MEM_END         0xfc000000
> +#define PCI_HIGH_MEM_START  0xa000000000ULL
> +#define PCI_HIGH_MEM_END    0xf000000000ULL

With such hard coded values, this is hardly meant to be anything
more than an RFC, is it? These values should not exist in the first
place, and the variables below should be determined from VM
characteristics (best would presumably be to allocate them top
down from the end of physical address space, making sure you
don't run into RAM).

> +#define PCI_MIN_MMIO_ADDR   0x80000000
> +
>  extern unsigned long pci_mem_start, pci_mem_end;
>  
>  
> diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> --- a/tools/firmware/hvmloader/pci.c	Tue Jul 24 17:02:04 2012 +0200
> +++ b/tools/firmware/hvmloader/pci.c	Thu Jul 26 15:40:01 2012 +0800
> @@ -31,24 +31,33 @@
>  unsigned long pci_mem_start = PCI_MEM_START;
>  unsigned long pci_mem_end = PCI_MEM_END;
>  
> +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> +
>  enum virtual_vga virtual_vga = VGA_none;
>  unsigned long igd_opregion_pgbase = 0;
>  
>  void pci_setup(void)
>  {
> -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
>      uint32_t vga_devfn = 256;
>      uint16_t class, vendor_id, device_id;
>      unsigned int bar, pin, link, isa_irq;
> +    int64_t mmio_left;
>  
>      /* Resources assignable to PCI devices via BARs. */
>      struct resource {
> -        uint32_t base, max;
> -    } *resource, mem_resource, io_resource;
> +        uint64_t base, max;
> +    } *resource, mem_resource, high_mem_resource, io_resource;
>  
>      /* Create a list of device BARs in descending order of size. */
>      struct bars {
> -        uint32_t devfn, bar_reg, bar_sz;
> +        uint32_t is_64bar;
> +        uint32_t devfn;
> +        uint32_t bar_reg;
> +        uint64_t bar_sz;
>      } *bars = (struct bars *)scratch_start;
>      unsigned int i, nr_bars = 0;
>  
> @@ -133,23 +142,35 @@ void pci_setup(void)
>          /* Map the I/O memory and port resources. */
>          for ( bar = 0; bar < 7; bar++ )
>          {
> +            bar_sz_upper = 0;
>              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
>              if ( bar == 6 )
>                  bar_reg = PCI_ROM_ADDRESS;
>  
>              bar_data = pci_readl(devfn, bar_reg);
> +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
>              pci_writel(devfn, bar_reg, ~0);
>              bar_sz = pci_readl(devfn, bar_reg);
>              pci_writel(devfn, bar_reg, bar_data);
> +
> +            if (is_64bar) {
> +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> +                pci_writel(devfn, bar_reg + 4, ~0);
> +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> +            }
> +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> +                       0xfffffffffffffff0 :

This should be a proper constant (or the masking could be
done earlier, in which case you could continue to use the
existing PCI_BASE_ADDRESS_MEM_MASK).

> +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> +            bar_sz &= ~(bar_sz - 1);
>              if ( bar_sz == 0 )
>                  continue;
>  
> -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> -                       PCI_BASE_ADDRESS_MEM_MASK :
> -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> -            bar_sz &= ~(bar_sz - 1);
> -
>              for ( i = 0; i < nr_bars; i++ )
>                  if ( bars[i].bar_sz < bar_sz )
>                      break;
> @@ -157,6 +178,7 @@ void pci_setup(void)
>              if ( i != nr_bars )
>                  memmove(&bars[i+1], &bars[i], (nr_bars-i) * sizeof(*bars));
>  
> +            bars[i].is_64bar = is_64bar;
>              bars[i].devfn   = devfn;
>              bars[i].bar_reg = bar_reg;
>              bars[i].bar_sz  = bar_sz;
> @@ -167,11 +189,8 @@ void pci_setup(void)
>  
>              nr_bars++;
>  
> -            /* Skip the upper-half of the address for a 64-bit BAR. */
> -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> -                              PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == 
> -                 (PCI_BASE_ADDRESS_SPACE_MEMORY | 
> -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> +            /*The upper half is already calculated, skip it! */
> +            if (is_64bar)
>                  bar++;
>          }
>  
> @@ -193,10 +212,14 @@ void pci_setup(void)
>          pci_writew(devfn, PCI_COMMAND, cmd);
>      }
>  
> -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> -            ((pci_mem_start << 1) != 0) )
> +    while ( mmio_total > (pci_mem_end - pci_mem_start) && pci_mem_start )

The old code here could remain in place if ...

>          pci_mem_start <<= 1;
>  
> +    if (!pci_mem_start) {

.. the condition here would get changed to the one used in the
first part of the while above.

> +        bar64_relocate = 1;
> +        pci_mem_start = PCI_MIN_MMIO_ADDR;

Which would then also make this assignment (and the
constant) unnecessary.

> +    }
> +
>      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
>      while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
>      {
> @@ -218,11 +241,15 @@ void pci_setup(void)
>          hvm_info->high_mem_pgend += nr_pages;
>      }
>  
> +    high_mem_resource.base = pci_high_mem_start; 
> +    high_mem_resource.max = pci_high_mem_end;
>      mem_resource.base = pci_mem_start;
>      mem_resource.max = pci_mem_end;
>      io_resource.base = 0xc000;
>      io_resource.max = 0x10000;
>  
> +    mmio_left = pci_mem_end - pci_mem_end;
> +
>      /* Assign iomem and ioport resources in descending order of size. */
>      for ( i = 0; i < nr_bars; i++ )
>      {
> @@ -230,13 +257,21 @@ void pci_setup(void)
>          bar_reg = bars[i].bar_reg;
>          bar_sz  = bars[i].bar_sz;
>  
> +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left < 
> bar_sz);
>          bar_data = pci_readl(devfn, bar_reg);
>  
>          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
>               PCI_BASE_ADDRESS_SPACE_MEMORY )
>          {
> -            resource = &mem_resource;
> -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            if (using_64bar) {
> +                resource = &high_mem_resource;
> +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            } 
> +            else {
> +                resource = &mem_resource;
> +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            }
> +            mmio_left -= bar_sz;
>          }
>          else
>          {
> @@ -244,13 +279,14 @@ void pci_setup(void)
>              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
>          }
>  
> -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> -        bar_data |= base;
> +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> +        bar_data |= (uint32_t)base;
> +        bar_data_upper = (uint32_t)(base >> 32);
>          base += bar_sz;
>  
>          if ( (base < resource->base) || (base > resource->max) )
>          {
> -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
>                     "resource!\n", devfn>>3, devfn&7, bar_reg, bar_sz);
>              continue;
>          }
> @@ -258,7 +294,9 @@ void pci_setup(void)
>          resource->base = base;
>  
>          pci_writel(devfn, bar_reg, bar_data);
> -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> +        if (using_64bar)
> +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
>                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
>  
>          /* Now enable the memory or I/O mapping. */

Besides that, I'd encourage you to have an intermediate state
between not using BARs above 4Gb and forcing all 64-bit ones
beyond 4Gb for maximum compatibility - try fitting as many as
you can into the low 2Gb. Perhaps this would warrant an option
of some sort.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-15  8:18 ` [PATCH 1/3] xen/tools: Add 64 bits big bar support Pasi Kärkkäinen
@ 2012-08-15  9:17   ` Hao, Xudong
  2012-08-15 10:21     ` Pasi Kärkkäinen
  0 siblings, 1 reply; 21+ messages in thread
From: Hao, Xudong @ 2012-08-15  9:17 UTC (permalink / raw)
  To: Pasi K?rkk?inen; +Cc: ian.jackson, Zhang, Xiantao, xen-devel


> -----Original Message-----
> From: Pasi Kärkkäinen [mailto:pasik@iki.fi]
> Sent: Wednesday, August 15, 2012 4:19 PM
> To: Hao, Xudong
> Cc: xen-devel@lists.xen.org; ian.jackson@eu.citrix.com; Zhang, Xiantao
> Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> On Wed, Aug 15, 2012 at 02:54:41PM +0800, Xudong Hao wrote:
> > Currently it is assumed PCI device BAR access < 4G memory. If there is such a
> > device whose BAR size is larger than 4G, it must access > 4G memory
> address.
> > This patch enable the 64bits big BAR support on hvmloader.
> >
> 
> Hello,
> 
> Do you have an example of such a PCI device with >4G BAR?
> 

We have a standard PCE-e device which integrated Intel MIC program, this device has 16G size bar. If we want to passthrough such a device to guest, current Xen can't support it, so we want Xen has this big bar support and make this patch.

For Intel MIC program technology, you can refer to http://openlab.web.cern.ch/sites/openlab.web.cern.ch/files/presentations/KNC_ISA_Overview_Apr2012_SJ_New_V4.pdf  or 
http://openlab.web.cern.ch/sites/openlab.web.cern.ch/files/presentations/2012.04.25%20Andrzej%20Nowak%20-%20An%20overview%20of%20Intel%20MIC%20-%20technology,%20hardware%20and%20software%20v3.pdf

- Thanks
Xudong

> Thanks,
> 
> -- Pasi
> 
> > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> >
> > diff -r 663eb766cdde tools/firmware/hvmloader/config.h
> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
> >  #define PCI_MEM_START       0xf0000000
> >  #define PCI_MEM_END         0xfc000000
> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> > +#define PCI_MIN_MMIO_ADDR   0x80000000
> > +
> >  extern unsigned long pci_mem_start, pci_mem_end;
> >
> >
> > diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> > --- a/tools/firmware/hvmloader/pci.c	Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/pci.c	Thu Jul 26 15:40:01 2012 +0800
> > @@ -31,24 +31,33 @@
> >  unsigned long pci_mem_start = PCI_MEM_START;
> >  unsigned long pci_mem_end = PCI_MEM_END;
> >
> > +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> > +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> > +
> >  enum virtual_vga virtual_vga = VGA_none;
> >  unsigned long igd_opregion_pgbase = 0;
> >
> >  void pci_setup(void)
> >  {
> > -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> > +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> > +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> > +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
> >      uint32_t vga_devfn = 256;
> >      uint16_t class, vendor_id, device_id;
> >      unsigned int bar, pin, link, isa_irq;
> > +    int64_t mmio_left;
> >
> >      /* Resources assignable to PCI devices via BARs. */
> >      struct resource {
> > -        uint32_t base, max;
> > -    } *resource, mem_resource, io_resource;
> > +        uint64_t base, max;
> > +    } *resource, mem_resource, high_mem_resource, io_resource;
> >
> >      /* Create a list of device BARs in descending order of size. */
> >      struct bars {
> > -        uint32_t devfn, bar_reg, bar_sz;
> > +        uint32_t is_64bar;
> > +        uint32_t devfn;
> > +        uint32_t bar_reg;
> > +        uint64_t bar_sz;
> >      } *bars = (struct bars *)scratch_start;
> >      unsigned int i, nr_bars = 0;
> >
> > @@ -133,23 +142,35 @@ void pci_setup(void)
> >          /* Map the I/O memory and port resources. */
> >          for ( bar = 0; bar < 7; bar++ )
> >          {
> > +            bar_sz_upper = 0;
> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
> >              if ( bar == 6 )
> >                  bar_reg = PCI_ROM_ADDRESS;
> >
> >              bar_data = pci_readl(devfn, bar_reg);
> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
> >              pci_writel(devfn, bar_reg, ~0);
> >              bar_sz = pci_readl(devfn, bar_reg);
> >              pci_writel(devfn, bar_reg, bar_data);
> > +
> > +            if (is_64bar) {
> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, ~0);
> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> > +            }
> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > +                       0xfffffffffffffff0 :
> > +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > +            bar_sz &= ~(bar_sz - 1);
> >              if ( bar_sz == 0 )
> >                  continue;
> >
> > -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > -                       PCI_BASE_ADDRESS_MEM_MASK :
> > -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > -            bar_sz &= ~(bar_sz - 1);
> > -
> >              for ( i = 0; i < nr_bars; i++ )
> >                  if ( bars[i].bar_sz < bar_sz )
> >                      break;
> > @@ -157,6 +178,7 @@ void pci_setup(void)
> >              if ( i != nr_bars )
> >                  memmove(&bars[i+1], &bars[i], (nr_bars-i) *
> sizeof(*bars));
> >
> > +            bars[i].is_64bar = is_64bar;
> >              bars[i].devfn   = devfn;
> >              bars[i].bar_reg = bar_reg;
> >              bars[i].bar_sz  = bar_sz;
> > @@ -167,11 +189,8 @@ void pci_setup(void)
> >
> >              nr_bars++;
> >
> > -            /* Skip the upper-half of the address for a 64-bit BAR. */
> > -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> > -
> PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > -                 (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> > +            /*The upper half is already calculated, skip it! */
> > +            if (is_64bar)
> >                  bar++;
> >          }
> >
> > @@ -193,10 +212,14 @@ void pci_setup(void)
> >          pci_writew(devfn, PCI_COMMAND, cmd);
> >      }
> >
> > -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> > -            ((pci_mem_start << 1) != 0) )
> > +    while ( mmio_total > (pci_mem_end - pci_mem_start) &&
> pci_mem_start )
> >          pci_mem_start <<= 1;
> >
> > +    if (!pci_mem_start) {
> > +        bar64_relocate = 1;
> > +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> > +    }
> > +
> >      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
> >      while ( (pci_mem_start >> PAGE_SHIFT) <
> hvm_info->low_mem_pgend )
> >      {
> > @@ -218,11 +241,15 @@ void pci_setup(void)
> >          hvm_info->high_mem_pgend += nr_pages;
> >      }
> >
> > +    high_mem_resource.base = pci_high_mem_start;
> > +    high_mem_resource.max = pci_high_mem_end;
> >      mem_resource.base = pci_mem_start;
> >      mem_resource.max = pci_mem_end;
> >      io_resource.base = 0xc000;
> >      io_resource.max = 0x10000;
> >
> > +    mmio_left = pci_mem_end - pci_mem_end;
> > +
> >      /* Assign iomem and ioport resources in descending order of size. */
> >      for ( i = 0; i < nr_bars; i++ )
> >      {
> > @@ -230,13 +257,21 @@ void pci_setup(void)
> >          bar_reg = bars[i].bar_reg;
> >          bar_sz  = bars[i].bar_sz;
> >
> > +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left
> < bar_sz);
> >          bar_data = pci_readl(devfn, bar_reg);
> >
> >          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
> >               PCI_BASE_ADDRESS_SPACE_MEMORY )
> >          {
> > -            resource = &mem_resource;
> > -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            if (using_64bar) {
> > +                resource = &high_mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            else {
> > +                resource = &mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            mmio_left -= bar_sz;
> >          }
> >          else
> >          {
> > @@ -244,13 +279,14 @@ void pci_setup(void)
> >              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
> >          }
> >
> > -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> > -        bar_data |= base;
> > +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> > +        bar_data |= (uint32_t)base;
> > +        bar_data_upper = (uint32_t)(base >> 32);
> >          base += bar_sz;
> >
> >          if ( (base < resource->base) || (base > resource->max) )
> >          {
> > -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> > +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
> >                     "resource!\n", devfn>>3, devfn&7, bar_reg,
> bar_sz);
> >              continue;
> >          }
> > @@ -258,7 +294,9 @@ void pci_setup(void)
> >          resource->base = base;
> >
> >          pci_writel(devfn, bar_reg, bar_data);
> > -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> > +        if (using_64bar)
> > +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
> >                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
> >
> >          /* Now enable the memory or I/O mapping. */
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-15  6:54 [PATCH 1/3] xen/tools: Add 64 bits big bar support Xudong Hao
                   ` (2 preceding siblings ...)
  2012-08-15  9:16 ` Jan Beulich
@ 2012-08-15 10:20 ` Stefano Stabellini
  2012-08-16  2:57   ` Hao, Xudong
  3 siblings, 1 reply; 21+ messages in thread
From: Stefano Stabellini @ 2012-08-15 10:20 UTC (permalink / raw)
  To: Xudong Hao; +Cc: Ian Jackson, Xiantao Zhang, xen-devel

On Wed, 15 Aug 2012, Xudong Hao wrote:
> Currently it is assumed PCI device BAR access < 4G memory. If there is such a
> device whose BAR size is larger than 4G, it must access > 4G memory address.
> This patch enable the 64bits big BAR support on hvmloader.
> 
> Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> Signed-off-by: Xudong Hao <xudong.hao@intel.com>

It is very good to see that this problem has been solved!

Considering that at this point it is too late for the 4.2 release cycle,
it might be worth spinning a version of this patches for SeaBIOS and
upstream QEMU, that now supports PCI passthrough.



> diff -r 663eb766cdde tools/firmware/hvmloader/config.h
> --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
> +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
> @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
>  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
>  #define PCI_MEM_START       0xf0000000
>  #define PCI_MEM_END         0xfc000000
> +#define PCI_HIGH_MEM_START  0xa000000000ULL
> +#define PCI_HIGH_MEM_END    0xf000000000ULL
> +#define PCI_MIN_MMIO_ADDR   0x80000000
> +
>  extern unsigned long pci_mem_start, pci_mem_end;
>  
>  
> diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> --- a/tools/firmware/hvmloader/pci.c	Tue Jul 24 17:02:04 2012 +0200
> +++ b/tools/firmware/hvmloader/pci.c	Thu Jul 26 15:40:01 2012 +0800
> @@ -31,24 +31,33 @@
>  unsigned long pci_mem_start = PCI_MEM_START;
>  unsigned long pci_mem_end = PCI_MEM_END;
>  
> +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> +
>  enum virtual_vga virtual_vga = VGA_none;
>  unsigned long igd_opregion_pgbase = 0;
>  
>  void pci_setup(void)
>  {
> -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
>      uint32_t vga_devfn = 256;
>      uint16_t class, vendor_id, device_id;
>      unsigned int bar, pin, link, isa_irq;
> +    int64_t mmio_left;
>  
>      /* Resources assignable to PCI devices via BARs. */
>      struct resource {
> -        uint32_t base, max;
> -    } *resource, mem_resource, io_resource;
> +        uint64_t base, max;
> +    } *resource, mem_resource, high_mem_resource, io_resource;
>  
>      /* Create a list of device BARs in descending order of size. */
>      struct bars {
> -        uint32_t devfn, bar_reg, bar_sz;
> +        uint32_t is_64bar;
> +        uint32_t devfn;
> +        uint32_t bar_reg;
> +        uint64_t bar_sz;
>      } *bars = (struct bars *)scratch_start;
>      unsigned int i, nr_bars = 0;
>  
> @@ -133,23 +142,35 @@ void pci_setup(void)
>          /* Map the I/O memory and port resources. */
>          for ( bar = 0; bar < 7; bar++ )
>          {
> +            bar_sz_upper = 0;
>              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
>              if ( bar == 6 )
>                  bar_reg = PCI_ROM_ADDRESS;
>  
>              bar_data = pci_readl(devfn, bar_reg);
> +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
>              pci_writel(devfn, bar_reg, ~0);
>              bar_sz = pci_readl(devfn, bar_reg);
>              pci_writel(devfn, bar_reg, bar_data);
> +
> +            if (is_64bar) {
> +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> +                pci_writel(devfn, bar_reg + 4, ~0);
> +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> +            }
> +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> +                       0xfffffffffffffff0 :
> +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> +            bar_sz &= ~(bar_sz - 1);
>              if ( bar_sz == 0 )
>                  continue;
>  
> -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> -                       PCI_BASE_ADDRESS_MEM_MASK :
> -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> -            bar_sz &= ~(bar_sz - 1);
> -
>              for ( i = 0; i < nr_bars; i++ )
>                  if ( bars[i].bar_sz < bar_sz )
>                      break;
> @@ -157,6 +178,7 @@ void pci_setup(void)
>              if ( i != nr_bars )
>                  memmove(&bars[i+1], &bars[i], (nr_bars-i) * sizeof(*bars));
>  
> +            bars[i].is_64bar = is_64bar;
>              bars[i].devfn   = devfn;
>              bars[i].bar_reg = bar_reg;
>              bars[i].bar_sz  = bar_sz;
> @@ -167,11 +189,8 @@ void pci_setup(void)
>  
>              nr_bars++;
>  
> -            /* Skip the upper-half of the address for a 64-bit BAR. */
> -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> -                              PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == 
> -                 (PCI_BASE_ADDRESS_SPACE_MEMORY | 
> -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> +            /*The upper half is already calculated, skip it! */
> +            if (is_64bar)
>                  bar++;
>          }
>  
> @@ -193,10 +212,14 @@ void pci_setup(void)
>          pci_writew(devfn, PCI_COMMAND, cmd);
>      }
>  
> -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> -            ((pci_mem_start << 1) != 0) )
> +    while ( mmio_total > (pci_mem_end - pci_mem_start) && pci_mem_start )
>          pci_mem_start <<= 1;
>  
> +    if (!pci_mem_start) {
> +        bar64_relocate = 1;
> +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> +    }
> +
>      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
>      while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
>      {
> @@ -218,11 +241,15 @@ void pci_setup(void)
>          hvm_info->high_mem_pgend += nr_pages;
>      }
>  
> +    high_mem_resource.base = pci_high_mem_start; 
> +    high_mem_resource.max = pci_high_mem_end;
>      mem_resource.base = pci_mem_start;
>      mem_resource.max = pci_mem_end;
>      io_resource.base = 0xc000;
>      io_resource.max = 0x10000;
>  
> +    mmio_left = pci_mem_end - pci_mem_end;
> +
>      /* Assign iomem and ioport resources in descending order of size. */
>      for ( i = 0; i < nr_bars; i++ )
>      {
> @@ -230,13 +257,21 @@ void pci_setup(void)
>          bar_reg = bars[i].bar_reg;
>          bar_sz  = bars[i].bar_sz;
>  
> +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left < bar_sz);
>          bar_data = pci_readl(devfn, bar_reg);
>  
>          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
>               PCI_BASE_ADDRESS_SPACE_MEMORY )
>          {
> -            resource = &mem_resource;
> -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            if (using_64bar) {
> +                resource = &high_mem_resource;
> +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            } 
> +            else {
> +                resource = &mem_resource;
> +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> +            }
> +            mmio_left -= bar_sz;
>          }
>          else
>          {
> @@ -244,13 +279,14 @@ void pci_setup(void)
>              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
>          }
>  
> -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> -        bar_data |= base;
> +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> +        bar_data |= (uint32_t)base;
> +        bar_data_upper = (uint32_t)(base >> 32);
>          base += bar_sz;
>  
>          if ( (base < resource->base) || (base > resource->max) )
>          {
> -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
>                     "resource!\n", devfn>>3, devfn&7, bar_reg, bar_sz);
>              continue;
>          }
> @@ -258,7 +294,9 @@ void pci_setup(void)
>          resource->base = base;
>  
>          pci_writel(devfn, bar_reg, bar_data);
> -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> +        if (using_64bar)
> +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
>                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
>  
>          /* Now enable the memory or I/O mapping. */
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-15  9:17   ` Hao, Xudong
@ 2012-08-15 10:21     ` Pasi Kärkkäinen
  0 siblings, 0 replies; 21+ messages in thread
From: Pasi Kärkkäinen @ 2012-08-15 10:21 UTC (permalink / raw)
  To: Hao, Xudong; +Cc: ian.jackson, Zhang, Xiantao, xen-devel

On Wed, Aug 15, 2012 at 09:17:51AM +0000, Hao, Xudong wrote:
> 
> > -----Original Message-----
> > From: Pasi Kärkkäinen [mailto:pasik@iki.fi]
> > Sent: Wednesday, August 15, 2012 4:19 PM
> > To: Hao, Xudong
> > Cc: xen-devel@lists.xen.org; ian.jackson@eu.citrix.com; Zhang, Xiantao
> > Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> > 
> > On Wed, Aug 15, 2012 at 02:54:41PM +0800, Xudong Hao wrote:
> > > Currently it is assumed PCI device BAR access < 4G memory. If there is such a
> > > device whose BAR size is larger than 4G, it must access > 4G memory
> > address.
> > > This patch enable the 64bits big BAR support on hvmloader.
> > >
> > 
> > Hello,
> > 
> > Do you have an example of such a PCI device with >4G BAR?
> > 
> 
> We have a standard PCE-e device which integrated Intel MIC program, this device has 16G size bar. If we want to passthrough such a device to guest, current Xen can't support it, so we want Xen has this big bar support and make this patch.
> 
> For Intel MIC program technology, you can refer to http://openlab.web.cern.ch/sites/openlab.web.cern.ch/files/presentations/KNC_ISA_Overview_Apr2012_SJ_New_V4.pdf  or 
> http://openlab.web.cern.ch/sites/openlab.web.cern.ch/files/presentations/2012.04.25%20Andrzej%20Nowak%20-%20An%20overview%20of%20Intel%20MIC%20-%20technology,%20hardware%20and%20software%20v3.pdf
> 

Interesting, thanks!

-- Pasi

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-15 10:20 ` Stefano Stabellini
@ 2012-08-16  2:57   ` Hao, Xudong
  2012-08-16 10:10     ` Stefano Stabellini
  0 siblings, 1 reply; 21+ messages in thread
From: Hao, Xudong @ 2012-08-16  2:57 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: Ian Jackson, Zhang, Xiantao, xen-devel

> -----Original Message-----
> From: Stefano Stabellini [mailto:stefano.stabellini@eu.citrix.com]
> Sent: Wednesday, August 15, 2012 6:21 PM
> To: Hao, Xudong
> Cc: xen-devel@lists.xen.org; Ian Jackson; Zhang, Xiantao
> Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> On Wed, 15 Aug 2012, Xudong Hao wrote:
> > Currently it is assumed PCI device BAR access < 4G memory. If there is such a
> > device whose BAR size is larger than 4G, it must access > 4G memory
> address.
> > This patch enable the 64bits big BAR support on hvmloader.
> >
> > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> 
> It is very good to see that this problem has been solved!
> 
> Considering that at this point it is too late for the 4.2 release cycle,
> it might be worth spinning a version of this patches for SeaBIOS and
> upstream QEMU, that now supports PCI passthrough.
> 

Hi, Stefano

Does Xen already switch to SeaBIOS and upstream QEMU? I saw SeaBIOS does not update for 5 months.

You mean upstream QEMU is this tree git://git.qemu.org/qemu.git ? I heard somebody said PCI device assignment does not work for this tree, but device hot-add/remove works.

> 
> 
> > diff -r 663eb766cdde tools/firmware/hvmloader/config.h
> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
> >  #define PCI_MEM_START       0xf0000000
> >  #define PCI_MEM_END         0xfc000000
> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> > +#define PCI_MIN_MMIO_ADDR   0x80000000
> > +
> >  extern unsigned long pci_mem_start, pci_mem_end;
> >
> >
> > diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> > --- a/tools/firmware/hvmloader/pci.c	Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/pci.c	Thu Jul 26 15:40:01 2012 +0800
> > @@ -31,24 +31,33 @@
> >  unsigned long pci_mem_start = PCI_MEM_START;
> >  unsigned long pci_mem_end = PCI_MEM_END;
> >
> > +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> > +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> > +
> >  enum virtual_vga virtual_vga = VGA_none;
> >  unsigned long igd_opregion_pgbase = 0;
> >
> >  void pci_setup(void)
> >  {
> > -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> > +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> > +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> > +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
> >      uint32_t vga_devfn = 256;
> >      uint16_t class, vendor_id, device_id;
> >      unsigned int bar, pin, link, isa_irq;
> > +    int64_t mmio_left;
> >
> >      /* Resources assignable to PCI devices via BARs. */
> >      struct resource {
> > -        uint32_t base, max;
> > -    } *resource, mem_resource, io_resource;
> > +        uint64_t base, max;
> > +    } *resource, mem_resource, high_mem_resource, io_resource;
> >
> >      /* Create a list of device BARs in descending order of size. */
> >      struct bars {
> > -        uint32_t devfn, bar_reg, bar_sz;
> > +        uint32_t is_64bar;
> > +        uint32_t devfn;
> > +        uint32_t bar_reg;
> > +        uint64_t bar_sz;
> >      } *bars = (struct bars *)scratch_start;
> >      unsigned int i, nr_bars = 0;
> >
> > @@ -133,23 +142,35 @@ void pci_setup(void)
> >          /* Map the I/O memory and port resources. */
> >          for ( bar = 0; bar < 7; bar++ )
> >          {
> > +            bar_sz_upper = 0;
> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
> >              if ( bar == 6 )
> >                  bar_reg = PCI_ROM_ADDRESS;
> >
> >              bar_data = pci_readl(devfn, bar_reg);
> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
> >              pci_writel(devfn, bar_reg, ~0);
> >              bar_sz = pci_readl(devfn, bar_reg);
> >              pci_writel(devfn, bar_reg, bar_data);
> > +
> > +            if (is_64bar) {
> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, ~0);
> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> > +            }
> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > +                       0xfffffffffffffff0 :
> > +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > +            bar_sz &= ~(bar_sz - 1);
> >              if ( bar_sz == 0 )
> >                  continue;
> >
> > -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > -                       PCI_BASE_ADDRESS_MEM_MASK :
> > -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > -            bar_sz &= ~(bar_sz - 1);
> > -
> >              for ( i = 0; i < nr_bars; i++ )
> >                  if ( bars[i].bar_sz < bar_sz )
> >                      break;
> > @@ -157,6 +178,7 @@ void pci_setup(void)
> >              if ( i != nr_bars )
> >                  memmove(&bars[i+1], &bars[i], (nr_bars-i) *
> sizeof(*bars));
> >
> > +            bars[i].is_64bar = is_64bar;
> >              bars[i].devfn   = devfn;
> >              bars[i].bar_reg = bar_reg;
> >              bars[i].bar_sz  = bar_sz;
> > @@ -167,11 +189,8 @@ void pci_setup(void)
> >
> >              nr_bars++;
> >
> > -            /* Skip the upper-half of the address for a 64-bit BAR. */
> > -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> > -
> PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > -                 (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> > +            /*The upper half is already calculated, skip it! */
> > +            if (is_64bar)
> >                  bar++;
> >          }
> >
> > @@ -193,10 +212,14 @@ void pci_setup(void)
> >          pci_writew(devfn, PCI_COMMAND, cmd);
> >      }
> >
> > -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> > -            ((pci_mem_start << 1) != 0) )
> > +    while ( mmio_total > (pci_mem_end - pci_mem_start) &&
> pci_mem_start )
> >          pci_mem_start <<= 1;
> >
> > +    if (!pci_mem_start) {
> > +        bar64_relocate = 1;
> > +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> > +    }
> > +
> >      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
> >      while ( (pci_mem_start >> PAGE_SHIFT) <
> hvm_info->low_mem_pgend )
> >      {
> > @@ -218,11 +241,15 @@ void pci_setup(void)
> >          hvm_info->high_mem_pgend += nr_pages;
> >      }
> >
> > +    high_mem_resource.base = pci_high_mem_start;
> > +    high_mem_resource.max = pci_high_mem_end;
> >      mem_resource.base = pci_mem_start;
> >      mem_resource.max = pci_mem_end;
> >      io_resource.base = 0xc000;
> >      io_resource.max = 0x10000;
> >
> > +    mmio_left = pci_mem_end - pci_mem_end;
> > +
> >      /* Assign iomem and ioport resources in descending order of size. */
> >      for ( i = 0; i < nr_bars; i++ )
> >      {
> > @@ -230,13 +257,21 @@ void pci_setup(void)
> >          bar_reg = bars[i].bar_reg;
> >          bar_sz  = bars[i].bar_sz;
> >
> > +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left
> < bar_sz);
> >          bar_data = pci_readl(devfn, bar_reg);
> >
> >          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
> >               PCI_BASE_ADDRESS_SPACE_MEMORY )
> >          {
> > -            resource = &mem_resource;
> > -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            if (using_64bar) {
> > +                resource = &high_mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            else {
> > +                resource = &mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            mmio_left -= bar_sz;
> >          }
> >          else
> >          {
> > @@ -244,13 +279,14 @@ void pci_setup(void)
> >              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
> >          }
> >
> > -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> > -        bar_data |= base;
> > +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> > +        bar_data |= (uint32_t)base;
> > +        bar_data_upper = (uint32_t)(base >> 32);
> >          base += bar_sz;
> >
> >          if ( (base < resource->base) || (base > resource->max) )
> >          {
> > -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> > +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
> >                     "resource!\n", devfn>>3, devfn&7, bar_reg,
> bar_sz);
> >              continue;
> >          }
> > @@ -258,7 +294,9 @@ void pci_setup(void)
> >          resource->base = base;
> >
> >          pci_writel(devfn, bar_reg, bar_data);
> > -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> > +        if (using_64bar)
> > +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
> >                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
> >
> >          /* Now enable the memory or I/O mapping. */
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > http://lists.xen.org/xen-devel
> >

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-16  2:57   ` Hao, Xudong
@ 2012-08-16 10:10     ` Stefano Stabellini
  2012-08-16 10:24       ` Hao, Xudong
  0 siblings, 1 reply; 21+ messages in thread
From: Stefano Stabellini @ 2012-08-16 10:10 UTC (permalink / raw)
  To: Hao, Xudong
  Cc: xen-devel, Ian Jackson, Anthony PERARD, Zhang, Xiantao,
	Stefano Stabellini

On Thu, 16 Aug 2012, Hao, Xudong wrote:
> > -----Original Message-----
> > From: Stefano Stabellini [mailto:stefano.stabellini@eu.citrix.com]
> > Sent: Wednesday, August 15, 2012 6:21 PM
> > To: Hao, Xudong
> > Cc: xen-devel@lists.xen.org; Ian Jackson; Zhang, Xiantao
> > Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> > 
> > On Wed, 15 Aug 2012, Xudong Hao wrote:
> > > Currently it is assumed PCI device BAR access < 4G memory. If there is such a
> > > device whose BAR size is larger than 4G, it must access > 4G memory
> > address.
> > > This patch enable the 64bits big BAR support on hvmloader.
> > >
> > > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> > > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> > 
> > It is very good to see that this problem has been solved!
> > 
> > Considering that at this point it is too late for the 4.2 release cycle,
> > it might be worth spinning a version of this patches for SeaBIOS and
> > upstream QEMU, that now supports PCI passthrough.
> > 
> 
> Hi, Stefano
> 
> Does Xen already switch to SeaBIOS and upstream QEMU? I saw SeaBIOS does not update for 5 months.

Yes, they are available but not the default yet for HVM guests.
In order to enable upstream QEMU you need to pass:

device_model_version='qemu-xen'

in the VM config file.


> You mean upstream QEMU is this tree git://git.qemu.org/qemu.git ? I heard somebody said PCI device assignment does not work for this tree, but device hot-add/remove works.

qemu-upstream-unstable, the upstream QEMU based tree used with Xen 4.2,
(git://xenbits.xensource.com/qemu-upstream-unstable.git) is based on
QEMU 1.0 and doesn't support PCI passthrough.

However upstream QEMU (git://git.qemu.org/qemu.git) does, and I am going
to rebase on it early when the 4.3 development cycle opens.  So it is
probably a good idea for you to try out upstram QEMU now.
We have a wiki page on how to build and run upstream QEMU on
xen-unstable:

http://wiki.xen.org/wiki/QEMU_Upstream

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-16 10:10     ` Stefano Stabellini
@ 2012-08-16 10:24       ` Hao, Xudong
  0 siblings, 0 replies; 21+ messages in thread
From: Hao, Xudong @ 2012-08-16 10:24 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: Anthony PERARD, Ian Jackson, Zhang, Xiantao, xen-devel


> -----Original Message-----
> From: Stefano Stabellini [mailto:stefano.stabellini@eu.citrix.com]
> Sent: Thursday, August 16, 2012 6:11 PM
> To: Hao, Xudong
> Cc: Stefano Stabellini; xen-devel@lists.xen.org; Ian Jackson; Zhang, Xiantao;
> Anthony PERARD
> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> On Thu, 16 Aug 2012, Hao, Xudong wrote:
> > > -----Original Message-----
> > > From: Stefano Stabellini [mailto:stefano.stabellini@eu.citrix.com]
> > > Sent: Wednesday, August 15, 2012 6:21 PM
> > > To: Hao, Xudong
> > > Cc: xen-devel@lists.xen.org; Ian Jackson; Zhang, Xiantao
> > > Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> > >
> > > On Wed, 15 Aug 2012, Xudong Hao wrote:
> > > > Currently it is assumed PCI device BAR access < 4G memory. If there is
> such a
> > > > device whose BAR size is larger than 4G, it must access > 4G memory
> > > address.
> > > > This patch enable the 64bits big BAR support on hvmloader.
> > > >
> > > > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> > > > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> > >
> > > It is very good to see that this problem has been solved!
> > >
> > > Considering that at this point it is too late for the 4.2 release cycle,
> > > it might be worth spinning a version of this patches for SeaBIOS and
> > > upstream QEMU, that now supports PCI passthrough.
> > >
> >
> > Hi, Stefano
> >
> > Does Xen already switch to SeaBIOS and upstream QEMU? I saw SeaBIOS
> does not update for 5 months.
> 
> Yes, they are available but not the default yet for HVM guests.
> In order to enable upstream QEMU you need to pass:
> 
> device_model_version='qemu-xen'
> 
> in the VM config file.
> 
> 
> > You mean upstream QEMU is this tree git://git.qemu.org/qemu.git ? I heard
> somebody said PCI device assignment does not work for this tree, but device
> hot-add/remove works.
> 
> qemu-upstream-unstable, the upstream QEMU based tree used with Xen 4.2,
> (git://xenbits.xensource.com/qemu-upstream-unstable.git) is based on
> QEMU 1.0 and doesn't support PCI passthrough.
> 
> However upstream QEMU (git://git.qemu.org/qemu.git) does, and I am going
> to rebase on it early when the 4.3 development cycle opens.  So it is
> probably a good idea for you to try out upstram QEMU now.
> We have a wiki page on how to build and run upstream QEMU on
> xen-unstable:
> 
> http://wiki.xen.org/wiki/QEMU_Upstream

I got it, I'll work my patch for QEMU upstream.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-15  9:16 ` Jan Beulich
@ 2012-08-16 10:48   ` Hao, Xudong
  2012-08-16 11:04     ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Hao, Xudong @ 2012-08-16 10:48 UTC (permalink / raw)
  To: Jan Beulich; +Cc: ian.jackson, Zhang, Xiantao, xen-devel

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Wednesday, August 15, 2012 5:16 PM
> To: Hao, Xudong
> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com> wrote:
> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
> >  #define PCI_MEM_START       0xf0000000
> >  #define PCI_MEM_END         0xfc000000
> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> 
> With such hard coded values, this is hardly meant to be anything
> more than an RFC, is it? These values should not exist in the first
> place, and the variables below should be determined from VM
> characteristics (best would presumably be to allocate them top
> down from the end of physical address space, making sure you
> don't run into RAM).
> 
> > +#define PCI_MIN_MMIO_ADDR   0x80000000
> > +
> >  extern unsigned long pci_mem_start, pci_mem_end;
> >
> >
> > diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> > --- a/tools/firmware/hvmloader/pci.c	Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/pci.c	Thu Jul 26 15:40:01 2012 +0800
> > @@ -31,24 +31,33 @@
> >  unsigned long pci_mem_start = PCI_MEM_START;
> >  unsigned long pci_mem_end = PCI_MEM_END;
> >
> > +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> > +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> > +
> >  enum virtual_vga virtual_vga = VGA_none;
> >  unsigned long igd_opregion_pgbase = 0;
> >
> >  void pci_setup(void)
> >  {
> > -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> > +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> > +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> > +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
> >      uint32_t vga_devfn = 256;
> >      uint16_t class, vendor_id, device_id;
> >      unsigned int bar, pin, link, isa_irq;
> > +    int64_t mmio_left;
> >
> >      /* Resources assignable to PCI devices via BARs. */
> >      struct resource {
> > -        uint32_t base, max;
> > -    } *resource, mem_resource, io_resource;
> > +        uint64_t base, max;
> > +    } *resource, mem_resource, high_mem_resource, io_resource;
> >
> >      /* Create a list of device BARs in descending order of size. */
> >      struct bars {
> > -        uint32_t devfn, bar_reg, bar_sz;
> > +        uint32_t is_64bar;
> > +        uint32_t devfn;
> > +        uint32_t bar_reg;
> > +        uint64_t bar_sz;
> >      } *bars = (struct bars *)scratch_start;
> >      unsigned int i, nr_bars = 0;
> >
> > @@ -133,23 +142,35 @@ void pci_setup(void)
> >          /* Map the I/O memory and port resources. */
> >          for ( bar = 0; bar < 7; bar++ )
> >          {
> > +            bar_sz_upper = 0;
> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
> >              if ( bar == 6 )
> >                  bar_reg = PCI_ROM_ADDRESS;
> >
> >              bar_data = pci_readl(devfn, bar_reg);
> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
> >              pci_writel(devfn, bar_reg, ~0);
> >              bar_sz = pci_readl(devfn, bar_reg);
> >              pci_writel(devfn, bar_reg, bar_data);
> > +
> > +            if (is_64bar) {
> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, ~0);
> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> > +            }
> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > +                       0xfffffffffffffff0 :
> 
> This should be a proper constant (or the masking could be
> done earlier, in which case you could continue to use the
> existing PCI_BASE_ADDRESS_MEM_MASK).
> 

So the PCI_BASE_ADDRESS_MEM_MASK can be defined as 'ULL'.

> > +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > +            bar_sz &= ~(bar_sz - 1);
> >              if ( bar_sz == 0 )
> >                  continue;
> >
> > -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > -                       PCI_BASE_ADDRESS_MEM_MASK :
> > -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > -            bar_sz &= ~(bar_sz - 1);
> > -
> >              for ( i = 0; i < nr_bars; i++ )
> >                  if ( bars[i].bar_sz < bar_sz )
> >                      break;
> > @@ -157,6 +178,7 @@ void pci_setup(void)
> >              if ( i != nr_bars )
> >                  memmove(&bars[i+1], &bars[i], (nr_bars-i) *
> sizeof(*bars));
> >
> > +            bars[i].is_64bar = is_64bar;
> >              bars[i].devfn   = devfn;
> >              bars[i].bar_reg = bar_reg;
> >              bars[i].bar_sz  = bar_sz;
> > @@ -167,11 +189,8 @@ void pci_setup(void)
> >
> >              nr_bars++;
> >
> > -            /* Skip the upper-half of the address for a 64-bit BAR. */
> > -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> > -
> PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > -                 (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> > +            /*The upper half is already calculated, skip it! */
> > +            if (is_64bar)
> >                  bar++;
> >          }
> >
> > @@ -193,10 +212,14 @@ void pci_setup(void)
> >          pci_writew(devfn, PCI_COMMAND, cmd);
> >      }
> >
> > -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> > -            ((pci_mem_start << 1) != 0) )
> > +    while ( mmio_total > (pci_mem_end - pci_mem_start) &&
> pci_mem_start )
> 
> The old code here could remain in place if ...
> 
> >          pci_mem_start <<= 1;
> >
> > +    if (!pci_mem_start) {
> 
> .. the condition here would get changed to the one used in the
> first part of the while above.
> 
> > +        bar64_relocate = 1;
> > +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> 
> Which would then also make this assignment (and the
> constant) unnecessary.
> 

Cool, I'll leave the old code, and just add

if (pci_mem_start = PCI_MIN_MMIO_ADDR)
    bar64_relocate = 1;

> > +    }
> > +
> >      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
> >      while ( (pci_mem_start >> PAGE_SHIFT) <
> hvm_info->low_mem_pgend )
> >      {
> > @@ -218,11 +241,15 @@ void pci_setup(void)
> >          hvm_info->high_mem_pgend += nr_pages;
> >      }
> >
> > +    high_mem_resource.base = pci_high_mem_start;
> > +    high_mem_resource.max = pci_high_mem_end;
> >      mem_resource.base = pci_mem_start;
> >      mem_resource.max = pci_mem_end;
> >      io_resource.base = 0xc000;
> >      io_resource.max = 0x10000;
> >
> > +    mmio_left = pci_mem_end - pci_mem_end;
> > +
> >      /* Assign iomem and ioport resources in descending order of size. */
> >      for ( i = 0; i < nr_bars; i++ )
> >      {
> > @@ -230,13 +257,21 @@ void pci_setup(void)
> >          bar_reg = bars[i].bar_reg;
> >          bar_sz  = bars[i].bar_sz;
> >
> > +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left
> <
> > bar_sz);
> >          bar_data = pci_readl(devfn, bar_reg);
> >
> >          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
> >               PCI_BASE_ADDRESS_SPACE_MEMORY )
> >          {
> > -            resource = &mem_resource;
> > -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            if (using_64bar) {
> > +                resource = &high_mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            else {
> > +                resource = &mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            mmio_left -= bar_sz;
> >          }
> >          else
> >          {
> > @@ -244,13 +279,14 @@ void pci_setup(void)
> >              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
> >          }
> >
> > -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> > -        bar_data |= base;
> > +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> > +        bar_data |= (uint32_t)base;
> > +        bar_data_upper = (uint32_t)(base >> 32);
> >          base += bar_sz;
> >
> >          if ( (base < resource->base) || (base > resource->max) )
> >          {
> > -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> > +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
> >                     "resource!\n", devfn>>3, devfn&7, bar_reg,
> bar_sz);
> >              continue;
> >          }
> > @@ -258,7 +294,9 @@ void pci_setup(void)
> >          resource->base = base;
> >
> >          pci_writel(devfn, bar_reg, bar_data);
> > -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> > +        if (using_64bar)
> > +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
> >                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
> >
> >          /* Now enable the memory or I/O mapping. */
> 
> Besides that, I'd encourage you to have an intermediate state
> between not using BARs above 4Gb and forcing all 64-bit ones
> beyond 4Gb for maximum compatibility - try fitting as many as
> you can into the low 2Gb. Perhaps this would warrant an option
> of some sort.
> 

I'll add bar size to judge if the 64 bit bar beyond 4G, special BAR size out of 512M should be treated as high memory resource.

> Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-16 10:48   ` Hao, Xudong
@ 2012-08-16 11:04     ` Jan Beulich
  2012-08-17  9:24       ` Hao, Xudong
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2012-08-16 11:04 UTC (permalink / raw)
  To: Xudong Hao; +Cc: ian.jackson, Xiantao Zhang, xen-devel

>>> On 16.08.12 at 12:48, "Hao, Xudong" <xudong.hao@intel.com> wrote:
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com> wrote:
>> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012 +0200
>> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012 +0800
>> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
>> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
>> >  #define PCI_MEM_START       0xf0000000
>> >  #define PCI_MEM_END         0xfc000000
>> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
>> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
>> 
>> With such hard coded values, this is hardly meant to be anything
>> more than an RFC, is it? These values should not exist in the first
>> place, and the variables below should be determined from VM
>> characteristics (best would presumably be to allocate them top
>> down from the end of physical address space, making sure you
>> don't run into RAM).

No comment on this part?

>> > @@ -133,23 +142,35 @@ void pci_setup(void)
>> >          /* Map the I/O memory and port resources. */
>> >          for ( bar = 0; bar < 7; bar++ )
>> >          {
>> > +            bar_sz_upper = 0;
>> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
>> >              if ( bar == 6 )
>> >                  bar_reg = PCI_ROM_ADDRESS;
>> >
>> >              bar_data = pci_readl(devfn, bar_reg);
>> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
>> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
>> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
>> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
>> >              pci_writel(devfn, bar_reg, ~0);
>> >              bar_sz = pci_readl(devfn, bar_reg);
>> >              pci_writel(devfn, bar_reg, bar_data);
>> > +
>> > +            if (is_64bar) {
>> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
>> > +                pci_writel(devfn, bar_reg + 4, ~0);
>> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
>> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
>> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
>> > +            }
>> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
>> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
>> > +                       0xfffffffffffffff0 :
>> 
>> This should be a proper constant (or the masking could be
>> done earlier, in which case you could continue to use the
>> existing PCI_BASE_ADDRESS_MEM_MASK).
>> 
> 
> So the PCI_BASE_ADDRESS_MEM_MASK can be defined as 'ULL'.

I'd recommend not touching existing variables. As said before,
by re-ordering the code you can still use the constant as-is.

>> > @@ -193,10 +212,14 @@ void pci_setup(void)
>> >          pci_writew(devfn, PCI_COMMAND, cmd);
>> >      }
>> >
>> > -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
>> > -            ((pci_mem_start << 1) != 0) )
>> > +    while ( mmio_total > (pci_mem_end - pci_mem_start) &&
>> pci_mem_start )
>> 
>> The old code here could remain in place if ...
>> 
>> >          pci_mem_start <<= 1;
>> >
>> > +    if (!pci_mem_start) {
>> 
>> .. the condition here would get changed to the one used in the
>> first part of the while above.
>> 
>> > +        bar64_relocate = 1;
>> > +        pci_mem_start = PCI_MIN_MMIO_ADDR;
>> 
>> Which would then also make this assignment (and the
>> constant) unnecessary.
>> 
> 
> Cool, I'll leave the old code, and just add
> 
> if (pci_mem_start = PCI_MIN_MMIO_ADDR)
>     bar64_relocate = 1;

No, that's not correct. It's the other half of the while()'s condition
that you need to re-check here.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-16 11:04     ` Jan Beulich
@ 2012-08-17  9:24       ` Hao, Xudong
  2012-08-17  9:35         ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Hao, Xudong @ 2012-08-17  9:24 UTC (permalink / raw)
  To: Jan Beulich; +Cc: ian.jackson, Zhang, Xiantao, xen-devel

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Thursday, August 16, 2012 7:04 PM
> To: Hao, Xudong
> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> >>> On 16.08.12 at 12:48, "Hao, Xudong" <xudong.hao@intel.com> wrote:
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com> wrote:
> >> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012
> +0200
> >> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012
> +0800
> >> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded.
> */
> >> >  #define PCI_MEM_START       0xf0000000
> >> >  #define PCI_MEM_END         0xfc000000
> >> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> >> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> >>
> >> With such hard coded values, this is hardly meant to be anything
> >> more than an RFC, is it? These values should not exist in the first
> >> place, and the variables below should be determined from VM
> >> characteristics (best would presumably be to allocate them top
> >> down from the end of physical address space, making sure you
> >> don't run into RAM).
> 
> No comment on this part?
> 

The MMIO high memory start from 640G, it's already very high, I think we don't need allocate MMIO top down from the high of physical address space. Another thing you remind me that maybe we can skip this high MMIO hole when setup p2m table in build hvm of libxc(setup_guest()), like the handles for MMIO below 4G.

> >> > @@ -133,23 +142,35 @@ void pci_setup(void)
> >> >          /* Map the I/O memory and port resources. */
> >> >          for ( bar = 0; bar < 7; bar++ )
> >> >          {
> >> > +            bar_sz_upper = 0;
> >> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
> >> >              if ( bar == 6 )
> >> >                  bar_reg = PCI_ROM_ADDRESS;
> >> >
> >> >              bar_data = pci_readl(devfn, bar_reg);
> >> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> >> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK))
> ==
> >> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> >> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
> >> >              pci_writel(devfn, bar_reg, ~0);
> >> >              bar_sz = pci_readl(devfn, bar_reg);
> >> >              pci_writel(devfn, bar_reg, bar_data);
> >> > +
> >> > +            if (is_64bar) {
> >> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> >> > +                pci_writel(devfn, bar_reg + 4, ~0);
> >> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> >> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> >> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> >> > +            }
> >> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> >> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> >> > +                       0xfffffffffffffff0 :
> >>
> >> This should be a proper constant (or the masking could be
> >> done earlier, in which case you could continue to use the
> >> existing PCI_BASE_ADDRESS_MEM_MASK).
> >>
> >
> > So the PCI_BASE_ADDRESS_MEM_MASK can be defined as 'ULL'.
> 
> I'd recommend not touching existing variables. As said before,
> by re-ordering the code you can still use the constant as-is.
> 

Mmh, I misunderstood you in the previous mail. 
So you means we put the mask before "if(is_64bar)", and then we do not do changes for old mask code, right?

> >> > @@ -193,10 +212,14 @@ void pci_setup(void)
> >> >          pci_writew(devfn, PCI_COMMAND, cmd);
> >> >      }
> >> >
> >> > -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> >> > -            ((pci_mem_start << 1) != 0) )
> >> > +    while ( mmio_total > (pci_mem_end - pci_mem_start) &&
> >> pci_mem_start )
> >>
> >> The old code here could remain in place if ...
> >>
> >> >          pci_mem_start <<= 1;
> >> >
> >> > +    if (!pci_mem_start) {
> >>
> >> .. the condition here would get changed to the one used in the
> >> first part of the while above.
> >>
> >> > +        bar64_relocate = 1;
> >> > +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> >>
> >> Which would then also make this assignment (and the
> >> constant) unnecessary.
> >>
> >
> > Cool, I'll leave the old code, and just add
> >
> > if (pci_mem_start = PCI_MIN_MMIO_ADDR)
> >     bar64_relocate = 1;
> 
> No, that's not correct. It's the other half of the while()'s condition
> that you need to re-check here.
> 

Oh, so it can avoid PCI_MIN_MMIO_ADDR usage, I'll use your suggestion.

-Thanks,
Xudong

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-17  9:24       ` Hao, Xudong
@ 2012-08-17  9:35         ` Jan Beulich
  2012-08-20  3:22           ` Hao, Xudong
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2012-08-17  9:35 UTC (permalink / raw)
  To: Xudong Hao; +Cc: ian.jackson, Xiantao Zhang, xen-devel

>>> On 17.08.12 at 11:24, "Hao, Xudong" <xudong.hao@intel.com> wrote:
>>  -----Original Message-----
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: Thursday, August 16, 2012 7:04 PM
>> To: Hao, Xudong
>> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org 
>> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
>> 
>> >>> On 16.08.12 at 12:48, "Hao, Xudong" <xudong.hao@intel.com> wrote:
>> >> From: Jan Beulich [mailto:JBeulich@suse.com]
>> >> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com> wrote:
>> >> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012
>> +0200
>> >> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012
>> +0800
>> >> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
>> >> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded.
>> */
>> >> >  #define PCI_MEM_START       0xf0000000
>> >> >  #define PCI_MEM_END         0xfc000000
>> >> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
>> >> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
>> >>
>> >> With such hard coded values, this is hardly meant to be anything
>> >> more than an RFC, is it? These values should not exist in the first
>> >> place, and the variables below should be determined from VM
>> >> characteristics (best would presumably be to allocate them top
>> >> down from the end of physical address space, making sure you
>> >> don't run into RAM).
>> 
>> No comment on this part?
>> 
> 
> The MMIO high memory start from 640G, it's already very high, I think we 
> don't need allocate MMIO top down from the high of physical address space. 
> Another thing you remind me that maybe we can skip this high MMIO hole when 
> setup p2m table in build hvm of libxc(setup_guest()), like the handles for 
> MMIO below 4G.

That would be an option, but any fixed address you pick here
will look arbitrary (and will sooner or later raise questions). Plus
by allowing the RAM above 4G to remain contiguous even for
huge guests, we'd retain maximum compatibility with all sorts
of guest OSes. Furthermore, did you check that we in all cases
can use 40-bit (guest) physical addresses (I would think that 36
is the biggest common value). Bottom line - please don't use a
fixed number here.

>> >> > @@ -133,23 +142,35 @@ void pci_setup(void)
>> >> >          /* Map the I/O memory and port resources. */
>> >> >          for ( bar = 0; bar < 7; bar++ )
>> >> >          {
>> >> > +            bar_sz_upper = 0;
>> >> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
>> >> >              if ( bar == 6 )
>> >> >                  bar_reg = PCI_ROM_ADDRESS;
>> >> >
>> >> >              bar_data = pci_readl(devfn, bar_reg);
>> >> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
>> >> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK))
>> ==
>> >> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
>> >> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
>> >> >              pci_writel(devfn, bar_reg, ~0);
>> >> >              bar_sz = pci_readl(devfn, bar_reg);
>> >> >              pci_writel(devfn, bar_reg, bar_data);
>> >> > +
>> >> > +            if (is_64bar) {
>> >> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
>> >> > +                pci_writel(devfn, bar_reg + 4, ~0);
>> >> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
>> >> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
>> >> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
>> >> > +            }
>> >> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
>> >> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
>> >> > +                       0xfffffffffffffff0 :
>> >>
>> >> This should be a proper constant (or the masking could be
>> >> done earlier, in which case you could continue to use the
>> >> existing PCI_BASE_ADDRESS_MEM_MASK).
>> >>
>> >
>> > So the PCI_BASE_ADDRESS_MEM_MASK can be defined as 'ULL'.
>> 
>> I'd recommend not touching existing variables. As said before,
>> by re-ordering the code you can still use the constant as-is.
>> 
> 
> Mmh, I misunderstood you in the previous mail. 
> So you means we put the mask before "if(is_64bar)", and then we do not do 
> changes for old mask code, right?

Exactly.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-17  9:35         ` Jan Beulich
@ 2012-08-20  3:22           ` Hao, Xudong
  2012-08-20 10:45             ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Hao, Xudong @ 2012-08-20  3:22 UTC (permalink / raw)
  To: Jan Beulich; +Cc: ian.jackson, Zhang, Xiantao, xen-devel


> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Friday, August 17, 2012 5:36 PM
> To: Hao, Xudong
> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> >>> On 17.08.12 at 11:24, "Hao, Xudong" <xudong.hao@intel.com> wrote:
> >>  -----Original Message-----
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: Thursday, August 16, 2012 7:04 PM
> >> To: Hao, Xudong
> >> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> >> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> >>
> >> >>> On 16.08.12 at 12:48, "Hao, Xudong" <xudong.hao@intel.com> wrote:
> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com> wrote:
> >> >> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012
> >> +0200
> >> >> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012
> >> +0800
> >> >> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >> >> >  /* MMIO hole: Hardcoded defaults, which can be dynamically
> expanded.
> >> */
> >> >> >  #define PCI_MEM_START       0xf0000000
> >> >> >  #define PCI_MEM_END         0xfc000000
> >> >> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> >> >> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> >> >>
> >> >> With such hard coded values, this is hardly meant to be anything
> >> >> more than an RFC, is it? These values should not exist in the first
> >> >> place, and the variables below should be determined from VM
> >> >> characteristics (best would presumably be to allocate them top
> >> >> down from the end of physical address space, making sure you
> >> >> don't run into RAM).
> >>
> >> No comment on this part?
> >>
> >
> > The MMIO high memory start from 640G, it's already very high, I think we
> > don't need allocate MMIO top down from the high of physical address space.
> > Another thing you remind me that maybe we can skip this high MMIO hole
> when
> > setup p2m table in build hvm of libxc(setup_guest()), like the handles for
> > MMIO below 4G.
> 
> That would be an option, but any fixed address you pick here
> will look arbitrary (and will sooner or later raise questions). Plus
> by allowing the RAM above 4G to remain contiguous even for
> huge guests, we'd retain maximum compatibility with all sorts
> of guest OSes. Furthermore, did you check that we in all cases
> can use 40-bit (guest) physical addresses (I would think that 36
> is the biggest common value). Bottom line - please don't use a
> fixed number here.
> 

Hi, Jan

Where does present the 36-bit physical addresses limit, could you help to point out in the current Xen code?

Thanks,
-Xudong

> Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-20  3:22           ` Hao, Xudong
@ 2012-08-20 10:45             ` Jan Beulich
  2012-08-22  1:03               ` Hao, Xudong
  2012-08-23 10:05               ` Zhang, Xiantao
  0 siblings, 2 replies; 21+ messages in thread
From: Jan Beulich @ 2012-08-20 10:45 UTC (permalink / raw)
  To: Xudong Hao; +Cc: ian.jackson, Xiantao Zhang, xen-devel

>>> On 20.08.12 at 05:22, "Hao, Xudong" <xudong.hao@intel.com> wrote:

>> -----Original Message-----
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: Friday, August 17, 2012 5:36 PM
>> To: Hao, Xudong
>> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org 
>> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
>> 
>> >>> On 17.08.12 at 11:24, "Hao, Xudong" <xudong.hao@intel.com> wrote:
>> >>  -----Original Message-----
>> >> From: Jan Beulich [mailto:JBeulich@suse.com]
>> >> Sent: Thursday, August 16, 2012 7:04 PM
>> >> To: Hao, Xudong
>> >> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org 
>> >> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
>> >>
>> >> >>> On 16.08.12 at 12:48, "Hao, Xudong" <xudong.hao@intel.com> wrote:
>> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
>> >> >> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com> wrote:
>> >> >> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012
>> >> +0200
>> >> >> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012
>> >> +0800
>> >> >> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
>> >> >> >  /* MMIO hole: Hardcoded defaults, which can be dynamically
>> expanded.
>> >> */
>> >> >> >  #define PCI_MEM_START       0xf0000000
>> >> >> >  #define PCI_MEM_END         0xfc000000
>> >> >> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
>> >> >> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
>> >> >>
>> >> >> With such hard coded values, this is hardly meant to be anything
>> >> >> more than an RFC, is it? These values should not exist in the first
>> >> >> place, and the variables below should be determined from VM
>> >> >> characteristics (best would presumably be to allocate them top
>> >> >> down from the end of physical address space, making sure you
>> >> >> don't run into RAM).
>> >>
>> >> No comment on this part?
>> >>
>> >
>> > The MMIO high memory start from 640G, it's already very high, I think we
>> > don't need allocate MMIO top down from the high of physical address space.
>> > Another thing you remind me that maybe we can skip this high MMIO hole
>> when
>> > setup p2m table in build hvm of libxc(setup_guest()), like the handles for
>> > MMIO below 4G.
>> 
>> That would be an option, but any fixed address you pick here
>> will look arbitrary (and will sooner or later raise questions). Plus
>> by allowing the RAM above 4G to remain contiguous even for
>> huge guests, we'd retain maximum compatibility with all sorts
>> of guest OSes. Furthermore, did you check that we in all cases
>> can use 40-bit (guest) physical addresses (I would think that 36
>> is the biggest common value). Bottom line - please don't use a
>> fixed number here.
>> 
> 
> Where does present the 36-bit physical addresses limit, could you help to 
> point out in the current Xen code?

Look at xen/arch/x86/hvm/mtrr.c, e.g. hvm_mtrr_pat_init() or
mtrr_var_range_msr_set().

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-20 10:45             ` Jan Beulich
@ 2012-08-22  1:03               ` Hao, Xudong
  2012-08-22  7:32                 ` Jan Beulich
  2012-08-23 10:05               ` Zhang, Xiantao
  1 sibling, 1 reply; 21+ messages in thread
From: Hao, Xudong @ 2012-08-22  1:03 UTC (permalink / raw)
  To: Jan Beulich; +Cc: ian.jackson, Zhang, Xiantao, xen-devel

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Monday, August 20, 2012 6:45 PM
> To: Hao, Xudong
> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> >>> On 20.08.12 at 05:22, "Hao, Xudong" <xudong.hao@intel.com> wrote:
> 
> >> -----Original Message-----
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: Friday, August 17, 2012 5:36 PM
> >> To: Hao, Xudong
> >> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> >> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> >>
> >> >>> On 17.08.12 at 11:24, "Hao, Xudong" <xudong.hao@intel.com> wrote:
> >> >>  -----Original Message-----
> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >> Sent: Thursday, August 16, 2012 7:04 PM
> >> >> To: Hao, Xudong
> >> >> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> >> >> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar
> support
> >> >>
> >> >> >>> On 16.08.12 at 12:48, "Hao, Xudong" <xudong.hao@intel.com>
> wrote:
> >> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >> >> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com>
> wrote:
> >> >> >> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04 2012
> >> >> +0200
> >> >> >> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01 2012
> >> >> +0800
> >> >> >> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >> >> >> >  /* MMIO hole: Hardcoded defaults, which can be dynamically
> >> expanded.
> >> >> */
> >> >> >> >  #define PCI_MEM_START       0xf0000000
> >> >> >> >  #define PCI_MEM_END         0xfc000000
> >> >> >> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> >> >> >> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> >> >> >>
> >> >> >> With such hard coded values, this is hardly meant to be anything
> >> >> >> more than an RFC, is it? These values should not exist in the first
> >> >> >> place, and the variables below should be determined from VM
> >> >> >> characteristics (best would presumably be to allocate them top
> >> >> >> down from the end of physical address space, making sure you
> >> >> >> don't run into RAM).
> >> >>
> >> >> No comment on this part?
> >> >>
> >> >
> >> > The MMIO high memory start from 640G, it's already very high, I think we
> >> > don't need allocate MMIO top down from the high of physical address
> space.
> >> > Another thing you remind me that maybe we can skip this high MMIO hole
> >> when
> >> > setup p2m table in build hvm of libxc(setup_guest()), like the handles for
> >> > MMIO below 4G.
> >>
> >> That would be an option, but any fixed address you pick here
> >> will look arbitrary (and will sooner or later raise questions). Plus
> >> by allowing the RAM above 4G to remain contiguous even for
> >> huge guests, we'd retain maximum compatibility with all sorts
> >> of guest OSes. Furthermore, did you check that we in all cases
> >> can use 40-bit (guest) physical addresses (I would think that 36
> >> is the biggest common value). Bottom line - please don't use a
> >> fixed number here.
> >>
> >
> > Where does present the 36-bit physical addresses limit, could you help to
> > point out in the current Xen code?
> 
> Look at xen/arch/x86/hvm/mtrr.c, e.g. hvm_mtrr_pat_init() or
> mtrr_var_range_msr_set().

So if common 36-bit(guest) physical address could not change, can we use top down from 64G, Jan, do you have any suggestion? 

Thanks,
-Xudong

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-22  1:03               ` Hao, Xudong
@ 2012-08-22  7:32                 ` Jan Beulich
  2012-08-22  9:59                   ` Hao, Xudong
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2012-08-22  7:32 UTC (permalink / raw)
  To: xudong.hao; +Cc: ian.jackson, xiantao.zhang, xen-devel

>>> "Hao, Xudong" <xudong.hao@intel.com> 08/22/12 3:03 AM >>>
>> > Where does present the 36-bit physical addresses limit, could you help to
>> > point out in the current Xen code?
>> 
>> Look at xen/arch/x86/hvm/mtrr.c, e.g. hvm_mtrr_pat_init() or
>> mtrr_var_range_msr_set().
>
> So if common 36-bit(guest) physical address could not change, can we use
> top down from 64G, Jan, do you have any suggestion? 

Sorry, I already said that I think the only viable option is top down from
top of physical address space. No new address space holes please if at
all possible - just do it in ways real firmware would do it (which would
unlikely alter the RAM layout for this purpose).

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-22  7:32                 ` Jan Beulich
@ 2012-08-22  9:59                   ` Hao, Xudong
  2012-08-22 13:29                     ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Hao, Xudong @ 2012-08-22  9:59 UTC (permalink / raw)
  To: Jan Beulich; +Cc: ian.jackson, Zhang, Xiantao, xen-devel

> -----Original Message-----
> From: Jan Beulich [mailto:jbeulich@suse.com]
> Sent: Wednesday, August 22, 2012 3:32 PM
> To: Hao, Xudong
> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> >>> "Hao, Xudong" <xudong.hao@intel.com> 08/22/12 3:03 AM >>>
> >> > Where does present the 36-bit physical addresses limit, could you help to
> >> > point out in the current Xen code?
> >>
> >> Look at xen/arch/x86/hvm/mtrr.c, e.g. hvm_mtrr_pat_init() or
> >> mtrr_var_range_msr_set().
> >
> > So if common 36-bit(guest) physical address could not change, can we use
> > top down from 64G, Jan, do you have any suggestion?
> 
> Sorry, I already said that I think the only viable option is top down from
> top of physical address space. No new address space holes please if at
> all possible - just do it in ways real firmware would do it (which would
> unlikely alter the RAM layout for this purpose).
> 

If the PCIe device has 64G bar size or more, how to do in current 36 bit, will we consider to extend the guest physical address? 
The real physical address space larger than 40 bit, there is not this issue.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-22  9:59                   ` Hao, Xudong
@ 2012-08-22 13:29                     ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2012-08-22 13:29 UTC (permalink / raw)
  To: Xudong Hao; +Cc: ian.jackson, Xiantao Zhang, xen-devel

>>> On 22.08.12 at 11:59, "Hao, Xudong" <xudong.hao@intel.com> wrote:
>>  -----Original Message-----
>> From: Jan Beulich [mailto:jbeulich@suse.com]
>> Sent: Wednesday, August 22, 2012 3:32 PM
>> To: Hao, Xudong
>> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org 
>> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
>> 
>> >>> "Hao, Xudong" <xudong.hao@intel.com> 08/22/12 3:03 AM >>>
>> >> > Where does present the 36-bit physical addresses limit, could you help to
>> >> > point out in the current Xen code?
>> >>
>> >> Look at xen/arch/x86/hvm/mtrr.c, e.g. hvm_mtrr_pat_init() or
>> >> mtrr_var_range_msr_set().
>> >
>> > So if common 36-bit(guest) physical address could not change, can we use
>> > top down from 64G, Jan, do you have any suggestion?
>> 
>> Sorry, I already said that I think the only viable option is top down from
>> top of physical address space. No new address space holes please if at
>> all possible - just do it in ways real firmware would do it (which would
>> unlikely alter the RAM layout for this purpose).
>> 
> 
> If the PCIe device has 64G bar size or more, how to do in current 36 bit, 

Such a device makes no sense on a machine with 36-bit physical
address limit.

> will we consider to extend the guest physical address? 

???

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] xen/tools: Add 64 bits big bar support
  2012-08-20 10:45             ` Jan Beulich
  2012-08-22  1:03               ` Hao, Xudong
@ 2012-08-23 10:05               ` Zhang, Xiantao
  1 sibling, 0 replies; 21+ messages in thread
From: Zhang, Xiantao @ 2012-08-23 10:05 UTC (permalink / raw)
  To: Jan Beulich, Hao, Xudong; +Cc: ian.jackson, Zhang, Xiantao, xen-devel



> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Monday, August 20, 2012 6:45 PM
> To: Hao, Xudong
> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao; xen-devel@lists.xen.org
> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> >>> On 20.08.12 at 05:22, "Hao, Xudong" <xudong.hao@intel.com> wrote:
> 
> >> -----Original Message-----
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: Friday, August 17, 2012 5:36 PM
> >> To: Hao, Xudong
> >> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao;
> >> xen-devel@lists.xen.org
> >> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar
> >> support
> >>
> >> >>> On 17.08.12 at 11:24, "Hao, Xudong" <xudong.hao@intel.com> wrote:
> >> >>  -----Original Message-----
> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >> Sent: Thursday, August 16, 2012 7:04 PM
> >> >> To: Hao, Xudong
> >> >> Cc: ian.jackson@eu.citrix.com; Zhang, Xiantao;
> >> >> xen-devel@lists.xen.org
> >> >> Subject: RE: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big
> >> >> bar support
> >> >>
> >> >> >>> On 16.08.12 at 12:48, "Hao, Xudong" <xudong.hao@intel.com>
> wrote:
> >> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >> >> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@intel.com>
> wrote:
> >> >> >> > --- a/tools/firmware/hvmloader/config.h	Tue Jul 24 17:02:04
> 2012
> >> >> +0200
> >> >> >> > +++ b/tools/firmware/hvmloader/config.h	Thu Jul 26 15:40:01
> 2012
> >> >> +0800
> >> >> >> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >> >> >> >  /* MMIO hole: Hardcoded defaults, which can be dynamically
> >> expanded.
> >> >> */
> >> >> >> >  #define PCI_MEM_START       0xf0000000
> >> >> >> >  #define PCI_MEM_END         0xfc000000
> >> >> >> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> >> >> >> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> >> >> >>
> >> >> >> With such hard coded values, this is hardly meant to be
> >> >> >> anything more than an RFC, is it? These values should not exist
> >> >> >> in the first place, and the variables below should be
> >> >> >> determined from VM characteristics (best would presumably be to
> >> >> >> allocate them top down from the end of physical address space,
> >> >> >> making sure you don't run into RAM).
> >> >>
> >> >> No comment on this part?
> >> >>
> >> >
> >> > The MMIO high memory start from 640G, it's already very high, I
> >> > think we don't need allocate MMIO top down from the high of physical
> address space.
> >> > Another thing you remind me that maybe we can skip this high MMIO
> >> > hole
> >> when
> >> > setup p2m table in build hvm of libxc(setup_guest()), like the
> >> > handles for MMIO below 4G.
> >>
> >> That would be an option, but any fixed address you pick here will
> >> look arbitrary (and will sooner or later raise questions). Plus by
> >> allowing the RAM above 4G to remain contiguous even for huge guests,
> >> we'd retain maximum compatibility with all sorts of guest OSes.
> >> Furthermore, did you check that we in all cases can use 40-bit
> >> (guest) physical addresses (I would think that 36 is the biggest
> >> common value). Bottom line - please don't use a fixed number here.
> >>
> >
> > Where does present the 36-bit physical addresses limit, could you help
> > to point out in the current Xen code?
> 
> Look at xen/arch/x86/hvm/mtrr.c, e.g. hvm_mtrr_pat_init() or
> mtrr_var_range_msr_set().

I think BIOS may fix it, you can refer to tools/firmware/hvmloader/cacheattr.c, and BIOS can set to right phys bit number after check. 
Xiantao

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2012-08-23 10:05 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-15  6:54 [PATCH 1/3] xen/tools: Add 64 bits big bar support Xudong Hao
2012-08-15  6:54 ` [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen Xudong Hao
2012-08-15  8:18 ` [PATCH 1/3] xen/tools: Add 64 bits big bar support Pasi Kärkkäinen
2012-08-15  9:17   ` Hao, Xudong
2012-08-15 10:21     ` Pasi Kärkkäinen
2012-08-15  9:16 ` Jan Beulich
2012-08-16 10:48   ` Hao, Xudong
2012-08-16 11:04     ` Jan Beulich
2012-08-17  9:24       ` Hao, Xudong
2012-08-17  9:35         ` Jan Beulich
2012-08-20  3:22           ` Hao, Xudong
2012-08-20 10:45             ` Jan Beulich
2012-08-22  1:03               ` Hao, Xudong
2012-08-22  7:32                 ` Jan Beulich
2012-08-22  9:59                   ` Hao, Xudong
2012-08-22 13:29                     ` Jan Beulich
2012-08-23 10:05               ` Zhang, Xiantao
2012-08-15 10:20 ` Stefano Stabellini
2012-08-16  2:57   ` Hao, Xudong
2012-08-16 10:10     ` Stefano Stabellini
2012-08-16 10:24       ` Hao, Xudong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.