TODO: currently dependent upon IOMMU being enabled (due to bus scan only happening in that case) TODO: SR-IOV (and alike?) devices not visible at time of MMCFG registration TODO: remove //temp-s --- unstable.orig/xen/arch/x86/mm.c +++ unstable/xen/arch/x86/mm.c @@ -5208,6 +5208,7 @@ int ptwr_do_page_fault(struct vcpu *v, u /* We are looking only for read-only mappings of p.t. pages. */ if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) || + rangeset_contains_singleton(mmio_ro_ranges, l1e_get_pfn(pte)) || !get_page_from_pagenr(l1e_get_pfn(pte), d) ) goto bail; @@ -5255,6 +5256,7 @@ int ptwr_do_page_fault(struct vcpu *v, u struct mmio_ro_emulate_ctxt { struct x86_emulate_ctxt ctxt; unsigned long cr2; + unsigned int seg, bdf; }; static int mmio_ro_emulated_read( @@ -5294,6 +5296,50 @@ static const struct x86_emulate_ops mmio .write = mmio_ro_emulated_write, }; +static int mmio_intercept_write( + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + struct mmio_ro_emulate_ctxt *mmio_ctxt = + container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt); + + /* + * Only allow naturally-aligned stores no wider than 4 bytes to the + * original %cr2 address. + */ + if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 || + offset != mmio_ctxt->cr2 ) + { + MEM_LOG("mmio_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)", + mmio_ctxt->cr2, offset, bytes); + return X86EMUL_UNHANDLEABLE; + } + + offset &= 0xfff; +printk("%04x:%02x:%02x.%u write [%lx] %0*x\n",//temp + mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), PCI_SLOT(mmio_ctxt->bdf), PCI_FUNC(mmio_ctxt->bdf),//temp + offset, bytes * 2, *(uint32_t*)p_data);//temp + pci_conf_write_intercept(mmio_ctxt->seg, mmio_ctxt->bdf, offset, bytes, + p_data); +printk("%04x:%02x:%02x.%u write [%lx]=%0*x\n",//temp + mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), PCI_SLOT(mmio_ctxt->bdf), PCI_FUNC(mmio_ctxt->bdf),//temp + offset, bytes * 2, *(uint32_t*)p_data);//temp + pci_mmcfg_write(mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), + PCI_DEVFN2(mmio_ctxt->bdf), offset, bytes, + *(uint32_t *)p_data); + + return X86EMUL_OKAY; +} + +static const struct x86_emulate_ops mmio_intercept_ops = { + .read = mmio_ro_emulated_read, + .insn_fetch = ptwr_emulated_read, + .write = mmio_intercept_write, +}; + /* Check if guest is trying to modify a r/o MMIO page. */ int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr, struct cpu_user_regs *regs) @@ -5308,6 +5354,7 @@ int mmio_ro_do_page_fault(struct vcpu *v .ctxt.swint_emulate = x86_swint_emulate_none, .cr2 = addr }; + const unsigned long *map; int rc; /* Attempt to read the PTE that maps the VA being accessed. */ @@ -5332,7 +5379,14 @@ int mmio_ro_do_page_fault(struct vcpu *v if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) return 0; - rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops); + if ( pci_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) && + (map = pci_get_intercept_map(mmio_ro_ctxt.seg)) != NULL && + test_bit(mmio_ro_ctxt.bdf, map) && + ((map = pci_get_ro_map(mmio_ro_ctxt.seg)) == NULL || + !test_bit(mmio_ro_ctxt.bdf, map)) ) + rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_intercept_ops); + else + rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops); return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0; } --- unstable.orig/xen/arch/x86/pci.c +++ unstable/xen/arch/x86/pci.c @@ -75,6 +75,15 @@ int pci_conf_write_intercept(unsigned in struct pci_dev *pdev; int rc = 0; + if ( !data ) /* probe */ + { + ASSERT(!~reg && !size); + return pci_find_cap_offset(seg, PCI_BUS(bdf), PCI_SLOT(bdf), + PCI_FUNC(bdf), PCI_CAP_ID_MSI) || + pci_find_cap_offset(seg, PCI_BUS(bdf), PCI_SLOT(bdf), + PCI_FUNC(bdf), PCI_CAP_ID_MSIX); + } + /* * Avoid expensive operations when no hook is going to do anything * for the access anyway. --- unstable.orig/xen/arch/x86/x86_64/mmconfig_64.c +++ unstable/xen/arch/x86/x86_64/mmconfig_64.c @@ -154,10 +154,25 @@ void arch_pci_ro_device(int seg, int bdf } } +static void set_ro(const struct acpi_mcfg_allocation *cfg, + const unsigned long *map) +{ + unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0); + unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1); + + if (!map) + return; + + while ((bdf = find_next_bit(map, end + 1, bdf)) <= end) { + arch_pci_ro_device(cfg->pci_segment, bdf); + if (bdf++ == end) + break; + } +} + int pci_mmcfg_arch_enable(unsigned int idx) { const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; - const unsigned long *ro_map = pci_get_ro_map(cfg->pci_segment); if (pci_mmcfg_virt[idx].virt) return 0; @@ -169,16 +184,10 @@ int pci_mmcfg_arch_enable(unsigned int i } printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); - if (ro_map) { - unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0); - unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1); - - while ((bdf = find_next_bit(ro_map, end + 1, bdf)) <= end) { - arch_pci_ro_device(cfg->pci_segment, bdf); - if (bdf++ == end) - break; - } - } + + set_ro(cfg, pci_get_ro_map(cfg->pci_segment)); + set_ro(cfg, pci_get_intercept_map(cfg->pci_segment)); + return 0; } @@ -197,6 +206,32 @@ void pci_mmcfg_arch_disable(unsigned int cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); } +bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg, + unsigned int *bdf) +{ + unsigned int idx; + + for (idx = 0; idx < pci_mmcfg_config_num; ++idx) { + const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg; + + if (pci_mmcfg_virt[idx].virt && + mfn >= PFN_DOWN(cfg->address + (cfg->start_bus_number << 20)) && + mfn < PFN_DOWN(cfg->address + ((cfg->end_bus_number + 1) << 20))) { +static unsigned long cnt, thr;//temp + *seg = cfg->pci_segment; + *bdf = mfn - PFN_DOWN(cfg->address); +if(++cnt > thr) {//temp + thr |= cnt; + printk("%lx -> %04x:%02x:%02x.%u\n", mfn, *seg, PCI_BUS(*bdf), PCI_SLOT(*bdf), PCI_FUNC(*bdf)); +} + return 1; + } + } + +printk("%lx -> ???\n", mfn);//temp + return 0; +} + int __init pci_mmcfg_arch_init(void) { int i; --- unstable.orig/xen/drivers/passthrough/pci.c +++ unstable/xen/drivers/passthrough/pci.c @@ -39,6 +39,7 @@ struct pci_seg { struct list_head alldevs_list; u16 nr; unsigned long *ro_map; + unsigned long *intercept_map; /* bus2bridge_lock protects bus2bridge array */ spinlock_t bus2bridge_lock; #define MAX_BUSES 256 @@ -123,6 +124,13 @@ const unsigned long *pci_get_ro_map(u16 return pseg ? pseg->ro_map : NULL; } +const unsigned long *pci_get_intercept_map(u16 seg) +{ + struct pci_seg *pseg = get_pseg(seg); + + return pseg ? pseg->intercept_map : NULL; +} + static struct phantom_dev { u16 seg; u8 bus, slot, stride; @@ -284,6 +292,20 @@ static struct pci_dev *alloc_pdev(struct pdev->domain = NULL; INIT_LIST_HEAD(&pdev->msi_list); + if ( !pseg->intercept_map && + pci_conf_write_intercept(pseg->nr, PCI_BDF2(bus, devfn), ~0, 0, + NULL) > 0 ) + { + size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long); + + pseg->intercept_map = vzalloc(sz); + if ( !pseg->intercept_map ) + { + xfree(pdev); + return NULL; + } + } + if ( pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), PCI_CAP_ID_MSIX) ) { @@ -366,6 +388,13 @@ static struct pci_dev *alloc_pdev(struct check_pdev(pdev); + if ( pci_conf_write_intercept(pseg->nr, PCI_BDF2(bus, devfn), ~0, 0, + NULL) > 0 ) + { + set_bit(PCI_BDF2(bus, devfn), pseg->intercept_map); + arch_pci_ro_device(pseg->nr, PCI_BDF2(bus, devfn)); + } + return pdev; } --- unstable.orig/xen/include/asm-x86/pci.h +++ unstable/xen/include/asm-x86/pci.h @@ -20,5 +20,7 @@ int pci_conf_write_intercept(unsigned in uint32_t *data); int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg, unsigned int size, uint32_t *data); +bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg, + unsigned int *bdf); #endif /* __X86_PCI_H__ */ --- unstable.orig/xen/include/xen/pci.h +++ unstable/xen/include/xen/pci.h @@ -106,6 +106,7 @@ void setup_hwdom_pci_devices(struct doma int pci_release_devices(struct domain *d); int pci_add_segment(u16 seg); const unsigned long *pci_get_ro_map(u16 seg); +const unsigned long *pci_get_intercept_map(u16 seg); int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *, nodeid_t node); int pci_remove_device(u16 seg, u8 bus, u8 devfn);