qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: "ehabkost@redhat.com" <ehabkost@redhat.com>,
	Sergio Lopez <slp@redhat.com>,
	"maran.wilson@oracle.com" <maran.wilson@oracle.com>,
	"Montes, Julio" <julio.montes@intel.com>,
	Stefan Hajnoczi <stefanha@gmail.com>,
	"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>,
	"kraxel@redhat.com" <kraxel@redhat.com>,
	"rth@twiddle.net" <rth@twiddle.net>,
	"sgarzare@redhat.com" <sgarzare@redhat.com>
Subject: Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type
Date: Thu, 25 Jul 2019 06:03:05 -0400	[thread overview]
Message-ID: <20190725060105-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <c9c811f4-6108-f5b1-31f5-3f757f51cf3c@redhat.com>

On Wed, Jul 24, 2019 at 01:14:35PM +0200, Paolo Bonzini wrote:
> On 23/07/19 12:01, Paolo Bonzini wrote:
> > The number of buses is determined by the firmware, not by QEMU, so
> > fw_cfg would not be the right interface.  In fact (as I have just
> > learnt) lastbus is an x86-specific option that overrides the last bus
> > returned by SeaBIOS's handle_1ab101.
> > 
> > So the next step could be to figure out what is the lastbus returned by
> > handle_1ab101 and possibly why it isn't zero.
> 
> Some update:
> 
> - for 64-bit, PCIBIOS (and thus handle_1ab101) is not called.  PCIBIOS is
> only used by 32-bit kernels.  As a side effect, PCI expander bridges do not
> work on 32-bit kernels with ACPI disabled, because they are located beyond
> pcibios_last_bus (with ACPI enabled, the DSDT exposes them).
> 
> - for -M pc, pcibios_last_bus in Linux remains -1 and no "legacy scanning" is done.
> 
> - for -M q35, pcibios_last_bus in Linux is set based on the size of the 
> MMCONFIG aperture and Linux ends up scanning all 32*255 (bus,dev) pairs 
> for buses above 0.
> 
> Here is a patch that only scans devfn==0, which should mostly remove the need
> for pci=lastbus=0.  (Testing is welcome).
> 
> Actually, KVM could probably avoid the scanning altogether.  The only "hidden" root
> buses we expect are from PCI expander bridges and if you found an MMCONFIG area
> through the ACPI MCFG table, you can also use the DSDT to find PCI expander bridges.
> However, I am being conservative.
> 
> A possible alternative could be a mechanism whereby the vmlinuz real mode entry
> point, or the 32-bit PVH entry point, fetch lastbus and they pass it to the
> kernel via the vmlinuz or PVH boot information structs.  However, I don't think
> that's very useful, and there is some risk of breaking real hardware too.
> 
> Paolo
> 
> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> index 73bb404f4d2a..17012aa60d22 100644
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -61,6 +61,7 @@ enum pci_bf_sort_state {
>  extern struct pci_ops pci_root_ops;
>  
>  void pcibios_scan_specific_bus(int busn);
> +void pcibios_scan_bus_by_device(int busn);
>  
>  /* pci-irq.c */
>  
> @@ -216,8 +217,10 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
>  # endif
>  # define x86_default_pci_init_irq	pcibios_irq_init
>  # define x86_default_pci_fixup_irqs	pcibios_fixup_irqs
> +# define x86_default_pci_scan_bus	pcibios_scan_bus_by_device
>  #else
>  # define x86_default_pci_init		NULL
>  # define x86_default_pci_init_irq	NULL
>  # define x86_default_pci_fixup_irqs	NULL
> +# define x86_default_pci_scan_bus      NULL
>  #endif
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index b85a7c54c6a1..4c3a0a17a600 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -251,6 +251,7 @@ struct x86_hyper_runtime {
>   * @save_sched_clock_state:	save state for sched_clock() on suspend
>   * @restore_sched_clock_state:	restore state for sched_clock() on resume
>   * @apic_post_init:		adjust apic if needed
> + * @pci_scan_bus:		scan a PCI bus
>   * @legacy:			legacy features
>   * @set_legacy_features:	override legacy features. Use of this callback
>   * 				is highly discouraged. You should only need
> @@ -273,6 +274,7 @@ struct x86_platform_ops {
>  	void (*save_sched_clock_state)(void);
>  	void (*restore_sched_clock_state)(void);
>  	void (*apic_post_init)(void);
> +	void (*pci_scan_bus)(int busn);
>  	struct x86_legacy_features legacy;
>  	void (*set_legacy_features)(void);
>  	struct x86_hyper_runtime hyper;
> diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
> index 6857b4577f17..b248d7036dd3 100644
> --- a/arch/x86/kernel/jailhouse.c
> +++ b/arch/x86/kernel/jailhouse.c
> @@ -11,12 +11,14 @@
>  #include <linux/acpi_pmtmr.h>
>  #include <linux/kernel.h>
>  #include <linux/reboot.h>
> +#include <linux/pci.h>
>  #include <asm/apic.h>
>  #include <asm/cpu.h>
>  #include <asm/hypervisor.h>
>  #include <asm/i8259.h>
>  #include <asm/irqdomain.h>
>  #include <asm/pci_x86.h>
> +#include <asm/pci.h>
>  #include <asm/reboot.h>
>  #include <asm/setup.h>
>  #include <asm/jailhouse_para.h>
> @@ -136,6 +138,22 @@ static int __init jailhouse_pci_arch_init(void)
>  	return 0;
>  }
>  
> +static void jailhouse_pci_scan_bus_by_function(int busn)
> +{
> +        int devfn;
> +        u32 l;
> +
> +        for (devfn = 0; devfn < 256; devfn++) {
> +                if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
> +                    l != 0x0000 && l != 0xffff) {
> +                        DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
> +                        pr_info("PCI: Discovered peer bus %02x\n", busn);
> +                        pcibios_scan_root(busn);
> +                        return;
> +                }
> +        }
> +}
> +
>  static void __init jailhouse_init_platform(void)
>  {
>  	u64 pa_data = boot_params.hdr.setup_data;
> @@ -153,6 +171,7 @@ static void __init jailhouse_init_platform(void)
>  	x86_platform.legacy.rtc		= 0;
>  	x86_platform.legacy.warm_reset	= 0;
>  	x86_platform.legacy.i8042	= X86_LEGACY_I8042_PLATFORM_ABSENT;
> +	x86_platform.pci_scan_bus	= jailhouse_pci_scan_bus_by_function;
>  
>  	legacy_pic			= &null_legacy_pic;
>  
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 82caf01b63dd..59f7204ed8f3 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -24,6 +24,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/nmi.h>
>  #include <linux/swait.h>
> +#include <linux/pci.h>
>  #include <asm/timer.h>
>  #include <asm/cpu.h>
>  #include <asm/traps.h>
> @@ -33,6 +34,7 @@
>  #include <asm/apicdef.h>
>  #include <asm/hypervisor.h>
>  #include <asm/tlb.h>
> +#include <asm/pci.h>
>  
>  static int kvmapf = 1;
>  
> @@ -621,10 +623,31 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
>  	native_flush_tlb_others(flushmask, info);
>  }
>  
> +#ifdef CONFIG_PCI
> +static void kvm_pci_scan_bus(int busn)
> +{
> +        u32 l;
> +
> +	/*
> +	 * Assume that there are no "hidden" buses, i.e. all PCI root buses
> +	 * have a host bridge at device 0, function 0.
> +	 */
> +	if (!raw_pci_read(0, busn, 0, PCI_VENDOR_ID, 2, &l) &&
> +	    l != 0x0000 && l != 0xffff) {
> +		pr_info("PCI: Discovered peer bus %02x\n", busn);
> +		pcibios_scan_root(busn);
> +        }
> +}
> +#endif
> +
>  static void __init kvm_guest_init(void)
>  {
>  	int i;
>  
> +#ifdef CONFIG_PCI
> +	x86_platform.pci_scan_bus = kvm_pci_scan_bus;
> +#endif
> +
>  	if (!kvm_para_available())
>  		return;
>  

Shouldn't this happen after kvm_para_available?
In fact, let's add a CPU ID flag for this, so it's
easy to tell guest whether to scan extra buses.
What do you say?

> diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
> index 50a2b492fdd6..19e1cc2cb6e0 100644
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -118,6 +118,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
>  	.get_nmi_reason			= default_get_nmi_reason,
>  	.save_sched_clock_state 	= tsc_save_sched_clock_state,
>  	.restore_sched_clock_state 	= tsc_restore_sched_clock_state,
> +	.pci_scan_bus			= x86_default_pci_scan_bus,
>  	.hyper.pin_vcpu			= x86_op_int_noop,
>  };
>  
> diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
> index 467311b1eeea..6214dbce26d3 100644
> --- a/arch/x86/pci/legacy.c
> +++ b/arch/x86/pci/legacy.c
> @@ -36,14 +36,19 @@ int __init pci_legacy_init(void)
>  
>  void pcibios_scan_specific_bus(int busn)
>  {
> -	int stride = jailhouse_paravirt() ? 1 : 8;
> -	int devfn;
> -	u32 l;
> -
>  	if (pci_find_bus(0, busn))
>  		return;
>  
> -	for (devfn = 0; devfn < 256; devfn += stride) {
> +	x86_platform.pci_scan_bus(busn);
> +}
> +EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus);
> +
> +void pcibios_scan_bus_by_device(int busn)
> +{
> +	int devfn;
> +	u32 l;
> +
> +	for (devfn = 0; devfn < 256; devfn += 8) {
>  		if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
>  		    l != 0x0000 && l != 0xffff) {
>  			DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
> @@ -53,7 +58,6 @@ void pcibios_scan_specific_bus(int busn)
>  		}
>  	}
>  }
> -EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus);
>  
>  static int __init pci_subsys_init(void)
>  {


  parent reply	other threads:[~2019-07-25 10:03 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-02 12:11 [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type Sergio Lopez
2019-07-02 12:11 ` [Qemu-devel] [PATCH v3 1/4] hw/virtio: Factorize virtio-mmio headers Sergio Lopez
2019-07-25  9:46   ` Liam Merwick
2019-07-25  9:58     ` Michael S. Tsirkin
2019-07-25 10:03       ` Peter Maydell
2019-07-25 10:36       ` Paolo Bonzini
2019-07-02 12:11 ` [Qemu-devel] [PATCH v3 2/4] hw/i386: Add an Intel MPTable generator Sergio Lopez
2019-07-02 12:11 ` [Qemu-devel] [PATCH v3 3/4] hw/i386: Factorize PVH related functions Sergio Lopez
2019-07-23  8:39   ` Liam Merwick
2019-07-02 12:11 ` [Qemu-devel] [PATCH v3 4/4] hw/i386: Introduce the microvm machine type Sergio Lopez
2019-07-02 13:58   ` Gerd Hoffmann
2019-07-25 10:47   ` Paolo Bonzini
2019-07-02 15:01 ` [Qemu-devel] [PATCH v3 0/4] " no-reply
2019-07-02 15:23 ` Peter Maydell
2019-07-02 17:34   ` Sergio Lopez
2019-07-02 18:04     ` Peter Maydell
2019-07-02 22:04       ` Sergio Lopez
2019-07-25  9:59         ` Michael S. Tsirkin
2019-07-25 10:05           ` Peter Maydell
2019-07-25 10:10             ` Michael S. Tsirkin
2019-07-25 14:52               ` Sergio Lopez
2019-07-25 10:42             ` Sergio Lopez
2019-07-25 11:23               ` Paolo Bonzini
2019-07-25 12:01                 ` Stefan Hajnoczi
2019-07-25 12:10                   ` Michael S. Tsirkin
2019-07-25 13:26                     ` Stefan Hajnoczi
2019-07-25 13:43                       ` Paolo Bonzini
2019-07-25 13:54                         ` Michael S. Tsirkin
2019-07-25 14:13                           ` Paolo Bonzini
2019-07-25 14:42                             ` Michael S. Tsirkin
2019-07-25 14:04                         ` Peter Maydell
2019-07-25 14:26                           ` Paolo Bonzini
2019-07-25 14:35                             ` Michael S. Tsirkin
2019-07-25 14:42                         ` Sergio Lopez
2019-07-25 14:58                           ` Michael S. Tsirkin
2019-07-25 15:01                             ` Michael S. Tsirkin
2019-07-25 15:39                               ` Paolo Bonzini
2019-07-25 17:38                                 ` Michael S. Tsirkin
2019-07-26 12:46                                   ` Igor Mammedov
2019-07-25 15:49                               ` Sergio Lopez
2019-07-25 13:48                       ` Michael S. Tsirkin
2019-07-02 15:30 ` no-reply
2019-07-03  9:58 ` Stefan Hajnoczi
2019-07-18 15:21   ` Sergio Lopez
2019-07-19 10:29     ` Stefan Hajnoczi
2019-07-19 13:48       ` Sergio Lopez
2019-07-19 15:09         ` Stefan Hajnoczi
2019-07-19 15:42           ` Montes, Julio
2019-07-23  8:43             ` Sergio Lopez
2019-07-23  9:47               ` Stefan Hajnoczi
2019-07-23 10:01                 ` Paolo Bonzini
2019-07-24 11:14                   ` Paolo Bonzini
2019-07-25  9:35                     ` Sergio Lopez
2019-07-25 10:03                     ` Michael S. Tsirkin [this message]
2019-07-25 10:55                       ` Paolo Bonzini
2019-07-25 14:46                     ` Michael S. Tsirkin
2019-07-25 15:35                       ` Paolo Bonzini
2019-07-25 17:33                         ` Michael S. Tsirkin
2019-07-25 20:30                         ` Michael S. Tsirkin
2019-07-26  7:57                           ` Paolo Bonzini
2019-07-26 11:10                             ` Michael S. Tsirkin
2019-07-23 11:30                 ` Stefano Garzarella
2019-07-24 15:23                   ` Stefano Garzarella
2019-08-29  9:02 ` Jing Liu
2019-08-29 15:46   ` Sergio Lopez
2019-08-30  4:53     ` Jing Liu
2019-08-30 14:27       ` Sergio Lopez
2019-09-02  5:43         ` Jing Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190725060105-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=julio.montes@intel.com \
    --cc=kraxel@redhat.com \
    --cc=maran.wilson@oracle.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=sgarzare@redhat.com \
    --cc=slp@redhat.com \
    --cc=stefanha@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).