linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Liu, Yi L" <yi.l.liu@intel.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: "kwankhede@nvidia.com" <kwankhede@nvidia.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	"Tian, Kevin" <kevin.tian@intel.com>,
	"joro@8bytes.org" <joro@8bytes.org>,
	"peterx@redhat.com" <peterx@redhat.com>,
	"baolu.lu@linux.intel.com" <baolu.lu@linux.intel.com>
Subject: RE: [PATCH v4 07/12] vfio_pci: shrink vfio_pci.c
Date: Thu, 16 Jan 2020 12:42:42 +0000	[thread overview]
Message-ID: <A2975661238FB949B60364EF0F2C25743A183EF9@SHSMSX104.ccr.corp.intel.com> (raw)
In-Reply-To: <20200109154826.7a818be8@w520.home>

> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Friday, January 10, 2020 6:48 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [PATCH v4 07/12] vfio_pci: shrink vfio_pci.c
> 
> On Tue,  7 Jan 2020 20:01:44 +0800
> Liu Yi L <yi.l.liu@intel.com> wrote:
> 
> > This patch removes the common codes in vfio_pci.c, leave the module
> > specific codes, new vfio_pci.c will leverage the common functions
> > implemented in vfio_pci_common.c.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > ---
> >  drivers/vfio/pci/Makefile           |    3 +-
> >  drivers/vfio/pci/vfio_pci.c         | 1442 -----------------------------------
> >  drivers/vfio/pci/vfio_pci_common.c  |    2 +-
> >  drivers/vfio/pci/vfio_pci_private.h |    2 +
> >  4 files changed, 5 insertions(+), 1444 deletions(-)
> >
> > diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
> > index f027f8a..d94317a 100644
> > --- a/drivers/vfio/pci/Makefile
> > +++ b/drivers/vfio/pci/Makefile
> > @@ -1,6 +1,7 @@
> >  # SPDX-License-Identifier: GPL-2.0-only
> >
> > -vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
> > +vfio-pci-y := vfio_pci.o vfio_pci_common.o vfio_pci_intrs.o \
> > +		vfio_pci_rdwr.o vfio_pci_config.o
> >  vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
> >  vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
> >
> > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > index 103e493..7e24da2 100644
> > --- a/drivers/vfio/pci/vfio_pci.c
> > +++ b/drivers/vfio/pci/vfio_pci.c
> 
> I think there are a bunch of headers that are no longer needed here
> too.  It at least compiles without these:
> 
> -#include <linux/eventfd.h>
> -#include <linux/file.h>
> -#include <linux/interrupt.h>
> -#include <linux/notifier.h>
> -#include <linux/pm_runtime.h>
> -#include <linux/uaccess.h>
> -#include <linux/nospec.h>

Got it, let me remove them.

> 
> 
> > @@ -54,411 +54,6 @@ module_param(disable_idle_d3, bool, S_IRUGO |
> S_IWUSR);
> >  MODULE_PARM_DESC(disable_idle_d3,
> >  		 "Disable using the PCI D3 low power state for idle, unused devices");
> >
> > -/*
> > - * Our VGA arbiter participation is limited since we don't know anything
> > - * about the device itself.  However, if the device is the only VGA device
> > - * downstream of a bridge and VFIO VGA support is disabled, then we can
> > - * safely return legacy VGA IO and memory as not decoded since the user
> > - * has no way to get to it and routing can be disabled externally at the
> > - * bridge.
> > - */
> > -unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)
> > -{
> > -	struct vfio_pci_device *vdev = opaque;
> > -	struct pci_dev *tmp = NULL, *pdev = vdev->pdev;
> > -	unsigned char max_busnr;
> > -	unsigned int decodes;
> > -
> > -	if (single_vga || !vfio_vga_disabled(vdev) ||
> > -		pci_is_root_bus(pdev->bus))
> > -		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
> > -		       VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
> > -
> > -	max_busnr = pci_bus_max_busnr(pdev->bus);
> > -	decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
> > -
> > -	while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
> > -		if (tmp == pdev ||
> > -		    pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
> > -		    pci_is_root_bus(tmp->bus))
> > -			continue;
> > -
> > -		if (tmp->bus->number >= pdev->bus->number &&
> > -		    tmp->bus->number <= max_busnr) {
> > -			pci_dev_put(tmp);
> > -			decodes |= VGA_RSRC_LEGACY_IO |
> VGA_RSRC_LEGACY_MEM;
> > -			break;
> > -		}
> > -	}
> > -
> > -	return decodes;
> > -}
> > -
> > -static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
> > -{
> > -	struct resource *res;
> > -	int i;
> > -	struct vfio_pci_dummy_resource *dummy_res;
> > -
> > -	INIT_LIST_HEAD(&vdev->dummy_resources_list);
> > -
> > -	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
> > -		int bar = i + PCI_STD_RESOURCES;
> > -
> > -		res = &vdev->pdev->resource[bar];
> > -
> > -		if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
> > -			goto no_mmap;
> > -
> > -		if (!(res->flags & IORESOURCE_MEM))
> > -			goto no_mmap;
> > -
> > -		/*
> > -		 * The PCI core shouldn't set up a resource with a
> > -		 * type but zero size. But there may be bugs that
> > -		 * cause us to do that.
> > -		 */
> > -		if (!resource_size(res))
> > -			goto no_mmap;
> > -
> > -		if (resource_size(res) >= PAGE_SIZE) {
> > -			vdev->bar_mmap_supported[bar] = true;
> > -			continue;
> > -		}
> > -
> > -		if (!(res->start & ~PAGE_MASK)) {
> > -			/*
> > -			 * Add a dummy resource to reserve the remainder
> > -			 * of the exclusive page in case that hot-add
> > -			 * device's bar is assigned into it.
> > -			 */
> > -			dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
> > -			if (dummy_res == NULL)
> > -				goto no_mmap;
> > -
> > -			dummy_res->resource.name = "vfio sub-page reserved";
> > -			dummy_res->resource.start = res->end + 1;
> > -			dummy_res->resource.end = res->start + PAGE_SIZE - 1;
> > -			dummy_res->resource.flags = res->flags;
> > -			if (request_resource(res->parent,
> > -						&dummy_res->resource)) {
> > -				kfree(dummy_res);
> > -				goto no_mmap;
> > -			}
> > -			dummy_res->index = bar;
> > -			list_add(&dummy_res->res_next,
> > -					&vdev->dummy_resources_list);
> > -			vdev->bar_mmap_supported[bar] = true;
> > -			continue;
> > -		}
> > -		/*
> > -		 * Here we don't handle the case when the BAR is not page
> > -		 * aligned because we can't expect the BAR will be
> > -		 * assigned into the same location in a page in guest
> > -		 * when we passthrough the BAR. And it's hard to access
> > -		 * this BAR in userspace because we have no way to get
> > -		 * the BAR's location in a page.
> > -		 */
> > -no_mmap:
> > -		vdev->bar_mmap_supported[bar] = false;
> > -	}
> > -}
> > -
> > -static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
> > -
> > -/*
> > - * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> > - * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
> > - * If a device implements the former but not the latter we would typically
> > - * expect broken_intx_masking be set and require an exclusive interrupt.
> > - * However since we do have control of the device's ability to assert INTx,
> > - * we can instead pretend that the device does not implement INTx, virtualizing
> > - * the pin register to report zero and maintaining DisINTx set on the host.
> > - */
> > -static bool vfio_pci_nointx(struct pci_dev *pdev)
> > -{
> > -	switch (pdev->vendor) {
> > -	case PCI_VENDOR_ID_INTEL:
> > -		switch (pdev->device) {
> > -		/* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
> > -		case 0x1572:
> > -		case 0x1574:
> > -		case 0x1580 ... 0x1581:
> > -		case 0x1583 ... 0x158b:
> > -		case 0x37d0 ... 0x37d2:
> > -			return true;
> > -		default:
> > -			return false;
> > -		}
> > -	}
> > -
> > -	return false;
> > -}
> > -
> > -void vfio_pci_probe_power_state(struct vfio_pci_device *vdev)
> > -{
> > -	struct pci_dev *pdev = vdev->pdev;
> > -	u16 pmcsr;
> > -
> > -	if (!pdev->pm_cap)
> > -		return;
> > -
> > -	pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);
> > -
> > -	vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
> > -}
> > -
> > -/*
> > - * pci_set_power_state() wrapper handling devices which perform a soft reset on
> > - * D3->D0 transition.  Save state prior to D0/1/2->D3, stash it on the vdev,
> > - * restore when returned to D0.  Saved separately from pci_saved_state for use
> > - * by PM capability emulation and separately from pci_dev internal saved state
> > - * to avoid it being overwritten and consumed around other resets.
> > - */
> > -int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
> > -{
> > -	struct pci_dev *pdev = vdev->pdev;
> > -	bool needs_restore = false, needs_save = false;
> > -	int ret;
> > -
> > -	if (vdev->needs_pm_restore) {
> > -		if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {
> > -			pci_save_state(pdev);
> > -			needs_save = true;
> > -		}
> > -
> > -		if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)
> > -			needs_restore = true;
> > -	}
> > -
> > -	ret = pci_set_power_state(pdev, state);
> > -
> > -	if (!ret) {
> > -		/* D3 might be unsupported via quirk, skip unless in D3 */
> > -		if (needs_save && pdev->current_state >= PCI_D3hot) {
> > -			vdev->pm_save = pci_store_saved_state(pdev);
> > -		} else if (needs_restore) {
> > -			pci_load_and_free_saved_state(pdev, &vdev->pm_save);
> > -			pci_restore_state(pdev);
> > -		}
> > -	}
> 
> 
> This gets a bit ugly, vfio_pci_remove() retains:
> 
> kfree(vdev->pm_save)
> 
> But vfio_pci.c otherwise has no use of this field on the
> vfio_pci_device.  I'm afraid we're really just doing a pretty rough
> splitting of the code rather than massaging the callbacks between the
> modules into an actual API, for example maybe there should be init and
> exit callbacks into the common code to handle such things.
> ioeventfds_{list,lock} are similar, vfio_pci.c inits and destroys them,
> but otherwise doesn't know what they're for.  I wonder how many more
> such things exist.  Thanks,

yeah, I tried to keep the code as what it looks like today. So it is
now much more like a code splitting). But I agree we need to make it
more thorough. I had been considering how to make the code work as
what you described here since I saw your comment last week. I may
need a more detailed investigation on it per your direction, and answer
your question better.

Thanks very much for your guidelines.

Regards,
Yi Liu

> 
> Alex


  reply	other threads:[~2020-01-16 12:42 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-07 12:01 [PATCH v4 00/12] vfio_pci: wrap pci device as a mediated device Liu Yi L
2020-01-07 12:01 ` [PATCH v4 01/12] vfio_pci: refine user config reference in vfio-pci module Liu Yi L
2020-01-09 22:48   ` Alex Williamson
2020-01-16 12:19     ` Liu, Yi L
2020-01-07 12:01 ` [PATCH v4 02/12] vfio_pci: move vfio_pci_is_vga/vfio_vga_disabled to header file Liu Yi L
2020-01-15 10:43   ` Cornelia Huck
2020-01-16 12:46     ` Liu, Yi L
2020-01-07 12:01 ` [PATCH v4 03/12] vfio_pci: refine vfio_pci_driver reference in vfio_pci.c Liu Yi L
2020-01-09 22:48   ` Alex Williamson
2020-01-10  7:35     ` Liu, Yi L
2020-01-07 12:01 ` [PATCH v4 04/12] vfio_pci: make common functions be extern Liu Yi L
2020-01-15 10:56   ` Cornelia Huck
2020-01-16 12:48     ` Liu, Yi L
2020-01-07 12:01 ` [PATCH v4 05/12] vfio_pci: duplicate vfio_pci.c Liu Yi L
2020-01-15 11:03   ` Cornelia Huck
2020-01-15 15:12     ` Alex Williamson
2020-01-07 12:01 ` [PATCH v4 06/12] vfio_pci: shrink vfio_pci_common.c Liu Yi L
2020-01-07 12:01 ` [PATCH v4 07/12] vfio_pci: shrink vfio_pci.c Liu Yi L
2020-01-08 11:24   ` kbuild test robot
2020-01-09 22:48   ` Alex Williamson
2020-01-16 12:42     ` Liu, Yi L [this message]
2020-01-07 12:01 ` [PATCH v4 08/12] vfio_pci: duplicate vfio_pci_private.h to include/linux Liu Yi L
2020-01-07 12:01 ` [PATCH v4 09/12] vfio: split vfio_pci_private.h into two files Liu Yi L
2020-01-09 22:48   ` Alex Williamson
2020-01-16 11:59     ` Liu, Yi L
2020-01-07 12:01 ` [PATCH v4 10/12] vfio: build vfio_pci_common.c into a kernel module Liu Yi L
2020-01-07 12:01 ` [PATCH v4 11/12] samples: add vfio-mdev-pci driver Liu Yi L
2020-01-09 22:48   ` Alex Williamson
2020-01-16 12:33     ` Liu, Yi L
2020-01-16 21:24       ` Alex Williamson
2020-01-18 14:25         ` Liu, Yi L
2020-01-20 21:07           ` Alex Williamson
2020-01-21  7:43             ` Tian, Kevin
2020-01-21  8:43               ` Yan Zhao
2020-01-21 20:04                 ` Alex Williamson
2020-01-21 21:54                   ` Yan Zhao
2020-01-23 23:33                     ` Alex Williamson
2020-01-31  2:26                       ` Yan Zhao
2020-01-15 12:30   ` Cornelia Huck
2020-01-16 13:23     ` Liu, Yi L
2020-01-16 17:40       ` Cornelia Huck
2020-01-18 14:23         ` Liu, Yi L
2020-01-20  8:55           ` Cornelia Huck
2020-01-07 12:01 ` [PATCH v4 12/12] samples: refine " Liu Yi L

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=A2975661238FB949B60364EF0F2C25743A183EF9@SHSMSX104.ccr.corp.intel.com \
    --to=yi.l.liu@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterx@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).