From mboxrd@z Thu Jan 1 00:00:00 1970 From: Boris Ostrovsky Subject: Re: [PATCH v3 4/7] sysctl: Add sysctl interface for querying PCI topology Date: Tue, 10 Feb 2015 09:45:56 -0500 Message-ID: <54DA19A4.8070603__12617.7259292037$1423579686$gmane$org@oracle.com> References: <1423512275-6531-1-git-send-email-boris.ostrovsky@oracle.com> <1423512275-6531-5-git-send-email-boris.ostrovsky@oracle.com> <54D9E7DE.6080609@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <54D9E7DE.6080609@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Andrew Cooper , jbeulich@suse.com, keir@xen.org, ian.jackson@eu.citrix.com, ian.campbell@citrix.com, stefano.stabellini@eu.citrix.com, wei.liu2@citrix.com Cc: dario.faggioli@citrix.com, port-xen@netbsd.org, ufimtseva@gmail.com, xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On 02/10/2015 06:13 AM, Andrew Cooper wrote: > On 09/02/15 20:04, Boris Ostrovsky wrote: >> Signed-off-by: Boris Ostrovsky >> --- >> xen/common/sysctl.c | 73 +++++++++++++++++++++++++++++++++++++++++++ >> xen/include/public/sysctl.h | 29 +++++++++++++++++ >> 2 files changed, 102 insertions(+), 0 deletions(-) >> >> diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c >> index 30c5806..ea6557f 100644 >> --- a/xen/common/sysctl.c >> +++ b/xen/common/sysctl.c >> @@ -384,7 +384,80 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) >> xfree(cputopo); >> } >> break; >> +#ifdef HAS_PCI >> + case XEN_SYSCTL_pcitopoinfo: >> + { >> + xen_sysctl_pcitopoinfo_t *ti = &op->u.pcitopoinfo; >> + physdev_pci_device_t *devs; >> + uint8_t *nodes; >> + unsigned int first_dev, i; >> + int num_devs; >> + >> + num_devs = ti->num_devs - ti->first_dev; >> + >> + if ( guest_handle_is_null(ti->devs) || >> + guest_handle_is_null(ti->nodes) || >> + (num_devs <= 0) ) >> + { >> + ret = -EINVAL; >> + break; >> + } >> + >> + devs = xmalloc_array(physdev_pci_device_t, num_devs); >> + nodes = xmalloc_array(uint8_t, num_devs); > You can do all of this without any memory allocation at all, which will > simplify your error handling substantially. > > Something along the lines of > > for(...) > { > copy one physdev_pci_device_t from the guest > > do the lookup > > copy one node id back to the guest > } I am trying to avoid doing multiple copies. For lots of devices (IIRC, you said you had a system with a few thousand), having two copies per loop will add up, I think. If you look at changes to cputopolgy and numatopology in earlier patch (the one you said you'd postpone reviewing until I split the third patch) I you will see that I did the same thing there. -boris > > ~Andrew > >> + if ( !devs || !nodes ) >> + { >> + xfree(devs); >> + xfree(nodes); >> + ret = -ENOMEM; >> + break; >> + } >> + >> + first_dev = ti->first_dev; >> + >> + if ( copy_from_guest_offset(devs, ti->devs, first_dev, num_devs) ) >> + { >> + xfree(devs); >> + xfree(nodes); >> + ret = -EFAULT; >> + break; >> + } >> >> + for ( i = 0; i < num_devs; i++ ) >> + { >> + struct pci_dev *pdev; >> + >> + spin_lock(&pcidevs_lock); >> + pdev = pci_get_pdev(devs[i].seg, devs[i].bus, devs[i].devfn); >> + if ( !pdev || (pdev->node == NUMA_NO_NODE) ) >> + nodes[i] = INVALID_NODE_ID; >> + else >> + nodes[i] = pdev->node; >> + spin_unlock(&pcidevs_lock); >> + >> + if ( hypercall_preempt_check() ) >> + break; >> + } >> + >> + ti->first_dev += i + 1; >> + >> + if ( __copy_field_to_guest(u_sysctl, op, >> + u.pcitopoinfo.first_dev) || >> + copy_to_guest_offset(ti->nodes, first_dev, nodes,num_devs) ) >> + { >> + ret = -EFAULT; >> + break; >> + } >> + >> + if ( ti->first_dev < ti->num_devs ) >> + ret = hypercall_create_continuation(__HYPERVISOR_sysctl, >> + "h", u_sysctl); >> + >> + xfree(devs); >> + xfree(nodes); >> + } >> + break; >> +#endif >> #ifdef TEST_COVERAGE >> case XEN_SYSCTL_coverage_op: >> ret = sysctl_coverage_op(&op->u.coverage_op); >> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h >> index 7c78f81..044c3a1 100644 >> --- a/xen/include/public/sysctl.h >> +++ b/xen/include/public/sysctl.h >> @@ -33,6 +33,7 @@ >> >> #include "xen.h" >> #include "domctl.h" >> +#include "physdev.h" >> >> #define XEN_SYSCTL_INTERFACE_VERSION 0x0000000C >> >> @@ -494,6 +495,32 @@ struct xen_sysctl_cputopoinfo { >> typedef struct xen_sysctl_cputopoinfo xen_sysctl_cputopoinfo_t; >> DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t); >> >> +/* XEN_SYSCTL_pcitopoinfo */ >> +struct xen_sysctl_pcitopoinfo { >> + /* IN: Size of pcitopo array */ >> + uint32_t num_devs; >> + >> + /* >> + * IN/OUT: First element of pcitopo array that needs to be processed by >> + * hypervisor. >> + * This is used primarily by hypercall continuations and callers will >> + * typically set it to zero >> + */ >> + uint32_t first_dev; >> + >> + /* IN: list of devices */ >> + XEN_GUEST_HANDLE_64(physdev_pci_device_t) devs; >> + >> + /* >> + * OUT: node identifier for each device. >> + * If information for a particular device is not avalable then set >> + * to INVALID_NODE_ID. >> + */ >> + XEN_GUEST_HANDLE_64(uint8) nodes; >> +}; >> +typedef struct xen_sysctl_pcitopoinfo xen_sysctl_pcitopoinfo_t; >> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_pcitopoinfo_t); >> + >> /* XEN_SYSCTL_numainfo */ >> #define INVALID_MEM_SZ (~0U) >> >> @@ -692,12 +719,14 @@ struct xen_sysctl { >> #define XEN_SYSCTL_scheduler_op 19 >> #define XEN_SYSCTL_coverage_op 20 >> #define XEN_SYSCTL_psr_cmt_op 21 >> +#define XEN_SYSCTL_pcitopoinfo 22 >> uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ >> union { >> struct xen_sysctl_readconsole readconsole; >> struct xen_sysctl_tbuf_op tbuf_op; >> struct xen_sysctl_physinfo physinfo; >> struct xen_sysctl_cputopoinfo cputopoinfo; >> + struct xen_sysctl_pcitopoinfo pcitopoinfo; >> struct xen_sysctl_numainfo numainfo; >> struct xen_sysctl_sched_id sched_id; >> struct xen_sysctl_perfc_op perfc_op; >