linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Wei Yang <weiyang@linux.vnet.ibm.com>
To: benh@au1.ibm.com
Cc: Wei Yang <weiyang@linux.vnet.ibm.com>,
	linux-pci@vger.kernel.org, gwshan@linux.vnet.ibm.com,
	qiudayu@linux.vnet.ibm.com, bhelgaas@google.com,
	yan@linux.vnet.ibm.com, linuxppc-dev@lists.ozlabs.org
Subject: [RFC PATCH V3 17/17] ppc/pnv: Group VF PE when IOV BAR is big on PHB3
Date: Tue, 10 Jun 2014 09:56:39 +0800	[thread overview]
Message-ID: <1402365399-5121-18-git-send-email-weiyang@linux.vnet.ibm.com> (raw)
In-Reply-To: <1402365399-5121-1-git-send-email-weiyang@linux.vnet.ibm.com>

When IOV BAR is big, each of it is covered by 4 M64 window. This leads to
several VF PE sits in one PE in terms of M64.

This patch group VF PEs according to the M64 allocation.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pci-bridge.h     |    2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  183 +++++++++++++++++++++++------
 2 files changed, 145 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 36b88e4..f0a21f5 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -173,7 +173,7 @@ struct pci_dn {
 	int     offset;
 #define M64_PER_IOV 4
 	int     m64_per_iov;
-	int     m64_wins[PCI_SRIOV_NUM_BARS];
+	int     m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
 #endif /* CONFIG_PCI_IOV */
 #endif
 };
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 98fc163..86688cd 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -804,26 +804,27 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
 	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pci_dn         *pdn;
-	int                    i;
+	int                    i, j;
 
 	bus = pdev->bus;
 	hose = pci_bus_to_host(bus);
 	phb = hose->private_data;
 	pdn = pci_get_pdn(pdev);
 
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		if (pdn->m64_wins[i] == -1)
-			continue;
-		opal_pci_phb_mmio_enable(phb->opal_id,
-				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 0);
-		clear_bit(pdn->m64_wins[i], &phb->ioda.m64win_alloc);
-		pdn->m64_wins[i] = -1;
-	}
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < M64_PER_IOV; j++) {
+			if (pdn->m64_wins[i][j] == -1)
+				continue;
+			opal_pci_phb_mmio_enable(phb->opal_id,
+				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
+			clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64win_alloc);
+			pdn->m64_wins[i][j] = -1;
+		}
 
 	return 0;
 }
 
-static int pnv_pci_vf_assign_m64(struct pci_dev *pdev)
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 vf_num)
 {
 	struct pci_bus        *bus;
 	struct pci_controller *hose;
@@ -831,17 +832,33 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev)
 	struct pci_dn         *pdn;
 	unsigned int           win;
 	struct resource       *res;
-	int                    i;
+	int                    i, j;
 	int64_t                rc;
+	int                    total_vfs;
+	resource_size_t        size, start;
+	int                    pe_num;
+	int                    vf_groups;
+	int                    vf_per_group;
 
 	bus = pdev->bus;
 	hose = pci_bus_to_host(bus);
 	phb = hose->private_data;
 	pdn = pci_get_pdn(pdev);
+	total_vfs = pci_sriov_get_totalvfs(pdev);
 
 	/* Initialize the m64_wins to -1 */
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
-		pdn->m64_wins[i] = -1;
+		for (j = 0; j < 4; j++)
+			pdn->m64_wins[i][j] = -1;
+
+	if (pdn->m64_per_iov == M64_PER_IOV) {
+		vf_groups = (vf_num <= M64_PER_IOV) ? vf_num: M64_PER_IOV;
+		vf_per_group = (vf_num <= M64_PER_IOV)? 1:
+			__roundup_pow_of_two(vf_num) / pdn->m64_per_iov;
+	} else {
+		vf_groups = 1;
+		vf_per_group = 1;
+	}
 
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 		res = pdev->resource + PCI_IOV_RESOURCES + i;
@@ -851,33 +868,61 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev)
 		if (!is_mem_pref_64_type(res->flags))
 			continue;
 
-		do {
-			win = find_next_zero_bit(&phb->ioda.m64win_alloc,
-					phb->ioda.m64_bars, 0);
-
-			if (win >= phb->ioda.m64_bars)
-				goto m64_failed;
-		} while (test_and_set_bit(win, &phb->ioda.m64win_alloc));
+		for (j = 0; j < vf_groups; j++) {
+			do {
+				win = find_next_zero_bit(&phb->ioda.m64win_alloc,
+						phb->ioda.m64_bars, 0);
+
+				if (win >= phb->ioda.m64_bars)
+					goto m64_failed;
+			} while (test_and_set_bit(win, &phb->ioda.m64win_alloc));
+
+			pdn->m64_wins[i][j] = win;
+
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				size = pci_sriov_resource_size(pdev,
+						PCI_IOV_RESOURCES + i);
+				size = size * vf_per_group;
+				start = res->start + size * j;
+			} else {
+				size = resource_size(res);
+				start = res->start;
+			}
 
-		pdn->m64_wins[i] = win;
+			/* Map the M64 here */
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				pe_num = pdn->offset + j;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+						pe_num, OPAL_M64_WINDOW_TYPE,
+						pdn->m64_wins[i][j], 0);
+			}
 
-		/* Map the M64 here */
-		rc = opal_pci_set_phb_mem_window(phb->opal_id,
+			rc = opal_pci_set_phb_mem_window(phb->opal_id,
 						 OPAL_M64_WINDOW_TYPE,
-						 pdn->m64_wins[i],
-						 res->start,
+						 pdn->m64_wins[i][j],
+						 start,
 						 0, /* unused */
-						 resource_size(res));
-		if (rc != OPAL_SUCCESS) {
-			pr_err("Failed to map M64 BAR #%d: %lld\n", win, rc);
-			goto m64_failed;
-		}
+						 size);
 
-		rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 1);
-		if (rc != OPAL_SUCCESS) {
-			pr_err("Failed to enable M64 BAR #%d: %llx\n", win, rc);
-			goto m64_failed;
+
+			if (rc != OPAL_SUCCESS) {
+				pr_err("Failed to set M64 BAR #%d: %lld\n",
+						win, rc);
+				goto m64_failed;
+			}
+
+			if (pdn->m64_per_iov == M64_PER_IOV)
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+			else
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+
+			if (rc != OPAL_SUCCESS) {
+				pr_err("Failed to enable M64 BAR #%d: %llx\n",
+						win, rc);
+				goto m64_failed;
+			}
 		}
 	}
 	return 0;
@@ -987,21 +1032,51 @@ static void pnv_pci_release_vf_node(struct pci_dev *dev, u16 vf_num)
 	}
 }
 
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 vf_num)
 {
 	struct pci_bus        *bus;
 	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pnv_ioda_pe    *pe, *pe_n;
 	struct pci_dn         *pdn;
+	u16                    vf_index;
+	int64_t                rc;
 
 	bus = pdev->bus;
 	hose = pci_bus_to_host(bus);
 	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
 
 	if (!pdev->is_physfn)
 		return;
 
+	if (pdn->m64_per_iov == M64_PER_IOV && vf_num > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = __roundup_pow_of_two(vf_num) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
+			for (vf_index = vf_group * vf_per_group;
+				vf_index < (vf_group + 1) * vf_per_group;
+				vf_index++)
+				for (vf_index1 = vf_group * vf_per_group;
+					vf_index1 < (vf_group + 1) * vf_per_group;
+					vf_index1++){
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_REMOVE_PE_FROM_DOMAIN);
+
+					if (rc)
+					    pr_warn("%s: Failed to unlink same"
+						" group PE#%d(%lld)\n", __func__,
+						pdn->offset + vf_index1, rc);
+				}
+	}
+
 	pdn = pci_get_pdn(pdev);
 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
 		if (pe->parent_dev != pdev)
@@ -1037,11 +1112,12 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
 	vf_num = iov->num_VFs;
 
 	/* Release VF PEs */
-	pnv_ioda_release_vf_PE(pdev);
+	pnv_ioda_release_vf_PE(pdev, vf_num);
 	pnv_pci_release_vf_node(pdev, vf_num);
 
 	if (phb->type == PNV_PHB_IODA2) {
-		pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+		if (pdn->m64_per_iov == 1)
+			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
 
 		/* Release M64 BARs */
 		pnv_pci_vf_release_m64(pdev);
@@ -1065,6 +1141,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 vf_num)
 	int                    pe_num;
 	u16                    vf_index;
 	struct pci_dn         *pdn;
+	int64_t                rc;
 
 	bus = pdev->bus;
 	hose = pci_bus_to_host(bus);
@@ -1112,7 +1189,34 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 vf_num)
 		mutex_unlock(&phb->ioda.pe_list_mutex);
 
 		pnv_pci_ioda2_setup_dma_pe(phb, pe);
+	}
 
+	if (pdn->m64_per_iov == M64_PER_IOV && vf_num > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = __roundup_pow_of_two(vf_num) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
+			for (vf_index = vf_group * vf_per_group;
+				vf_index < (vf_group + 1) * vf_per_group;
+				vf_index++)
+				for (vf_index1 = vf_group * vf_per_group;
+					vf_index1 < (vf_group + 1) * vf_per_group;
+					vf_index1++) {
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_ADD_PE_TO_DOMAIN);
+
+					if (rc)
+					    pr_warn("%s: Failed to link same "
+						"group PE#%d(%lld)\n",
+						__func__,
+						pdn->offset + vf_index1, rc);
+			}
 	}
 }
 
@@ -1146,14 +1250,15 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 vf_num)
 		mutex_unlock(&phb->ioda.pe_alloc_mutex);
 
 		/* Assign M64 BAR accordingly */
-		ret = pnv_pci_vf_assign_m64(pdev);
+		ret = pnv_pci_vf_assign_m64(pdev, vf_num);
 		if (ret) {
 			pr_info("No enough M64 resource\n");
 			goto m64_failed;
 		}
 
 		/* Do some magic shift */
-		pnv_pci_vf_resource_shift(pdev, pdn->offset);
+		if (pdn->m64_per_iov == 1)
+			pnv_pci_vf_resource_shift(pdev, pdn->offset);
 	}
 
 	/* Setup VF PEs */
-- 
1.7.9.5

      parent reply	other threads:[~2014-06-10  1:57 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-10  1:56 [RFC PATCH V3 00/17] Enable SRIOV on POWER8 Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 01/17] pci/iov: Export interface for retrieve VF's BDF Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 02/17] pci/of: Match PCI VFs to dev-tree nodes dynamically Wei Yang
2014-06-23  5:07   ` Gavin Shan
2014-06-23  6:29     ` Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 03/17] ppc/pci: don't unset pci resources for VFs Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 04/17] PCI: SRIOV: add VF enable/disable hook Wei Yang
2014-06-23  5:03   ` Gavin Shan
2014-06-23  6:29     ` Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 05/17] ppc/pnv: user macro to define the TCE size Wei Yang
2014-06-23  5:12   ` Gavin Shan
2014-06-23  6:31     ` Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 06/17] ppc/pnv: allocate pe->iommu_table dynamically Wei Yang
2014-06-24 10:06   ` Alexey Kardashevskiy
2014-06-25  1:12     ` Wei Yang
2014-06-25  4:12       ` Alexey Kardashevskiy
2014-06-25  5:27         ` Wei Yang
2014-06-25  7:50           ` Alexey Kardashevskiy
2014-06-25  7:56             ` Benjamin Herrenschmidt
2014-06-25  9:18               ` Wei Yang
2014-06-25  9:13             ` Wei Yang
2014-06-25  9:20           ` David Laight
2014-06-25  9:31             ` Wei Yang
2014-06-25 10:30             ` Alexey Kardashevskiy
2014-07-14  3:12             ` Benjamin Herrenschmidt
2014-06-10  1:56 ` [RFC PATCH V3 07/17] ppc/pnv: Add function to deconfig a PE Wei Yang
2014-06-23  5:27   ` Gavin Shan
2014-06-23  9:07     ` Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 08/17] PCI: Add weak pcibios_sriov_resource_size() interface Wei Yang
2014-06-23  5:41   ` Gavin Shan
2014-06-23  7:56     ` Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 09/17] PCI: Add weak pcibios_sriov_resource_alignment() interface Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 10/17] PCI: take additional IOV BAR alignment in sizing and assigning Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 11/17] ppc/pnv: Expand VF resources according to the number of total_pe Wei Yang
2014-06-23  6:07   ` Gavin Shan
2014-06-23  6:56     ` Wei Yang
2014-06-23  7:08       ` Gavin Shan
2014-06-10  1:56 ` [RFC PATCH V3 12/17] powerpc/powernv: implement pcibios_sriov_resource_alignment on powernv Wei Yang
2014-06-23  6:09   ` Gavin Shan
2014-06-23  8:21     ` Wei Yang
2014-06-23 23:29       ` Gavin Shan
2014-06-24  1:24         ` Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 13/17] powerpc/powernv: shift VF resource with an offset Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 14/17] ppc/pci: create/release dev-tree node for VFs Wei Yang
2014-06-18 18:26   ` Grant Likely
2014-06-18 20:51     ` Benjamin Herrenschmidt
2014-06-19  2:46     ` Wei Yang
2014-06-19  8:30       ` Grant Likely
2014-06-19  9:42         ` Wei Yang
2014-06-20  3:46         ` Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 15/17] powerpc/powernv: allocate VF PE Wei Yang
2014-06-10  1:56 ` [RFC PATCH V3 16/17] ppc/pci: Expanding IOV BAR, with m64_per_iov supported Wei Yang
2014-06-10  1:56 ` Wei Yang [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1402365399-5121-18-git-send-email-weiyang@linux.vnet.ibm.com \
    --to=weiyang@linux.vnet.ibm.com \
    --cc=benh@au1.ibm.com \
    --cc=bhelgaas@google.com \
    --cc=gwshan@linux.vnet.ibm.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=qiudayu@linux.vnet.ibm.com \
    --cc=yan@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).