From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexey Kardashevskiy Subject: Re: [PATCH v8 16/45] powerpc/powernv: Remove DMA32 PE list Date: Wed, 13 Apr 2016 18:59:40 +1000 Message-ID: <570E0A7C.70103@ozlabs.ru> References: <1455680668-23298-1-git-send-email-gwshan@linux.vnet.ibm.com> <1455680668-23298-17-git-send-email-gwshan@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=koi8-r; format=flowed Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1455680668-23298-17-git-send-email-gwshan@linux.vnet.ibm.com> Sender: linux-pci-owner@vger.kernel.org To: Gavin Shan , linuxppc-dev@lists.ozlabs.org Cc: linux-pci@vger.kernel.org, devicetree@vger.kernel.org, benh@kernel.crashing.org, mpe@ellerman.id.au, dja@axtens.net, bhelgaas@google.com, robherring2@gmail.com, grant.likely@linaro.org List-Id: devicetree@vger.kernel.org On 02/17/2016 02:43 PM, Gavin Shan wrote: > PEs are put into PHB DMA32 list (phb->ioda.pe_dma_list) according > to their DMA32 weight. The PEs on the list are iterated to setup > their TCE32 tables at system booting time. The list is used for > once and there is for keep having it. "there is no need to keep it" may be? > > This moves the logic calculating DMA32 weight of PHB and PE to > pnv_ioda_setup_dma() to drop PHB's DMA32 list. Also, every PE > traces the consumed DMA32 segment by @tce32_seg and @tce32_segcount > are useless and they're removed. > > Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy with few comments below... > --- > arch/powerpc/platforms/powernv/pci-ioda.c | 168 +++++++++++++----------------- > arch/powerpc/platforms/powernv/pci.h | 19 ---- > 2 files changed, 75 insertions(+), 112 deletions(-) > > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index e60cff6..0fc2309 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -886,44 +886,6 @@ out: > return 0; > } > > -static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, > - struct pnv_ioda_pe *pe) > -{ > - struct pnv_ioda_pe *lpe; > - > - list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { > - if (lpe->dma_weight < pe->dma_weight) { > - list_add_tail(&pe->dma_link, &lpe->dma_link); > - return; > - } > - } > - list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); > -} > - > -static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) > -{ > - /* This is quite simplistic. The "base" weight of a device > - * is 10. 0 means no DMA is to be accounted for it. > - */ > - > - /* If it's a bridge, no DMA */ > - if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) > - return 0; > - > - /* Reduce the weight of slow USB controllers */ > - if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || > - dev->class == PCI_CLASS_SERIAL_USB_OHCI || > - dev->class == PCI_CLASS_SERIAL_USB_EHCI) > - return 3; > - > - /* Increase the weight of RAID (includes Obsidian) */ > - if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) > - return 15; > - > - /* Default */ > - return 10; > -} > - > #ifdef CONFIG_PCI_IOV > static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) > { > @@ -1028,7 +990,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) > pe->flags = PNV_IODA_PE_DEV; > pe->pdev = dev; > pe->pbus = NULL; > - pe->tce32_seg = -1; > pe->mve_number = -1; > pe->rid = dev->bus->number << 8 | pdn->devfn; > > @@ -1044,16 +1005,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) > return NULL; > } > > - /* Assign a DMA weight to the device */ > - pe->dma_weight = pnv_ioda_dma_weight(dev); > - if (pe->dma_weight != 0) { > - phb->ioda.dma_weight += pe->dma_weight; > - phb->ioda.dma_pe_count++; > - } > - > - /* Link the PE */ > - pnv_ioda_link_pe_by_weight(phb, pe); > - > return pe; > } > > @@ -1071,7 +1022,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) > } > pdn->pcidev = dev; > pdn->pe_number = pe->pe_number; > - pe->dma_weight += pnv_ioda_dma_weight(dev); > if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) > pnv_ioda_setup_same_PE(dev->subordinate, pe); > } > @@ -1108,10 +1058,8 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) > pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); > pe->pbus = bus; > pe->pdev = NULL; > - pe->tce32_seg = -1; > pe->mve_number = -1; > pe->rid = bus->busn_res.start << 8; > - pe->dma_weight = 0; > > if (all) > pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", > @@ -1133,17 +1081,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) > > /* Put PE to the list */ > list_add_tail(&pe->list, &phb->ioda.pe_list); > - > - /* Account for one DMA PE if at least one DMA capable device exist > - * below the bridge > - */ > - if (pe->dma_weight != 0) { > - phb->ioda.dma_weight += pe->dma_weight; > - phb->ioda.dma_pe_count++; > - } > - > - /* Link the PE */ > - pnv_ioda_link_pe_by_weight(phb, pe); > } > > static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) > @@ -1184,7 +1121,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) > rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; > npu_pdn->pcidev = npu_pdev; > npu_pdn->pe_number = pe_num; > - pe->dma_weight += pnv_ioda_dma_weight(npu_pdev); > phb->ioda.pe_rmap[rid] = pe->pe_number; > > /* Map the PE to this link */ > @@ -1532,7 +1468,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) > pe->flags = PNV_IODA_PE_VF; > pe->pbus = NULL; > pe->parent_dev = pdev; > - pe->tce32_seg = -1; > pe->mve_number = -1; > pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | > pci_iov_virtfn_devfn(pdev, vf_index); > @@ -2023,6 +1958,54 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { > .free = pnv_ioda2_table_free, > }; > > +static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data) > +{ > + unsigned int *weight = (unsigned int *)data; > + > + /* This is quite simplistic. The "base" weight of a device > + * is 10. 0 means no DMA is to be accounted for it. > + */ > + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) > + return 0; > + > + if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || > + dev->class == PCI_CLASS_SERIAL_USB_OHCI || > + dev->class == PCI_CLASS_SERIAL_USB_EHCI) > + *weight += 3; > + else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) > + *weight += 15; > + else > + *weight += 10; > + > + return 0; > +} > + > +static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe) > +{ > + unsigned int weight = 0; > + > + if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) { > + pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight); > + } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) { > + struct pci_dev *pdev; > + > + list_for_each_entry(pdev, &pe->pbus->devices, bus_list) > + pnv_pci_ioda_dev_dma_weight(pdev, &weight); > + } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) { > + pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight); > + } > + > + return weight; > +} > + > +static unsigned int pnv_pci_ioda_total_dma_weight(struct pnv_phb *phb) s/pnv_pci_ioda_total_dma_weight/pnv_pci_ioda1_phb_dma_weight/ ? "total" does not say much. Or just merge it into pnv_pci_ioda1_setup_dma_pe() as it is useless for anything but IODA1. > +{ > + unsigned int weight = 0; > + > + pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight, &weight); > + return weight; > +} > + > static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, > struct pnv_ioda_pe *pe, > unsigned int base, > @@ -2039,17 +2022,12 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, > /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ > /* XXX FIXME: Allocate multi-level tables on PHB3 */ > > - /* We shouldn't already have a 32-bit DMA associated */ > - if (WARN_ON(pe->tce32_seg >= 0)) > - return; > - > tbl = pnv_pci_table_alloc(phb->hose->node); > iommu_register_group(&pe->table_group, phb->hose->global_number, > pe->pe_number); > pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); > > /* Grab a 32-bit TCE table */ > - pe->tce32_seg = base; > pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", > base * PNV_IODA1_DMA32_SEGSIZE, > (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1); > @@ -2116,8 +2094,6 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, > return; > fail: > /* XXX Failure: Try to fallback to 64-bit only ? */ > - if (pe->tce32_seg >= 0) > - pe->tce32_seg = -1; > if (tce_mem) > __free_pages(tce_mem, get_order(tce32_segsz * segs)); > if (tbl) { > @@ -2528,10 +2504,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, > { > int64_t rc; > > - /* We shouldn't already have a 32-bit DMA associated */ > - if (WARN_ON(pe->tce32_seg >= 0)) > - return; > - > /* TVE #1 is selected by PCI address bit 59 */ > pe->tce_bypass_base = 1ull << 59; > > @@ -2539,7 +2511,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, > pe->pe_number); > > /* The PE will reserve all possible 32-bits space */ > - pe->tce32_seg = 0; > pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", > phb->ioda.m32_pci_base); > > @@ -2555,11 +2526,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, > #endif > > rc = pnv_pci_ioda2_setup_default_config(pe); > - if (rc) { > - if (pe->tce32_seg >= 0) > - pe->tce32_seg = -1; > + if (rc) > return; > - } > > if (pe->flags & PNV_IODA_PE_DEV) > iommu_add_device(&pe->pdev->dev); > @@ -2570,24 +2538,32 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, > static void pnv_ioda_setup_dma(struct pnv_phb *phb) > { > struct pci_controller *hose = phb->hose; > - unsigned int residual, remaining, segs, tw, base; > + unsigned int weight, total_weight, dma_pe_count; > + unsigned int residual, remaining, segs, base; > struct pnv_ioda_pe *pe; > > + total_weight = pnv_pci_ioda_total_dma_weight(phb); > + dma_pe_count = 0; > + list_for_each_entry(pe, &phb->ioda.pe_list, list) { > + weight = pnv_pci_ioda_pe_dma_weight(pe); > + if (weight > 0) > + dma_pe_count++; > + } > + > /* If we have more PE# than segments available, hand out one > * per PE until we run out and let the rest fail. If not, > * then we assign at least one segment per PE, plus more based > * on the amount of devices under that PE > */ > - if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) > + if (dma_pe_count > phb->ioda.tce32_count) > residual = 0; > else > - residual = phb->ioda.tce32_count - > - phb->ioda.dma_pe_count; > + residual = phb->ioda.tce32_count - dma_pe_count; > > pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", > hose->global_number, phb->ioda.tce32_count); > pr_info("PCI: %d PE# for a total weight of %d\n", > - phb->ioda.dma_pe_count, phb->ioda.dma_weight); > + dma_pe_count, total_weight); > > pnv_pci_ioda_setup_opal_tce_kill(phb); > > @@ -2596,18 +2572,20 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) > * weight > */ > remaining = phb->ioda.tce32_count; > - tw = phb->ioda.dma_weight; > base = 0; > - list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { > - if (!pe->dma_weight) > + list_for_each_entry(pe, &phb->ioda.pe_list, list) { > + weight = pnv_pci_ioda_pe_dma_weight(pe); > + if (!weight) > continue; > + > if (!remaining) { > pe_warn(pe, "No DMA32 resources available\n"); > continue; > } > segs = 1; > if (residual) { > - segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; > + segs += ((weight * residual) + (total_weight / 2)) / > + total_weight; > if (segs > remaining) > segs = remaining; > } > @@ -2619,7 +2597,7 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) > */ > if (phb->type == PNV_PHB_IODA1) { > pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", > - pe->dma_weight, segs); > + weight, segs); > pnv_pci_ioda1_setup_dma_pe(phb, pe, base, segs); > } else if (phb->type == PNV_PHB_IODA2) { > pe_info(pe, "Assign DMA32 space\n"); > @@ -3156,13 +3134,18 @@ static void pnv_npu_ioda_fixup(void) > struct pci_controller *hose, *tmp; > struct pnv_phb *phb; > struct pnv_ioda_pe *pe; > + unsigned int weight; > > list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > phb = hose->private_data; > if (phb->type != PNV_PHB_NPU) > continue; > > - list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { > + list_for_each_entry(pe, &phb->ioda.pe_list, list) { > + weight = pnv_pci_ioda_pe_dma_weight(pe); > + if (!weight) > + continue; Is this even possible for NPU PE to get weight==0? WARN_ON()? BUG_ON()? > + > enable_bypass = dma_get_mask(&pe->pdev->dev) == > DMA_BIT_MASK(64); > pnv_npu_init_dma_pe(pe); > @@ -3443,7 +3426,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, > phb->ioda.pe_array = aux + pemap_off; > set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); > > - INIT_LIST_HEAD(&phb->ioda.pe_dma_list); > INIT_LIST_HEAD(&phb->ioda.pe_list); > mutex_init(&phb->ioda.pe_list_mutex); > > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h > index 1d8e775..e90bcbe 100644 > --- a/arch/powerpc/platforms/powernv/pci.h > +++ b/arch/powerpc/platforms/powernv/pci.h > @@ -53,14 +53,7 @@ struct pnv_ioda_pe { > /* PE number */ > unsigned int pe_number; > > - /* "Weight" assigned to the PE for the sake of DMA resource > - * allocations > - */ > - unsigned int dma_weight; > - > /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ > - int tce32_seg; > - int tce32_segcount; > struct iommu_table_group table_group; > > /* 64-bit TCE bypass region */ > @@ -78,7 +71,6 @@ struct pnv_ioda_pe { > struct list_head slaves; > > /* Link in list of PE#s */ > - struct list_head dma_link; > struct list_head list; > }; > > @@ -173,17 +165,6 @@ struct pnv_phb { > /* 32-bit TCE tables allocation */ > unsigned long tce32_count; > > - /* Total "weight" for the sake of DMA resources > - * allocation > - */ > - unsigned int dma_weight; > - unsigned int dma_pe_count; > - > - /* Sorted list of used PE's, sorted at > - * boot for resource allocation purposes > - */ > - struct list_head pe_dma_list; > - > /* TCE cache invalidate registers (physical and > * remapped) > */ > -- Alexey