From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752136AbdLEGet (ORCPT ); Tue, 5 Dec 2017 01:34:49 -0500 Received: from mga03.intel.com ([134.134.136.65]:6985 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751062AbdLEGer (ORCPT ); Tue, 5 Dec 2017 01:34:47 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,363,1508828400"; d="scan'208";a="167902" Subject: Re: [PATCH v3 12/16] iommu/vt-d: report unrecoverable device faults To: Jacob Pan , iommu@lists.linux-foundation.org, LKML , Joerg Roedel , David Woodhouse , Greg Kroah-Hartman , Rafael Wysocki , Alex Williamson References: <1510944914-54430-1-git-send-email-jacob.jun.pan@linux.intel.com> <1510944914-54430-13-git-send-email-jacob.jun.pan@linux.intel.com> Cc: Lan Tianyu , Yi L , Liu@mail.linuxfoundation.org, Jean Delvare From: Lu Baolu Message-ID: <5A263E05.10507@linux.intel.com> Date: Tue, 5 Dec 2017 14:34:45 +0800 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.5.1 MIME-Version: 1.0 In-Reply-To: <1510944914-54430-13-git-send-email-jacob.jun.pan@linux.intel.com> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Hi, On 11/18/2017 02:55 AM, Jacob Pan wrote: > Currently, when device DMA faults are detected by IOMMU the fault > reasons are printed but the driver of the offending device is "... but the driver of the offending device is not involved in ..." Best regards, Lu Baolu > involved in fault handling. > This patch uses per device fault reporting API to send fault event > data for further processing. > Offending device is identified by the source ID in VT-d fault reason > report registers. > > Signed-off-by: Liu, Yi L > Signed-off-by: Jacob Pan > Signed-off-by: Ashok Raj > --- > drivers/iommu/dmar.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 93 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c > index 38ee91b..b1f67fc2 100644 > --- a/drivers/iommu/dmar.c > +++ b/drivers/iommu/dmar.c > @@ -1555,6 +1555,31 @@ static const char *irq_remap_fault_reasons[] = > "Blocked an interrupt request due to source-id verification failure", > }; > > +/* fault data and status */ > +enum intel_iommu_fault_reason { > + INTEL_IOMMU_FAULT_REASON_SW, > + INTEL_IOMMU_FAULT_REASON_ROOT_NOT_PRESENT, > + INTEL_IOMMU_FAULT_REASON_CONTEXT_NOT_PRESENT, > + INTEL_IOMMU_FAULT_REASON_CONTEXT_INVALID, > + INTEL_IOMMU_FAULT_REASON_BEYOND_ADDR_WIDTH, > + INTEL_IOMMU_FAULT_REASON_PTE_WRITE_ACCESS, > + INTEL_IOMMU_FAULT_REASON_PTE_READ_ACCESS, > + INTEL_IOMMU_FAULT_REASON_NEXT_PT_INVALID, > + INTEL_IOMMU_FAULT_REASON_ROOT_ADDR_INVALID, > + INTEL_IOMMU_FAULT_REASON_CONTEXT_PTR_INVALID, > + INTEL_IOMMU_FAULT_REASON_NONE_ZERO_RTP, > + INTEL_IOMMU_FAULT_REASON_NONE_ZERO_CTP, > + INTEL_IOMMU_FAULT_REASON_NONE_ZERO_PTE, > + NR_INTEL_IOMMU_FAULT_REASON, > +}; > + > +/* fault reasons that are allowed to be reported outside IOMMU subsystem */ > +#define INTEL_IOMMU_FAULT_REASON_ALLOWED \ > + ((1ULL << INTEL_IOMMU_FAULT_REASON_BEYOND_ADDR_WIDTH) | \ > + (1ULL << INTEL_IOMMU_FAULT_REASON_PTE_WRITE_ACCESS) | \ > + (1ULL << INTEL_IOMMU_FAULT_REASON_PTE_READ_ACCESS)) > + > + > static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) > { > if (fault_reason >= 0x20 && (fault_reason - 0x20 < > @@ -1635,6 +1660,69 @@ void dmar_msi_read(int irq, struct msi_msg *msg) > raw_spin_unlock_irqrestore(&iommu->register_lock, flag); > } > > +static enum iommu_fault_reason to_iommu_fault_reason(u8 reason) > +{ > + if (reason >= NR_INTEL_IOMMU_FAULT_REASON) { > + pr_warn("unknown DMAR fault reason %d\n", reason); > + return IOMMU_FAULT_REASON_UNKNOWN; > + } > + switch (reason) { > + case INTEL_IOMMU_FAULT_REASON_SW: > + case INTEL_IOMMU_FAULT_REASON_ROOT_NOT_PRESENT: > + case INTEL_IOMMU_FAULT_REASON_CONTEXT_NOT_PRESENT: > + case INTEL_IOMMU_FAULT_REASON_CONTEXT_INVALID: > + case INTEL_IOMMU_FAULT_REASON_BEYOND_ADDR_WIDTH: > + case INTEL_IOMMU_FAULT_REASON_ROOT_ADDR_INVALID: > + case INTEL_IOMMU_FAULT_REASON_CONTEXT_PTR_INVALID: > + return IOMMU_FAULT_REASON_INTERNAL; > + case INTEL_IOMMU_FAULT_REASON_NEXT_PT_INVALID: > + case INTEL_IOMMU_FAULT_REASON_PTE_WRITE_ACCESS: > + case INTEL_IOMMU_FAULT_REASON_PTE_READ_ACCESS: > + return IOMMU_FAULT_REASON_PERMISSION; > + default: > + return IOMMU_FAULT_REASON_UNKNOWN; > + } > +} > + > +static void report_fault_to_device(struct intel_iommu *iommu, u64 addr, int type, > + int fault_type, enum intel_iommu_fault_reason reason, u16 sid) > +{ > + struct iommu_fault_event event; > + struct pci_dev *pdev; > + u8 bus, devfn; > + > + /* check if fault reason is worth reporting outside IOMMU */ > + if (!((1 << reason) & INTEL_IOMMU_FAULT_REASON_ALLOWED)) { > + pr_debug("Fault reason %d not allowed to report to device\n", > + reason); > + return; > + } > + > + bus = PCI_BUS_NUM(sid); > + devfn = PCI_DEVFN(PCI_SLOT(sid), PCI_FUNC(sid)); > + /* > + * we need to check if the fault reporting is requested for the > + * offending device. > + */ > + pdev = pci_get_bus_and_slot(bus, devfn); > + if (!pdev) { > + pr_warn("No PCI device found for source ID %x\n", sid); > + return; > + } > + /* > + * unrecoverable fault is reported per IOMMU, notifier handler can > + * resolve PCI device based on source ID. > + */ > + event.reason = to_iommu_fault_reason(reason); > + event.addr = addr; > + event.type = IOMMU_FAULT_DMA_UNRECOV; > + event.prot = type ? IOMMU_READ : IOMMU_WRITE; > + dev_warn(&pdev->dev, "report device unrecoverable fault: %d, %x, %d\n", > + event.reason, sid, event.type); > + iommu_report_device_fault(&pdev->dev, &event); > + pci_dev_put(pdev); > +} > + > static int dmar_fault_do_one(struct intel_iommu *iommu, int type, > u8 fault_reason, u16 source_id, unsigned long long addr) > { > @@ -1648,11 +1736,15 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, > source_id >> 8, PCI_SLOT(source_id & 0xFF), > PCI_FUNC(source_id & 0xFF), addr >> 48, > fault_reason, reason); > - else > + else { > pr_err("[%s] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02d] %s\n", > type ? "DMA Read" : "DMA Write", > source_id >> 8, PCI_SLOT(source_id & 0xFF), > PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); > + } > + report_fault_to_device(iommu, addr, type, fault_type, > + fault_reason, source_id); > + > return 0; > } >