From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752532AbdF1PGC (ORCPT ); Wed, 28 Jun 2017 11:06:02 -0400 Received: from foss.arm.com ([217.140.101.70]:43076 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752378AbdF1PF3 (ORCPT ); Wed, 28 Jun 2017 11:05:29 -0400 From: Marc Zyngier To: linux-kernel@vger.kernel.org, linux-arm-kernel@lists.infradead.org, kvmarm@lists.cs.columbia.edu Cc: Christoffer Dall , Thomas Gleixner , Jason Cooper , Eric Auger , Shanker Donthineni , Mark Rutland Subject: [PATCH v2 27/52] irqchip/gic-v3-its: Support VPE doorbell invalidation even when !DirectLPI Date: Wed, 28 Jun 2017 16:03:46 +0100 Message-Id: <20170628150411.15846-28-marc.zyngier@arm.com> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20170628150411.15846-1-marc.zyngier@arm.com> References: <20170628150411.15846-1-marc.zyngier@arm.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org When we don't have the DirectLPI feature, we must work around the architecture shortcomings to be able to perform the required invalidation. For this, we create a fake device whose sole purpose is to provide a way to issue a map/inv/unmap sequence (and the corresponding sync operations). That's 6 commands and a full serialization point to be able to do this. You just have to hope the hypervisor won't do that too often... Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 66 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 64893f0427f2..4979916cd3d7 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -138,6 +138,9 @@ struct its_device { u32 device_id; }; +static struct its_device *vpe_proxy_dev; +static DEFINE_RAW_SPINLOCK(vpe_proxy_dev_lock); + static LIST_HEAD(its_nodes); static DEFINE_SPINLOCK(its_lock); static struct rdists *gic_rdists; @@ -2074,6 +2077,14 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, msi_info = msi_get_domain_info(domain); its = msi_info->data; + if (its->is_v4 && !gic_rdists->has_direct_lpi && + dev_id == vpe_proxy_dev->device_id) { + /* Bad luck. Get yourself a better implementation */ + WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n", + dev_id); + return -EINVAL; + } + its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -2318,10 +2329,38 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) static void its_vpe_send_inv(struct irq_data *d) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - void __iomem *rdbase; - rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; - gic_write_invlpir(d->hwirq, rdbase + GICR_INVLPIR); + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + gic_write_invlpir(d->hwirq, rdbase + GICR_INVLPIR); + } else { + /* + * This is insane. + * + * If a GICv4 doesn't implement Direct LPIs, the only + * way to perform an invalidate is to use a fake + * device to issue a MAP/INV/UNMAP sequence. Since + * each of these commands has a sync operation, this + * is really fast. Not. + * + * We always use event 0, and thus serialize all VPE + * invalidations in the system. + * + * Broken by design(tm). + */ + unsigned long flags; + + raw_spin_lock_irqsave(&vpe_proxy_dev_lock, flags); + + vpe_proxy_dev->event_map.col_map[0] = vpe->col_idx; + its_send_mapti(vpe_proxy_dev, vpe->vpe_db_lpi, 0); + its_send_inv(vpe_proxy_dev, 0); + its_send_discard(vpe_proxy_dev, 0); + + raw_spin_unlock_irqrestore(&vpe_proxy_dev_lock, flags); + } } static void its_vpe_mask_irq(struct irq_data *d) @@ -2616,6 +2655,27 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) static int its_init_vpe_domain(void) { + struct its_node *its; + u32 devid; + + if (gic_rdists->has_direct_lpi) { + pr_info("ITS: Using DirectLPI for VPE invalidation\n"); + return 0; + } + + /* Any ITS will do, even if not v4 */ + its = list_first_entry(&its_nodes, struct its_node, entry); + + /* Use the last possible DevID */ + devid = GENMASK(its->device_ids - 1, 0); + vpe_proxy_dev = its_create_device(its, devid, 1); + if (!vpe_proxy_dev) { + pr_err("ITS: Can't allocate GICv4 proxy device\n"); + return -ENODEV; + } + + pr_info("ITS: Allocated DevID %x as GICv4 proxy device\n", devid); + return 0; } -- 2.11.0 From mboxrd@z Thu Jan 1 00:00:00 1970 From: Marc Zyngier Subject: [PATCH v2 27/52] irqchip/gic-v3-its: Support VPE doorbell invalidation even when !DirectLPI Date: Wed, 28 Jun 2017 16:03:46 +0100 Message-ID: <20170628150411.15846-28-marc.zyngier@arm.com> References: <20170628150411.15846-1-marc.zyngier@arm.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from localhost (localhost [127.0.0.1]) by mm01.cs.columbia.edu (Postfix) with ESMTP id 15E9949C1A for ; Wed, 28 Jun 2017 11:05:18 -0400 (EDT) Received: from mm01.cs.columbia.edu ([127.0.0.1]) by localhost (mm01.cs.columbia.edu [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 4hZ9KYILt9YC for ; Wed, 28 Jun 2017 11:05:16 -0400 (EDT) Received: from foss.arm.com (foss.arm.com [217.140.101.70]) by mm01.cs.columbia.edu (Postfix) with ESMTP id BEC6A40FA5 for ; Wed, 28 Jun 2017 11:05:14 -0400 (EDT) In-Reply-To: <20170628150411.15846-1-marc.zyngier@arm.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: kvmarm-bounces@lists.cs.columbia.edu Sender: kvmarm-bounces@lists.cs.columbia.edu To: linux-kernel@vger.kernel.org, linux-arm-kernel@lists.infradead.org, kvmarm@lists.cs.columbia.edu Cc: Jason Cooper , Thomas Gleixner List-Id: kvmarm@lists.cs.columbia.edu When we don't have the DirectLPI feature, we must work around the architecture shortcomings to be able to perform the required invalidation. For this, we create a fake device whose sole purpose is to provide a way to issue a map/inv/unmap sequence (and the corresponding sync operations). That's 6 commands and a full serialization point to be able to do this. You just have to hope the hypervisor won't do that too often... Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 66 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 64893f0427f2..4979916cd3d7 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -138,6 +138,9 @@ struct its_device { u32 device_id; }; +static struct its_device *vpe_proxy_dev; +static DEFINE_RAW_SPINLOCK(vpe_proxy_dev_lock); + static LIST_HEAD(its_nodes); static DEFINE_SPINLOCK(its_lock); static struct rdists *gic_rdists; @@ -2074,6 +2077,14 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, msi_info = msi_get_domain_info(domain); its = msi_info->data; + if (its->is_v4 && !gic_rdists->has_direct_lpi && + dev_id == vpe_proxy_dev->device_id) { + /* Bad luck. Get yourself a better implementation */ + WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n", + dev_id); + return -EINVAL; + } + its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -2318,10 +2329,38 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) static void its_vpe_send_inv(struct irq_data *d) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - void __iomem *rdbase; - rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; - gic_write_invlpir(d->hwirq, rdbase + GICR_INVLPIR); + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + gic_write_invlpir(d->hwirq, rdbase + GICR_INVLPIR); + } else { + /* + * This is insane. + * + * If a GICv4 doesn't implement Direct LPIs, the only + * way to perform an invalidate is to use a fake + * device to issue a MAP/INV/UNMAP sequence. Since + * each of these commands has a sync operation, this + * is really fast. Not. + * + * We always use event 0, and thus serialize all VPE + * invalidations in the system. + * + * Broken by design(tm). + */ + unsigned long flags; + + raw_spin_lock_irqsave(&vpe_proxy_dev_lock, flags); + + vpe_proxy_dev->event_map.col_map[0] = vpe->col_idx; + its_send_mapti(vpe_proxy_dev, vpe->vpe_db_lpi, 0); + its_send_inv(vpe_proxy_dev, 0); + its_send_discard(vpe_proxy_dev, 0); + + raw_spin_unlock_irqrestore(&vpe_proxy_dev_lock, flags); + } } static void its_vpe_mask_irq(struct irq_data *d) @@ -2616,6 +2655,27 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) static int its_init_vpe_domain(void) { + struct its_node *its; + u32 devid; + + if (gic_rdists->has_direct_lpi) { + pr_info("ITS: Using DirectLPI for VPE invalidation\n"); + return 0; + } + + /* Any ITS will do, even if not v4 */ + its = list_first_entry(&its_nodes, struct its_node, entry); + + /* Use the last possible DevID */ + devid = GENMASK(its->device_ids - 1, 0); + vpe_proxy_dev = its_create_device(its, devid, 1); + if (!vpe_proxy_dev) { + pr_err("ITS: Can't allocate GICv4 proxy device\n"); + return -ENODEV; + } + + pr_info("ITS: Allocated DevID %x as GICv4 proxy device\n", devid); + return 0; } -- 2.11.0 From mboxrd@z Thu Jan 1 00:00:00 1970 From: marc.zyngier@arm.com (Marc Zyngier) Date: Wed, 28 Jun 2017 16:03:46 +0100 Subject: [PATCH v2 27/52] irqchip/gic-v3-its: Support VPE doorbell invalidation even when !DirectLPI In-Reply-To: <20170628150411.15846-1-marc.zyngier@arm.com> References: <20170628150411.15846-1-marc.zyngier@arm.com> Message-ID: <20170628150411.15846-28-marc.zyngier@arm.com> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org When we don't have the DirectLPI feature, we must work around the architecture shortcomings to be able to perform the required invalidation. For this, we create a fake device whose sole purpose is to provide a way to issue a map/inv/unmap sequence (and the corresponding sync operations). That's 6 commands and a full serialization point to be able to do this. You just have to hope the hypervisor won't do that too often... Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 66 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 64893f0427f2..4979916cd3d7 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -138,6 +138,9 @@ struct its_device { u32 device_id; }; +static struct its_device *vpe_proxy_dev; +static DEFINE_RAW_SPINLOCK(vpe_proxy_dev_lock); + static LIST_HEAD(its_nodes); static DEFINE_SPINLOCK(its_lock); static struct rdists *gic_rdists; @@ -2074,6 +2077,14 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, msi_info = msi_get_domain_info(domain); its = msi_info->data; + if (its->is_v4 && !gic_rdists->has_direct_lpi && + dev_id == vpe_proxy_dev->device_id) { + /* Bad luck. Get yourself a better implementation */ + WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n", + dev_id); + return -EINVAL; + } + its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -2318,10 +2329,38 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) static void its_vpe_send_inv(struct irq_data *d) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - void __iomem *rdbase; - rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; - gic_write_invlpir(d->hwirq, rdbase + GICR_INVLPIR); + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + gic_write_invlpir(d->hwirq, rdbase + GICR_INVLPIR); + } else { + /* + * This is insane. + * + * If a GICv4 doesn't implement Direct LPIs, the only + * way to perform an invalidate is to use a fake + * device to issue a MAP/INV/UNMAP sequence. Since + * each of these commands has a sync operation, this + * is really fast. Not. + * + * We always use event 0, and thus serialize all VPE + * invalidations in the system. + * + * Broken by design(tm). + */ + unsigned long flags; + + raw_spin_lock_irqsave(&vpe_proxy_dev_lock, flags); + + vpe_proxy_dev->event_map.col_map[0] = vpe->col_idx; + its_send_mapti(vpe_proxy_dev, vpe->vpe_db_lpi, 0); + its_send_inv(vpe_proxy_dev, 0); + its_send_discard(vpe_proxy_dev, 0); + + raw_spin_unlock_irqrestore(&vpe_proxy_dev_lock, flags); + } } static void its_vpe_mask_irq(struct irq_data *d) @@ -2616,6 +2655,27 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) static int its_init_vpe_domain(void) { + struct its_node *its; + u32 devid; + + if (gic_rdists->has_direct_lpi) { + pr_info("ITS: Using DirectLPI for VPE invalidation\n"); + return 0; + } + + /* Any ITS will do, even if not v4 */ + its = list_first_entry(&its_nodes, struct its_node, entry); + + /* Use the last possible DevID */ + devid = GENMASK(its->device_ids - 1, 0); + vpe_proxy_dev = its_create_device(its, devid, 1); + if (!vpe_proxy_dev) { + pr_err("ITS: Can't allocate GICv4 proxy device\n"); + return -ENODEV; + } + + pr_info("ITS: Allocated DevID %x as GICv4 proxy device\n", devid); + return 0; } -- 2.11.0