dmaengine.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 3/5] dmaengine: idxd: Add shared workqueue support
       [not found] <160090233730.44288.4446779116422752486.stgit@djiang5-desk3.ch.intel.com>
@ 2020-09-23 23:10 ` Dave Jiang
  2020-09-23 23:11 ` [PATCH v5 4/5] dmaengine: idxd: Clean up descriptors with fault error Dave Jiang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Dave Jiang @ 2020-09-23 23:10 UTC (permalink / raw)
  To: vkoul, tglx, mingo, bp, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, David.Laight,
	dmaengine, linux-kernel

Add shared workqueue support that includes the support of Shared Virtual
memory (SVM) or in similar terms On Demand Paging (ODP). The shared
workqueue uses the enqcmds command in kernel and will respond with retry if
the workqueue is full. Shared workqueue only works when there is PASID
support from the IOMMU.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/Kconfig          |   10 +++
 drivers/dma/idxd/cdev.c      |   49 ++++++++++++++++
 drivers/dma/idxd/device.c    |   91 +++++++++++++++++++++++++++---
 drivers/dma/idxd/dma.c       |    9 ---
 drivers/dma/idxd/idxd.h      |   28 +++++++++
 drivers/dma/idxd/init.c      |   91 +++++++++++++++++++++++-------
 drivers/dma/idxd/registers.h |   14 +++++
 drivers/dma/idxd/submit.c    |   35 ++++++++++--
 drivers/dma/idxd/sysfs.c     |  127 ++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 410 insertions(+), 44 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 518a1437862a..6a908785a5f7 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -296,6 +296,16 @@ config INTEL_IDXD
 
 	  If unsure, say N.
 
+# Config symbol that collects all the dependencies that's necessary to
+# support shared virtual memory for the devices supported by idxd.
+config INTEL_IDXD_SVM
+	bool "Accelerator Shared Virtual Memory Support"
+	depends on INTEL_IDXD
+	depends on INTEL_IOMMU_SVM
+	depends on PCI_PRI
+	depends on PCI_PASID
+	depends on PCI_IOV
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86_64
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index c3976156db2f..010b820d8f74 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -11,6 +11,7 @@
 #include <linux/cdev.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
+#include <linux/iommu.h>
 #include <uapi/linux/idxd.h>
 #include "registers.h"
 #include "idxd.h"
@@ -32,7 +33,9 @@ static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
 struct idxd_user_context {
 	struct idxd_wq *wq;
 	struct task_struct *task;
+	unsigned int pasid;
 	unsigned int flags;
+	struct iommu_sva *sva;
 };
 
 enum idxd_cdev_cleanup {
@@ -75,6 +78,8 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
 	struct idxd_wq *wq;
 	struct device *dev;
 	int rc = 0;
+	struct iommu_sva *sva;
+	unsigned int pasid;
 
 	wq = inode_wq(inode);
 	idxd = wq->idxd;
@@ -95,6 +100,34 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
 
 	ctx->wq = wq;
 	filp->private_data = ctx;
+
+	if (device_pasid_enabled(idxd)) {
+		sva = iommu_sva_bind_device(dev, current->mm, NULL);
+		if (IS_ERR(sva)) {
+			rc = PTR_ERR(sva);
+			dev_err(dev, "pasid allocation failed: %d\n", rc);
+			goto failed;
+		}
+
+		pasid = iommu_sva_get_pasid(sva);
+		if (pasid == IOMMU_PASID_INVALID) {
+			iommu_sva_unbind_device(sva);
+			goto failed;
+		}
+
+		ctx->sva = sva;
+		ctx->pasid = pasid;
+
+		if (wq_dedicated(wq)) {
+			rc = idxd_wq_set_pasid(wq, pasid);
+			if (rc < 0) {
+				iommu_sva_unbind_device(sva);
+				dev_err(dev, "wq set pasid failed: %d\n", rc);
+				goto failed;
+			}
+		}
+	}
+
 	idxd_wq_get(wq);
 	mutex_unlock(&wq->wq_lock);
 	return 0;
@@ -111,13 +144,27 @@ static int idxd_cdev_release(struct inode *node, struct file *filep)
 	struct idxd_wq *wq = ctx->wq;
 	struct idxd_device *idxd = wq->idxd;
 	struct device *dev = &idxd->pdev->dev;
+	int rc;
 
 	dev_dbg(dev, "%s called\n", __func__);
 	filep->private_data = NULL;
 
 	/* Wait for in-flight operations to complete. */
-	idxd_wq_drain(wq);
+	if (wq_shared(wq)) {
+		idxd_device_drain_pasid(idxd, ctx->pasid);
+	} else {
+		if (device_pasid_enabled(idxd)) {
+			/* The wq disable in the disable pasid function will drain the wq */
+			rc = idxd_wq_disable_pasid(wq);
+			if (rc < 0)
+				dev_err(dev, "wq disable pasid failed.\n");
+		} else {
+			idxd_wq_drain(wq);
+		}
+	}
 
+	if (ctx->sva)
+		iommu_sva_unbind_device(ctx->sva);
 	kfree(ctx);
 	mutex_lock(&wq->wq_lock);
 	idxd_wq_put(wq);
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 200b9109cacf..3c6c6df9d2e8 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -273,10 +273,9 @@ int idxd_wq_map_portal(struct idxd_wq *wq)
 	start = pci_resource_start(pdev, IDXD_WQ_BAR);
 	start = start + wq->id * IDXD_PORTAL_SIZE;
 
-	wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE);
-	if (!wq->dportal)
+	wq->portal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE);
+	if (!wq->portal)
 		return -ENOMEM;
-	dev_dbg(dev, "wq %d portal mapped at %p\n", wq->id, wq->dportal);
 
 	return 0;
 }
@@ -285,7 +284,61 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq)
 {
 	struct device *dev = &wq->idxd->pdev->dev;
 
-	devm_iounmap(dev, wq->dportal);
+	devm_iounmap(dev, wq->portal);
+}
+
+int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid)
+{
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+	union wqcfg wqcfg;
+	unsigned int offset;
+	unsigned long flags;
+
+	rc = idxd_wq_disable(wq);
+	if (rc < 0)
+		return rc;
+
+	offset = WQCFG_OFFSET(idxd, wq->id, 2);
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	wqcfg.bits[2] = ioread32(idxd->reg_base + offset);
+	wqcfg.pasid_en = 1;
+	wqcfg.pasid = pasid;
+	iowrite32(wqcfg.bits[2], idxd->reg_base + offset);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	rc = idxd_wq_enable(wq);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+int idxd_wq_disable_pasid(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+	union wqcfg wqcfg;
+	unsigned int offset;
+	unsigned long flags;
+
+	rc = idxd_wq_disable(wq);
+	if (rc < 0)
+		return rc;
+
+	offset = idxd->wqcfg_offset + wq->id * sizeof(wqcfg);
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	wqcfg.bits[2] = ioread32(idxd->reg_base + offset);
+	wqcfg.pasid_en = 0;
+	wqcfg.pasid = 0;
+	iowrite32(wqcfg.bits[2], idxd->reg_base + offset);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	rc = idxd_wq_enable(wq);
+	if (rc < 0)
+		return rc;
+
+	return 0;
 }
 
 void idxd_wq_disable_cleanup(struct idxd_wq *wq)
@@ -468,6 +521,17 @@ void idxd_device_reset(struct idxd_device *idxd)
 	spin_unlock_irqrestore(&idxd->dev_lock, flags);
 }
 
+void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid)
+{
+	struct device *dev = &idxd->pdev->dev;
+	u32 operand;
+
+	operand = pasid;
+	dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_DRAIN_PASID, operand);
+	idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_PASID, operand, NULL);
+	dev_dbg(dev, "pasid %d drained\n", pasid);
+}
+
 /* Device configuration bits */
 static void idxd_group_config_write(struct idxd_group *group)
 {
@@ -553,11 +617,22 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
 	wq->wqcfg.wq_thresh = wq->threshold;
 
 	/* byte 8-11 */
-	wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL);
-	wq->wqcfg.mode = 1;
+	wq->wqcfg.priv = wq->type == IDXD_WQT_KERNEL ? 1 : 0;
+	if (wq_dedicated(wq))
+		wq->wqcfg.mode = 1;
+
+	if (device_pasid_enabled(idxd)) {
+		wq->wqcfg.pasid_en = 1;
+		if (wq->type == IDXD_WQT_KERNEL && wq_dedicated(wq))
+			wq->wqcfg.pasid = idxd->pasid;
+	}
 
 	wq->wqcfg.priority = wq->priority;
 
+	if (idxd->hw.gen_cap.block_on_fault &&
+	    test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags))
+		wq->wqcfg.bof = 1;
+
 	/* bytes 12-15 */
 	wq->wqcfg.max_xfer_shift = ilog2(wq->max_xfer_bytes);
 	wq->wqcfg.max_batch_shift = ilog2(wq->max_batch_size);
@@ -665,8 +740,8 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
 		if (!wq->size)
 			continue;
 
-		if (!wq_dedicated(wq)) {
-			dev_warn(dev, "No shared workqueue support.\n");
+		if (wq_shared(wq) && !device_swq_supported(idxd)) {
+			dev_warn(dev, "No shared wq support but configured.\n");
 			return -EINVAL;
 		}
 
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index 0c892cbd72e0..8ed2773d8285 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -61,8 +61,6 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq,
 					 u64 addr_f1, u64 addr_f2, u64 len,
 					 u64 compl, u32 flags)
 {
-	struct idxd_device *idxd = wq->idxd;
-
 	hw->flags = flags;
 	hw->opcode = opcode;
 	hw->src_addr = addr_f1;
@@ -70,13 +68,6 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq,
 	hw->xfer_size = len;
 	hw->priv = !!(wq->type == IDXD_WQT_KERNEL);
 	hw->completion_addr = compl;
-
-	/*
-	 * Descriptor completion vectors are 1-8 for MSIX. We will round
-	 * robin through the 8 vectors.
-	 */
-	wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
-	hw->int_handle =  wq->vec_ptr;
 }
 
 static struct dma_async_tx_descriptor *
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index c64df197e724..43a216c42d25 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -59,6 +59,7 @@ enum idxd_wq_state {
 
 enum idxd_wq_flag {
 	WQ_FLAG_DEDICATED = 0,
+	WQ_FLAG_BLOCK_ON_FAULT,
 };
 
 enum idxd_wq_type {
@@ -86,10 +87,11 @@ enum idxd_op_type {
 enum idxd_complete_type {
 	IDXD_COMPLETE_NORMAL = 0,
 	IDXD_COMPLETE_ABORT,
+	IDXD_COMPLETE_DEV_FAIL,
 };
 
 struct idxd_wq {
-	void __iomem *dportal;
+	void __iomem *portal;
 	struct device conf_dev;
 	struct idxd_cdev idxd_cdev;
 	struct idxd_device *idxd;
@@ -145,6 +147,7 @@ enum idxd_device_state {
 enum idxd_device_flag {
 	IDXD_FLAG_CONFIGURABLE = 0,
 	IDXD_FLAG_CMD_RUNNING,
+	IDXD_FLAG_PASID_ENABLED,
 };
 
 struct idxd_device {
@@ -167,6 +170,9 @@ struct idxd_device {
 	struct idxd_wq *wqs;
 	struct idxd_engine *engines;
 
+	struct iommu_sva *sva;
+	unsigned int pasid;
+
 	int num_groups;
 
 	u32 msix_perm_offset;
@@ -214,11 +220,28 @@ struct idxd_desc {
 
 extern struct bus_type dsa_bus_type;
 
+extern bool support_enqcmd;
+
 static inline bool wq_dedicated(struct idxd_wq *wq)
 {
 	return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
 }
 
+static inline bool wq_shared(struct idxd_wq *wq)
+{
+	return !test_bit(WQ_FLAG_DEDICATED, &wq->flags);
+}
+
+static inline bool device_pasid_enabled(struct idxd_device *idxd)
+{
+	return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+}
+
+static inline bool device_swq_supported(struct idxd_device *idxd)
+{
+	return (support_enqcmd && device_pasid_enabled(idxd));
+}
+
 enum idxd_portal_prot {
 	IDXD_PORTAL_UNLIMITED = 0,
 	IDXD_PORTAL_LIMITED,
@@ -287,6 +310,7 @@ void idxd_device_reset(struct idxd_device *idxd);
 void idxd_device_cleanup(struct idxd_device *idxd);
 int idxd_device_config(struct idxd_device *idxd);
 void idxd_device_wqs_clear_state(struct idxd_device *idxd);
+void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
 
 /* work queue control */
 int idxd_wq_alloc_resources(struct idxd_wq *wq);
@@ -297,6 +321,8 @@ void idxd_wq_drain(struct idxd_wq *wq);
 int idxd_wq_map_portal(struct idxd_wq *wq);
 void idxd_wq_unmap_portal(struct idxd_wq *wq);
 void idxd_wq_disable_cleanup(struct idxd_wq *wq);
+int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
+int idxd_wq_disable_pasid(struct idxd_wq *wq);
 
 /* submission */
 int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 11e5ce168177..626401a71fdd 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -14,6 +14,8 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/device.h>
 #include <linux/idr.h>
+#include <linux/intel-svm.h>
+#include <linux/iommu.h>
 #include <uapi/linux/idxd.h>
 #include <linux/dmaengine.h>
 #include "../dmaengine.h"
@@ -26,6 +28,8 @@ MODULE_AUTHOR("Intel Corporation");
 
 #define DRV_NAME "idxd"
 
+bool support_enqcmd;
+
 static struct idr idxd_idrs[IDXD_TYPE_MAX];
 static struct mutex idxd_idr_lock;
 
@@ -53,6 +57,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
 	struct idxd_irq_entry *irq_entry;
 	int i, msixcnt;
 	int rc = 0;
+	union msix_perm mperm;
 
 	msixcnt = pci_msix_vec_count(pdev);
 	if (msixcnt < 0) {
@@ -131,6 +136,13 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
 
 	idxd_unmask_error_interrupts(idxd);
 
+	/* Setup MSIX permission table */
+	mperm.bits = 0;
+	mperm.pasid = idxd->pasid;
+	mperm.pasid_en = device_pasid_enabled(idxd);
+	for (i = 1; i < msixcnt; i++)
+		iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
+
 	return 0;
 
  err_no_irq:
@@ -260,8 +272,7 @@ static void idxd_read_caps(struct idxd_device *idxd)
 	}
 }
 
-static struct idxd_device *idxd_alloc(struct pci_dev *pdev,
-				      void __iomem * const *iomap)
+static struct idxd_device *idxd_alloc(struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct idxd_device *idxd;
@@ -271,12 +282,45 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev,
 		return NULL;
 
 	idxd->pdev = pdev;
-	idxd->reg_base = iomap[IDXD_MMIO_BAR];
 	spin_lock_init(&idxd->dev_lock);
 
 	return idxd;
 }
 
+static int idxd_enable_system_pasid(struct idxd_device *idxd)
+{
+	int flags;
+	unsigned int pasid;
+	struct iommu_sva *sva;
+
+	flags = SVM_FLAG_SUPERVISOR_MODE;
+
+	sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
+	if (IS_ERR(sva)) {
+		dev_warn(&idxd->pdev->dev,
+			 "iommu sva bind failed: %ld\n", PTR_ERR(sva));
+		return PTR_ERR(sva);
+	}
+
+	pasid = iommu_sva_get_pasid(sva);
+	if (pasid == IOMMU_PASID_INVALID) {
+		iommu_sva_unbind_device(sva);
+		return -ENODEV;
+	}
+
+	idxd->sva = sva;
+	idxd->pasid = pasid;
+	dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
+	return 0;
+}
+
+static void idxd_disable_system_pasid(struct idxd_device *idxd)
+{
+
+	iommu_sva_unbind_device(idxd->sva);
+	idxd->sva = NULL;
+}
+
 static int idxd_probe(struct idxd_device *idxd)
 {
 	struct pci_dev *pdev = idxd->pdev;
@@ -287,6 +331,14 @@ static int idxd_probe(struct idxd_device *idxd)
 	idxd_device_init_reset(idxd);
 	dev_dbg(dev, "IDXD reset complete\n");
 
+	if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM)) {
+		rc = idxd_enable_system_pasid(idxd);
+		if (rc < 0)
+			dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
+		else
+			set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+	}
+
 	idxd_read_caps(idxd);
 	idxd_read_table_offsets(idxd);
 
@@ -317,29 +369,29 @@ static int idxd_probe(struct idxd_device *idxd)
 	idxd_mask_error_interrupts(idxd);
 	idxd_mask_msix_vectors(idxd);
  err_setup:
+	if (device_pasid_enabled(idxd))
+		idxd_disable_system_pasid(idxd);
 	return rc;
 }
 
 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	void __iomem * const *iomap;
 	struct device *dev = &pdev->dev;
 	struct idxd_device *idxd;
 	int rc;
-	unsigned int mask;
 
 	rc = pcim_enable_device(pdev);
 	if (rc)
 		return rc;
 
-	dev_dbg(dev, "Mapping BARs\n");
-	mask = (1 << IDXD_MMIO_BAR);
-	rc = pcim_iomap_regions(pdev, mask, DRV_NAME);
-	if (rc)
-		return rc;
+	dev_dbg(dev, "Alloc IDXD context\n");
+	idxd = idxd_alloc(pdev);
+	if (!idxd)
+		return -ENOMEM;
 
-	iomap = pcim_iomap_table(pdev);
-	if (!iomap)
+	dev_dbg(dev, "Mapping BARs\n");
+	idxd->reg_base = pcim_iomap(pdev, IDXD_MMIO_BAR, 0);
+	if (!idxd->reg_base)
 		return -ENOMEM;
 
 	dev_dbg(dev, "Set DMA masks\n");
@@ -355,11 +407,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		return rc;
 
-	dev_dbg(dev, "Alloc IDXD context\n");
-	idxd = idxd_alloc(pdev, iomap);
-	if (!idxd)
-		return -ENOMEM;
-
 	idxd_set_type(idxd);
 
 	dev_dbg(dev, "Set PCI master\n");
@@ -447,6 +494,8 @@ static void idxd_remove(struct pci_dev *pdev)
 	dev_dbg(&pdev->dev, "%s called\n", __func__);
 	idxd_cleanup_sysfs(idxd);
 	idxd_shutdown(pdev);
+	if (device_pasid_enabled(idxd))
+		idxd_disable_system_pasid(idxd);
 	mutex_lock(&idxd_idr_lock);
 	idr_remove(&idxd_idrs[idxd->type], idxd->id);
 	mutex_unlock(&idxd_idr_lock);
@@ -465,7 +514,7 @@ static int __init idxd_init_module(void)
 	int err, i;
 
 	/*
-	 * If the CPU does not support write512, there's no point in
+	 * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
 	 * enumerating the device. We can not utilize it.
 	 */
 	if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) {
@@ -473,8 +522,10 @@ static int __init idxd_init_module(void)
 		return -ENODEV;
 	}
 
-	pr_info("%s: Intel(R) Accelerator Devices Driver %s\n",
-		DRV_NAME, IDXD_DRIVER_VERSION);
+	if (!boot_cpu_has(X86_FEATURE_ENQCMD))
+		pr_warn("Platform does not have ENQCMD(S) support.\n");
+	else
+		support_enqcmd = true;
 
 	mutex_init(&idxd_idr_lock);
 	for (i = 0; i < IDXD_TYPE_MAX; i++)
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index a39e7ae6b3d9..a0df4f3fe1fb 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -333,4 +333,18 @@ union wqcfg {
 	};
 	u32 bits[8];
 } __packed;
+
+/*
+ * This macro calculates the offset into the WQCFG register
+ * idxd - struct idxd *
+ * n - wq id
+ * ofs - the index of the 32b dword for the config register
+ *
+ * The WQCFG register block is divided into groups per each wq. The n index
+ * allows us to move to the register group that's for that particular wq.
+ * Each register is 32bits. The ofs gives us the number of register to access.
+ */
+#define WQCFG_OFFSET(idxd_dev, n, ofs) ((idxd_dev)->wqcfg_offset +\
+					(n) * sizeof(union wqcfg) +\
+					sizeof(u32) * (ofs))
 #endif
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 156a1ee233aa..efca5d8468a6 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -11,11 +11,22 @@
 static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
 {
 	struct idxd_desc *desc;
+	struct idxd_device *idxd = wq->idxd;
 
 	desc = wq->descs[idx];
 	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
 	memset(desc->completion, 0, sizeof(struct dsa_completion_record));
 	desc->cpu = cpu;
+
+	if (device_pasid_enabled(idxd))
+		desc->hw->pasid = idxd->pasid;
+
+	/*
+	 * Descriptor completion vectors are 1-8 for MSIX. We will round
+	 * robin through the 8 vectors.
+	 */
+	wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
+	desc->hw->int_handle = wq->vec_ptr;
 	return desc;
 }
 
@@ -70,18 +81,32 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 	struct idxd_device *idxd = wq->idxd;
 	int vec = desc->hw->int_handle;
 	void __iomem *portal;
+	int rc;
 
 	if (idxd->state != IDXD_DEV_ENABLED)
 		return -EIO;
 
-	portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED);
+	portal = wq->portal + idxd_get_wq_portal_offset(IDXD_PORTAL_LIMITED);
+
 	/*
-	 * The wmb() flushes writes to coherent DMA data before possibly
-	 * triggering a DMA read. The wmb() is necessary even on UP because
-	 * the recipient is a device.
+	 * The wmb() flushes writes to coherent DMA data before
+	 * possibly triggering a DMA read. The wmb() is necessary
+	 * even on UP because the recipient is a device.
 	 */
 	wmb();
-	iosubmit_cmds512(portal, desc->hw, 1);
+	if (wq_dedicated(wq)) {
+		iosubmit_cmds512(portal, desc->hw, 1);
+	} else {
+		/*
+		 * It's not likely that we would receive queue full rejection
+		 * since the descriptor allocation gates at wq size. If we
+		 * receive a -EAGAIN, that means something went wrong such as the
+		 * device is not accepting descriptor at all.
+		 */
+		rc = enqcmds(portal, desc->hw);
+		if (rc < 0)
+			return rc;
+	}
 
 	/*
 	 * Pending the descriptor to the lockless list for the irq_entry
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 07a5db06a29a..6d292eb79bf3 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -175,6 +175,30 @@ static int idxd_config_bus_probe(struct device *dev)
 			return -EINVAL;
 		}
 
+		/* Shared WQ checks */
+		if (wq_shared(wq)) {
+			if (!device_swq_supported(idxd)) {
+				dev_warn(dev,
+					 "PASID not enabled and shared WQ.\n");
+				mutex_unlock(&wq->wq_lock);
+				return -ENXIO;
+			}
+			/*
+			 * Shared wq with the threshold set to 0 means the user
+			 * did not set the threshold or transitioned from a
+			 * dedicated wq but did not set threshold. A value
+			 * of 0 would effectively disable the shared wq. The
+			 * driver does not allow a value of 0 to be set for
+			 * threshold via sysfs.
+			 */
+			if (wq->threshold == 0) {
+				dev_warn(dev,
+					 "Shared WQ and threshold 0.\n");
+				mutex_unlock(&wq->wq_lock);
+				return -EINVAL;
+			}
+		}
+
 		rc = idxd_wq_alloc_resources(wq);
 		if (rc < 0) {
 			mutex_unlock(&wq->wq_lock);
@@ -875,6 +899,8 @@ static ssize_t wq_mode_store(struct device *dev,
 	if (sysfs_streq(buf, "dedicated")) {
 		set_bit(WQ_FLAG_DEDICATED, &wq->flags);
 		wq->threshold = 0;
+	} else if (sysfs_streq(buf, "shared") && device_swq_supported(idxd)) {
+		clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
 	} else {
 		return -EINVAL;
 	}
@@ -973,6 +999,87 @@ static ssize_t wq_priority_store(struct device *dev,
 static struct device_attribute dev_attr_wq_priority =
 		__ATTR(priority, 0644, wq_priority_show, wq_priority_store);
 
+static ssize_t wq_block_on_fault_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%u\n",
+		       test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
+}
+
+static ssize_t wq_block_on_fault_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_device *idxd = wq->idxd;
+	bool bof;
+	int rc;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -ENXIO;
+
+	rc = kstrtobool(buf, &bof);
+	if (rc < 0)
+		return rc;
+
+	if (bof)
+		set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+	else
+		clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_block_on_fault =
+		__ATTR(block_on_fault, 0644, wq_block_on_fault_show,
+		       wq_block_on_fault_store);
+
+static ssize_t wq_threshold_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%u\n", wq->threshold);
+}
+
+static ssize_t wq_threshold_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_device *idxd = wq->idxd;
+	unsigned int val;
+	int rc;
+
+	rc = kstrtouint(buf, 0, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (val > wq->size || val <= 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -ENXIO;
+
+	if (test_bit(WQ_FLAG_DEDICATED, &wq->flags))
+		return -EINVAL;
+
+	wq->threshold = val;
+
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_threshold =
+		__ATTR(threshold, 0644, wq_threshold_show, wq_threshold_store);
+
 static ssize_t wq_type_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -1044,6 +1151,13 @@ static ssize_t wq_name_store(struct device *dev,
 	if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0)
 		return -EINVAL;
 
+	/*
+	 * This is temporarily placed here until we have SVM support for
+	 * dmaengine.
+	 */
+	if (wq->type == IDXD_WQT_KERNEL && device_pasid_enabled(wq->idxd))
+		return -EOPNOTSUPP;
+
 	memset(wq->name, 0, WQ_NAME_SIZE + 1);
 	strncpy(wq->name, buf, WQ_NAME_SIZE);
 	strreplace(wq->name, '\n', '\0');
@@ -1154,6 +1268,8 @@ static struct attribute *idxd_wq_attributes[] = {
 	&dev_attr_wq_mode.attr,
 	&dev_attr_wq_size.attr,
 	&dev_attr_wq_priority.attr,
+	&dev_attr_wq_block_on_fault.attr,
+	&dev_attr_wq_threshold.attr,
 	&dev_attr_wq_type.attr,
 	&dev_attr_wq_name.attr,
 	&dev_attr_wq_cdev_minor.attr,
@@ -1305,6 +1421,16 @@ static ssize_t clients_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(clients);
 
+static ssize_t pasid_enabled_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", device_pasid_enabled(idxd));
+}
+static DEVICE_ATTR_RO(pasid_enabled);
+
 static ssize_t state_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
 {
@@ -1424,6 +1550,7 @@ static struct attribute *idxd_device_attributes[] = {
 	&dev_attr_gen_cap.attr,
 	&dev_attr_configurable.attr,
 	&dev_attr_clients.attr,
+	&dev_attr_pasid_enabled.attr,
 	&dev_attr_state.attr,
 	&dev_attr_errors.attr,
 	&dev_attr_max_tokens.attr,


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v5 4/5] dmaengine: idxd: Clean up descriptors with fault error
       [not found] <160090233730.44288.4446779116422752486.stgit@djiang5-desk3.ch.intel.com>
  2020-09-23 23:10 ` [PATCH v5 3/5] dmaengine: idxd: Add shared workqueue support Dave Jiang
@ 2020-09-23 23:11 ` Dave Jiang
  2020-09-23 23:11 ` [PATCH v5 5/5] dmaengine: idxd: Add ABI documentation for shared wq Dave Jiang
       [not found] ` <160090264332.44288.7575027054245105525.stgit@djiang5-desk3.ch.intel.com>
  3 siblings, 0 replies; 12+ messages in thread
From: Dave Jiang @ 2020-09-23 23:11 UTC (permalink / raw)
  To: vkoul, tglx, mingo, bp, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, David.Laight,
	dmaengine, linux-kernel

Add code to "complete" a descriptor when the descriptor or its completion
address hit a fault error when SVA mode is being used. This error can be
triggered due to bad programming by the user. A lock is introduced in order
to protect the descriptor completion lists since the fault handler will run
from the system work queue after being scheduled in the interrupt handler.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/idxd/idxd.h |    5 ++
 drivers/dma/idxd/init.c |    1 
 drivers/dma/idxd/irq.c  |  143 +++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 43a216c42d25..b64b6266ca97 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -34,6 +34,11 @@ struct idxd_irq_entry {
 	int id;
 	struct llist_head pending_llist;
 	struct list_head work_list;
+	/*
+	 * Lock to protect access between irq thread process descriptor
+	 * and irq thread processing error descriptor.
+	 */
+	spinlock_t list_lock;
 };
 
 struct idxd_group {
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 626401a71fdd..1bb7637b02eb 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -97,6 +97,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
 	for (i = 0; i < msixcnt; i++) {
 		idxd->irq_entries[i].id = i;
 		idxd->irq_entries[i].idxd = idxd;
+		spin_lock_init(&idxd->irq_entries[i].list_lock);
 	}
 
 	msix = &idxd->msix_entries[0];
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 17a65a13fb64..9e6cc55ad22f 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -11,6 +11,24 @@
 #include "idxd.h"
 #include "registers.h"
 
+enum irq_work_type {
+	IRQ_WORK_NORMAL = 0,
+	IRQ_WORK_PROCESS_FAULT,
+};
+
+struct idxd_fault {
+	struct work_struct work;
+	u64 addr;
+	struct idxd_device *idxd;
+};
+
+static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
+				 enum irq_work_type wtype,
+				 int *processed, u64 data);
+static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
+				     enum irq_work_type wtype,
+				     int *processed, u64 data);
+
 static void idxd_device_reinit(struct work_struct *work)
 {
 	struct idxd_device *idxd = container_of(work, struct idxd_device, work);
@@ -44,6 +62,46 @@ static void idxd_device_reinit(struct work_struct *work)
 	idxd_device_wqs_clear_state(idxd);
 }
 
+static void idxd_device_fault_work(struct work_struct *work)
+{
+	struct idxd_fault *fault = container_of(work, struct idxd_fault, work);
+	struct idxd_irq_entry *ie;
+	int i;
+	int processed;
+	int irqcnt = fault->idxd->num_wq_irqs + 1;
+
+	for (i = 1; i < irqcnt; i++) {
+		ie = &fault->idxd->irq_entries[i];
+		irq_process_work_list(ie, IRQ_WORK_PROCESS_FAULT,
+				      &processed, fault->addr);
+		if (processed)
+			break;
+
+		irq_process_pending_llist(ie, IRQ_WORK_PROCESS_FAULT,
+					  &processed, fault->addr);
+		if (processed)
+			break;
+	}
+
+	kfree(fault);
+}
+
+static int idxd_device_schedule_fault_process(struct idxd_device *idxd,
+					      u64 fault_addr)
+{
+	struct idxd_fault *fault;
+
+	fault = kmalloc(sizeof(*fault), GFP_ATOMIC);
+	if (!fault)
+		return -ENOMEM;
+
+	fault->addr = fault_addr;
+	fault->idxd = idxd;
+	INIT_WORK(&fault->work, idxd_device_fault_work);
+	queue_work(idxd->wq, &fault->work);
+	return 0;
+}
+
 irqreturn_t idxd_irq_handler(int vec, void *data)
 {
 	struct idxd_irq_entry *irq_entry = data;
@@ -125,6 +183,16 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
 	if (!err)
 		goto out;
 
+	/*
+	 * This case should rarely happen and typically is due to software
+	 * programming error by the driver.
+	 */
+	if (idxd->sw_err.valid &&
+	    idxd->sw_err.desc_valid &&
+	    idxd->sw_err.fault_addr)
+		idxd_device_schedule_fault_process(idxd,
+						   idxd->sw_err.fault_addr);
+
 	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
 	if (gensts.state == IDXD_DEVICE_STATE_HALT) {
 		idxd->state = IDXD_DEV_HALTED;
@@ -152,57 +220,106 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
 	return IRQ_HANDLED;
 }
 
+static bool process_fault(struct idxd_desc *desc, u64 fault_addr)
+{
+	if ((u64)desc->hw == fault_addr ||
+	    (u64)desc->completion == fault_addr) {
+		idxd_dma_complete_txd(desc, IDXD_COMPLETE_DEV_FAIL);
+		return true;
+	}
+
+	return false;
+}
+
+static bool complete_desc(struct idxd_desc *desc)
+{
+	if (desc->completion->status) {
+		idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL);
+		return true;
+	}
+
+	return false;
+}
+
 static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
-				     int *processed)
+				     enum irq_work_type wtype,
+				     int *processed, u64 data)
 {
 	struct idxd_desc *desc, *t;
 	struct llist_node *head;
 	int queued = 0;
+	bool completed = false;
+	unsigned long flags;
 
 	*processed = 0;
 	head = llist_del_all(&irq_entry->pending_llist);
 	if (!head)
-		return 0;
+		goto out;
 
 	llist_for_each_entry_safe(desc, t, head, llnode) {
-		if (desc->completion->status) {
-			idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL);
+		if (wtype == IRQ_WORK_NORMAL)
+			completed = complete_desc(desc);
+		else if (wtype == IRQ_WORK_PROCESS_FAULT)
+			completed = process_fault(desc, data);
+
+		if (completed) {
 			idxd_free_desc(desc->wq, desc);
 			(*processed)++;
+			if (wtype == IRQ_WORK_PROCESS_FAULT)
+				break;
 		} else {
-			list_add_tail(&desc->list, &irq_entry->work_list);
+			spin_lock_irqsave(&irq_entry->list_lock, flags);
+			list_add_tail(&desc->list,
+				      &irq_entry->work_list);
+			spin_unlock_irqrestore(&irq_entry->list_lock, flags);
 			queued++;
 		}
 	}
 
+ out:
 	return queued;
 }
 
 static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
-				 int *processed)
+				 enum irq_work_type wtype,
+				 int *processed, u64 data)
 {
 	struct list_head *node, *next;
 	int queued = 0;
+	bool completed = false;
+	unsigned long flags;
 
 	*processed = 0;
+	spin_lock_irqsave(&irq_entry->list_lock, flags);
 	if (list_empty(&irq_entry->work_list))
-		return 0;
+		goto out;
 
 	list_for_each_safe(node, next, &irq_entry->work_list) {
 		struct idxd_desc *desc =
 			container_of(node, struct idxd_desc, list);
 
-		if (desc->completion->status) {
+		spin_unlock_irqrestore(&irq_entry->list_lock, flags);
+		if (wtype == IRQ_WORK_NORMAL)
+			completed = complete_desc(desc);
+		else if (wtype == IRQ_WORK_PROCESS_FAULT)
+			completed = process_fault(desc, data);
+
+		if (completed) {
+			spin_lock_irqsave(&irq_entry->list_lock, flags);
 			list_del(&desc->list);
-			/* process and callback */
-			idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL);
+			spin_unlock_irqrestore(&irq_entry->list_lock, flags);
 			idxd_free_desc(desc->wq, desc);
 			(*processed)++;
+			if (wtype == IRQ_WORK_PROCESS_FAULT)
+				return queued;
 		} else {
 			queued++;
 		}
+		spin_lock_irqsave(&irq_entry->list_lock, flags);
 	}
 
+ out:
+	spin_unlock_irqrestore(&irq_entry->list_lock, flags);
 	return queued;
 }
 
@@ -230,12 +347,14 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry)
 	 * 5. Repeat until no more descriptors.
 	 */
 	do {
-		rc = irq_process_work_list(irq_entry, &processed);
+		rc = irq_process_work_list(irq_entry, IRQ_WORK_NORMAL,
+					   &processed, 0);
 		total += processed;
 		if (rc != 0)
 			continue;
 
-		rc = irq_process_pending_llist(irq_entry, &processed);
+		rc = irq_process_pending_llist(irq_entry, IRQ_WORK_NORMAL,
+					       &processed, 0);
 		total += processed;
 	} while (rc != 0);
 


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v5 5/5] dmaengine: idxd: Add ABI documentation for shared wq
       [not found] <160090233730.44288.4446779116422752486.stgit@djiang5-desk3.ch.intel.com>
  2020-09-23 23:10 ` [PATCH v5 3/5] dmaengine: idxd: Add shared workqueue support Dave Jiang
  2020-09-23 23:11 ` [PATCH v5 4/5] dmaengine: idxd: Clean up descriptors with fault error Dave Jiang
@ 2020-09-23 23:11 ` Dave Jiang
       [not found] ` <160090264332.44288.7575027054245105525.stgit@djiang5-desk3.ch.intel.com>
  3 siblings, 0 replies; 12+ messages in thread
From: Dave Jiang @ 2020-09-23 23:11 UTC (permalink / raw)
  To: vkoul, tglx, mingo, bp, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, David.Laight,
	dmaengine, linux-kernel

Add the sysfs attribute bits in ABI/stable for shared wq support.

Signed-off-by: Jing Lin <jing.lin@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/stable/sysfs-driver-dma-idxd |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index b44183880935..42d3dc03ffea 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -77,6 +77,13 @@ Contact:        dmaengine@vger.kernel.org
 Description:    The operation capability bit mask specify the operation types
 		supported by the this device.
 
+What:		/sys/bus/dsa/devices/dsa<m>/pasid_enabled
+Date:		Sep 17, 2020
+KernelVersion:	5.10.0
+Contact:	dmaengine@vger.kernel.org
+Description:	To indicate if PASID (process address space identifier) is
+		enabled or not for this device.
+
 What:           /sys/bus/dsa/devices/dsa<m>/state
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
@@ -122,6 +129,13 @@ KernelVersion:	5.10.0
 Contact:	dmaengine@vger.kernel.org
 Description:	The last executed device administrative command's status/error.
 
+What:		/sys/bus/dsa/devices/wq<m>.<n>/block_on_fault
+Date:		Sept 17, 2020
+KernelVersion:	5.10.0
+Contact:	dmaengine@vger.kernel.org
+Description:	To indicate block on fault is allowed or not for the work queue
+		to support on demand paging.
+
 What:           /sys/bus/dsa/devices/wq<m>.<n>/group_id
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
       [not found] ` <160090264332.44288.7575027054245105525.stgit@djiang5-desk3.ch.intel.com>
@ 2020-09-24  8:24   ` David Laight
  2020-09-24 10:15     ` Borislav Petkov
  2020-09-24 13:07   ` Borislav Petkov
  1 sibling, 1 reply; 12+ messages in thread
From: David Laight @ 2020-09-24  8:24 UTC (permalink / raw)
  To: 'Dave Jiang',
	vkoul, tglx, mingo, bp, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, dmaengine,
	linux-kernel

From: Dave Jiang
> Sent: 24 September 2020 00:11
>
> The MOVDIR64B instruction can be used by other wrapper instructions. Move
> the asm code to special_insns.h and have iosubmit_cmds512() call the
> asm function.
> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> Reviewed-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/include/asm/io.h            |   17 +++--------------
>  arch/x86/include/asm/special_insns.h |   19 +++++++++++++++++++
>  2 files changed, 22 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> index e1aa17a468a8..d726459d08e5 100644
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
...
> diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
> index 59a3e13204c3..2a5abd27bb86 100644
> --- a/arch/x86/include/asm/special_insns.h
> +++ b/arch/x86/include/asm/special_insns.h
> @@ -234,6 +234,25 @@ static inline void clwb(volatile void *__p)
> 
>  #define nop() asm volatile ("nop")
> 
> +/* The dst parameter must be 64-bytes aligned */
> +static inline void movdir64b(void *dst, const void *src)
> +{
> +	/*
> +	 * Note that this isn't an "on-stack copy", just definition of "dst"
> +	 * as a pointer to 64-bytes of stuff that is going to be overwritten.
> +	 * In the MOVDIR64B case that may be needed as you can use the
> +	 * MOVDIR64B instruction to copy arbitrary memory around. This trick
> +	 * lets the compiler know how much gets clobbered.
> +	 */
> +	volatile struct { char _[64]; } *__dst = dst;
> +
> +	/* MOVDIR64B [rdx], rax */
> +	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> +		     :
> +		     : "m" (*(struct { char _[64];} **)src), "a" (__dst)
> +		     : "memory");
> +}
> +
>  #endif /* __KERNEL__ */

You've lost the "d" (src).
You don't need the 'memory' clobber, just:

static inline void movdir64b(void *dst, const void *src)
{
	/*
	 * 64 bytes from dst are marked as modified for completeness.
	 * Since the writes bypass the cache later reads may return
	 * old data anyway.
	 */
	/* MOVDIR64B [rdx], rax */
	asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
	     : "=m" ((struct { char _[64];} *)dst),
	     : "m" ((struct { char _[64];} *)src), "d" (src), "a" (dst));
}

I've checked that the "m" constraint on src does force (at least one
version of) gcc to actually write to the supplied buffer.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
  2020-09-24  8:24   ` [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage David Laight
@ 2020-09-24 10:15     ` Borislav Petkov
  2020-09-24 10:42       ` David Laight
  2020-09-24 14:07       ` Michael Matz
  0 siblings, 2 replies; 12+ messages in thread
From: Borislav Petkov @ 2020-09-24 10:15 UTC (permalink / raw)
  To: David Laight, Michael Matz
  Cc: 'Dave Jiang',
	vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, dmaengine,
	linux-kernel

On Thu, Sep 24, 2020 at 08:24:46AM +0000, David Laight wrote:
> static inline void movdir64b(void *dst, const void *src)
> {
> 	/*
> 	 * 64 bytes from dst are marked as modified for completeness.
> 	 * Since the writes bypass the cache later reads may return
> 	 * old data anyway.
> 	 */
> 	/* MOVDIR64B [rdx], rax */
> 	asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> 	     : "=m" ((struct { char _[64];} *)dst),
> 	     : "m" ((struct { char _[64];} *)src), "d" (src), "a" (dst));

Now since you're so generous with your advice on random threads, please
explain what you're advising here?

The destination operand - in this case in %rax - is "destination memory
address specified as offset to ES segment in the register operand."

So what is the difference between:

	...(void *dst, ... )

	volatile struct { char _[64]; } *__dst = dst;

	...

	: "=m" (__dst)
	: "a" (__dst)

and

	...(void *dst, ... )

	...

	: "=m" ((struct { char _[64];} *)dst)
	: "a" (__dst)

and why?

Point me to the gcc documentation where this is explained.

To cut to the chase, I don't think you need to do that, otherwise clwb()
would be broken too but perhaps you know something I don't.

Looking at clwb(), I believe the proper specification should be:

	volatile struct { char _[64]; } *__dst = dst;

	...

	: "+m" (__dst)
	: "a" (__dst)

And if anything, the source specification should be something like that:

	volatile struct { char x[64]; } *__src = src;

	...


	"d" (__src)

because this tells gcc that the source operand would read 64 bytes
through the pointer in the %rdx reg.

So this ends up close to what you're saying but it is using local
variables to make the asm actually readable.

Lemme add Micha to Cc for sanity-checking:

Micha, the instruction is:

MOVDIR64B %(rdx), rax

"Move 64-bytes as direct-store with guaranteed 64-byte write atomicity
from the source memory operand address to destination memory address
specified as offset to ES segment in the register operand."

Do I need to tell gcc that both operands are referencing 64 bytes,
source operand is a memory reference, destination operand is an address
specified in a register?

What we have currently is:

		volatile struct { char _[64]; } *dst = __dst;

                /* MOVDIR64B [rdx], rax */
                asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
                             : "=m" (dst)
                             : "d" (from), "a" (dst));


Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
  2020-09-24 10:15     ` Borislav Petkov
@ 2020-09-24 10:42       ` David Laight
  2020-09-24 11:02         ` Borislav Petkov
  2020-09-24 14:07       ` Michael Matz
  1 sibling, 1 reply; 12+ messages in thread
From: David Laight @ 2020-09-24 10:42 UTC (permalink / raw)
  To: 'Borislav Petkov', Michael Matz
  Cc: 'Dave Jiang',
	vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, dmaengine,
	linux-kernel

From: Borislav Petkov
> Sent: 24 September 2020 11:15
> On Thu, Sep 24, 2020 at 08:24:46AM +0000, David Laight wrote:
> > static inline void movdir64b(void *dst, const void *src)
> > {
> > 	/*
> > 	 * 64 bytes from dst are marked as modified for completeness.
> > 	 * Since the writes bypass the cache later reads may return
> > 	 * old data anyway.
> > 	 */
> > 	/* MOVDIR64B [rdx], rax */
> > 	asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> > 	     : "=m" ((struct { char _[64];} *)dst),
> > 	     : "m" ((struct { char _[64];} *)src), "d" (src), "a" (dst));
> 
> Now since you're so generous with your advice on random threads, please
> explain what you're advising here?
> 
> The destination operand - in this case in %rax - is "destination memory
> address specified as offset to ES segment in the register operand."

The movdir64b instruction does a 'normal' read of 64 bytes (can be misaligned)
Then a cache-bypassing (probably) write-combining single 64byte write to
an address that must be aligned.
Any reference to segment registers is largely irrelevant since we are
not in real mode.


> So what is the difference between:
> 
> 	...(void *dst, ... )
> 
> 	volatile struct { char _[64]; } *__dst = dst;
> 	...
> 	: "=m" (__dst)
> 	: "a" (__dst)
> 
> and
> 
> 	...(void *dst, ... )
> 	...
> 	: "=m" ((struct { char _[64];} *)dst)
> 	: "a" (__dst)
> 
> and why?
> 
> Point me to the gcc documentation where this is explained.

Mainly less lines of code to look at.

> To cut to the chase, I don't think you need to do that, otherwise clwb()
> would be broken too but perhaps you know something I don't.
> 
> Looking at clwb(), I believe the proper specification should be:
> 
> 	volatile struct { char _[64]; } *__dst = dst;
> 
> 	...
> 
> 	: "+m" (__dst)
> 	: "a" (__dst)

No idea what clwb() is doing.
But the "+m" (dst) tells gcc it depends on, and modifies the 64 bytes
at *dst.

I believe the 'volatile' is pointless.

> And if anything, the source specification should be something like that:
> 
> 	volatile struct { char x[64]; } *__src = src;
> 
> 	...
> 
> 
> 	"d" (__src)
> 
> because this tells gcc that the source operand would read 64 bytes
> through the pointer in the %rdx reg.

No, that just says the asm uses the value of the pointer.
Not what it points to.

> So this ends up close to what you're saying but it is using local
> variables to make the asm actually readable.
> 
> Lemme add Micha to Cc for sanity-checking:
> 
> Micha, the instruction is:
> 
> MOVDIR64B %(rdx), rax
> 
> "Move 64-bytes as direct-store with guaranteed 64-byte write atomicity
> from the source memory operand address to destination memory address
> specified as offset to ES segment in the register operand."
> 
> Do I need to tell gcc that both operands are referencing 64 bytes,
> source operand is a memory reference, destination operand is an address
> specified in a register?
> 
> What we have currently is:
> 
> 		volatile struct { char _[64]; } *dst = __dst;
> 
>                 /* MOVDIR64B [rdx], rax */
>                 asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
>                              : "=m" (dst)
>                              : "d" (from), "a" (dst));

That is wrong.
Feed this into cc -S -O2 and look at the .s file

static inline void movdir64b(void *dst, const void *src)
{
       asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
                    :
                    : /*"m" ((struct { char _[64];} *)src),*/ "d" (src), "a" (dst)
                    );

void foo(void *dst, int val)
{
        long b64[8] = { 0 };

        b64[0] = val;
        movdir64b(dst, b64);
}

Note that all to code that writes into b64[] is optimised away.
Repeat after uncommenting the "m" constraint and spot the difference.

The "=m" (dst) constraint is much less important here.
The write itself will always happen.
So do we need to tell gcc we did it?
Doing so just ensures gcc doesn't move any instructions that it knows
access the same memory above the movdir64b instruction.
But, because this is a cache bypassing write they are going
to be invalid anyway - without extra strong barriers.
So it is fairly safe to miss it out.
OTOH putting it in does no harm and helps annotate what the
instruction is doing.

I just failed to spot an example of a 'memory size' cast in the
kernel source tree - I'm sure there is an example somewhere.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
  2020-09-24 10:42       ` David Laight
@ 2020-09-24 11:02         ` Borislav Petkov
  2020-09-24 11:25           ` David Laight
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2020-09-24 11:02 UTC (permalink / raw)
  To: David Laight
  Cc: Michael Matz, 'Dave Jiang',
	vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, dmaengine,
	linux-kernel

On Thu, Sep 24, 2020 at 10:42:16AM +0000, David Laight wrote:
> The movdir64b instruction does a 'normal' read of 64 bytes (can be
> misaligned) Then a cache-bypassing (probably) write-combining single
> 64byte write to an address that must be aligned. Any reference to
> segment registers is largely irrelevant since we are not in real mode.

Sounds like you know better than the SDM.

> Mainly less lines of code to look at.

Yeah, no. Readability is what I would prefer any day of the week.

> No idea what clwb() is doing.

Sounds like you need to read another part of the SDM.

> But the "+m" (dst) tells gcc it depends on, and modifies the 64 bytes
> at *dst.
> 
> I believe the 'volatile' is pointless.

I discussed this at the time with a gcc person. And nope, it ain't
pointless.

> No, that just says the asm uses the value of the pointer.
> Not what it points to.

Err, no, it is *exactly* what it points to that is important here and
you're telling the compiler that the instruction will read that much
memory through the pointer.

Ok, I've read enough babble. I'll discuss it with a gcc person before I
take anything.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
  2020-09-24 11:02         ` Borislav Petkov
@ 2020-09-24 11:25           ` David Laight
  0 siblings, 0 replies; 12+ messages in thread
From: David Laight @ 2020-09-24 11:25 UTC (permalink / raw)
  To: 'Borislav Petkov'
  Cc: Michael Matz, 'Dave Jiang',
	vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, dmaengine,
	linux-kernel

> > No, that just says the asm uses the value of the pointer.
> > Not what it points to.
> 
> Err, no, it is *exactly* what it points to that is important here and
> you're telling the compiler that the instruction will read that much
> memory through the pointer.

You need to use an "m" constraint for that.
A 'register' constraint just requires the value of the address
to be valid.

Look at the asm output from the example code I posted.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
       [not found] ` <160090264332.44288.7575027054245105525.stgit@djiang5-desk3.ch.intel.com>
  2020-09-24  8:24   ` [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage David Laight
@ 2020-09-24 13:07   ` Borislav Petkov
  2020-09-24 13:27     ` David Laight
  2020-09-24 15:07     ` Dave Jiang
  1 sibling, 2 replies; 12+ messages in thread
From: Borislav Petkov @ 2020-09-24 13:07 UTC (permalink / raw)
  To: Dave Jiang
  Cc: vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, David.Laight,
	dmaengine, linux-kernel, Michael Matz

On Wed, Sep 23, 2020 at 04:10:43PM -0700, Dave Jiang wrote:
> +/* The dst parameter must be 64-bytes aligned */
> +static inline void movdir64b(void *dst, const void *src)
> +{
> +	/*
> +	 * Note that this isn't an "on-stack copy", just definition of "dst"
> +	 * as a pointer to 64-bytes of stuff that is going to be overwritten.
> +	 * In the MOVDIR64B case that may be needed as you can use the
> +	 * MOVDIR64B instruction to copy arbitrary memory around. This trick
> +	 * lets the compiler know how much gets clobbered.
> +	 */
> +	volatile struct { char _[64]; } *__dst = dst;
> +
> +	/* MOVDIR64B [rdx], rax */
> +	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> +		     :
> +		     : "m" (*(struct { char _[64];} **)src), "a" (__dst)
> +		     : "memory");
> +}

Ok, Micha and I hashed it out on IRC, here's what you do. Please keep
the comments too because we will forget soon again.

static inline void movdir64b(void *__dst, const void *src)
{
	struct { char _[64]; } *__src = src;
	struct { char _[64]; } *__dst = dst;

	/*
	 * MOVDIR64B %(rdx), rax.
	 *
	 * Both __src and __dst must be memory constraints in order to tell the
	 * compiler that no other memory accesses should be reordered around
	 * this one.
	 *
	 * Also, both must be supplied as lvalues because this tells
	 * the compiler what the object is (its size) the instruction accesses.
	 * I.e., not the pointers but what they point, thus the deref'ing '*'.
	 */
	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
		     : "+m" (*__dst)
		     :  "m" (*__src), "a" (__dst), "d" (__src));
}

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
  2020-09-24 13:07   ` Borislav Petkov
@ 2020-09-24 13:27     ` David Laight
  2020-09-24 15:07     ` Dave Jiang
  1 sibling, 0 replies; 12+ messages in thread
From: David Laight @ 2020-09-24 13:27 UTC (permalink / raw)
  To: 'Borislav Petkov', Dave Jiang
  Cc: vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, dmaengine,
	linux-kernel, Michael Matz

From: Borislav Petkov
> Sent: 24 September 2020 14:08
> 
> On Wed, Sep 23, 2020 at 04:10:43PM -0700, Dave Jiang wrote:
> > +/* The dst parameter must be 64-bytes aligned */
> > +static inline void movdir64b(void *dst, const void *src)
> > +{
> > +	/*
> > +	 * Note that this isn't an "on-stack copy", just definition of "dst"
> > +	 * as a pointer to 64-bytes of stuff that is going to be overwritten.
> > +	 * In the MOVDIR64B case that may be needed as you can use the
> > +	 * MOVDIR64B instruction to copy arbitrary memory around. This trick
> > +	 * lets the compiler know how much gets clobbered.
> > +	 */
> > +	volatile struct { char _[64]; } *__dst = dst;
> > +
> > +	/* MOVDIR64B [rdx], rax */
> > +	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> > +		     :
> > +		     : "m" (*(struct { char _[64];} **)src), "a" (__dst)
> > +		     : "memory");
> > +}
> 
> Ok, Micha and I hashed it out on IRC, here's what you do. Please keep
> the comments too because we will forget soon again.
> 
> static inline void movdir64b(void *__dst, const void *src)
> {
> 	struct { char _[64]; } *__src = src;
> 	struct { char _[64]; } *__dst = dst;
> 
> 	/*
> 	 * MOVDIR64B %(rdx), rax.
> 	 *
> 	 * Both __src and __dst must be memory constraints in order to tell the
> 	 * compiler that no other memory accesses should be reordered around
> 	 * this one.
> 	 *
> 	 * Also, both must be supplied as lvalues because this tells
> 	 * the compiler what the object is (its size) the instruction accesses.
> 	 * I.e., not the pointers but what they point, thus the deref'ing '*'.
> 	 */
> 	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> 		     : "+m" (*__dst)
> 		     :  "m" (*__src), "a" (__dst), "d" (__src));
> }

Doesn't look wrong now.
I'd still paint it a slightly different colour :-)

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
  2020-09-24 10:15     ` Borislav Petkov
  2020-09-24 10:42       ` David Laight
@ 2020-09-24 14:07       ` Michael Matz
  1 sibling, 0 replies; 12+ messages in thread
From: Michael Matz @ 2020-09-24 14:07 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: David Laight, 'Dave Jiang',
	vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, dmaengine,
	linux-kernel

Hello,

even though we hashed it out downthread, let me make some additional 
remarks:

On Thu, 24 Sep 2020, Borislav Petkov wrote:

> > 	/* MOVDIR64B [rdx], rax */

This comment is confusing as it uses Intel syntax for the operand forms, 
but AT&T order (dest last).

> 	volatile struct { char _[64]; } *__dst = dst;
> 
> 	...
> 
> 	: "=m" (__dst)

This and the other occurences in this thread up to now always miss that 
the 'm' constraints want the object itself, not the address of the object.  
So you want '"m" (*__src)', same for dst, and so on.

> Micha, the instruction is:
> 
> MOVDIR64B %(rdx), rax
> 
> "Move 64-bytes as direct-store with guaranteed 64-byte write atomicity
> from the source memory operand address to destination memory address
> specified as offset to ES segment in the register operand."

It's unfortunate that the introduction of this mnemonic into binutils 
did it wrong already, but what the instruction should really read like in 
AT&T mode is:

  movdir64b (%rdx), (%rax)
or even
  movdir64b (%rdx), es:(%rax)

because both are memory operands really (even though the destination can 
only be encoded with a direct register, as these are the constraints of 
x86 insn encodings).  It's comparable to movs, which, also having two 
memory operands is written:

  movsb  %ds:(%rsi),%es:(%rdi)


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage
  2020-09-24 13:07   ` Borislav Petkov
  2020-09-24 13:27     ` David Laight
@ 2020-09-24 15:07     ` Dave Jiang
  1 sibling, 0 replies; 12+ messages in thread
From: Dave Jiang @ 2020-09-24 15:07 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: vkoul, tglx, mingo, dan.j.williams, tony.luck, jing.lin,
	ashok.raj, sanjay.k.kumar, fenghua.yu, kevin.tian, David.Laight,
	dmaengine, linux-kernel, Michael Matz



On 9/24/2020 6:07 AM, Borislav Petkov wrote:
> On Wed, Sep 23, 2020 at 04:10:43PM -0700, Dave Jiang wrote:
>> +/* The dst parameter must be 64-bytes aligned */
>> +static inline void movdir64b(void *dst, const void *src)
>> +{
>> +	/*
>> +	 * Note that this isn't an "on-stack copy", just definition of "dst"
>> +	 * as a pointer to 64-bytes of stuff that is going to be overwritten.
>> +	 * In the MOVDIR64B case that may be needed as you can use the
>> +	 * MOVDIR64B instruction to copy arbitrary memory around. This trick
>> +	 * lets the compiler know how much gets clobbered.
>> +	 */
>> +	volatile struct { char _[64]; } *__dst = dst;
>> +
>> +	/* MOVDIR64B [rdx], rax */
>> +	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
>> +		     :
>> +		     : "m" (*(struct { char _[64];} **)src), "a" (__dst)
>> +		     : "memory");
>> +}
> 
> Ok, Micha and I hashed it out on IRC, here's what you do. Please keep
> the comments too because we will forget soon again.
> 
> static inline void movdir64b(void *__dst, const void *src)
> {
> 	struct { char _[64]; } *__src = src;
> 	struct { char _[64]; } *__dst = dst;
> 
> 	/*
> 	 * MOVDIR64B %(rdx), rax.
> 	 *
> 	 * Both __src and __dst must be memory constraints in order to tell the
> 	 * compiler that no other memory accesses should be reordered around
> 	 * this one.
> 	 *
> 	 * Also, both must be supplied as lvalues because this tells
> 	 * the compiler what the object is (its size) the instruction accesses.
> 	 * I.e., not the pointers but what they point, thus the deref'ing '*'.
> 	 */
> 	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> 		     : "+m" (*__dst)
> 		     :  "m" (*__src), "a" (__dst), "d" (__src));
> }

Thanks Boris. I will update and resend.

> 
> Thx.
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-09-24 15:07 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <160090233730.44288.4446779116422752486.stgit@djiang5-desk3.ch.intel.com>
2020-09-23 23:10 ` [PATCH v5 3/5] dmaengine: idxd: Add shared workqueue support Dave Jiang
2020-09-23 23:11 ` [PATCH v5 4/5] dmaengine: idxd: Clean up descriptors with fault error Dave Jiang
2020-09-23 23:11 ` [PATCH v5 5/5] dmaengine: idxd: Add ABI documentation for shared wq Dave Jiang
     [not found] ` <160090264332.44288.7575027054245105525.stgit@djiang5-desk3.ch.intel.com>
2020-09-24  8:24   ` [PATCH v5 1/5] x86/asm: Carve out a generic movdir64b() helper for general usage David Laight
2020-09-24 10:15     ` Borislav Petkov
2020-09-24 10:42       ` David Laight
2020-09-24 11:02         ` Borislav Petkov
2020-09-24 11:25           ` David Laight
2020-09-24 14:07       ` Michael Matz
2020-09-24 13:07   ` Borislav Petkov
2020-09-24 13:27     ` David Laight
2020-09-24 15:07     ` Dave Jiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).