All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend
  2017-07-22  0:11 [PATCH v1 00/13] introduce the Xen PV Calls frontend Stefano Stabellini
  2017-07-22  0:11 ` [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend Stefano Stabellini
@ 2017-07-22  0:11 ` Stefano Stabellini
  2017-07-22  0:11     ` Stefano Stabellini
                     ` (20 more replies)
  1 sibling, 21 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Introduce a xenbus frontend for the pvcalls protocol, as defined by
https://xenbits.xen.org/docs/unstable/misc/pvcalls.html.

This patch only adds the stubs, the code will be added by the following
patches.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 drivers/xen/pvcalls-front.c

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
new file mode 100644
index 0000000..173e204
--- /dev/null
+++ b/drivers/xen/pvcalls-front.c
@@ -0,0 +1,68 @@
+/*
+ * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/pvcalls.h>
+
+static const struct xenbus_device_id pvcalls_front_ids[] = {
+	{ "pvcalls" },
+	{ "" }
+};
+
+static int pvcalls_front_remove(struct xenbus_device *dev)
+{
+	return 0;
+}
+
+static int pvcalls_front_probe(struct xenbus_device *dev,
+			  const struct xenbus_device_id *id)
+{
+	return 0;
+}
+
+static int pvcalls_front_resume(struct xenbus_device *dev)
+{
+	dev_warn(&dev->dev, "suspsend/resume unsupported\n");
+	return 0;
+}
+
+static void pvcalls_front_changed(struct xenbus_device *dev,
+			    enum xenbus_state backend_state)
+{
+}
+
+static struct xenbus_driver pvcalls_front_driver = {
+	.ids = pvcalls_front_ids,
+	.probe = pvcalls_front_probe,
+	.remove = pvcalls_front_remove,
+	.resume = pvcalls_front_resume,
+	.otherend_changed = pvcalls_front_changed,
+};
+
+static int __init pvcalls_frontend_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	pr_info("Initialising Xen pvcalls frontend driver\n");
+
+	return xenbus_register_frontend(&pvcalls_front_driver);
+}
+
+module_init(pvcalls_frontend_init);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend
  2017-07-22  0:11 [PATCH v1 00/13] introduce the Xen PV Calls frontend Stefano Stabellini
@ 2017-07-22  0:11 ` Stefano Stabellini
  2017-07-22  0:11 ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Introduce a xenbus frontend for the pvcalls protocol, as defined by
https://xenbits.xen.org/docs/unstable/misc/pvcalls.html.

This patch only adds the stubs, the code will be added by the following
patches.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 drivers/xen/pvcalls-front.c

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
new file mode 100644
index 0000000..173e204
--- /dev/null
+++ b/drivers/xen/pvcalls-front.c
@@ -0,0 +1,68 @@
+/*
+ * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/pvcalls.h>
+
+static const struct xenbus_device_id pvcalls_front_ids[] = {
+	{ "pvcalls" },
+	{ "" }
+};
+
+static int pvcalls_front_remove(struct xenbus_device *dev)
+{
+	return 0;
+}
+
+static int pvcalls_front_probe(struct xenbus_device *dev,
+			  const struct xenbus_device_id *id)
+{
+	return 0;
+}
+
+static int pvcalls_front_resume(struct xenbus_device *dev)
+{
+	dev_warn(&dev->dev, "suspsend/resume unsupported\n");
+	return 0;
+}
+
+static void pvcalls_front_changed(struct xenbus_device *dev,
+			    enum xenbus_state backend_state)
+{
+}
+
+static struct xenbus_driver pvcalls_front_driver = {
+	.ids = pvcalls_front_ids,
+	.probe = pvcalls_front_probe,
+	.remove = pvcalls_front_remove,
+	.resume = pvcalls_front_resume,
+	.otherend_changed = pvcalls_front_changed,
+};
+
+static int __init pvcalls_frontend_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	pr_info("Initialising Xen pvcalls frontend driver\n");
+
+	return xenbus_register_frontend(&pvcalls_front_driver);
+}
+
+module_init(pvcalls_frontend_init);
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 02/13] xen/pvcalls: connect to the backend
  2017-07-22  0:11 ` Stefano Stabellini
@ 2017-07-22  0:11     ` Stefano Stabellini
  2017-07-22  0:11     ` Stefano Stabellini
                       ` (19 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Implement the probe function for the pvcalls frontend. Read the
supported versions, max-page-order and function-calls nodes from
xenstore.

Introduce a data structure named pvcalls_bedata. It contains pointers to
the command ring, the event channel, a list of active sockets and a list
of passive sockets. Lists accesses are protected by a spin_lock.

Introduce a waitqueue to allow waiting for a response on commands sent
to the backend.

Introduce an array of struct xen_pvcalls_response to store commands
responses.

Only one frontend<->backend connection is supported at any given time
for a guest. Store the active frontend device to a static pointer.

Introduce a stub functions for the event handler.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 173e204..fb08ebf 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -20,6 +20,29 @@
 #include <xen/xenbus.h>
 #include <xen/interface/io/pvcalls.h>
 
+#define PVCALLS_INVALID_ID (UINT_MAX)
+#define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
+#define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
+
+struct pvcalls_bedata {
+	struct xen_pvcalls_front_ring ring;
+	grant_ref_t ref;
+	int irq;
+
+	struct list_head socket_mappings;
+	struct list_head socketpass_mappings;
+	spinlock_t pvcallss_lock;
+
+	wait_queue_head_t inflight_req;
+	struct xen_pvcalls_response rsp[PVCALLS_NR_REQ_PER_RING];
+};
+struct xenbus_device *pvcalls_front_dev;
+
+static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
+{
+	return IRQ_HANDLED;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
@@ -33,7 +56,114 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
 static int pvcalls_front_probe(struct xenbus_device *dev,
 			  const struct xenbus_device_id *id)
 {
+	int ret = -EFAULT, evtchn, ref = -1, i;
+	unsigned int max_page_order, function_calls, len;
+	char *versions;
+	grant_ref_t gref_head = 0;
+	struct xenbus_transaction xbt;
+	struct pvcalls_bedata *bedata = NULL;
+	struct xen_pvcalls_sring *sring;
+
+	if (pvcalls_front_dev != NULL) {
+		dev_err(&dev->dev, "only one PV Calls connection supported\n");
+		return -EINVAL;
+	}
+
+	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
+	if (!len)
+		return -EINVAL;
+	if (strcmp(versions, "1")) {
+		kfree(versions);
+		return -EINVAL;
+	}
+	kfree(versions);
+	ret = xenbus_scanf(XBT_NIL, dev->otherend,
+			   "max-page-order", "%u", &max_page_order);
+	if (ret <= 0)
+		return -ENODEV;
+	if (max_page_order < RING_ORDER)
+		return -ENODEV;
+	ret = xenbus_scanf(XBT_NIL, dev->otherend,
+			   "function-calls", "%u", &function_calls);
+	if (ret <= 0 || function_calls != 1)
+		return -ENODEV;
+	pr_info("%s max-page-order is %u\n", __func__, max_page_order);
+
+	bedata = kzalloc(sizeof(struct pvcalls_bedata), GFP_KERNEL);
+	if (!bedata)
+		return -ENOMEM;
+
+	init_waitqueue_head(&bedata->inflight_req);
+	for (i = 0; i < PVCALLS_NR_REQ_PER_RING; i++)
+		bedata->rsp[i].req_id = PVCALLS_INVALID_ID;
+
+	sring = (struct xen_pvcalls_sring *) __get_free_page(GFP_KERNEL |
+							     __GFP_ZERO);
+	if (!sring)
+		goto error;
+	SHARED_RING_INIT(sring);
+	FRONT_RING_INIT(&bedata->ring, sring, XEN_PAGE_SIZE);
+
+	ret = xenbus_alloc_evtchn(dev, &evtchn);
+	if (ret)
+		goto error;
+
+	bedata->irq = bind_evtchn_to_irqhandler(evtchn,
+						pvcalls_front_event_handler,
+						0, "pvcalls-frontend", dev);
+	if (bedata->irq < 0) {
+		ret = bedata->irq;
+		goto error;
+	}
+
+	ret = gnttab_alloc_grant_references(1, &gref_head);
+	if (ret < 0)
+		goto error;
+	bedata->ref = ref = gnttab_claim_grant_reference(&gref_head);
+	if (ref < 0)
+		goto error;
+	gnttab_grant_foreign_access_ref(ref, dev->otherend_id,
+					virt_to_gfn((void *)sring), 0);
+
+ again:
+	ret = xenbus_transaction_start(&xbt);
+	if (ret) {
+		xenbus_dev_fatal(dev, ret, "starting transaction");
+		goto error;
+	}
+	ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "port", "%u",
+			    evtchn);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_transaction_end(xbt, 0);
+	if (ret) {
+		if (ret == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, ret, "completing transaction");
+		goto error;
+	}
+
+	INIT_LIST_HEAD(&bedata->socket_mappings);
+	INIT_LIST_HEAD(&bedata->socketpass_mappings);
+	spin_lock_init(&bedata->pvcallss_lock);
+	dev_set_drvdata(&dev->dev, bedata);
+	pvcalls_front_dev = dev;
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
 	return 0;
+
+ error_xenbus:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, ret, "writing xenstore");
+ error:
+	pvcalls_front_remove(dev);
+	return ret;
 }
 
 static int pvcalls_front_resume(struct xenbus_device *dev)
@@ -45,6 +175,29 @@ static int pvcalls_front_resume(struct xenbus_device *dev)
 static void pvcalls_front_changed(struct xenbus_device *dev,
 			    enum xenbus_state backend_state)
 {
+	switch (backend_state) {
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateUnknown:
+		break;
+
+	case XenbusStateInitWait:
+		break;
+
+	case XenbusStateConnected:
+		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosed:
+		if (dev->state == XenbusStateClosed)
+			break;
+		/* Missed the backend's CLOSING state -- fallthrough */
+	case XenbusStateClosing:
+		xenbus_frontend_closed(dev);
+		break;
+	}
 }
 
 static struct xenbus_driver pvcalls_front_driver = {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 02/13] xen/pvcalls: connect to the backend
@ 2017-07-22  0:11     ` Stefano Stabellini
  0 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Implement the probe function for the pvcalls frontend. Read the
supported versions, max-page-order and function-calls nodes from
xenstore.

Introduce a data structure named pvcalls_bedata. It contains pointers to
the command ring, the event channel, a list of active sockets and a list
of passive sockets. Lists accesses are protected by a spin_lock.

Introduce a waitqueue to allow waiting for a response on commands sent
to the backend.

Introduce an array of struct xen_pvcalls_response to store commands
responses.

Only one frontend<->backend connection is supported at any given time
for a guest. Store the active frontend device to a static pointer.

Introduce a stub functions for the event handler.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 173e204..fb08ebf 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -20,6 +20,29 @@
 #include <xen/xenbus.h>
 #include <xen/interface/io/pvcalls.h>
 
+#define PVCALLS_INVALID_ID (UINT_MAX)
+#define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
+#define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
+
+struct pvcalls_bedata {
+	struct xen_pvcalls_front_ring ring;
+	grant_ref_t ref;
+	int irq;
+
+	struct list_head socket_mappings;
+	struct list_head socketpass_mappings;
+	spinlock_t pvcallss_lock;
+
+	wait_queue_head_t inflight_req;
+	struct xen_pvcalls_response rsp[PVCALLS_NR_REQ_PER_RING];
+};
+struct xenbus_device *pvcalls_front_dev;
+
+static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
+{
+	return IRQ_HANDLED;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
@@ -33,7 +56,114 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
 static int pvcalls_front_probe(struct xenbus_device *dev,
 			  const struct xenbus_device_id *id)
 {
+	int ret = -EFAULT, evtchn, ref = -1, i;
+	unsigned int max_page_order, function_calls, len;
+	char *versions;
+	grant_ref_t gref_head = 0;
+	struct xenbus_transaction xbt;
+	struct pvcalls_bedata *bedata = NULL;
+	struct xen_pvcalls_sring *sring;
+
+	if (pvcalls_front_dev != NULL) {
+		dev_err(&dev->dev, "only one PV Calls connection supported\n");
+		return -EINVAL;
+	}
+
+	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
+	if (!len)
+		return -EINVAL;
+	if (strcmp(versions, "1")) {
+		kfree(versions);
+		return -EINVAL;
+	}
+	kfree(versions);
+	ret = xenbus_scanf(XBT_NIL, dev->otherend,
+			   "max-page-order", "%u", &max_page_order);
+	if (ret <= 0)
+		return -ENODEV;
+	if (max_page_order < RING_ORDER)
+		return -ENODEV;
+	ret = xenbus_scanf(XBT_NIL, dev->otherend,
+			   "function-calls", "%u", &function_calls);
+	if (ret <= 0 || function_calls != 1)
+		return -ENODEV;
+	pr_info("%s max-page-order is %u\n", __func__, max_page_order);
+
+	bedata = kzalloc(sizeof(struct pvcalls_bedata), GFP_KERNEL);
+	if (!bedata)
+		return -ENOMEM;
+
+	init_waitqueue_head(&bedata->inflight_req);
+	for (i = 0; i < PVCALLS_NR_REQ_PER_RING; i++)
+		bedata->rsp[i].req_id = PVCALLS_INVALID_ID;
+
+	sring = (struct xen_pvcalls_sring *) __get_free_page(GFP_KERNEL |
+							     __GFP_ZERO);
+	if (!sring)
+		goto error;
+	SHARED_RING_INIT(sring);
+	FRONT_RING_INIT(&bedata->ring, sring, XEN_PAGE_SIZE);
+
+	ret = xenbus_alloc_evtchn(dev, &evtchn);
+	if (ret)
+		goto error;
+
+	bedata->irq = bind_evtchn_to_irqhandler(evtchn,
+						pvcalls_front_event_handler,
+						0, "pvcalls-frontend", dev);
+	if (bedata->irq < 0) {
+		ret = bedata->irq;
+		goto error;
+	}
+
+	ret = gnttab_alloc_grant_references(1, &gref_head);
+	if (ret < 0)
+		goto error;
+	bedata->ref = ref = gnttab_claim_grant_reference(&gref_head);
+	if (ref < 0)
+		goto error;
+	gnttab_grant_foreign_access_ref(ref, dev->otherend_id,
+					virt_to_gfn((void *)sring), 0);
+
+ again:
+	ret = xenbus_transaction_start(&xbt);
+	if (ret) {
+		xenbus_dev_fatal(dev, ret, "starting transaction");
+		goto error;
+	}
+	ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "port", "%u",
+			    evtchn);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_transaction_end(xbt, 0);
+	if (ret) {
+		if (ret == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, ret, "completing transaction");
+		goto error;
+	}
+
+	INIT_LIST_HEAD(&bedata->socket_mappings);
+	INIT_LIST_HEAD(&bedata->socketpass_mappings);
+	spin_lock_init(&bedata->pvcallss_lock);
+	dev_set_drvdata(&dev->dev, bedata);
+	pvcalls_front_dev = dev;
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
 	return 0;
+
+ error_xenbus:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, ret, "writing xenstore");
+ error:
+	pvcalls_front_remove(dev);
+	return ret;
 }
 
 static int pvcalls_front_resume(struct xenbus_device *dev)
@@ -45,6 +175,29 @@ static int pvcalls_front_resume(struct xenbus_device *dev)
 static void pvcalls_front_changed(struct xenbus_device *dev,
 			    enum xenbus_state backend_state)
 {
+	switch (backend_state) {
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateUnknown:
+		break;
+
+	case XenbusStateInitWait:
+		break;
+
+	case XenbusStateConnected:
+		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosed:
+		if (dev->state == XenbusStateClosed)
+			break;
+		/* Missed the backend's CLOSING state -- fallthrough */
+	case XenbusStateClosing:
+		xenbus_frontend_closed(dev);
+		break;
+	}
 }
 
 static struct xenbus_driver pvcalls_front_driver = {
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 03/13] xen/pvcalls: implement socket command and handle events
  2017-07-22  0:11 ` Stefano Stabellini
@ 2017-07-22  0:11     ` Stefano Stabellini
  2017-07-22  0:11     ` Stefano Stabellini
                       ` (19 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Send a PVCALLS_SOCKET command to the backend, use the masked
req_prod_pvt as req_id. This way, req_id is guaranteed to be between 0
and PVCALLS_NR_REQ_PER_RING. We already have a slot in the rsp array
ready for the response, and there cannot be two outstanding responses
with the same req_id.

Wait for the response by waiting on the inflight_req waitqueue and
check for the req_id field in rsp[req_id]. Use atomic accesses to
read the field. Once a response is received, clear the corresponding rsp
slot by setting req_id to PVCALLS_INVALID_ID. Note that
PVCALLS_INVALID_ID is invalid only from the frontend point of view. It
is not part of the PVCalls protocol.

pvcalls_front_event_handler is in charge of copying responses from the
ring to the appropriate rsp slot. It is done by copying the body of the
response first, then by copying req_id atomically. After the copies,
wake up anybody waiting on waitqueue.

pvcallss_lock protects accesses to the ring.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 91 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  8 ++++
 2 files changed, 99 insertions(+)
 create mode 100644 drivers/xen/pvcalls-front.h

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index fb08ebf..7933c73 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -40,9 +40,100 @@ struct pvcalls_bedata {
 
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
+	struct xenbus_device *dev = dev_id;
+	struct pvcalls_bedata *bedata;
+	struct xen_pvcalls_response *rsp;
+	uint8_t *src, *dst;
+	int req_id = 0, more = 0;
+
+	if (dev == NULL)
+		return IRQ_HANDLED;
+
+	bedata = dev_get_drvdata(&dev->dev);
+	if (bedata == NULL)
+		return IRQ_HANDLED;
+
+again:
+	while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
+		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
+
+		req_id = rsp->req_id;
+		src = (uint8_t *)&bedata->rsp[req_id];
+		src += sizeof(rsp->req_id);
+		dst = (uint8_t *)rsp;
+		dst += sizeof(rsp->req_id);
+		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
+		/*
+		 * First copy the rest of the data, then req_id. It is
+		 * paired with the barrier when accessing bedata->rsp.
+		 */
+		smp_wmb();
+		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+
+		bedata->ring.rsp_cons++;
+		wake_up(&bedata->inflight_req);
+	}
+
+	RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
+	if (more)
+		goto again;
 	return IRQ_HANDLED;
 }
 
+int pvcalls_front_socket(struct socket *sock)
+{
+	struct pvcalls_bedata *bedata;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret;
+
+	if (!pvcalls_front_dev)
+		return -EACCES;
+	/*
+	 * PVCalls only supports domain AF_INET,
+	 * type SOCK_STREAM and protocol 0 sockets for now.
+	 *
+	 * Check socket type here, AF_INET and protocol checks are done
+	 * by the caller.
+	 */
+	if (sock->type != SOCK_STREAM)
+	    return -ENOTSUPP;
+
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_SOCKET;
+	req->u.socket.id = (uint64_t) sock;
+	req->u.socket.domain = AF_INET;
+	req->u.socket.type = SOCK_STREAM;
+	req->u.socket.protocol = 0;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	if (wait_event_interruptible(bedata->inflight_req,
+		READ_ONCE(bedata->rsp[req_id].req_id) == req_id) != 0)
+		return -EINTR;
+
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
new file mode 100644
index 0000000..b7dabed
--- /dev/null
+++ b/drivers/xen/pvcalls-front.h
@@ -0,0 +1,8 @@
+#ifndef __PVCALLS_FRONT_H__
+#define __PVCALLS_FRONT_H__
+
+#include <linux/net.h>
+
+int pvcalls_front_socket(struct socket *sock);
+
+#endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 03/13] xen/pvcalls: implement socket command and handle events
@ 2017-07-22  0:11     ` Stefano Stabellini
  0 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Send a PVCALLS_SOCKET command to the backend, use the masked
req_prod_pvt as req_id. This way, req_id is guaranteed to be between 0
and PVCALLS_NR_REQ_PER_RING. We already have a slot in the rsp array
ready for the response, and there cannot be two outstanding responses
with the same req_id.

Wait for the response by waiting on the inflight_req waitqueue and
check for the req_id field in rsp[req_id]. Use atomic accesses to
read the field. Once a response is received, clear the corresponding rsp
slot by setting req_id to PVCALLS_INVALID_ID. Note that
PVCALLS_INVALID_ID is invalid only from the frontend point of view. It
is not part of the PVCalls protocol.

pvcalls_front_event_handler is in charge of copying responses from the
ring to the appropriate rsp slot. It is done by copying the body of the
response first, then by copying req_id atomically. After the copies,
wake up anybody waiting on waitqueue.

pvcallss_lock protects accesses to the ring.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 91 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  8 ++++
 2 files changed, 99 insertions(+)
 create mode 100644 drivers/xen/pvcalls-front.h

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index fb08ebf..7933c73 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -40,9 +40,100 @@ struct pvcalls_bedata {
 
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
+	struct xenbus_device *dev = dev_id;
+	struct pvcalls_bedata *bedata;
+	struct xen_pvcalls_response *rsp;
+	uint8_t *src, *dst;
+	int req_id = 0, more = 0;
+
+	if (dev == NULL)
+		return IRQ_HANDLED;
+
+	bedata = dev_get_drvdata(&dev->dev);
+	if (bedata == NULL)
+		return IRQ_HANDLED;
+
+again:
+	while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
+		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
+
+		req_id = rsp->req_id;
+		src = (uint8_t *)&bedata->rsp[req_id];
+		src += sizeof(rsp->req_id);
+		dst = (uint8_t *)rsp;
+		dst += sizeof(rsp->req_id);
+		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
+		/*
+		 * First copy the rest of the data, then req_id. It is
+		 * paired with the barrier when accessing bedata->rsp.
+		 */
+		smp_wmb();
+		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+
+		bedata->ring.rsp_cons++;
+		wake_up(&bedata->inflight_req);
+	}
+
+	RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
+	if (more)
+		goto again;
 	return IRQ_HANDLED;
 }
 
+int pvcalls_front_socket(struct socket *sock)
+{
+	struct pvcalls_bedata *bedata;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret;
+
+	if (!pvcalls_front_dev)
+		return -EACCES;
+	/*
+	 * PVCalls only supports domain AF_INET,
+	 * type SOCK_STREAM and protocol 0 sockets for now.
+	 *
+	 * Check socket type here, AF_INET and protocol checks are done
+	 * by the caller.
+	 */
+	if (sock->type != SOCK_STREAM)
+	    return -ENOTSUPP;
+
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_SOCKET;
+	req->u.socket.id = (uint64_t) sock;
+	req->u.socket.domain = AF_INET;
+	req->u.socket.type = SOCK_STREAM;
+	req->u.socket.protocol = 0;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	if (wait_event_interruptible(bedata->inflight_req,
+		READ_ONCE(bedata->rsp[req_id].req_id) == req_id) != 0)
+		return -EINTR;
+
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
new file mode 100644
index 0000000..b7dabed
--- /dev/null
+++ b/drivers/xen/pvcalls-front.h
@@ -0,0 +1,8 @@
+#ifndef __PVCALLS_FRONT_H__
+#define __PVCALLS_FRONT_H__
+
+#include <linux/net.h>
+
+int pvcalls_front_socket(struct socket *sock);
+
+#endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 00/13] introduce the Xen PV Calls frontend
@ 2017-07-22  0:11 Stefano Stabellini
  2017-07-22  0:11 ` [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend Stefano Stabellini
  2017-07-22  0:11 ` Stefano Stabellini
  0 siblings, 2 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel; +Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky

Hi all,

this series introduces the frontend for the newly introduced PV Calls
procotol.

PV Calls is a paravirtualized protocol that allows the implementation of
a set of POSIX functions in a different domain. The PV Calls frontend
sends POSIX function calls to the backend, which implements them and
returns a value to the frontend and acts on the function call.

For more information about PV Calls, please read:

https://xenbits.xen.org/docs/unstable/misc/pvcalls.html

This patch series only implements the frontend driver. It doesn't
attempt to redirect POSIX calls to it. The functions exported in
pvcalls-front.h are meant to be used for that. A separate patch series
will be sent to use them and hook them into the system.


Stefano Stabellini (13):
      xen/pvcalls: introduce the pvcalls xenbus frontend
      xen/pvcalls: connect to the backend
      xen/pvcalls: implement socket command and handle events
      xen/pvcalls: implement connect command
      xen/pvcalls: implement bind command
      xen/pvcalls: implement listen command
      xen/pvcalls: implement accept command
      xen/pvcalls: implement sendmsg
      xen/pvcalls: implement recvmsg
      xen/pvcalls: implement poll command
      xen/pvcalls: implement release command
      xen/pvcalls: implement frontend disconnect
      xen: introduce a Kconfig option to enable the pvcalls frontend

 drivers/xen/Kconfig         |    9 +
 drivers/xen/Makefile        |    1 +
 drivers/xen/pvcalls-front.c | 1097 +++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   28 ++
 4 files changed, 1135 insertions(+)
 create mode 100644 drivers/xen/pvcalls-front.c
 create mode 100644 drivers/xen/pvcalls-front.h

^ permalink raw reply	[flat|nested] 77+ messages in thread

* [PATCH v1 04/13] xen/pvcalls: implement connect command
  2017-07-22  0:11 ` Stefano Stabellini
@ 2017-07-22  0:11     ` Stefano Stabellini
  2017-07-22  0:11     ` Stefano Stabellini
                       ` (19 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Send PVCALLS_CONNECT to the backend. Allocate a new ring and evtchn for
the active socket.

Introduce a data structure to keep track of sockets. Introduce a
waitqueue to allow the frontend to wait on data coming from the backend
on the active socket (recvmsg command).

Two mutexes (one of reads and one for writes) will be used to protect
the active socket in and out rings from concurrent accesses.

sock->sk->sk_send_head is not used for ip sockets: reuse the field to
store a pointer to the struct sock_mapping corresponding to the socket.
This way, we can easily get the struct sock_mapping from the struct
socket.

Convert the struct socket pointer into an uint64_t and use it as id for
the new socket to pass to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   2 +
 2 files changed, 155 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 7933c73..0d305e0 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -13,6 +13,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
+#include <linux/socket.h>
 
 #include <xen/events.h>
 #include <xen/grant_table.h>
@@ -20,6 +22,8 @@
 #include <xen/xenbus.h>
 #include <xen/interface/io/pvcalls.h>
 
+#include <net/sock.h>
+
 #define PVCALLS_INVALID_ID (UINT_MAX)
 #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
 #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
@@ -38,6 +42,24 @@ struct pvcalls_bedata {
 };
 struct xenbus_device *pvcalls_front_dev;
 
+struct sock_mapping {
+	bool active_socket;
+	struct list_head list;
+	struct socket *sock;
+	union {
+		struct {
+			int irq;
+			grant_ref_t ref;
+			struct pvcalls_data_intf *ring;
+			struct pvcalls_data data;
+			struct mutex in_mutex;
+			struct mutex out_mutex;
+
+			wait_queue_head_t inflight_conn_req;
+		} active;
+	};
+};
+
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
@@ -80,6 +102,18 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
+{
+	struct sock_mapping *map = sock_map;
+
+	if (map == NULL)
+		return IRQ_HANDLED;
+
+	wake_up_interruptible(&map->active.inflight_conn_req);
+
+	return IRQ_HANDLED;
+}
+
 int pvcalls_front_socket(struct socket *sock)
 {
 	struct pvcalls_bedata *bedata;
@@ -134,6 +168,125 @@ int pvcalls_front_socket(struct socket *sock)
 	return ret;
 }
 
+static struct sock_mapping *create_active(int *evtchn)
+{
+	struct sock_mapping *map = NULL;
+	void *bytes;
+	int ret, irq = -1, i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (map == NULL)
+		return NULL;
+
+	init_waitqueue_head(&map->active.inflight_conn_req);
+
+	map->active.ring = (struct pvcalls_data_intf *)
+		__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (map->active.ring == NULL)
+		goto out_error;
+	memset(map->active.ring, 0, XEN_PAGE_SIZE);
+	map->active.ring->ring_order = RING_ORDER;
+	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					map->active.ring->ring_order);
+	if (bytes == NULL)
+		goto out_error;
+	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
+		map->active.ring->ref[i] = gnttab_grant_foreign_access(
+			pvcalls_front_dev->otherend_id,
+			pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
+
+	map->active.ref = gnttab_grant_foreign_access(
+		pvcalls_front_dev->otherend_id,
+		pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
+
+	ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
+	if (ret)
+		goto out_error;
+	map->active.data.in = bytes;
+	map->active.data.out = bytes +
+		XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
+	irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
+					0, "pvcalls-frontend", map);
+	if (irq < 0)
+		goto out_error;
+
+	map->active.irq = irq;
+	map->active_socket = true;
+	mutex_init(&map->active.in_mutex);
+	mutex_init(&map->active.out_mutex);
+
+	return map;
+
+out_error:
+	if (irq >= 0)
+		unbind_from_irqhandler(irq, map);
+	else if (*evtchn >= 0)
+		xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
+	kfree(map->active.data.in);
+	kfree(map->active.ring);
+	kfree(map);
+	return NULL;
+}
+
+int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
+				int addr_len, int flags)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map = NULL;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret, evtchn;
+
+	if (!pvcalls_front_dev)
+		return -ENETUNREACH;
+	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
+		return -ENOTSUPP;
+
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+
+	map = create_active(&evtchn);
+	if (!map)
+	    return -ENOMEM;
+
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_CONNECT;
+	req->u.connect.id = (uint64_t)sock;
+	memcpy(req->u.connect.addr, addr, sizeof(*addr));
+	req->u.connect.len = addr_len;
+	req->u.connect.flags = flags;
+	req->u.connect.ref = map->active.ref;
+	req->u.connect.evtchn = evtchn;
+	
+	list_add_tail(&map->list, &bedata->socket_mappings);
+	map->sock = sock;
+	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index b7dabed..63b0417 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -4,5 +4,7 @@
 #include <linux/net.h>
 
 int pvcalls_front_socket(struct socket *sock);
+int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
+			  int addr_len, int flags);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 04/13] xen/pvcalls: implement connect command
@ 2017-07-22  0:11     ` Stefano Stabellini
  0 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Send PVCALLS_CONNECT to the backend. Allocate a new ring and evtchn for
the active socket.

Introduce a data structure to keep track of sockets. Introduce a
waitqueue to allow the frontend to wait on data coming from the backend
on the active socket (recvmsg command).

Two mutexes (one of reads and one for writes) will be used to protect
the active socket in and out rings from concurrent accesses.

sock->sk->sk_send_head is not used for ip sockets: reuse the field to
store a pointer to the struct sock_mapping corresponding to the socket.
This way, we can easily get the struct sock_mapping from the struct
socket.

Convert the struct socket pointer into an uint64_t and use it as id for
the new socket to pass to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   2 +
 2 files changed, 155 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 7933c73..0d305e0 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -13,6 +13,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
+#include <linux/socket.h>
 
 #include <xen/events.h>
 #include <xen/grant_table.h>
@@ -20,6 +22,8 @@
 #include <xen/xenbus.h>
 #include <xen/interface/io/pvcalls.h>
 
+#include <net/sock.h>
+
 #define PVCALLS_INVALID_ID (UINT_MAX)
 #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
 #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
@@ -38,6 +42,24 @@ struct pvcalls_bedata {
 };
 struct xenbus_device *pvcalls_front_dev;
 
+struct sock_mapping {
+	bool active_socket;
+	struct list_head list;
+	struct socket *sock;
+	union {
+		struct {
+			int irq;
+			grant_ref_t ref;
+			struct pvcalls_data_intf *ring;
+			struct pvcalls_data data;
+			struct mutex in_mutex;
+			struct mutex out_mutex;
+
+			wait_queue_head_t inflight_conn_req;
+		} active;
+	};
+};
+
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
@@ -80,6 +102,18 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
+{
+	struct sock_mapping *map = sock_map;
+
+	if (map == NULL)
+		return IRQ_HANDLED;
+
+	wake_up_interruptible(&map->active.inflight_conn_req);
+
+	return IRQ_HANDLED;
+}
+
 int pvcalls_front_socket(struct socket *sock)
 {
 	struct pvcalls_bedata *bedata;
@@ -134,6 +168,125 @@ int pvcalls_front_socket(struct socket *sock)
 	return ret;
 }
 
+static struct sock_mapping *create_active(int *evtchn)
+{
+	struct sock_mapping *map = NULL;
+	void *bytes;
+	int ret, irq = -1, i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (map == NULL)
+		return NULL;
+
+	init_waitqueue_head(&map->active.inflight_conn_req);
+
+	map->active.ring = (struct pvcalls_data_intf *)
+		__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (map->active.ring == NULL)
+		goto out_error;
+	memset(map->active.ring, 0, XEN_PAGE_SIZE);
+	map->active.ring->ring_order = RING_ORDER;
+	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					map->active.ring->ring_order);
+	if (bytes == NULL)
+		goto out_error;
+	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
+		map->active.ring->ref[i] = gnttab_grant_foreign_access(
+			pvcalls_front_dev->otherend_id,
+			pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
+
+	map->active.ref = gnttab_grant_foreign_access(
+		pvcalls_front_dev->otherend_id,
+		pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
+
+	ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
+	if (ret)
+		goto out_error;
+	map->active.data.in = bytes;
+	map->active.data.out = bytes +
+		XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
+	irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
+					0, "pvcalls-frontend", map);
+	if (irq < 0)
+		goto out_error;
+
+	map->active.irq = irq;
+	map->active_socket = true;
+	mutex_init(&map->active.in_mutex);
+	mutex_init(&map->active.out_mutex);
+
+	return map;
+
+out_error:
+	if (irq >= 0)
+		unbind_from_irqhandler(irq, map);
+	else if (*evtchn >= 0)
+		xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
+	kfree(map->active.data.in);
+	kfree(map->active.ring);
+	kfree(map);
+	return NULL;
+}
+
+int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
+				int addr_len, int flags)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map = NULL;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret, evtchn;
+
+	if (!pvcalls_front_dev)
+		return -ENETUNREACH;
+	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
+		return -ENOTSUPP;
+
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+
+	map = create_active(&evtchn);
+	if (!map)
+	    return -ENOMEM;
+
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_CONNECT;
+	req->u.connect.id = (uint64_t)sock;
+	memcpy(req->u.connect.addr, addr, sizeof(*addr));
+	req->u.connect.len = addr_len;
+	req->u.connect.flags = flags;
+	req->u.connect.ref = map->active.ref;
+	req->u.connect.evtchn = evtchn;
+	
+	list_add_tail(&map->list, &bedata->socket_mappings);
+	map->sock = sock;
+	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index b7dabed..63b0417 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -4,5 +4,7 @@
 #include <linux/net.h>
 
 int pvcalls_front_socket(struct socket *sock);
+int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
+			  int addr_len, int flags);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 05/13] xen/pvcalls: implement bind command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (3 preceding siblings ...)
  2017-07-22  0:11   ` [PATCH v1 05/13] xen/pvcalls: implement bind command Stefano Stabellini
@ 2017-07-22  0:11   ` Stefano Stabellini
  2017-07-24 19:43     ` Juergen Gross
  2017-07-24 19:43     ` Juergen Gross
  2017-07-22  0:11   ` [PATCH v1 06/13] xen/pvcalls: implement listen command Stefano Stabellini
                     ` (15 subsequent siblings)
  20 siblings, 2 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Send PVCALLS_BIND to the backend. Introduce a new structure, part of
struct sock_mapping, to store information specific to passive sockets.

Introduce a status field to keep track of the status of the passive
socket.

Introduce a waitqueue for the "accept" command (see the accept command
implementation): it is used to allow only one outstanding accept
command at any given time and to implement polling on the passive
socket. Introduce a flags field to keep track of in-flight accept and
poll commands.

sock->sk->sk_send_head is not used for ip sockets: reuse the field to
store a pointer to the struct sock_mapping corresponding to the socket.

Convert the struct socket pointer into an uint64_t and use it as id for
the socket to pass to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 74 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  3 ++
 2 files changed, 77 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 0d305e0..71619bc 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -57,6 +57,23 @@ struct sock_mapping {
 
 			wait_queue_head_t inflight_conn_req;
 		} active;
+		struct {
+		/* Socket status */
+#define PVCALLS_STATUS_UNINITALIZED  0
+#define PVCALLS_STATUS_BIND          1
+#define PVCALLS_STATUS_LISTEN        2
+			uint8_t status;
+		/*
+		 * Internal state-machine flags.
+		 * Only one accept operation can be inflight for a socket.
+		 * Only one poll operation can be inflight for a given socket.
+		 */
+#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
+#define PVCALLS_FLAG_POLL_INFLIGHT   1
+#define PVCALLS_FLAG_POLL_RET        2
+			uint8_t flags;
+			wait_queue_head_t inflight_accept_req;
+		} passive;
 	};
 };
 
@@ -287,6 +304,63 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 	return ret;
 }
 
+int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map = NULL;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
+		return -ENOTSUPP;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		kfree(map);
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	map->sock = sock;
+	req->cmd = PVCALLS_BIND;
+	req->u.bind.id = (uint64_t) sock;
+	memcpy(req->u.bind.addr, addr, sizeof(*addr));
+	req->u.bind.len = addr_len;
+
+	init_waitqueue_head(&map->passive.inflight_accept_req);
+
+	list_add_tail(&map->list, &bedata->socketpass_mappings);
+	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
+	map->active_socket = false;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	map->passive.status = PVCALLS_STATUS_BIND;
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return 0;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 63b0417..8b0a274 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -6,5 +6,8 @@
 int pvcalls_front_socket(struct socket *sock);
 int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 			  int addr_len, int flags);
+int pvcalls_front_bind(struct socket *sock,
+		       struct sockaddr *addr,
+		       int addr_len);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 05/13] xen/pvcalls: implement bind command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (2 preceding siblings ...)
  2017-07-22  0:11     ` Stefano Stabellini
@ 2017-07-22  0:11   ` Stefano Stabellini
  2017-07-22  0:11   ` Stefano Stabellini
                     ` (16 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Send PVCALLS_BIND to the backend. Introduce a new structure, part of
struct sock_mapping, to store information specific to passive sockets.

Introduce a status field to keep track of the status of the passive
socket.

Introduce a waitqueue for the "accept" command (see the accept command
implementation): it is used to allow only one outstanding accept
command at any given time and to implement polling on the passive
socket. Introduce a flags field to keep track of in-flight accept and
poll commands.

sock->sk->sk_send_head is not used for ip sockets: reuse the field to
store a pointer to the struct sock_mapping corresponding to the socket.

Convert the struct socket pointer into an uint64_t and use it as id for
the socket to pass to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 74 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  3 ++
 2 files changed, 77 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 0d305e0..71619bc 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -57,6 +57,23 @@ struct sock_mapping {
 
 			wait_queue_head_t inflight_conn_req;
 		} active;
+		struct {
+		/* Socket status */
+#define PVCALLS_STATUS_UNINITALIZED  0
+#define PVCALLS_STATUS_BIND          1
+#define PVCALLS_STATUS_LISTEN        2
+			uint8_t status;
+		/*
+		 * Internal state-machine flags.
+		 * Only one accept operation can be inflight for a socket.
+		 * Only one poll operation can be inflight for a given socket.
+		 */
+#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
+#define PVCALLS_FLAG_POLL_INFLIGHT   1
+#define PVCALLS_FLAG_POLL_RET        2
+			uint8_t flags;
+			wait_queue_head_t inflight_accept_req;
+		} passive;
 	};
 };
 
@@ -287,6 +304,63 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 	return ret;
 }
 
+int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map = NULL;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
+		return -ENOTSUPP;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		kfree(map);
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	map->sock = sock;
+	req->cmd = PVCALLS_BIND;
+	req->u.bind.id = (uint64_t) sock;
+	memcpy(req->u.bind.addr, addr, sizeof(*addr));
+	req->u.bind.len = addr_len;
+
+	init_waitqueue_head(&map->passive.inflight_accept_req);
+
+	list_add_tail(&map->list, &bedata->socketpass_mappings);
+	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
+	map->active_socket = false;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	map->passive.status = PVCALLS_STATUS_BIND;
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return 0;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 63b0417..8b0a274 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -6,5 +6,8 @@
 int pvcalls_front_socket(struct socket *sock);
 int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 			  int addr_len, int flags);
+int pvcalls_front_bind(struct socket *sock,
+		       struct sockaddr *addr,
+		       int addr_len);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 06/13] xen/pvcalls: implement listen command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (4 preceding siblings ...)
  2017-07-22  0:11   ` Stefano Stabellini
@ 2017-07-22  0:11   ` Stefano Stabellini
  2017-07-24 19:44     ` Juergen Gross
  2017-07-24 19:44     ` Juergen Gross
  2017-07-22  0:11   ` Stefano Stabellini
                     ` (14 subsequent siblings)
  20 siblings, 2 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Send PVCALLS_LISTEN to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 49 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  1 +
 2 files changed, 50 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 71619bc..80fd5fb 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -361,6 +361,55 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	return 0;
 }
 
+int pvcalls_front_listen(struct socket *sock, int backlog)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	if (map->passive.status != PVCALLS_STATUS_BIND)
+		return -EOPNOTSUPP;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    bedata->rsp[req_id].req_id != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_LISTEN;
+	req->u.listen.id = (uint64_t) sock;
+	req->u.listen.backlog = backlog;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	map->passive.status = PVCALLS_STATUS_LISTEN;
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 8b0a274..aa8fe10 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -9,5 +9,6 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 int pvcalls_front_bind(struct socket *sock,
 		       struct sockaddr *addr,
 		       int addr_len);
+int pvcalls_front_listen(struct socket *sock, int backlog);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 06/13] xen/pvcalls: implement listen command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (5 preceding siblings ...)
  2017-07-22  0:11   ` [PATCH v1 06/13] xen/pvcalls: implement listen command Stefano Stabellini
@ 2017-07-22  0:11   ` Stefano Stabellini
  2017-07-22  0:11     ` Stefano Stabellini
                     ` (13 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Send PVCALLS_LISTEN to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 49 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  1 +
 2 files changed, 50 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 71619bc..80fd5fb 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -361,6 +361,55 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	return 0;
 }
 
+int pvcalls_front_listen(struct socket *sock, int backlog)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	if (map->passive.status != PVCALLS_STATUS_BIND)
+		return -EOPNOTSUPP;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    bedata->rsp[req_id].req_id != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_LISTEN;
+	req->u.listen.id = (uint64_t) sock;
+	req->u.listen.backlog = backlog;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	map->passive.status = PVCALLS_STATUS_LISTEN;
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 8b0a274..aa8fe10 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -9,5 +9,6 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 int pvcalls_front_bind(struct socket *sock,
 		       struct sockaddr *addr,
 		       int addr_len);
+int pvcalls_front_listen(struct socket *sock, int backlog);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 07/13] xen/pvcalls: implement accept command
  2017-07-22  0:11 ` Stefano Stabellini
@ 2017-07-22  0:11     ` Stefano Stabellini
  2017-07-22  0:11     ` Stefano Stabellini
                       ` (19 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Send PVCALLS_ACCEPT to the backend. Allocate a new active socket. Make
sure that only one accept command is executed at any given time by
setting PVCALLS_FLAG_ACCEPT_INFLIGHT and waiting on the
inflight_accept_req waitqueue.

sock->sk->sk_send_head is not used for ip sockets: reuse the field to
store a pointer to the struct sock_mapping corresponding to the socket.

Convert the new struct socket pointer into an uint64_t and use it as id
for the new socket to pass to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  3 ++
 2 files changed, 82 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 80fd5fb..f3a04a2 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -410,6 +410,85 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
 	return ret;
 }
 
+int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	struct sock_mapping *map2 = NULL;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret, evtchn;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	if (map->passive.status != PVCALLS_STATUS_LISTEN)
+		return -EINVAL;
+
+	/*
+	 * Backend only supports 1 inflight accept request, will return
+	 * errors for the others
+	 */
+	if (test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+			     (void *)&map->passive.flags)) {
+		if (wait_event_interruptible(map->passive.inflight_accept_req,
+			!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+					  (void *)&map->passive.flags))
+			!= 0)
+			return -EINTR;
+	}
+
+
+	newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
+	if (newsock->sk == NULL)
+		return -ENOMEM;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+
+	map2 = create_active(&evtchn);
+
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_ACCEPT;
+	req->u.accept.id = (uint64_t) sock;
+	req->u.accept.ref = map2->active.ref;
+	req->u.accept.id_new = (uint64_t) newsock;
+	req->u.accept.evtchn = evtchn;
+
+	list_add_tail(&map2->list, &bedata->socket_mappings);
+	WRITE_ONCE(newsock->sk->sk_send_head, (void *)map2);
+	map2->sock = newsock;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
+	wake_up(&map->passive.inflight_accept_req);
+
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index aa8fe10..ab4f1da 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -10,5 +10,8 @@ int pvcalls_front_bind(struct socket *sock,
 		       struct sockaddr *addr,
 		       int addr_len);
 int pvcalls_front_listen(struct socket *sock, int backlog);
+int pvcalls_front_accept(struct socket *sock,
+			 struct socket *newsock,
+			 int flags);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 07/13] xen/pvcalls: implement accept command
@ 2017-07-22  0:11     ` Stefano Stabellini
  0 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Send PVCALLS_ACCEPT to the backend. Allocate a new active socket. Make
sure that only one accept command is executed at any given time by
setting PVCALLS_FLAG_ACCEPT_INFLIGHT and waiting on the
inflight_accept_req waitqueue.

sock->sk->sk_send_head is not used for ip sockets: reuse the field to
store a pointer to the struct sock_mapping corresponding to the socket.

Convert the new struct socket pointer into an uint64_t and use it as id
for the new socket to pass to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  3 ++
 2 files changed, 82 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 80fd5fb..f3a04a2 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -410,6 +410,85 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
 	return ret;
 }
 
+int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	struct sock_mapping *map2 = NULL;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret, evtchn;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	if (map->passive.status != PVCALLS_STATUS_LISTEN)
+		return -EINVAL;
+
+	/*
+	 * Backend only supports 1 inflight accept request, will return
+	 * errors for the others
+	 */
+	if (test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+			     (void *)&map->passive.flags)) {
+		if (wait_event_interruptible(map->passive.inflight_accept_req,
+			!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+					  (void *)&map->passive.flags))
+			!= 0)
+			return -EINTR;
+	}
+
+
+	newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
+	if (newsock->sk == NULL)
+		return -ENOMEM;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+
+	map2 = create_active(&evtchn);
+
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_ACCEPT;
+	req->u.accept.id = (uint64_t) sock;
+	req->u.accept.ref = map2->active.ref;
+	req->u.accept.id_new = (uint64_t) newsock;
+	req->u.accept.evtchn = evtchn;
+
+	list_add_tail(&map2->list, &bedata->socket_mappings);
+	WRITE_ONCE(newsock->sk->sk_send_head, (void *)map2);
+	map2->sock = newsock;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
+	wake_up(&map->passive.inflight_accept_req);
+
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index aa8fe10..ab4f1da 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -10,5 +10,8 @@ int pvcalls_front_bind(struct socket *sock,
 		       struct sockaddr *addr,
 		       int addr_len);
 int pvcalls_front_listen(struct socket *sock, int backlog);
+int pvcalls_front_accept(struct socket *sock,
+			 struct socket *newsock,
+			 int flags);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 08/13] xen/pvcalls: implement sendmsg
  2017-07-22  0:11 ` Stefano Stabellini
@ 2017-07-22  0:11     ` Stefano Stabellini
  2017-07-22  0:11     ` Stefano Stabellini
                       ` (19 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Send data to an active socket by copying data to the "out" ring. Take
the active socket out_mutex so that only one function can access the
ring at any given time.

If not enough room is available on the ring, rather than returning
immediately or sleep-waiting, spin for up to 5000 cycles. This small
optimization turns out to improve performance significantly.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 109 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   3 ++
 2 files changed, 112 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index f3a04a2..bf29f40 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -27,6 +27,7 @@
 #define PVCALLS_INVALID_ID (UINT_MAX)
 #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
 #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
+#define PVCALLS_FRON_MAX_SPIN 5000
 
 struct pvcalls_bedata {
 	struct xen_pvcalls_front_ring ring;
@@ -77,6 +78,22 @@ struct sock_mapping {
 	};
 };
 
+static int pvcalls_front_write_todo(struct sock_mapping *map)
+{
+	struct pvcalls_data_intf *intf = map->active.ring;
+	RING_IDX cons, prod, size = XEN_FLEX_RING_SIZE(intf->ring_order);
+	int32_t error;
+
+	cons = intf->out_cons;
+	prod = intf->out_prod;
+	error = intf->out_error;
+	if (error == -ENOTCONN)
+		return 0;
+	if (error != 0)
+		return error;
+	return size - pvcalls_queued(prod, cons, size);
+}
+
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
@@ -304,6 +321,98 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 	return ret;
 }
 
+static int __write_ring(struct pvcalls_data_intf *intf,
+			struct pvcalls_data *data,
+			struct iov_iter *msg_iter,
+			size_t len)
+{
+	RING_IDX cons, prod, size, masked_prod, masked_cons;
+	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
+	int32_t error;
+
+	cons = intf->out_cons;
+	prod = intf->out_prod;
+	error = intf->out_error;
+	/* read indexes before continuing */
+	virt_mb();
+
+	if (error < 0)
+		return error;
+
+	size = pvcalls_queued(prod, cons, array_size);
+	if (size >= array_size)
+		return 0;
+	if (len > array_size - size)
+		len = array_size - size;
+
+	masked_prod = pvcalls_mask(prod, array_size);
+	masked_cons = pvcalls_mask(cons, array_size);
+
+	if (masked_prod < masked_cons) {
+		copy_from_iter(data->out + masked_prod, len, msg_iter);
+	} else {
+		if (len > array_size - masked_prod) {
+			copy_from_iter(data->out + masked_prod,
+				       array_size - masked_prod, msg_iter);
+			copy_from_iter(data->out,
+				       len - (array_size - masked_prod),
+				       msg_iter);
+		} else {
+			copy_from_iter(data->out + masked_prod, len, msg_iter);
+		}
+	}
+	/* write to ring before updating pointer */
+	virt_wmb();
+	intf->out_prod += len;
+
+	return len;
+}
+
+int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
+			  size_t len)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	int sent = 0, tot_sent = 0;
+	int count = 0, flags;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	flags = msg->msg_flags;
+	if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&map->active.out_mutex);
+	if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
+		mutex_unlock(&map->active.out_mutex);
+		return -EAGAIN;
+	}
+
+again:
+	count++;
+	sent = __write_ring(map->active.ring,
+			    &map->active.data, &msg->msg_iter,
+			    len);
+	if (sent > 0) {
+		len -= sent;
+		tot_sent += sent;
+		notify_remote_via_irq(map->active.irq);
+	}
+	if (sent >= 0 && len > 0 && count < PVCALLS_FRON_MAX_SPIN)
+		goto again;
+	if (sent < 0)
+		tot_sent = sent;
+
+	mutex_unlock(&map->active.out_mutex);
+	return tot_sent;
+}
+
 int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct pvcalls_bedata *bedata;
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index ab4f1da..d937c24 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -13,5 +13,8 @@ int pvcalls_front_bind(struct socket *sock,
 int pvcalls_front_accept(struct socket *sock,
 			 struct socket *newsock,
 			 int flags);
+int pvcalls_front_sendmsg(struct socket *sock,
+			  struct msghdr *msg,
+			  size_t len);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 08/13] xen/pvcalls: implement sendmsg
@ 2017-07-22  0:11     ` Stefano Stabellini
  0 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Send data to an active socket by copying data to the "out" ring. Take
the active socket out_mutex so that only one function can access the
ring at any given time.

If not enough room is available on the ring, rather than returning
immediately or sleep-waiting, spin for up to 5000 cycles. This small
optimization turns out to improve performance significantly.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 109 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   3 ++
 2 files changed, 112 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index f3a04a2..bf29f40 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -27,6 +27,7 @@
 #define PVCALLS_INVALID_ID (UINT_MAX)
 #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
 #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
+#define PVCALLS_FRON_MAX_SPIN 5000
 
 struct pvcalls_bedata {
 	struct xen_pvcalls_front_ring ring;
@@ -77,6 +78,22 @@ struct sock_mapping {
 	};
 };
 
+static int pvcalls_front_write_todo(struct sock_mapping *map)
+{
+	struct pvcalls_data_intf *intf = map->active.ring;
+	RING_IDX cons, prod, size = XEN_FLEX_RING_SIZE(intf->ring_order);
+	int32_t error;
+
+	cons = intf->out_cons;
+	prod = intf->out_prod;
+	error = intf->out_error;
+	if (error == -ENOTCONN)
+		return 0;
+	if (error != 0)
+		return error;
+	return size - pvcalls_queued(prod, cons, size);
+}
+
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
@@ -304,6 +321,98 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 	return ret;
 }
 
+static int __write_ring(struct pvcalls_data_intf *intf,
+			struct pvcalls_data *data,
+			struct iov_iter *msg_iter,
+			size_t len)
+{
+	RING_IDX cons, prod, size, masked_prod, masked_cons;
+	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
+	int32_t error;
+
+	cons = intf->out_cons;
+	prod = intf->out_prod;
+	error = intf->out_error;
+	/* read indexes before continuing */
+	virt_mb();
+
+	if (error < 0)
+		return error;
+
+	size = pvcalls_queued(prod, cons, array_size);
+	if (size >= array_size)
+		return 0;
+	if (len > array_size - size)
+		len = array_size - size;
+
+	masked_prod = pvcalls_mask(prod, array_size);
+	masked_cons = pvcalls_mask(cons, array_size);
+
+	if (masked_prod < masked_cons) {
+		copy_from_iter(data->out + masked_prod, len, msg_iter);
+	} else {
+		if (len > array_size - masked_prod) {
+			copy_from_iter(data->out + masked_prod,
+				       array_size - masked_prod, msg_iter);
+			copy_from_iter(data->out,
+				       len - (array_size - masked_prod),
+				       msg_iter);
+		} else {
+			copy_from_iter(data->out + masked_prod, len, msg_iter);
+		}
+	}
+	/* write to ring before updating pointer */
+	virt_wmb();
+	intf->out_prod += len;
+
+	return len;
+}
+
+int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
+			  size_t len)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	int sent = 0, tot_sent = 0;
+	int count = 0, flags;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	flags = msg->msg_flags;
+	if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&map->active.out_mutex);
+	if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
+		mutex_unlock(&map->active.out_mutex);
+		return -EAGAIN;
+	}
+
+again:
+	count++;
+	sent = __write_ring(map->active.ring,
+			    &map->active.data, &msg->msg_iter,
+			    len);
+	if (sent > 0) {
+		len -= sent;
+		tot_sent += sent;
+		notify_remote_via_irq(map->active.irq);
+	}
+	if (sent >= 0 && len > 0 && count < PVCALLS_FRON_MAX_SPIN)
+		goto again;
+	if (sent < 0)
+		tot_sent = sent;
+
+	mutex_unlock(&map->active.out_mutex);
+	return tot_sent;
+}
+
 int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct pvcalls_bedata *bedata;
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index ab4f1da..d937c24 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -13,5 +13,8 @@ int pvcalls_front_bind(struct socket *sock,
 int pvcalls_front_accept(struct socket *sock,
 			 struct socket *newsock,
 			 int flags);
+int pvcalls_front_sendmsg(struct socket *sock,
+			  struct msghdr *msg,
+			  size_t len);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 09/13] xen/pvcalls: implement recvmsg
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (9 preceding siblings ...)
  2017-07-22  0:11   ` [PATCH v1 09/13] xen/pvcalls: implement recvmsg Stefano Stabellini
@ 2017-07-22  0:11   ` Stefano Stabellini
  2017-07-24 19:56     ` Juergen Gross
  2017-07-24 19:56     ` Juergen Gross
  2017-07-22  0:12   ` [PATCH v1 10/13] xen/pvcalls: implement poll command Stefano Stabellini
                     ` (9 subsequent siblings)
  20 siblings, 2 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Implement recvmsg by copying data from the "in" ring. If not enough data
is available and the recvmsg call is blocking, then wait on the
inflight_conn_req waitqueue. Take the active socket in_mutex so that
only one function can access the ring at any given time.

If not enough data is available on the ring, rather than returning
immediately or sleep-waiting, spin for up to 5000 cycles. This small
optimization turns out to improve performance and latency significantly.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   4 ++
 2 files changed, 110 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index bf29f40..3d1041a 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -94,6 +94,20 @@ static int pvcalls_front_write_todo(struct sock_mapping *map)
 	return size - pvcalls_queued(prod, cons, size);
 }
 
+static int pvcalls_front_read_todo(struct sock_mapping *map)
+{
+	struct pvcalls_data_intf *intf = map->active.ring;
+	RING_IDX cons, prod;
+	int32_t error;
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	error = intf->in_error;
+	return (error != 0 ||
+		pvcalls_queued(prod, cons,
+			       XEN_FLEX_RING_SIZE(intf->ring_order))) != 0;
+}
+
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
@@ -413,6 +427,98 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
 	return tot_sent;
 }
 
+static int __read_ring(struct pvcalls_data_intf *intf,
+		       struct pvcalls_data *data,
+		       struct iov_iter *msg_iter,
+		       size_t len, int flags)
+{
+	RING_IDX cons, prod, size, masked_prod, masked_cons;
+	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
+	int32_t error;
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	error = intf->in_error;
+	/* get pointers before reading from the ring */
+	virt_rmb();
+	if (error < 0)
+		return error;
+
+	size = pvcalls_queued(prod, cons, array_size);
+	masked_prod = pvcalls_mask(prod, array_size);
+	masked_cons = pvcalls_mask(cons, array_size);
+
+	if (size == 0)
+		return 0;
+
+	if (len > size)
+		len = size;
+
+	if (masked_prod > masked_cons) {
+		copy_to_iter(data->in + masked_cons, len, msg_iter);
+	} else {
+		if (len > (array_size - masked_cons)) {
+			copy_to_iter(data->in + masked_cons,
+				     array_size - masked_cons, msg_iter);
+			copy_to_iter(data->in,
+				     len - (array_size - masked_cons),
+				     msg_iter);
+		} else {
+			copy_to_iter(data->in + masked_cons, len, msg_iter);
+		}
+	}
+	/* read data from the ring before increasing the index */
+	virt_mb();
+	if (!(flags & MSG_PEEK))
+		intf->in_cons += len;
+
+	return len;
+}
+
+int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		     int flags)
+{
+	struct pvcalls_bedata *bedata;
+	int ret = -EAGAIN;
+	struct sock_mapping *map;
+	int count = 0;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&map->active.in_mutex);
+	if (len > XEN_FLEX_RING_SIZE(map->active.ring->ring_order))
+		len = XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
+
+	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
+		if (count < PVCALLS_FRON_MAX_SPIN)
+			count++;
+		else
+			wait_event_interruptible(map->active.inflight_conn_req,
+						 pvcalls_front_read_todo(map));
+	}
+	ret = __read_ring(map->active.ring, &map->active.data,
+			  &msg->msg_iter, len, flags);
+
+	if (ret > 0)
+		notify_remote_via_irq(map->active.irq);
+	if (ret == 0)
+		ret = -EAGAIN;
+	if (ret == -ENOTCONN)
+		ret = 0;
+
+	mutex_unlock(&map->active.in_mutex);
+	return ret;
+}
+
 int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct pvcalls_bedata *bedata;
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index d937c24..de24041 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -16,5 +16,9 @@ int pvcalls_front_accept(struct socket *sock,
 int pvcalls_front_sendmsg(struct socket *sock,
 			  struct msghdr *msg,
 			  size_t len);
+int pvcalls_front_recvmsg(struct socket *sock,
+			  struct msghdr *msg,
+			  size_t len,
+			  int flags);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 09/13] xen/pvcalls: implement recvmsg
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (8 preceding siblings ...)
  2017-07-22  0:11     ` Stefano Stabellini
@ 2017-07-22  0:11   ` Stefano Stabellini
  2017-07-22  0:11   ` Stefano Stabellini
                     ` (10 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:11 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Implement recvmsg by copying data from the "in" ring. If not enough data
is available and the recvmsg call is blocking, then wait on the
inflight_conn_req waitqueue. Take the active socket in_mutex so that
only one function can access the ring at any given time.

If not enough data is available on the ring, rather than returning
immediately or sleep-waiting, spin for up to 5000 cycles. This small
optimization turns out to improve performance and latency significantly.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   4 ++
 2 files changed, 110 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index bf29f40..3d1041a 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -94,6 +94,20 @@ static int pvcalls_front_write_todo(struct sock_mapping *map)
 	return size - pvcalls_queued(prod, cons, size);
 }
 
+static int pvcalls_front_read_todo(struct sock_mapping *map)
+{
+	struct pvcalls_data_intf *intf = map->active.ring;
+	RING_IDX cons, prod;
+	int32_t error;
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	error = intf->in_error;
+	return (error != 0 ||
+		pvcalls_queued(prod, cons,
+			       XEN_FLEX_RING_SIZE(intf->ring_order))) != 0;
+}
+
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
@@ -413,6 +427,98 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
 	return tot_sent;
 }
 
+static int __read_ring(struct pvcalls_data_intf *intf,
+		       struct pvcalls_data *data,
+		       struct iov_iter *msg_iter,
+		       size_t len, int flags)
+{
+	RING_IDX cons, prod, size, masked_prod, masked_cons;
+	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
+	int32_t error;
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	error = intf->in_error;
+	/* get pointers before reading from the ring */
+	virt_rmb();
+	if (error < 0)
+		return error;
+
+	size = pvcalls_queued(prod, cons, array_size);
+	masked_prod = pvcalls_mask(prod, array_size);
+	masked_cons = pvcalls_mask(cons, array_size);
+
+	if (size == 0)
+		return 0;
+
+	if (len > size)
+		len = size;
+
+	if (masked_prod > masked_cons) {
+		copy_to_iter(data->in + masked_cons, len, msg_iter);
+	} else {
+		if (len > (array_size - masked_cons)) {
+			copy_to_iter(data->in + masked_cons,
+				     array_size - masked_cons, msg_iter);
+			copy_to_iter(data->in,
+				     len - (array_size - masked_cons),
+				     msg_iter);
+		} else {
+			copy_to_iter(data->in + masked_cons, len, msg_iter);
+		}
+	}
+	/* read data from the ring before increasing the index */
+	virt_mb();
+	if (!(flags & MSG_PEEK))
+		intf->in_cons += len;
+
+	return len;
+}
+
+int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		     int flags)
+{
+	struct pvcalls_bedata *bedata;
+	int ret = -EAGAIN;
+	struct sock_mapping *map;
+	int count = 0;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return -ENOTSOCK;
+
+	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&map->active.in_mutex);
+	if (len > XEN_FLEX_RING_SIZE(map->active.ring->ring_order))
+		len = XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
+
+	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
+		if (count < PVCALLS_FRON_MAX_SPIN)
+			count++;
+		else
+			wait_event_interruptible(map->active.inflight_conn_req,
+						 pvcalls_front_read_todo(map));
+	}
+	ret = __read_ring(map->active.ring, &map->active.data,
+			  &msg->msg_iter, len, flags);
+
+	if (ret > 0)
+		notify_remote_via_irq(map->active.irq);
+	if (ret == 0)
+		ret = -EAGAIN;
+	if (ret == -ENOTCONN)
+		ret = 0;
+
+	mutex_unlock(&map->active.in_mutex);
+	return ret;
+}
+
 int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct pvcalls_bedata *bedata;
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index d937c24..de24041 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -16,5 +16,9 @@ int pvcalls_front_accept(struct socket *sock,
 int pvcalls_front_sendmsg(struct socket *sock,
 			  struct msghdr *msg,
 			  size_t len);
+int pvcalls_front_recvmsg(struct socket *sock,
+			  struct msghdr *msg,
+			  size_t len,
+			  int flags);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 10/13] xen/pvcalls: implement poll command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (11 preceding siblings ...)
  2017-07-22  0:12   ` [PATCH v1 10/13] xen/pvcalls: implement poll command Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-24 20:08       ` Juergen Gross
  2017-07-22  0:12   ` [PATCH v1 11/13] xen/pvcalls: implement release command Stefano Stabellini
                     ` (7 subsequent siblings)
  20 siblings, 1 reply; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

For active sockets, check the indexes and use the inflight_conn_req
waitqueue to wait.

For passive sockets, send PVCALLS_POLL to the backend. Use the
inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
the inflight_req waitqueue: inflight_req is awaken when a new response
is received; on wakeup we check whether the POLL response is arrived by
looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
pvcalls_front_event_handler, if the response was for a POLL command.

In pvcalls_front_event_handler, get the struct socket pointer from the
poll id (we previously converted struct socket* to uint64_t and used it
as id).

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 123 ++++++++++++++++++++++++++++++++++++++++----
 drivers/xen/pvcalls-front.h |   3 ++
 2 files changed, 115 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 3d1041a..b6cfb7d 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -128,17 +128,29 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
 
 		req_id = rsp->req_id;
-		src = (uint8_t *)&bedata->rsp[req_id];
-		src += sizeof(rsp->req_id);
-		dst = (uint8_t *)rsp;
-		dst += sizeof(rsp->req_id);
-		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
-		/*
-		 * First copy the rest of the data, then req_id. It is
-		 * paired with the barrier when accessing bedata->rsp.
-		 */
-		smp_wmb();
-		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+		if (rsp->cmd == PVCALLS_POLL) {
+			struct socket *sock = (struct socket *) rsp->u.poll.id;
+			struct sock_mapping *map =
+				(struct sock_mapping *)
+				READ_ONCE(sock->sk->sk_send_head);
+
+			set_bit(PVCALLS_FLAG_POLL_RET,
+				(void *)&map->passive.flags);
+			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+				  (void *)&map->passive.flags);
+		} else {
+			src = (uint8_t *)&bedata->rsp[req_id];
+			src += sizeof(rsp->req_id);
+			dst = (uint8_t *)rsp;
+			dst += sizeof(rsp->req_id);
+			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
+			/*
+			 * First copy the rest of the data, then req_id. It is
+			 * paired with the barrier when accessing bedata->rsp.
+			 */
+			smp_wmb();
+			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+		}
 
 		bedata->ring.rsp_cons++;
 		wake_up(&bedata->inflight_req);
@@ -704,6 +716,95 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 	return ret;
 }
 
+static unsigned int pvcalls_front_poll_passive(struct file *file,
+					       struct pvcalls_bedata *bedata,
+					       struct sock_mapping *map,
+					       poll_table *wait)
+{
+	int notify, req_id;
+	struct xen_pvcalls_request *req;
+
+	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+		     (void *)&map->passive.flags)) {
+		poll_wait(file, &map->passive.inflight_accept_req, wait);
+		return 0;
+	}
+
+	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
+			       (void *)&map->passive.flags))
+		return POLLIN;
+
+	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+			     (void *)&map->passive.flags)) {
+		poll_wait(file, &bedata->inflight_req, wait);
+		return 0;
+	}
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_POLL;
+	req->u.poll.id = (uint64_t) map->sock;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	poll_wait(file, &bedata->inflight_req, wait);
+	return 0;
+}
+
+static unsigned int pvcalls_front_poll_active(struct file *file,
+					      struct pvcalls_bedata *bedata,
+					      struct sock_mapping *map,
+					      poll_table *wait)
+{
+	unsigned int mask = 0;
+	int32_t in_error, out_error;
+	struct pvcalls_data_intf *intf = map->active.ring;
+
+	out_error = intf->out_error;
+	in_error = intf->in_error;
+
+	poll_wait(file, &map->active.inflight_conn_req, wait);
+	if (pvcalls_front_write_todo(map))
+		mask |= POLLOUT | POLLWRNORM;
+	if (pvcalls_front_read_todo(map))
+		mask |= POLLIN | POLLRDNORM;
+	if (in_error != 0 || out_error != 0)
+		mask |= POLLERR;
+
+	return mask;
+}
+
+unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+
+	if (!pvcalls_front_dev)
+		return POLLNVAL;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return POLLNVAL;
+	if (map->active_socket)
+		return pvcalls_front_poll_active(file, bedata, map, wait);
+	else
+		return pvcalls_front_poll_passive(file, bedata, map, wait);
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index de24041..25e05b8 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
 			  struct msghdr *msg,
 			  size_t len,
 			  int flags);
+unsigned int pvcalls_front_poll(struct file *file,
+				struct socket *sock,
+				poll_table *wait);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 10/13] xen/pvcalls: implement poll command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (10 preceding siblings ...)
  2017-07-22  0:11   ` Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-22  0:12   ` Stefano Stabellini
                     ` (8 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

For active sockets, check the indexes and use the inflight_conn_req
waitqueue to wait.

For passive sockets, send PVCALLS_POLL to the backend. Use the
inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
the inflight_req waitqueue: inflight_req is awaken when a new response
is received; on wakeup we check whether the POLL response is arrived by
looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
pvcalls_front_event_handler, if the response was for a POLL command.

In pvcalls_front_event_handler, get the struct socket pointer from the
poll id (we previously converted struct socket* to uint64_t and used it
as id).

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 123 ++++++++++++++++++++++++++++++++++++++++----
 drivers/xen/pvcalls-front.h |   3 ++
 2 files changed, 115 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 3d1041a..b6cfb7d 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -128,17 +128,29 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
 
 		req_id = rsp->req_id;
-		src = (uint8_t *)&bedata->rsp[req_id];
-		src += sizeof(rsp->req_id);
-		dst = (uint8_t *)rsp;
-		dst += sizeof(rsp->req_id);
-		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
-		/*
-		 * First copy the rest of the data, then req_id. It is
-		 * paired with the barrier when accessing bedata->rsp.
-		 */
-		smp_wmb();
-		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+		if (rsp->cmd == PVCALLS_POLL) {
+			struct socket *sock = (struct socket *) rsp->u.poll.id;
+			struct sock_mapping *map =
+				(struct sock_mapping *)
+				READ_ONCE(sock->sk->sk_send_head);
+
+			set_bit(PVCALLS_FLAG_POLL_RET,
+				(void *)&map->passive.flags);
+			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+				  (void *)&map->passive.flags);
+		} else {
+			src = (uint8_t *)&bedata->rsp[req_id];
+			src += sizeof(rsp->req_id);
+			dst = (uint8_t *)rsp;
+			dst += sizeof(rsp->req_id);
+			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
+			/*
+			 * First copy the rest of the data, then req_id. It is
+			 * paired with the barrier when accessing bedata->rsp.
+			 */
+			smp_wmb();
+			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+		}
 
 		bedata->ring.rsp_cons++;
 		wake_up(&bedata->inflight_req);
@@ -704,6 +716,95 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 	return ret;
 }
 
+static unsigned int pvcalls_front_poll_passive(struct file *file,
+					       struct pvcalls_bedata *bedata,
+					       struct sock_mapping *map,
+					       poll_table *wait)
+{
+	int notify, req_id;
+	struct xen_pvcalls_request *req;
+
+	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+		     (void *)&map->passive.flags)) {
+		poll_wait(file, &map->passive.inflight_accept_req, wait);
+		return 0;
+	}
+
+	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
+			       (void *)&map->passive.flags))
+		return POLLIN;
+
+	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+			     (void *)&map->passive.flags)) {
+		poll_wait(file, &bedata->inflight_req, wait);
+		return 0;
+	}
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_POLL;
+	req->u.poll.id = (uint64_t) map->sock;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	poll_wait(file, &bedata->inflight_req, wait);
+	return 0;
+}
+
+static unsigned int pvcalls_front_poll_active(struct file *file,
+					      struct pvcalls_bedata *bedata,
+					      struct sock_mapping *map,
+					      poll_table *wait)
+{
+	unsigned int mask = 0;
+	int32_t in_error, out_error;
+	struct pvcalls_data_intf *intf = map->active.ring;
+
+	out_error = intf->out_error;
+	in_error = intf->in_error;
+
+	poll_wait(file, &map->active.inflight_conn_req, wait);
+	if (pvcalls_front_write_todo(map))
+		mask |= POLLOUT | POLLWRNORM;
+	if (pvcalls_front_read_todo(map))
+		mask |= POLLIN | POLLRDNORM;
+	if (in_error != 0 || out_error != 0)
+		mask |= POLLERR;
+
+	return mask;
+}
+
+unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+
+	if (!pvcalls_front_dev)
+		return POLLNVAL;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (!map)
+		return POLLNVAL;
+	if (map->active_socket)
+		return pvcalls_front_poll_active(file, bedata, map, wait);
+	else
+		return pvcalls_front_poll_passive(file, bedata, map, wait);
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index de24041..25e05b8 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
 			  struct msghdr *msg,
 			  size_t len,
 			  int flags);
+unsigned int pvcalls_front_poll(struct file *file,
+				struct socket *sock,
+				poll_table *wait);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 11/13] xen/pvcalls: implement release command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (13 preceding siblings ...)
  2017-07-22  0:12   ` [PATCH v1 11/13] xen/pvcalls: implement release command Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-24 20:14     ` Juergen Gross
  2017-07-24 20:14     ` Juergen Gross
  2017-07-22  0:12   ` [PATCH v1 12/13] xen/pvcalls: implement frontend disconnect Stefano Stabellini
                     ` (5 subsequent siblings)
  20 siblings, 2 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
in_mutex and out_mutex to avoid concurrent accesses. Then, free the
socket.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 86 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  1 +
 2 files changed, 87 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index b6cfb7d..bd3dfac 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -174,6 +174,24 @@ static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
 	return IRQ_HANDLED;
 }
 
+static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
+				   struct sock_mapping *map)
+{
+	int i;
+
+	spin_lock(&bedata->pvcallss_lock);
+	if (!list_empty(&map->list))
+		list_del_init(&map->list);
+	spin_unlock(&bedata->pvcallss_lock);
+
+	/* what if the thread waiting still need access? */
+	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
+		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
+	gnttab_end_foreign_access(map->active.ref, 0, 0);
+	free_page((unsigned long)map->active.ring);
+	unbind_from_irqhandler(map->active.irq, map);
+}
+
 int pvcalls_front_socket(struct socket *sock)
 {
 	struct pvcalls_bedata *bedata;
@@ -805,6 +823,74 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
 		return pvcalls_front_poll_passive(file, bedata, map, wait);
 }
 
+int pvcalls_front_release(struct socket *sock)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	int req_id, notify;
+	struct xen_pvcalls_request *req;
+
+	if (!pvcalls_front_dev)
+		return -EIO;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+	if (!bedata)
+		return -EIO;
+
+	if (sock->sk == NULL)
+		return 0;
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (map == NULL)
+		return 0;
+	WRITE_ONCE(sock->sk->sk_send_head, NULL);
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_RELEASE;
+	req->u.release.id = (uint64_t)sock;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	if (map->active_socket) {
+		/* 
+		 * Set in_error and wake up inflight_conn_req to force
+		 * recvmsg waiters to exit.
+		 */
+		map->active.ring->in_error = -EBADF;
+		wake_up_interruptible(&map->active.inflight_conn_req);
+
+		mutex_lock(&map->active.in_mutex);
+		mutex_lock(&map->active.out_mutex);
+		pvcalls_front_free_map(bedata, map);
+		mutex_unlock(&map->active.out_mutex);
+		mutex_unlock(&map->active.in_mutex);
+		kfree(map);
+	} else {
+		spin_lock(&bedata->pvcallss_lock);
+		list_del_init(&map->list);
+		kfree(map);
+		spin_unlock(&bedata->pvcallss_lock);
+	}
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+
+	return 0;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 25e05b8..3332978 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
 unsigned int pvcalls_front_poll(struct file *file,
 				struct socket *sock,
 				poll_table *wait);
+int pvcalls_front_release(struct socket *sock);
 
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 11/13] xen/pvcalls: implement release command
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (12 preceding siblings ...)
  2017-07-22  0:12   ` Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-22  0:12   ` Stefano Stabellini
                     ` (6 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
in_mutex and out_mutex to avoid concurrent accesses. Then, free the
socket.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 86 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  1 +
 2 files changed, 87 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index b6cfb7d..bd3dfac 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -174,6 +174,24 @@ static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
 	return IRQ_HANDLED;
 }
 
+static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
+				   struct sock_mapping *map)
+{
+	int i;
+
+	spin_lock(&bedata->pvcallss_lock);
+	if (!list_empty(&map->list))
+		list_del_init(&map->list);
+	spin_unlock(&bedata->pvcallss_lock);
+
+	/* what if the thread waiting still need access? */
+	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
+		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
+	gnttab_end_foreign_access(map->active.ref, 0, 0);
+	free_page((unsigned long)map->active.ring);
+	unbind_from_irqhandler(map->active.irq, map);
+}
+
 int pvcalls_front_socket(struct socket *sock)
 {
 	struct pvcalls_bedata *bedata;
@@ -805,6 +823,74 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
 		return pvcalls_front_poll_passive(file, bedata, map, wait);
 }
 
+int pvcalls_front_release(struct socket *sock)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	int req_id, notify;
+	struct xen_pvcalls_request *req;
+
+	if (!pvcalls_front_dev)
+		return -EIO;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+	if (!bedata)
+		return -EIO;
+
+	if (sock->sk == NULL)
+		return 0;
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (map == NULL)
+		return 0;
+	WRITE_ONCE(sock->sk->sk_send_head, NULL);
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_RELEASE;
+	req->u.release.id = (uint64_t)sock;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	if (map->active_socket) {
+		/* 
+		 * Set in_error and wake up inflight_conn_req to force
+		 * recvmsg waiters to exit.
+		 */
+		map->active.ring->in_error = -EBADF;
+		wake_up_interruptible(&map->active.inflight_conn_req);
+
+		mutex_lock(&map->active.in_mutex);
+		mutex_lock(&map->active.out_mutex);
+		pvcalls_front_free_map(bedata, map);
+		mutex_unlock(&map->active.out_mutex);
+		mutex_unlock(&map->active.in_mutex);
+		kfree(map);
+	} else {
+		spin_lock(&bedata->pvcallss_lock);
+		list_del_init(&map->list);
+		kfree(map);
+		spin_unlock(&bedata->pvcallss_lock);
+	}
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+
+	return 0;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 25e05b8..3332978 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
 unsigned int pvcalls_front_poll(struct file *file,
 				struct socket *sock,
 				poll_table *wait);
+int pvcalls_front_release(struct socket *sock);
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 12/13] xen/pvcalls: implement frontend disconnect
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (15 preceding siblings ...)
  2017-07-22  0:12   ` [PATCH v1 12/13] xen/pvcalls: implement frontend disconnect Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-24 20:16     ` Juergen Gross
  2017-07-24 20:16     ` Juergen Gross
  2017-07-22  0:12   ` [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend Stefano Stabellini
                     ` (3 subsequent siblings)
  20 siblings, 2 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Implement pvcalls frontend removal function. Go through the list of
active and passive sockets and free them all, one at a time.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index bd3dfac..fcc15fb 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -898,6 +898,34 @@ int pvcalls_front_release(struct socket *sock)
 
 static int pvcalls_front_remove(struct xenbus_device *dev)
 {
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map = NULL, *n;
+
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	list_for_each_entry_safe(map, n, &bedata->socket_mappings, list) {
+		mutex_lock(&map->active.in_mutex);
+		mutex_lock(&map->active.out_mutex);
+		pvcalls_front_free_map(bedata, map);
+		mutex_unlock(&map->active.out_mutex);
+		mutex_unlock(&map->active.in_mutex);
+		kfree(map);
+	}
+	list_for_each_entry_safe(map, n, &bedata->socketpass_mappings, list) {
+		spin_lock(&bedata->pvcallss_lock);
+		list_del_init(&map->list);
+		spin_unlock(&bedata->pvcallss_lock);
+		kfree(map);
+	}
+	if (bedata->irq > 0)
+		unbind_from_irqhandler(bedata->irq, dev);
+	if (bedata->ref >= 0)
+		gnttab_end_foreign_access(bedata->ref, 0, 0);
+	kfree(bedata->ring.sring);
+	kfree(bedata);
+	dev_set_drvdata(&dev->dev, NULL);
+	xenbus_switch_state(dev, XenbusStateClosed);
+	pvcalls_front_dev = NULL;
 	return 0;
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 12/13] xen/pvcalls: implement frontend disconnect
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (14 preceding siblings ...)
  2017-07-22  0:12   ` Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-22  0:12   ` Stefano Stabellini
                     ` (4 subsequent siblings)
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Implement pvcalls frontend removal function. Go through the list of
active and passive sockets and free them all, one at a time.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index bd3dfac..fcc15fb 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -898,6 +898,34 @@ int pvcalls_front_release(struct socket *sock)
 
 static int pvcalls_front_remove(struct xenbus_device *dev)
 {
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map = NULL, *n;
+
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	list_for_each_entry_safe(map, n, &bedata->socket_mappings, list) {
+		mutex_lock(&map->active.in_mutex);
+		mutex_lock(&map->active.out_mutex);
+		pvcalls_front_free_map(bedata, map);
+		mutex_unlock(&map->active.out_mutex);
+		mutex_unlock(&map->active.in_mutex);
+		kfree(map);
+	}
+	list_for_each_entry_safe(map, n, &bedata->socketpass_mappings, list) {
+		spin_lock(&bedata->pvcallss_lock);
+		list_del_init(&map->list);
+		spin_unlock(&bedata->pvcallss_lock);
+		kfree(map);
+	}
+	if (bedata->irq > 0)
+		unbind_from_irqhandler(bedata->irq, dev);
+	if (bedata->ref >= 0)
+		gnttab_end_foreign_access(bedata->ref, 0, 0);
+	kfree(bedata->ring.sring);
+	kfree(bedata);
+	dev_set_drvdata(&dev->dev, NULL);
+	xenbus_switch_state(dev, XenbusStateClosed);
+	pvcalls_front_dev = NULL;
 	return 0;
 }
 
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (16 preceding siblings ...)
  2017-07-22  0:12   ` Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-24 20:17     ` Juergen Gross
  2017-07-24 20:17     ` Juergen Gross
  2017-07-22  0:12   ` Stefano Stabellini
                     ` (2 subsequent siblings)
  20 siblings, 2 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: linux-kernel, sstabellini, jgross, boris.ostrovsky, Stefano Stabellini

Also add pvcalls-front to the Makefile.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/Kconfig  | 9 +++++++++
 drivers/xen/Makefile | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 4545561..ea5e99f 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -196,6 +196,15 @@ config XEN_PCIDEV_BACKEND
 
 	  If in doubt, say m.
 
+config XEN_PVCALLS_FRONTEND
+	bool "XEN PV Calls frontend driver"
+	depends on INET && XEN
+	help
+	  Experimental frontend for the Xen PV Calls protocol
+	  (https://xenbits.xen.org/docs/unstable/misc/pvcalls.html). It
+	  sends a small set of POSIX calls to the backend, which
+	  implements them.
+
 config XEN_PVCALLS_BACKEND
 	bool "XEN PV Calls backend driver"
 	depends on INET && XEN && XEN_BACKEND
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 480b928..afb9e03 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_XEN_EFI)			+= efi.o
 obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
 obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
 obj-$(CONFIG_XEN_PVCALLS_BACKEND)	+= pvcalls-back.o
+obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (17 preceding siblings ...)
  2017-07-22  0:12   ` [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend Stefano Stabellini
@ 2017-07-22  0:12   ` Stefano Stabellini
  2017-07-24 19:06   ` [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend Juergen Gross
  2017-07-24 19:06   ` Juergen Gross
  20 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-22  0:12 UTC (permalink / raw)
  To: xen-devel
  Cc: jgross, Stefano Stabellini, boris.ostrovsky, sstabellini, linux-kernel

Also add pvcalls-front to the Makefile.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/Kconfig  | 9 +++++++++
 drivers/xen/Makefile | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 4545561..ea5e99f 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -196,6 +196,15 @@ config XEN_PCIDEV_BACKEND
 
 	  If in doubt, say m.
 
+config XEN_PVCALLS_FRONTEND
+	bool "XEN PV Calls frontend driver"
+	depends on INET && XEN
+	help
+	  Experimental frontend for the Xen PV Calls protocol
+	  (https://xenbits.xen.org/docs/unstable/misc/pvcalls.html). It
+	  sends a small set of POSIX calls to the backend, which
+	  implements them.
+
 config XEN_PVCALLS_BACKEND
 	bool "XEN PV Calls backend driver"
 	depends on INET && XEN && XEN_BACKEND
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 480b928..afb9e03 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_XEN_EFI)			+= efi.o
 obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
 obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
 obj-$(CONFIG_XEN_PVCALLS_BACKEND)	+= pvcalls-back.o
+obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (19 preceding siblings ...)
  2017-07-24 19:06   ` [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend Juergen Gross
@ 2017-07-24 19:06   ` Juergen Gross
  2017-07-24 22:32     ` Stefano Stabellini
  2017-07-24 22:32     ` Stefano Stabellini
  20 siblings, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:06 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Introduce a xenbus frontend for the pvcalls protocol, as defined by
> https://xenbits.xen.org/docs/unstable/misc/pvcalls.html.
> 
> This patch only adds the stubs, the code will be added by the following
> patches.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 68 insertions(+)
>  create mode 100644 drivers/xen/pvcalls-front.c
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> new file mode 100644
> index 0000000..173e204
> --- /dev/null
> +++ b/drivers/xen/pvcalls-front.c
> @@ -0,0 +1,68 @@
> +/*
> + * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/module.h>
> +
> +#include <xen/events.h>
> +#include <xen/grant_table.h>
> +#include <xen/xen.h>
> +#include <xen/xenbus.h>
> +#include <xen/interface/io/pvcalls.h>
> +
> +static const struct xenbus_device_id pvcalls_front_ids[] = {
> +	{ "pvcalls" },
> +	{ "" }
> +};
> +
> +static int pvcalls_front_remove(struct xenbus_device *dev)
> +{
> +	return 0;
> +}
> +
> +static int pvcalls_front_probe(struct xenbus_device *dev,
> +			  const struct xenbus_device_id *id)
> +{
> +	return 0;
> +}
> +
> +static int pvcalls_front_resume(struct xenbus_device *dev)
> +{
> +	dev_warn(&dev->dev, "suspsend/resume unsupported\n");
> +	return 0;
> +}

Why are you adding a resume function doing nothing but issuing a
message? Just omit it.


Juergen

> +
> +static void pvcalls_front_changed(struct xenbus_device *dev,
> +			    enum xenbus_state backend_state)
> +{
> +}
> +
> +static struct xenbus_driver pvcalls_front_driver = {
> +	.ids = pvcalls_front_ids,
> +	.probe = pvcalls_front_probe,
> +	.remove = pvcalls_front_remove,
> +	.resume = pvcalls_front_resume,
> +	.otherend_changed = pvcalls_front_changed,
> +};
> +
> +static int __init pvcalls_frontend_init(void)
> +{
> +	if (!xen_domain())
> +		return -ENODEV;
> +
> +	pr_info("Initialising Xen pvcalls frontend driver\n");
> +
> +	return xenbus_register_frontend(&pvcalls_front_driver);
> +}
> +
> +module_init(pvcalls_frontend_init);
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend
  2017-07-22  0:11 ` Stefano Stabellini
                     ` (18 preceding siblings ...)
  2017-07-22  0:12   ` Stefano Stabellini
@ 2017-07-24 19:06   ` Juergen Gross
  2017-07-24 19:06   ` Juergen Gross
  20 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:06 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Introduce a xenbus frontend for the pvcalls protocol, as defined by
> https://xenbits.xen.org/docs/unstable/misc/pvcalls.html.
> 
> This patch only adds the stubs, the code will be added by the following
> patches.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 68 insertions(+)
>  create mode 100644 drivers/xen/pvcalls-front.c
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> new file mode 100644
> index 0000000..173e204
> --- /dev/null
> +++ b/drivers/xen/pvcalls-front.c
> @@ -0,0 +1,68 @@
> +/*
> + * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/module.h>
> +
> +#include <xen/events.h>
> +#include <xen/grant_table.h>
> +#include <xen/xen.h>
> +#include <xen/xenbus.h>
> +#include <xen/interface/io/pvcalls.h>
> +
> +static const struct xenbus_device_id pvcalls_front_ids[] = {
> +	{ "pvcalls" },
> +	{ "" }
> +};
> +
> +static int pvcalls_front_remove(struct xenbus_device *dev)
> +{
> +	return 0;
> +}
> +
> +static int pvcalls_front_probe(struct xenbus_device *dev,
> +			  const struct xenbus_device_id *id)
> +{
> +	return 0;
> +}
> +
> +static int pvcalls_front_resume(struct xenbus_device *dev)
> +{
> +	dev_warn(&dev->dev, "suspsend/resume unsupported\n");
> +	return 0;
> +}

Why are you adding a resume function doing nothing but issuing a
message? Just omit it.


Juergen

> +
> +static void pvcalls_front_changed(struct xenbus_device *dev,
> +			    enum xenbus_state backend_state)
> +{
> +}
> +
> +static struct xenbus_driver pvcalls_front_driver = {
> +	.ids = pvcalls_front_ids,
> +	.probe = pvcalls_front_probe,
> +	.remove = pvcalls_front_remove,
> +	.resume = pvcalls_front_resume,
> +	.otherend_changed = pvcalls_front_changed,
> +};
> +
> +static int __init pvcalls_frontend_init(void)
> +{
> +	if (!xen_domain())
> +		return -ENODEV;
> +
> +	pr_info("Initialising Xen pvcalls frontend driver\n");
> +
> +	return xenbus_register_frontend(&pvcalls_front_driver);
> +}
> +
> +module_init(pvcalls_frontend_init);
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 02/13] xen/pvcalls: connect to the backend
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
  (?)
@ 2017-07-24 19:23     ` Juergen Gross
  2017-07-25 20:10       ` Stefano Stabellini
  2017-07-25 20:10       ` Stefano Stabellini
  -1 siblings, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:23 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Implement the probe function for the pvcalls frontend. Read the
> supported versions, max-page-order and function-calls nodes from
> xenstore.
> 
> Introduce a data structure named pvcalls_bedata. It contains pointers to
> the command ring, the event channel, a list of active sockets and a list
> of passive sockets. Lists accesses are protected by a spin_lock.
> 
> Introduce a waitqueue to allow waiting for a response on commands sent
> to the backend.
> 
> Introduce an array of struct xen_pvcalls_response to store commands
> responses.
> 
> Only one frontend<->backend connection is supported at any given time
> for a guest. Store the active frontend device to a static pointer.
> 
> Introduce a stub functions for the event handler.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 173e204..fb08ebf 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -20,6 +20,29 @@
>  #include <xen/xenbus.h>
>  #include <xen/interface/io/pvcalls.h>
>  
> +#define PVCALLS_INVALID_ID (UINT_MAX)
> +#define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> +#define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> +
> +struct pvcalls_bedata {
> +	struct xen_pvcalls_front_ring ring;
> +	grant_ref_t ref;
> +	int irq;
> +
> +	struct list_head socket_mappings;
> +	struct list_head socketpass_mappings;
> +	spinlock_t pvcallss_lock;
> +
> +	wait_queue_head_t inflight_req;
> +	struct xen_pvcalls_response rsp[PVCALLS_NR_REQ_PER_RING];
> +};
> +struct xenbus_device *pvcalls_front_dev;
> +
> +static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> +{
> +	return IRQ_HANDLED;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> @@ -33,7 +56,114 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
>  static int pvcalls_front_probe(struct xenbus_device *dev,
>  			  const struct xenbus_device_id *id)
>  {
> +	int ret = -EFAULT, evtchn, ref = -1, i;
> +	unsigned int max_page_order, function_calls, len;
> +	char *versions;
> +	grant_ref_t gref_head = 0;
> +	struct xenbus_transaction xbt;
> +	struct pvcalls_bedata *bedata = NULL;
> +	struct xen_pvcalls_sring *sring;
> +
> +	if (pvcalls_front_dev != NULL) {
> +		dev_err(&dev->dev, "only one PV Calls connection supported\n");
> +		return -EINVAL;
> +	}
> +
> +	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
> +	if (!len)
> +		return -EINVAL;
> +	if (strcmp(versions, "1")) {
> +		kfree(versions);
> +		return -EINVAL;
> +	}
> +	kfree(versions);
> +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> +			   "max-page-order", "%u", &max_page_order);

Use xenbus_read_unsigned() instead?

> +	if (ret <= 0)
> +		return -ENODEV;
> +	if (max_page_order < RING_ORDER)
> +		return -ENODEV;
> +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> +			   "function-calls", "%u", &function_calls);

xenbus_read_unsigned() again?

> +	if (ret <= 0 || function_calls != 1)
> +		return -ENODEV;
> +	pr_info("%s max-page-order is %u\n", __func__, max_page_order);
> +
> +	bedata = kzalloc(sizeof(struct pvcalls_bedata), GFP_KERNEL);
> +	if (!bedata)
> +		return -ENOMEM;
> +

You should call dev_set_drvdata() here already, otherwise entering the
error path will dereference a NULL pointer instead of bedata.

> +	init_waitqueue_head(&bedata->inflight_req);
> +	for (i = 0; i < PVCALLS_NR_REQ_PER_RING; i++)
> +		bedata->rsp[i].req_id = PVCALLS_INVALID_ID;
> +
> +	sring = (struct xen_pvcalls_sring *) __get_free_page(GFP_KERNEL |
> +							     __GFP_ZERO);
> +	if (!sring)
> +		goto error;

ret will be 1 here. Shouldn't you set it to -ENOMEM?

> +	SHARED_RING_INIT(sring);
> +	FRONT_RING_INIT(&bedata->ring, sring, XEN_PAGE_SIZE);
> +
> +	ret = xenbus_alloc_evtchn(dev, &evtchn);
> +	if (ret)
> +		goto error;
> +
> +	bedata->irq = bind_evtchn_to_irqhandler(evtchn,
> +						pvcalls_front_event_handler,
> +						0, "pvcalls-frontend", dev);
> +	if (bedata->irq < 0) {
> +		ret = bedata->irq;
> +		goto error;
> +	}
> +
> +	ret = gnttab_alloc_grant_references(1, &gref_head);
> +	if (ret < 0)
> +		goto error;
> +	bedata->ref = ref = gnttab_claim_grant_reference(&gref_head);
> +	if (ref < 0)
> +		goto error;

Setting ret?


Juergen

> +	gnttab_grant_foreign_access_ref(ref, dev->otherend_id,
> +					virt_to_gfn((void *)sring), 0);
> +
> + again:
> +	ret = xenbus_transaction_start(&xbt);
> +	if (ret) {
> +		xenbus_dev_fatal(dev, ret, "starting transaction");
> +		goto error;
> +	}
> +	ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
> +	if (ret)
> +		goto error_xenbus;
> +	ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref);
> +	if (ret)
> +		goto error_xenbus;
> +	ret = xenbus_printf(xbt, dev->nodename, "port", "%u",
> +			    evtchn);
> +	if (ret)
> +		goto error_xenbus;
> +	ret = xenbus_transaction_end(xbt, 0);
> +	if (ret) {
> +		if (ret == -EAGAIN)
> +			goto again;
> +		xenbus_dev_fatal(dev, ret, "completing transaction");
> +		goto error;
> +	}
> +
> +	INIT_LIST_HEAD(&bedata->socket_mappings);
> +	INIT_LIST_HEAD(&bedata->socketpass_mappings);
> +	spin_lock_init(&bedata->pvcallss_lock);
> +	dev_set_drvdata(&dev->dev, bedata);
> +	pvcalls_front_dev = dev;
> +	xenbus_switch_state(dev, XenbusStateInitialised);
> +
>  	return 0;
> +
> + error_xenbus:
> +	xenbus_transaction_end(xbt, 1);
> +	xenbus_dev_fatal(dev, ret, "writing xenstore");
> + error:
> +	pvcalls_front_remove(dev);
> +	return ret;
>  }
>  
>  static int pvcalls_front_resume(struct xenbus_device *dev)
> @@ -45,6 +175,29 @@ static int pvcalls_front_resume(struct xenbus_device *dev)
>  static void pvcalls_front_changed(struct xenbus_device *dev,
>  			    enum xenbus_state backend_state)
>  {
> +	switch (backend_state) {
> +	case XenbusStateReconfiguring:
> +	case XenbusStateReconfigured:
> +	case XenbusStateInitialising:
> +	case XenbusStateInitialised:
> +	case XenbusStateUnknown:
> +		break;
> +
> +	case XenbusStateInitWait:
> +		break;
> +
> +	case XenbusStateConnected:
> +		xenbus_switch_state(dev, XenbusStateConnected);
> +		break;
> +
> +	case XenbusStateClosed:
> +		if (dev->state == XenbusStateClosed)
> +			break;
> +		/* Missed the backend's CLOSING state -- fallthrough */
> +	case XenbusStateClosing:
> +		xenbus_frontend_closed(dev);
> +		break;
> +	}
>  }
>  
>  static struct xenbus_driver pvcalls_front_driver = {
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 02/13] xen/pvcalls: connect to the backend
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
@ 2017-07-24 19:23     ` Juergen Gross
  -1 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:23 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Implement the probe function for the pvcalls frontend. Read the
> supported versions, max-page-order and function-calls nodes from
> xenstore.
> 
> Introduce a data structure named pvcalls_bedata. It contains pointers to
> the command ring, the event channel, a list of active sockets and a list
> of passive sockets. Lists accesses are protected by a spin_lock.
> 
> Introduce a waitqueue to allow waiting for a response on commands sent
> to the backend.
> 
> Introduce an array of struct xen_pvcalls_response to store commands
> responses.
> 
> Only one frontend<->backend connection is supported at any given time
> for a guest. Store the active frontend device to a static pointer.
> 
> Introduce a stub functions for the event handler.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 173e204..fb08ebf 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -20,6 +20,29 @@
>  #include <xen/xenbus.h>
>  #include <xen/interface/io/pvcalls.h>
>  
> +#define PVCALLS_INVALID_ID (UINT_MAX)
> +#define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> +#define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> +
> +struct pvcalls_bedata {
> +	struct xen_pvcalls_front_ring ring;
> +	grant_ref_t ref;
> +	int irq;
> +
> +	struct list_head socket_mappings;
> +	struct list_head socketpass_mappings;
> +	spinlock_t pvcallss_lock;
> +
> +	wait_queue_head_t inflight_req;
> +	struct xen_pvcalls_response rsp[PVCALLS_NR_REQ_PER_RING];
> +};
> +struct xenbus_device *pvcalls_front_dev;
> +
> +static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> +{
> +	return IRQ_HANDLED;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> @@ -33,7 +56,114 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
>  static int pvcalls_front_probe(struct xenbus_device *dev,
>  			  const struct xenbus_device_id *id)
>  {
> +	int ret = -EFAULT, evtchn, ref = -1, i;
> +	unsigned int max_page_order, function_calls, len;
> +	char *versions;
> +	grant_ref_t gref_head = 0;
> +	struct xenbus_transaction xbt;
> +	struct pvcalls_bedata *bedata = NULL;
> +	struct xen_pvcalls_sring *sring;
> +
> +	if (pvcalls_front_dev != NULL) {
> +		dev_err(&dev->dev, "only one PV Calls connection supported\n");
> +		return -EINVAL;
> +	}
> +
> +	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
> +	if (!len)
> +		return -EINVAL;
> +	if (strcmp(versions, "1")) {
> +		kfree(versions);
> +		return -EINVAL;
> +	}
> +	kfree(versions);
> +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> +			   "max-page-order", "%u", &max_page_order);

Use xenbus_read_unsigned() instead?

> +	if (ret <= 0)
> +		return -ENODEV;
> +	if (max_page_order < RING_ORDER)
> +		return -ENODEV;
> +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> +			   "function-calls", "%u", &function_calls);

xenbus_read_unsigned() again?

> +	if (ret <= 0 || function_calls != 1)
> +		return -ENODEV;
> +	pr_info("%s max-page-order is %u\n", __func__, max_page_order);
> +
> +	bedata = kzalloc(sizeof(struct pvcalls_bedata), GFP_KERNEL);
> +	if (!bedata)
> +		return -ENOMEM;
> +

You should call dev_set_drvdata() here already, otherwise entering the
error path will dereference a NULL pointer instead of bedata.

> +	init_waitqueue_head(&bedata->inflight_req);
> +	for (i = 0; i < PVCALLS_NR_REQ_PER_RING; i++)
> +		bedata->rsp[i].req_id = PVCALLS_INVALID_ID;
> +
> +	sring = (struct xen_pvcalls_sring *) __get_free_page(GFP_KERNEL |
> +							     __GFP_ZERO);
> +	if (!sring)
> +		goto error;

ret will be 1 here. Shouldn't you set it to -ENOMEM?

> +	SHARED_RING_INIT(sring);
> +	FRONT_RING_INIT(&bedata->ring, sring, XEN_PAGE_SIZE);
> +
> +	ret = xenbus_alloc_evtchn(dev, &evtchn);
> +	if (ret)
> +		goto error;
> +
> +	bedata->irq = bind_evtchn_to_irqhandler(evtchn,
> +						pvcalls_front_event_handler,
> +						0, "pvcalls-frontend", dev);
> +	if (bedata->irq < 0) {
> +		ret = bedata->irq;
> +		goto error;
> +	}
> +
> +	ret = gnttab_alloc_grant_references(1, &gref_head);
> +	if (ret < 0)
> +		goto error;
> +	bedata->ref = ref = gnttab_claim_grant_reference(&gref_head);
> +	if (ref < 0)
> +		goto error;

Setting ret?


Juergen

> +	gnttab_grant_foreign_access_ref(ref, dev->otherend_id,
> +					virt_to_gfn((void *)sring), 0);
> +
> + again:
> +	ret = xenbus_transaction_start(&xbt);
> +	if (ret) {
> +		xenbus_dev_fatal(dev, ret, "starting transaction");
> +		goto error;
> +	}
> +	ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
> +	if (ret)
> +		goto error_xenbus;
> +	ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref);
> +	if (ret)
> +		goto error_xenbus;
> +	ret = xenbus_printf(xbt, dev->nodename, "port", "%u",
> +			    evtchn);
> +	if (ret)
> +		goto error_xenbus;
> +	ret = xenbus_transaction_end(xbt, 0);
> +	if (ret) {
> +		if (ret == -EAGAIN)
> +			goto again;
> +		xenbus_dev_fatal(dev, ret, "completing transaction");
> +		goto error;
> +	}
> +
> +	INIT_LIST_HEAD(&bedata->socket_mappings);
> +	INIT_LIST_HEAD(&bedata->socketpass_mappings);
> +	spin_lock_init(&bedata->pvcallss_lock);
> +	dev_set_drvdata(&dev->dev, bedata);
> +	pvcalls_front_dev = dev;
> +	xenbus_switch_state(dev, XenbusStateInitialised);
> +
>  	return 0;
> +
> + error_xenbus:
> +	xenbus_transaction_end(xbt, 1);
> +	xenbus_dev_fatal(dev, ret, "writing xenstore");
> + error:
> +	pvcalls_front_remove(dev);
> +	return ret;
>  }
>  
>  static int pvcalls_front_resume(struct xenbus_device *dev)
> @@ -45,6 +175,29 @@ static int pvcalls_front_resume(struct xenbus_device *dev)
>  static void pvcalls_front_changed(struct xenbus_device *dev,
>  			    enum xenbus_state backend_state)
>  {
> +	switch (backend_state) {
> +	case XenbusStateReconfiguring:
> +	case XenbusStateReconfigured:
> +	case XenbusStateInitialising:
> +	case XenbusStateInitialised:
> +	case XenbusStateUnknown:
> +		break;
> +
> +	case XenbusStateInitWait:
> +		break;
> +
> +	case XenbusStateConnected:
> +		xenbus_switch_state(dev, XenbusStateConnected);
> +		break;
> +
> +	case XenbusStateClosed:
> +		if (dev->state == XenbusStateClosed)
> +			break;
> +		/* Missed the backend's CLOSING state -- fallthrough */
> +	case XenbusStateClosing:
> +		xenbus_frontend_closed(dev);
> +		break;
> +	}
>  }
>  
>  static struct xenbus_driver pvcalls_front_driver = {
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 03/13] xen/pvcalls: implement socket command and handle events
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
  (?)
@ 2017-07-24 19:29     ` Juergen Gross
  2017-07-25 20:43       ` Stefano Stabellini
  2017-07-25 20:43       ` Stefano Stabellini
  -1 siblings, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:29 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send a PVCALLS_SOCKET command to the backend, use the masked
> req_prod_pvt as req_id. This way, req_id is guaranteed to be between 0
> and PVCALLS_NR_REQ_PER_RING. We already have a slot in the rsp array
> ready for the response, and there cannot be two outstanding responses
> with the same req_id.
> 
> Wait for the response by waiting on the inflight_req waitqueue and
> check for the req_id field in rsp[req_id]. Use atomic accesses to
> read the field. Once a response is received, clear the corresponding rsp
> slot by setting req_id to PVCALLS_INVALID_ID. Note that
> PVCALLS_INVALID_ID is invalid only from the frontend point of view. It
> is not part of the PVCalls protocol.
> 
> pvcalls_front_event_handler is in charge of copying responses from the
> ring to the appropriate rsp slot. It is done by copying the body of the
> response first, then by copying req_id atomically. After the copies,
> wake up anybody waiting on waitqueue.
> 
> pvcallss_lock protects accesses to the ring.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 91 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  8 ++++
>  2 files changed, 99 insertions(+)
>  create mode 100644 drivers/xen/pvcalls-front.h
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index fb08ebf..7933c73 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c

Shouldn't you include pvcalls-front.h?

> @@ -40,9 +40,100 @@ struct pvcalls_bedata {
>  
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
> +	struct xenbus_device *dev = dev_id;
> +	struct pvcalls_bedata *bedata;
> +	struct xen_pvcalls_response *rsp;
> +	uint8_t *src, *dst;
> +	int req_id = 0, more = 0;
> +
> +	if (dev == NULL)
> +		return IRQ_HANDLED;
> +
> +	bedata = dev_get_drvdata(&dev->dev);
> +	if (bedata == NULL)
> +		return IRQ_HANDLED;
> +
> +again:
> +	while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
> +		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> +
> +		req_id = rsp->req_id;
> +		src = (uint8_t *)&bedata->rsp[req_id];
> +		src += sizeof(rsp->req_id);
> +		dst = (uint8_t *)rsp;
> +		dst += sizeof(rsp->req_id);
> +		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> +		/*
> +		 * First copy the rest of the data, then req_id. It is
> +		 * paired with the barrier when accessing bedata->rsp.
> +		 */
> +		smp_wmb();
> +		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> +
> +		bedata->ring.rsp_cons++;
> +		wake_up(&bedata->inflight_req);
> +	}
> +
> +	RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
> +	if (more)
> +		goto again;

Wouldn't it make more sense to use wake_up() just once if there is any
response pending and do the consuming loop outside the irq handler?


Juergen

>  	return IRQ_HANDLED;
>  }
>  
> +int pvcalls_front_socket(struct socket *sock)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret;
> +
> +	if (!pvcalls_front_dev)
> +		return -EACCES;
> +	/*
> +	 * PVCalls only supports domain AF_INET,
> +	 * type SOCK_STREAM and protocol 0 sockets for now.
> +	 *
> +	 * Check socket type here, AF_INET and protocol checks are done
> +	 * by the caller.
> +	 */
> +	if (sock->type != SOCK_STREAM)
> +	    return -ENOTSUPP;
> +
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_SOCKET;
> +	req->u.socket.id = (uint64_t) sock;
> +	req->u.socket.domain = AF_INET;
> +	req->u.socket.type = SOCK_STREAM;
> +	req->u.socket.protocol = 0;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	if (wait_event_interruptible(bedata->inflight_req,
> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id) != 0)
> +		return -EINTR;
> +
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> new file mode 100644
> index 0000000..b7dabed
> --- /dev/null
> +++ b/drivers/xen/pvcalls-front.h
> @@ -0,0 +1,8 @@
> +#ifndef __PVCALLS_FRONT_H__
> +#define __PVCALLS_FRONT_H__
> +
> +#include <linux/net.h>
> +
> +int pvcalls_front_socket(struct socket *sock);
> +
> +#endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 03/13] xen/pvcalls: implement socket command and handle events
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
@ 2017-07-24 19:29     ` Juergen Gross
  -1 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:29 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send a PVCALLS_SOCKET command to the backend, use the masked
> req_prod_pvt as req_id. This way, req_id is guaranteed to be between 0
> and PVCALLS_NR_REQ_PER_RING. We already have a slot in the rsp array
> ready for the response, and there cannot be two outstanding responses
> with the same req_id.
> 
> Wait for the response by waiting on the inflight_req waitqueue and
> check for the req_id field in rsp[req_id]. Use atomic accesses to
> read the field. Once a response is received, clear the corresponding rsp
> slot by setting req_id to PVCALLS_INVALID_ID. Note that
> PVCALLS_INVALID_ID is invalid only from the frontend point of view. It
> is not part of the PVCalls protocol.
> 
> pvcalls_front_event_handler is in charge of copying responses from the
> ring to the appropriate rsp slot. It is done by copying the body of the
> response first, then by copying req_id atomically. After the copies,
> wake up anybody waiting on waitqueue.
> 
> pvcallss_lock protects accesses to the ring.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 91 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  8 ++++
>  2 files changed, 99 insertions(+)
>  create mode 100644 drivers/xen/pvcalls-front.h
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index fb08ebf..7933c73 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c

Shouldn't you include pvcalls-front.h?

> @@ -40,9 +40,100 @@ struct pvcalls_bedata {
>  
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
> +	struct xenbus_device *dev = dev_id;
> +	struct pvcalls_bedata *bedata;
> +	struct xen_pvcalls_response *rsp;
> +	uint8_t *src, *dst;
> +	int req_id = 0, more = 0;
> +
> +	if (dev == NULL)
> +		return IRQ_HANDLED;
> +
> +	bedata = dev_get_drvdata(&dev->dev);
> +	if (bedata == NULL)
> +		return IRQ_HANDLED;
> +
> +again:
> +	while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
> +		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> +
> +		req_id = rsp->req_id;
> +		src = (uint8_t *)&bedata->rsp[req_id];
> +		src += sizeof(rsp->req_id);
> +		dst = (uint8_t *)rsp;
> +		dst += sizeof(rsp->req_id);
> +		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> +		/*
> +		 * First copy the rest of the data, then req_id. It is
> +		 * paired with the barrier when accessing bedata->rsp.
> +		 */
> +		smp_wmb();
> +		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> +
> +		bedata->ring.rsp_cons++;
> +		wake_up(&bedata->inflight_req);
> +	}
> +
> +	RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
> +	if (more)
> +		goto again;

Wouldn't it make more sense to use wake_up() just once if there is any
response pending and do the consuming loop outside the irq handler?


Juergen

>  	return IRQ_HANDLED;
>  }
>  
> +int pvcalls_front_socket(struct socket *sock)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret;
> +
> +	if (!pvcalls_front_dev)
> +		return -EACCES;
> +	/*
> +	 * PVCalls only supports domain AF_INET,
> +	 * type SOCK_STREAM and protocol 0 sockets for now.
> +	 *
> +	 * Check socket type here, AF_INET and protocol checks are done
> +	 * by the caller.
> +	 */
> +	if (sock->type != SOCK_STREAM)
> +	    return -ENOTSUPP;
> +
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_SOCKET;
> +	req->u.socket.id = (uint64_t) sock;
> +	req->u.socket.domain = AF_INET;
> +	req->u.socket.type = SOCK_STREAM;
> +	req->u.socket.protocol = 0;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	if (wait_event_interruptible(bedata->inflight_req,
> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id) != 0)
> +		return -EINTR;
> +
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> new file mode 100644
> index 0000000..b7dabed
> --- /dev/null
> +++ b/drivers/xen/pvcalls-front.h
> @@ -0,0 +1,8 @@
> +#ifndef __PVCALLS_FRONT_H__
> +#define __PVCALLS_FRONT_H__
> +
> +#include <linux/net.h>
> +
> +int pvcalls_front_socket(struct socket *sock);
> +
> +#endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 04/13] xen/pvcalls: implement connect command
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
  (?)
@ 2017-07-24 19:40     ` Juergen Gross
  2017-07-24 22:45       ` Stefano Stabellini
  2017-07-24 22:45       ` Stefano Stabellini
  -1 siblings, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:40 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_CONNECT to the backend. Allocate a new ring and evtchn for
> the active socket.
> 
> Introduce a data structure to keep track of sockets. Introduce a
> waitqueue to allow the frontend to wait on data coming from the backend
> on the active socket (recvmsg command).
> 
> Two mutexes (one of reads and one for writes) will be used to protect
> the active socket in and out rings from concurrent accesses.
> 
> sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> store a pointer to the struct sock_mapping corresponding to the socket.
> This way, we can easily get the struct sock_mapping from the struct
> socket.
> 
> Convert the struct socket pointer into an uint64_t and use it as id for
> the new socket to pass to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |   2 +
>  2 files changed, 155 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 7933c73..0d305e0 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -13,6 +13,8 @@
>   */
>  
>  #include <linux/module.h>
> +#include <linux/net.h>
> +#include <linux/socket.h>
>  
>  #include <xen/events.h>
>  #include <xen/grant_table.h>
> @@ -20,6 +22,8 @@
>  #include <xen/xenbus.h>
>  #include <xen/interface/io/pvcalls.h>
>  
> +#include <net/sock.h>
> +
>  #define PVCALLS_INVALID_ID (UINT_MAX)
>  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
>  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> @@ -38,6 +42,24 @@ struct pvcalls_bedata {
>  };
>  struct xenbus_device *pvcalls_front_dev;
>  
> +struct sock_mapping {
> +	bool active_socket;
> +	struct list_head list;
> +	struct socket *sock;
> +	union {
> +		struct {
> +			int irq;
> +			grant_ref_t ref;
> +			struct pvcalls_data_intf *ring;
> +			struct pvcalls_data data;
> +			struct mutex in_mutex;
> +			struct mutex out_mutex;
> +
> +			wait_queue_head_t inflight_conn_req;
> +		} active;
> +	};
> +};
> +
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
>  	struct xenbus_device *dev = dev_id;
> @@ -80,6 +102,18 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  	return IRQ_HANDLED;
>  }
>  
> +static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
> +{
> +	struct sock_mapping *map = sock_map;
> +
> +	if (map == NULL)
> +		return IRQ_HANDLED;
> +
> +	wake_up_interruptible(&map->active.inflight_conn_req);
> +
> +	return IRQ_HANDLED;
> +}
> +
>  int pvcalls_front_socket(struct socket *sock)
>  {
>  	struct pvcalls_bedata *bedata;
> @@ -134,6 +168,125 @@ int pvcalls_front_socket(struct socket *sock)
>  	return ret;
>  }
>  
> +static struct sock_mapping *create_active(int *evtchn)
> +{
> +	struct sock_mapping *map = NULL;
> +	void *bytes;
> +	int ret, irq = -1, i;
> +
> +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> +	if (map == NULL)
> +		return NULL;
> +
> +	init_waitqueue_head(&map->active.inflight_conn_req);
> +
> +	map->active.ring = (struct pvcalls_data_intf *)
> +		__get_free_page(GFP_KERNEL | __GFP_ZERO);
> +	if (map->active.ring == NULL)
> +		goto out_error;
> +	memset(map->active.ring, 0, XEN_PAGE_SIZE);
> +	map->active.ring->ring_order = RING_ORDER;
> +	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> +					map->active.ring->ring_order);
> +	if (bytes == NULL)
> +		goto out_error;
> +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> +		map->active.ring->ref[i] = gnttab_grant_foreign_access(
> +			pvcalls_front_dev->otherend_id,
> +			pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
> +
> +	map->active.ref = gnttab_grant_foreign_access(
> +		pvcalls_front_dev->otherend_id,
> +		pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
> +
> +	ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
> +	if (ret)
> +		goto out_error;

You are leaking bytes here in case of error.

> +	map->active.data.in = bytes;
> +	map->active.data.out = bytes +
> +		XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> +	irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
> +					0, "pvcalls-frontend", map);
> +	if (irq < 0)
> +		goto out_error;
> +
> +	map->active.irq = irq;
> +	map->active_socket = true;
> +	mutex_init(&map->active.in_mutex);
> +	mutex_init(&map->active.out_mutex);
> +
> +	return map;
> +
> +out_error:
> +	if (irq >= 0)
> +		unbind_from_irqhandler(irq, map);
> +	else if (*evtchn >= 0)
> +		xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
> +	kfree(map->active.data.in);
> +	kfree(map->active.ring);
> +	kfree(map);
> +	return NULL;
> +}
> +
> +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> +				int addr_len, int flags)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map = NULL;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret, evtchn;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENETUNREACH;
> +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> +		return -ENOTSUPP;
> +
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +
> +	map = create_active(&evtchn);
> +	if (!map)
> +	    return -ENOMEM;

spin_unlock()?


Juergen

> +
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_CONNECT;
> +	req->u.connect.id = (uint64_t)sock;
> +	memcpy(req->u.connect.addr, addr, sizeof(*addr));
> +	req->u.connect.len = addr_len;
> +	req->u.connect.flags = flags;
> +	req->u.connect.ref = map->active.ref;
> +	req->u.connect.evtchn = evtchn;
> +	
> +	list_add_tail(&map->list, &bedata->socket_mappings);
> +	map->sock = sock;
> +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index b7dabed..63b0417 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -4,5 +4,7 @@
>  #include <linux/net.h>
>  
>  int pvcalls_front_socket(struct socket *sock);
> +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> +			  int addr_len, int flags);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 04/13] xen/pvcalls: implement connect command
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
@ 2017-07-24 19:40     ` Juergen Gross
  -1 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:40 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_CONNECT to the backend. Allocate a new ring and evtchn for
> the active socket.
> 
> Introduce a data structure to keep track of sockets. Introduce a
> waitqueue to allow the frontend to wait on data coming from the backend
> on the active socket (recvmsg command).
> 
> Two mutexes (one of reads and one for writes) will be used to protect
> the active socket in and out rings from concurrent accesses.
> 
> sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> store a pointer to the struct sock_mapping corresponding to the socket.
> This way, we can easily get the struct sock_mapping from the struct
> socket.
> 
> Convert the struct socket pointer into an uint64_t and use it as id for
> the new socket to pass to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |   2 +
>  2 files changed, 155 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 7933c73..0d305e0 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -13,6 +13,8 @@
>   */
>  
>  #include <linux/module.h>
> +#include <linux/net.h>
> +#include <linux/socket.h>
>  
>  #include <xen/events.h>
>  #include <xen/grant_table.h>
> @@ -20,6 +22,8 @@
>  #include <xen/xenbus.h>
>  #include <xen/interface/io/pvcalls.h>
>  
> +#include <net/sock.h>
> +
>  #define PVCALLS_INVALID_ID (UINT_MAX)
>  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
>  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> @@ -38,6 +42,24 @@ struct pvcalls_bedata {
>  };
>  struct xenbus_device *pvcalls_front_dev;
>  
> +struct sock_mapping {
> +	bool active_socket;
> +	struct list_head list;
> +	struct socket *sock;
> +	union {
> +		struct {
> +			int irq;
> +			grant_ref_t ref;
> +			struct pvcalls_data_intf *ring;
> +			struct pvcalls_data data;
> +			struct mutex in_mutex;
> +			struct mutex out_mutex;
> +
> +			wait_queue_head_t inflight_conn_req;
> +		} active;
> +	};
> +};
> +
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
>  	struct xenbus_device *dev = dev_id;
> @@ -80,6 +102,18 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  	return IRQ_HANDLED;
>  }
>  
> +static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
> +{
> +	struct sock_mapping *map = sock_map;
> +
> +	if (map == NULL)
> +		return IRQ_HANDLED;
> +
> +	wake_up_interruptible(&map->active.inflight_conn_req);
> +
> +	return IRQ_HANDLED;
> +}
> +
>  int pvcalls_front_socket(struct socket *sock)
>  {
>  	struct pvcalls_bedata *bedata;
> @@ -134,6 +168,125 @@ int pvcalls_front_socket(struct socket *sock)
>  	return ret;
>  }
>  
> +static struct sock_mapping *create_active(int *evtchn)
> +{
> +	struct sock_mapping *map = NULL;
> +	void *bytes;
> +	int ret, irq = -1, i;
> +
> +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> +	if (map == NULL)
> +		return NULL;
> +
> +	init_waitqueue_head(&map->active.inflight_conn_req);
> +
> +	map->active.ring = (struct pvcalls_data_intf *)
> +		__get_free_page(GFP_KERNEL | __GFP_ZERO);
> +	if (map->active.ring == NULL)
> +		goto out_error;
> +	memset(map->active.ring, 0, XEN_PAGE_SIZE);
> +	map->active.ring->ring_order = RING_ORDER;
> +	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> +					map->active.ring->ring_order);
> +	if (bytes == NULL)
> +		goto out_error;
> +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> +		map->active.ring->ref[i] = gnttab_grant_foreign_access(
> +			pvcalls_front_dev->otherend_id,
> +			pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
> +
> +	map->active.ref = gnttab_grant_foreign_access(
> +		pvcalls_front_dev->otherend_id,
> +		pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
> +
> +	ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
> +	if (ret)
> +		goto out_error;

You are leaking bytes here in case of error.

> +	map->active.data.in = bytes;
> +	map->active.data.out = bytes +
> +		XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> +	irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
> +					0, "pvcalls-frontend", map);
> +	if (irq < 0)
> +		goto out_error;
> +
> +	map->active.irq = irq;
> +	map->active_socket = true;
> +	mutex_init(&map->active.in_mutex);
> +	mutex_init(&map->active.out_mutex);
> +
> +	return map;
> +
> +out_error:
> +	if (irq >= 0)
> +		unbind_from_irqhandler(irq, map);
> +	else if (*evtchn >= 0)
> +		xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
> +	kfree(map->active.data.in);
> +	kfree(map->active.ring);
> +	kfree(map);
> +	return NULL;
> +}
> +
> +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> +				int addr_len, int flags)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map = NULL;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret, evtchn;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENETUNREACH;
> +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> +		return -ENOTSUPP;
> +
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +
> +	map = create_active(&evtchn);
> +	if (!map)
> +	    return -ENOMEM;

spin_unlock()?


Juergen

> +
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_CONNECT;
> +	req->u.connect.id = (uint64_t)sock;
> +	memcpy(req->u.connect.addr, addr, sizeof(*addr));
> +	req->u.connect.len = addr_len;
> +	req->u.connect.flags = flags;
> +	req->u.connect.ref = map->active.ref;
> +	req->u.connect.evtchn = evtchn;
> +	
> +	list_add_tail(&map->list, &bedata->socket_mappings);
> +	map->sock = sock;
> +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index b7dabed..63b0417 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -4,5 +4,7 @@
>  #include <linux/net.h>
>  
>  int pvcalls_front_socket(struct socket *sock);
> +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> +			  int addr_len, int flags);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 05/13] xen/pvcalls: implement bind command
  2017-07-22  0:11   ` Stefano Stabellini
@ 2017-07-24 19:43     ` Juergen Gross
  2017-07-24 22:51       ` Stefano Stabellini
  2017-07-24 22:51       ` Stefano Stabellini
  2017-07-24 19:43     ` Juergen Gross
  1 sibling, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:43 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_BIND to the backend. Introduce a new structure, part of
> struct sock_mapping, to store information specific to passive sockets.
> 
> Introduce a status field to keep track of the status of the passive
> socket.
> 
> Introduce a waitqueue for the "accept" command (see the accept command
> implementation): it is used to allow only one outstanding accept
> command at any given time and to implement polling on the passive
> socket. Introduce a flags field to keep track of in-flight accept and
> poll commands.
> 
> sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> store a pointer to the struct sock_mapping corresponding to the socket.
> 
> Convert the struct socket pointer into an uint64_t and use it as id for
> the socket to pass to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 74 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  3 ++
>  2 files changed, 77 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 0d305e0..71619bc 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -57,6 +57,23 @@ struct sock_mapping {
>  
>  			wait_queue_head_t inflight_conn_req;
>  		} active;
> +		struct {
> +		/* Socket status */
> +#define PVCALLS_STATUS_UNINITALIZED  0
> +#define PVCALLS_STATUS_BIND          1
> +#define PVCALLS_STATUS_LISTEN        2
> +			uint8_t status;
> +		/*
> +		 * Internal state-machine flags.
> +		 * Only one accept operation can be inflight for a socket.
> +		 * Only one poll operation can be inflight for a given socket.
> +		 */
> +#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> +#define PVCALLS_FLAG_POLL_RET        2
> +			uint8_t flags;
> +			wait_queue_head_t inflight_accept_req;
> +		} passive;
>  	};
>  };
>  
> @@ -287,6 +304,63 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  	return ret;
>  }
>  
> +int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map = NULL;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> +		return -ENOTSUPP;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> +	if (map == NULL)
> +		return -ENOMEM;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);

BUG_ON() isn't appropriate here. The system can still be used.


Juergen

> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		kfree(map);
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	map->sock = sock;
> +	req->cmd = PVCALLS_BIND;
> +	req->u.bind.id = (uint64_t) sock;
> +	memcpy(req->u.bind.addr, addr, sizeof(*addr));
> +	req->u.bind.len = addr_len;
> +
> +	init_waitqueue_head(&map->passive.inflight_accept_req);
> +
> +	list_add_tail(&map->list, &bedata->socketpass_mappings);
> +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> +	map->active_socket = false;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	map->passive.status = PVCALLS_STATUS_BIND;
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return 0;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 63b0417..8b0a274 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -6,5 +6,8 @@
>  int pvcalls_front_socket(struct socket *sock);
>  int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  			  int addr_len, int flags);
> +int pvcalls_front_bind(struct socket *sock,
> +		       struct sockaddr *addr,
> +		       int addr_len);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 05/13] xen/pvcalls: implement bind command
  2017-07-22  0:11   ` Stefano Stabellini
  2017-07-24 19:43     ` Juergen Gross
@ 2017-07-24 19:43     ` Juergen Gross
  1 sibling, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:43 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_BIND to the backend. Introduce a new structure, part of
> struct sock_mapping, to store information specific to passive sockets.
> 
> Introduce a status field to keep track of the status of the passive
> socket.
> 
> Introduce a waitqueue for the "accept" command (see the accept command
> implementation): it is used to allow only one outstanding accept
> command at any given time and to implement polling on the passive
> socket. Introduce a flags field to keep track of in-flight accept and
> poll commands.
> 
> sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> store a pointer to the struct sock_mapping corresponding to the socket.
> 
> Convert the struct socket pointer into an uint64_t and use it as id for
> the socket to pass to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 74 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  3 ++
>  2 files changed, 77 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 0d305e0..71619bc 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -57,6 +57,23 @@ struct sock_mapping {
>  
>  			wait_queue_head_t inflight_conn_req;
>  		} active;
> +		struct {
> +		/* Socket status */
> +#define PVCALLS_STATUS_UNINITALIZED  0
> +#define PVCALLS_STATUS_BIND          1
> +#define PVCALLS_STATUS_LISTEN        2
> +			uint8_t status;
> +		/*
> +		 * Internal state-machine flags.
> +		 * Only one accept operation can be inflight for a socket.
> +		 * Only one poll operation can be inflight for a given socket.
> +		 */
> +#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> +#define PVCALLS_FLAG_POLL_RET        2
> +			uint8_t flags;
> +			wait_queue_head_t inflight_accept_req;
> +		} passive;
>  	};
>  };
>  
> @@ -287,6 +304,63 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  	return ret;
>  }
>  
> +int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map = NULL;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> +		return -ENOTSUPP;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> +	if (map == NULL)
> +		return -ENOMEM;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);

BUG_ON() isn't appropriate here. The system can still be used.


Juergen

> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		kfree(map);
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	map->sock = sock;
> +	req->cmd = PVCALLS_BIND;
> +	req->u.bind.id = (uint64_t) sock;
> +	memcpy(req->u.bind.addr, addr, sizeof(*addr));
> +	req->u.bind.len = addr_len;
> +
> +	init_waitqueue_head(&map->passive.inflight_accept_req);
> +
> +	list_add_tail(&map->list, &bedata->socketpass_mappings);
> +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> +	map->active_socket = false;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	map->passive.status = PVCALLS_STATUS_BIND;
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return 0;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 63b0417..8b0a274 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -6,5 +6,8 @@
>  int pvcalls_front_socket(struct socket *sock);
>  int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  			  int addr_len, int flags);
> +int pvcalls_front_bind(struct socket *sock,
> +		       struct sockaddr *addr,
> +		       int addr_len);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 06/13] xen/pvcalls: implement listen command
  2017-07-22  0:11   ` [PATCH v1 06/13] xen/pvcalls: implement listen command Stefano Stabellini
@ 2017-07-24 19:44     ` Juergen Gross
  2017-07-24 22:51       ` Stefano Stabellini
  2017-07-24 22:51       ` Stefano Stabellini
  2017-07-24 19:44     ` Juergen Gross
  1 sibling, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:44 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_LISTEN to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 49 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  1 +
>  2 files changed, 50 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 71619bc..80fd5fb 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -361,6 +361,55 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  	return 0;
>  }
>  
> +int pvcalls_front_listen(struct socket *sock, int backlog)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	if (map->passive.status != PVCALLS_STATUS_BIND)
> +		return -EOPNOTSUPP;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);

BUG_ON() again!


Juergen

> +	if (RING_FULL(&bedata->ring) ||
> +	    bedata->rsp[req_id].req_id != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_LISTEN;
> +	req->u.listen.id = (uint64_t) sock;
> +	req->u.listen.backlog = backlog;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	map->passive.status = PVCALLS_STATUS_LISTEN;
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 8b0a274..aa8fe10 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -9,5 +9,6 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  int pvcalls_front_bind(struct socket *sock,
>  		       struct sockaddr *addr,
>  		       int addr_len);
> +int pvcalls_front_listen(struct socket *sock, int backlog);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 06/13] xen/pvcalls: implement listen command
  2017-07-22  0:11   ` [PATCH v1 06/13] xen/pvcalls: implement listen command Stefano Stabellini
  2017-07-24 19:44     ` Juergen Gross
@ 2017-07-24 19:44     ` Juergen Gross
  1 sibling, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:44 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_LISTEN to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 49 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  1 +
>  2 files changed, 50 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 71619bc..80fd5fb 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -361,6 +361,55 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  	return 0;
>  }
>  
> +int pvcalls_front_listen(struct socket *sock, int backlog)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	if (map->passive.status != PVCALLS_STATUS_BIND)
> +		return -EOPNOTSUPP;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);

BUG_ON() again!


Juergen

> +	if (RING_FULL(&bedata->ring) ||
> +	    bedata->rsp[req_id].req_id != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_LISTEN;
> +	req->u.listen.id = (uint64_t) sock;
> +	req->u.listen.backlog = backlog;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	map->passive.status = PVCALLS_STATUS_LISTEN;
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 8b0a274..aa8fe10 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -9,5 +9,6 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  int pvcalls_front_bind(struct socket *sock,
>  		       struct sockaddr *addr,
>  		       int addr_len);
> +int pvcalls_front_listen(struct socket *sock, int backlog);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 07/13] xen/pvcalls: implement accept command
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
  (?)
@ 2017-07-24 19:47     ` Juergen Gross
  2017-07-24 22:53       ` Stefano Stabellini
  2017-07-24 22:53       ` Stefano Stabellini
  -1 siblings, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:47 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_ACCEPT to the backend. Allocate a new active socket. Make
> sure that only one accept command is executed at any given time by
> setting PVCALLS_FLAG_ACCEPT_INFLIGHT and waiting on the
> inflight_accept_req waitqueue.
> 
> sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> store a pointer to the struct sock_mapping corresponding to the socket.
> 
> Convert the new struct socket pointer into an uint64_t and use it as id
> for the new socket to pass to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  3 ++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 80fd5fb..f3a04a2 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -410,6 +410,85 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
>  	return ret;
>  }
>  
> +int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	struct sock_mapping *map2 = NULL;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret, evtchn;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	if (map->passive.status != PVCALLS_STATUS_LISTEN)
> +		return -EINVAL;
> +
> +	/*
> +	 * Backend only supports 1 inflight accept request, will return
> +	 * errors for the others
> +	 */
> +	if (test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +			     (void *)&map->passive.flags)) {
> +		if (wait_event_interruptible(map->passive.inflight_accept_req,
> +			!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +					  (void *)&map->passive.flags))
> +			!= 0)
> +			return -EINTR;
> +	}
> +
> +
> +	newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
> +	if (newsock->sk == NULL)
> +		return -ENOMEM;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);

BUG_ON()?

> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;

Leaking newsock->sk?


Juergen

> +	}
> +
> +	map2 = create_active(&evtchn);
> +
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_ACCEPT;
> +	req->u.accept.id = (uint64_t) sock;
> +	req->u.accept.ref = map2->active.ref;
> +	req->u.accept.id_new = (uint64_t) newsock;
> +	req->u.accept.evtchn = evtchn;
> +
> +	list_add_tail(&map2->list, &bedata->socket_mappings);
> +	WRITE_ONCE(newsock->sk->sk_send_head, (void *)map2);
> +	map2->sock = newsock;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
> +	wake_up(&map->passive.inflight_accept_req);
> +
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index aa8fe10..ab4f1da 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -10,5 +10,8 @@ int pvcalls_front_bind(struct socket *sock,
>  		       struct sockaddr *addr,
>  		       int addr_len);
>  int pvcalls_front_listen(struct socket *sock, int backlog);
> +int pvcalls_front_accept(struct socket *sock,
> +			 struct socket *newsock,
> +			 int flags);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 07/13] xen/pvcalls: implement accept command
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
@ 2017-07-24 19:47     ` Juergen Gross
  -1 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:47 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send PVCALLS_ACCEPT to the backend. Allocate a new active socket. Make
> sure that only one accept command is executed at any given time by
> setting PVCALLS_FLAG_ACCEPT_INFLIGHT and waiting on the
> inflight_accept_req waitqueue.
> 
> sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> store a pointer to the struct sock_mapping corresponding to the socket.
> 
> Convert the new struct socket pointer into an uint64_t and use it as id
> for the new socket to pass to the backend.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  3 ++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 80fd5fb..f3a04a2 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -410,6 +410,85 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
>  	return ret;
>  }
>  
> +int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	struct sock_mapping *map2 = NULL;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret, evtchn;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	if (map->passive.status != PVCALLS_STATUS_LISTEN)
> +		return -EINVAL;
> +
> +	/*
> +	 * Backend only supports 1 inflight accept request, will return
> +	 * errors for the others
> +	 */
> +	if (test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +			     (void *)&map->passive.flags)) {
> +		if (wait_event_interruptible(map->passive.inflight_accept_req,
> +			!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +					  (void *)&map->passive.flags))
> +			!= 0)
> +			return -EINTR;
> +	}
> +
> +
> +	newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
> +	if (newsock->sk == NULL)
> +		return -ENOMEM;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);

BUG_ON()?

> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;

Leaking newsock->sk?


Juergen

> +	}
> +
> +	map2 = create_active(&evtchn);
> +
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_ACCEPT;
> +	req->u.accept.id = (uint64_t) sock;
> +	req->u.accept.ref = map2->active.ref;
> +	req->u.accept.id_new = (uint64_t) newsock;
> +	req->u.accept.evtchn = evtchn;
> +
> +	list_add_tail(&map2->list, &bedata->socket_mappings);
> +	WRITE_ONCE(newsock->sk->sk_send_head, (void *)map2);
> +	map2->sock = newsock;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
> +	wake_up(&map->passive.inflight_accept_req);
> +
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index aa8fe10..ab4f1da 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -10,5 +10,8 @@ int pvcalls_front_bind(struct socket *sock,
>  		       struct sockaddr *addr,
>  		       int addr_len);
>  int pvcalls_front_listen(struct socket *sock, int backlog);
> +int pvcalls_front_accept(struct socket *sock,
> +			 struct socket *newsock,
> +			 int flags);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 08/13] xen/pvcalls: implement sendmsg
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
@ 2017-07-24 19:51     ` Juergen Gross
  2017-07-24 22:38       ` Stefano Stabellini
  2017-07-24 22:38       ` Stefano Stabellini
  -1 siblings, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:51 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send data to an active socket by copying data to the "out" ring. Take
> the active socket out_mutex so that only one function can access the
> ring at any given time.
> 
> If not enough room is available on the ring, rather than returning
> immediately or sleep-waiting, spin for up to 5000 cycles. This small
> optimization turns out to improve performance significantly.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 109 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |   3 ++
>  2 files changed, 112 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index f3a04a2..bf29f40 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -27,6 +27,7 @@
>  #define PVCALLS_INVALID_ID (UINT_MAX)
>  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
>  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> +#define PVCALLS_FRON_MAX_SPIN 5000

Any reason not to name it PVCALLS_FRONT_MAX_SPIN? I first thought you
meant FROM instead.


Juergen

>  
>  struct pvcalls_bedata {
>  	struct xen_pvcalls_front_ring ring;
> @@ -77,6 +78,22 @@ struct sock_mapping {
>  	};
>  };
>  
> +static int pvcalls_front_write_todo(struct sock_mapping *map)
> +{
> +	struct pvcalls_data_intf *intf = map->active.ring;
> +	RING_IDX cons, prod, size = XEN_FLEX_RING_SIZE(intf->ring_order);
> +	int32_t error;
> +
> +	cons = intf->out_cons;
> +	prod = intf->out_prod;
> +	error = intf->out_error;
> +	if (error == -ENOTCONN)
> +		return 0;
> +	if (error != 0)
> +		return error;
> +	return size - pvcalls_queued(prod, cons, size);
> +}
> +
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
>  	struct xenbus_device *dev = dev_id;
> @@ -304,6 +321,98 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  	return ret;
>  }
>  
> +static int __write_ring(struct pvcalls_data_intf *intf,
> +			struct pvcalls_data *data,
> +			struct iov_iter *msg_iter,
> +			size_t len)
> +{
> +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> +	int32_t error;
> +
> +	cons = intf->out_cons;
> +	prod = intf->out_prod;
> +	error = intf->out_error;
> +	/* read indexes before continuing */
> +	virt_mb();
> +
> +	if (error < 0)
> +		return error;
> +
> +	size = pvcalls_queued(prod, cons, array_size);
> +	if (size >= array_size)
> +		return 0;
> +	if (len > array_size - size)
> +		len = array_size - size;
> +
> +	masked_prod = pvcalls_mask(prod, array_size);
> +	masked_cons = pvcalls_mask(cons, array_size);
> +
> +	if (masked_prod < masked_cons) {
> +		copy_from_iter(data->out + masked_prod, len, msg_iter);
> +	} else {
> +		if (len > array_size - masked_prod) {
> +			copy_from_iter(data->out + masked_prod,
> +				       array_size - masked_prod, msg_iter);
> +			copy_from_iter(data->out,
> +				       len - (array_size - masked_prod),
> +				       msg_iter);
> +		} else {
> +			copy_from_iter(data->out + masked_prod, len, msg_iter);
> +		}
> +	}
> +	/* write to ring before updating pointer */
> +	virt_wmb();
> +	intf->out_prod += len;
> +
> +	return len;
> +}
> +
> +int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
> +			  size_t len)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	int sent = 0, tot_sent = 0;
> +	int count = 0, flags;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	flags = msg->msg_flags;
> +	if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&map->active.out_mutex);
> +	if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
> +		mutex_unlock(&map->active.out_mutex);
> +		return -EAGAIN;
> +	}
> +
> +again:
> +	count++;
> +	sent = __write_ring(map->active.ring,
> +			    &map->active.data, &msg->msg_iter,
> +			    len);
> +	if (sent > 0) {
> +		len -= sent;
> +		tot_sent += sent;
> +		notify_remote_via_irq(map->active.irq);
> +	}
> +	if (sent >= 0 && len > 0 && count < PVCALLS_FRON_MAX_SPIN)
> +		goto again;
> +	if (sent < 0)
> +		tot_sent = sent;
> +
> +	mutex_unlock(&map->active.out_mutex);
> +	return tot_sent;
> +}
> +
>  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  {
>  	struct pvcalls_bedata *bedata;
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index ab4f1da..d937c24 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -13,5 +13,8 @@ int pvcalls_front_bind(struct socket *sock,
>  int pvcalls_front_accept(struct socket *sock,
>  			 struct socket *newsock,
>  			 int flags);
> +int pvcalls_front_sendmsg(struct socket *sock,
> +			  struct msghdr *msg,
> +			  size_t len);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 08/13] xen/pvcalls: implement sendmsg
  2017-07-22  0:11     ` Stefano Stabellini
  (?)
  (?)
@ 2017-07-24 19:51     ` Juergen Gross
  -1 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:51 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Send data to an active socket by copying data to the "out" ring. Take
> the active socket out_mutex so that only one function can access the
> ring at any given time.
> 
> If not enough room is available on the ring, rather than returning
> immediately or sleep-waiting, spin for up to 5000 cycles. This small
> optimization turns out to improve performance significantly.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 109 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |   3 ++
>  2 files changed, 112 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index f3a04a2..bf29f40 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -27,6 +27,7 @@
>  #define PVCALLS_INVALID_ID (UINT_MAX)
>  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
>  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> +#define PVCALLS_FRON_MAX_SPIN 5000

Any reason not to name it PVCALLS_FRONT_MAX_SPIN? I first thought you
meant FROM instead.


Juergen

>  
>  struct pvcalls_bedata {
>  	struct xen_pvcalls_front_ring ring;
> @@ -77,6 +78,22 @@ struct sock_mapping {
>  	};
>  };
>  
> +static int pvcalls_front_write_todo(struct sock_mapping *map)
> +{
> +	struct pvcalls_data_intf *intf = map->active.ring;
> +	RING_IDX cons, prod, size = XEN_FLEX_RING_SIZE(intf->ring_order);
> +	int32_t error;
> +
> +	cons = intf->out_cons;
> +	prod = intf->out_prod;
> +	error = intf->out_error;
> +	if (error == -ENOTCONN)
> +		return 0;
> +	if (error != 0)
> +		return error;
> +	return size - pvcalls_queued(prod, cons, size);
> +}
> +
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
>  	struct xenbus_device *dev = dev_id;
> @@ -304,6 +321,98 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>  	return ret;
>  }
>  
> +static int __write_ring(struct pvcalls_data_intf *intf,
> +			struct pvcalls_data *data,
> +			struct iov_iter *msg_iter,
> +			size_t len)
> +{
> +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> +	int32_t error;
> +
> +	cons = intf->out_cons;
> +	prod = intf->out_prod;
> +	error = intf->out_error;
> +	/* read indexes before continuing */
> +	virt_mb();
> +
> +	if (error < 0)
> +		return error;
> +
> +	size = pvcalls_queued(prod, cons, array_size);
> +	if (size >= array_size)
> +		return 0;
> +	if (len > array_size - size)
> +		len = array_size - size;
> +
> +	masked_prod = pvcalls_mask(prod, array_size);
> +	masked_cons = pvcalls_mask(cons, array_size);
> +
> +	if (masked_prod < masked_cons) {
> +		copy_from_iter(data->out + masked_prod, len, msg_iter);
> +	} else {
> +		if (len > array_size - masked_prod) {
> +			copy_from_iter(data->out + masked_prod,
> +				       array_size - masked_prod, msg_iter);
> +			copy_from_iter(data->out,
> +				       len - (array_size - masked_prod),
> +				       msg_iter);
> +		} else {
> +			copy_from_iter(data->out + masked_prod, len, msg_iter);
> +		}
> +	}
> +	/* write to ring before updating pointer */
> +	virt_wmb();
> +	intf->out_prod += len;
> +
> +	return len;
> +}
> +
> +int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
> +			  size_t len)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	int sent = 0, tot_sent = 0;
> +	int count = 0, flags;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	flags = msg->msg_flags;
> +	if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&map->active.out_mutex);
> +	if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
> +		mutex_unlock(&map->active.out_mutex);
> +		return -EAGAIN;
> +	}
> +
> +again:
> +	count++;
> +	sent = __write_ring(map->active.ring,
> +			    &map->active.data, &msg->msg_iter,
> +			    len);
> +	if (sent > 0) {
> +		len -= sent;
> +		tot_sent += sent;
> +		notify_remote_via_irq(map->active.irq);
> +	}
> +	if (sent >= 0 && len > 0 && count < PVCALLS_FRON_MAX_SPIN)
> +		goto again;
> +	if (sent < 0)
> +		tot_sent = sent;
> +
> +	mutex_unlock(&map->active.out_mutex);
> +	return tot_sent;
> +}
> +
>  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  {
>  	struct pvcalls_bedata *bedata;
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index ab4f1da..d937c24 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -13,5 +13,8 @@ int pvcalls_front_bind(struct socket *sock,
>  int pvcalls_front_accept(struct socket *sock,
>  			 struct socket *newsock,
>  			 int flags);
> +int pvcalls_front_sendmsg(struct socket *sock,
> +			  struct msghdr *msg,
> +			  size_t len);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 09/13] xen/pvcalls: implement recvmsg
  2017-07-22  0:11   ` Stefano Stabellini
  2017-07-24 19:56     ` Juergen Gross
@ 2017-07-24 19:56     ` Juergen Gross
  2017-07-24 22:37       ` Stefano Stabellini
  2017-07-24 22:37       ` Stefano Stabellini
  1 sibling, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:56 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:11, Stefano Stabellini wrote:
> Implement recvmsg by copying data from the "in" ring. If not enough data
> is available and the recvmsg call is blocking, then wait on the
> inflight_conn_req waitqueue. Take the active socket in_mutex so that
> only one function can access the ring at any given time.
> 
> If not enough data is available on the ring, rather than returning
> immediately or sleep-waiting, spin for up to 5000 cycles. This small
> optimization turns out to improve performance and latency significantly.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |   4 ++
>  2 files changed, 110 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index bf29f40..3d1041a 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -94,6 +94,20 @@ static int pvcalls_front_write_todo(struct sock_mapping *map)
>  	return size - pvcalls_queued(prod, cons, size);
>  }
>  
> +static int pvcalls_front_read_todo(struct sock_mapping *map)

Return type bool?


Juergen

> +{
> +	struct pvcalls_data_intf *intf = map->active.ring;
> +	RING_IDX cons, prod;
> +	int32_t error;
> +
> +	cons = intf->in_cons;
> +	prod = intf->in_prod;
> +	error = intf->in_error;
> +	return (error != 0 ||
> +		pvcalls_queued(prod, cons,
> +			       XEN_FLEX_RING_SIZE(intf->ring_order))) != 0;
> +}
> +
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
>  	struct xenbus_device *dev = dev_id;
> @@ -413,6 +427,98 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
>  	return tot_sent;
>  }
>  
> +static int __read_ring(struct pvcalls_data_intf *intf,
> +		       struct pvcalls_data *data,
> +		       struct iov_iter *msg_iter,
> +		       size_t len, int flags)
> +{
> +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> +	int32_t error;
> +
> +	cons = intf->in_cons;
> +	prod = intf->in_prod;
> +	error = intf->in_error;
> +	/* get pointers before reading from the ring */
> +	virt_rmb();
> +	if (error < 0)
> +		return error;
> +
> +	size = pvcalls_queued(prod, cons, array_size);
> +	masked_prod = pvcalls_mask(prod, array_size);
> +	masked_cons = pvcalls_mask(cons, array_size);
> +
> +	if (size == 0)
> +		return 0;
> +
> +	if (len > size)
> +		len = size;
> +
> +	if (masked_prod > masked_cons) {
> +		copy_to_iter(data->in + masked_cons, len, msg_iter);
> +	} else {
> +		if (len > (array_size - masked_cons)) {
> +			copy_to_iter(data->in + masked_cons,
> +				     array_size - masked_cons, msg_iter);
> +			copy_to_iter(data->in,
> +				     len - (array_size - masked_cons),
> +				     msg_iter);
> +		} else {
> +			copy_to_iter(data->in + masked_cons, len, msg_iter);
> +		}
> +	}
> +	/* read data from the ring before increasing the index */
> +	virt_mb();
> +	if (!(flags & MSG_PEEK))
> +		intf->in_cons += len;
> +
> +	return len;
> +}
> +
> +int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> +		     int flags)
> +{
> +	struct pvcalls_bedata *bedata;
> +	int ret = -EAGAIN;
> +	struct sock_mapping *map;
> +	int count = 0;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&map->active.in_mutex);
> +	if (len > XEN_FLEX_RING_SIZE(map->active.ring->ring_order))
> +		len = XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> +
> +	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> +		if (count < PVCALLS_FRON_MAX_SPIN)
> +			count++;
> +		else
> +			wait_event_interruptible(map->active.inflight_conn_req,
> +						 pvcalls_front_read_todo(map));
> +	}
> +	ret = __read_ring(map->active.ring, &map->active.data,
> +			  &msg->msg_iter, len, flags);
> +
> +	if (ret > 0)
> +		notify_remote_via_irq(map->active.irq);
> +	if (ret == 0)
> +		ret = -EAGAIN;
> +	if (ret == -ENOTCONN)
> +		ret = 0;
> +
> +	mutex_unlock(&map->active.in_mutex);
> +	return ret;
> +}
> +
>  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  {
>  	struct pvcalls_bedata *bedata;
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index d937c24..de24041 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -16,5 +16,9 @@ int pvcalls_front_accept(struct socket *sock,
>  int pvcalls_front_sendmsg(struct socket *sock,
>  			  struct msghdr *msg,
>  			  size_t len);
> +int pvcalls_front_recvmsg(struct socket *sock,
> +			  struct msghdr *msg,
> +			  size_t len,
> +			  int flags);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 09/13] xen/pvcalls: implement recvmsg
  2017-07-22  0:11   ` Stefano Stabellini
@ 2017-07-24 19:56     ` Juergen Gross
  2017-07-24 19:56     ` Juergen Gross
  1 sibling, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 19:56 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:11, Stefano Stabellini wrote:
> Implement recvmsg by copying data from the "in" ring. If not enough data
> is available and the recvmsg call is blocking, then wait on the
> inflight_conn_req waitqueue. Take the active socket in_mutex so that
> only one function can access the ring at any given time.
> 
> If not enough data is available on the ring, rather than returning
> immediately or sleep-waiting, spin for up to 5000 cycles. This small
> optimization turns out to improve performance and latency significantly.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |   4 ++
>  2 files changed, 110 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index bf29f40..3d1041a 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -94,6 +94,20 @@ static int pvcalls_front_write_todo(struct sock_mapping *map)
>  	return size - pvcalls_queued(prod, cons, size);
>  }
>  
> +static int pvcalls_front_read_todo(struct sock_mapping *map)

Return type bool?


Juergen

> +{
> +	struct pvcalls_data_intf *intf = map->active.ring;
> +	RING_IDX cons, prod;
> +	int32_t error;
> +
> +	cons = intf->in_cons;
> +	prod = intf->in_prod;
> +	error = intf->in_error;
> +	return (error != 0 ||
> +		pvcalls_queued(prod, cons,
> +			       XEN_FLEX_RING_SIZE(intf->ring_order))) != 0;
> +}
> +
>  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  {
>  	struct xenbus_device *dev = dev_id;
> @@ -413,6 +427,98 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
>  	return tot_sent;
>  }
>  
> +static int __read_ring(struct pvcalls_data_intf *intf,
> +		       struct pvcalls_data *data,
> +		       struct iov_iter *msg_iter,
> +		       size_t len, int flags)
> +{
> +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> +	int32_t error;
> +
> +	cons = intf->in_cons;
> +	prod = intf->in_prod;
> +	error = intf->in_error;
> +	/* get pointers before reading from the ring */
> +	virt_rmb();
> +	if (error < 0)
> +		return error;
> +
> +	size = pvcalls_queued(prod, cons, array_size);
> +	masked_prod = pvcalls_mask(prod, array_size);
> +	masked_cons = pvcalls_mask(cons, array_size);
> +
> +	if (size == 0)
> +		return 0;
> +
> +	if (len > size)
> +		len = size;
> +
> +	if (masked_prod > masked_cons) {
> +		copy_to_iter(data->in + masked_cons, len, msg_iter);
> +	} else {
> +		if (len > (array_size - masked_cons)) {
> +			copy_to_iter(data->in + masked_cons,
> +				     array_size - masked_cons, msg_iter);
> +			copy_to_iter(data->in,
> +				     len - (array_size - masked_cons),
> +				     msg_iter);
> +		} else {
> +			copy_to_iter(data->in + masked_cons, len, msg_iter);
> +		}
> +	}
> +	/* read data from the ring before increasing the index */
> +	virt_mb();
> +	if (!(flags & MSG_PEEK))
> +		intf->in_cons += len;
> +
> +	return len;
> +}
> +
> +int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> +		     int flags)
> +{
> +	struct pvcalls_bedata *bedata;
> +	int ret = -EAGAIN;
> +	struct sock_mapping *map;
> +	int count = 0;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return -ENOTSOCK;
> +
> +	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&map->active.in_mutex);
> +	if (len > XEN_FLEX_RING_SIZE(map->active.ring->ring_order))
> +		len = XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> +
> +	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> +		if (count < PVCALLS_FRON_MAX_SPIN)
> +			count++;
> +		else
> +			wait_event_interruptible(map->active.inflight_conn_req,
> +						 pvcalls_front_read_todo(map));
> +	}
> +	ret = __read_ring(map->active.ring, &map->active.data,
> +			  &msg->msg_iter, len, flags);
> +
> +	if (ret > 0)
> +		notify_remote_via_irq(map->active.irq);
> +	if (ret == 0)
> +		ret = -EAGAIN;
> +	if (ret == -ENOTCONN)
> +		ret = 0;
> +
> +	mutex_unlock(&map->active.in_mutex);
> +	return ret;
> +}
> +
>  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  {
>  	struct pvcalls_bedata *bedata;
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index d937c24..de24041 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -16,5 +16,9 @@ int pvcalls_front_accept(struct socket *sock,
>  int pvcalls_front_sendmsg(struct socket *sock,
>  			  struct msghdr *msg,
>  			  size_t len);
> +int pvcalls_front_recvmsg(struct socket *sock,
> +			  struct msghdr *msg,
> +			  size_t len,
> +			  int flags);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 10/13] xen/pvcalls: implement poll command
  2017-07-22  0:12   ` Stefano Stabellini
@ 2017-07-24 20:08       ` Juergen Gross
  0 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:08 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:12, Stefano Stabellini wrote:
> For active sockets, check the indexes and use the inflight_conn_req
> waitqueue to wait.
> 
> For passive sockets, send PVCALLS_POLL to the backend. Use the
> inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
> the inflight_req waitqueue: inflight_req is awaken when a new response
> is received; on wakeup we check whether the POLL response is arrived by
> looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
> pvcalls_front_event_handler, if the response was for a POLL command.
> 
> In pvcalls_front_event_handler, get the struct socket pointer from the
> poll id (we previously converted struct socket* to uint64_t and used it
> as id).
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 123 ++++++++++++++++++++++++++++++++++++++++----
>  drivers/xen/pvcalls-front.h |   3 ++
>  2 files changed, 115 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 3d1041a..b6cfb7d 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -128,17 +128,29 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
>  
>  		req_id = rsp->req_id;
> -		src = (uint8_t *)&bedata->rsp[req_id];
> -		src += sizeof(rsp->req_id);
> -		dst = (uint8_t *)rsp;
> -		dst += sizeof(rsp->req_id);
> -		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> -		/*
> -		 * First copy the rest of the data, then req_id. It is
> -		 * paired with the barrier when accessing bedata->rsp.
> -		 */
> -		smp_wmb();
> -		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> +		if (rsp->cmd == PVCALLS_POLL) {
> +			struct socket *sock = (struct socket *) rsp->u.poll.id;
> +			struct sock_mapping *map =
> +				(struct sock_mapping *)
> +				READ_ONCE(sock->sk->sk_send_head);
> +
> +			set_bit(PVCALLS_FLAG_POLL_RET,
> +				(void *)&map->passive.flags);

Add a barrier here to make sure PVCALLS_FLAG_POLL_INFLIGHT is cleared
_after_ setting PVCALLS_FLAG_POLL_RET?


Juergen

> +			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +				  (void *)&map->passive.flags);
> +		} else {
> +			src = (uint8_t *)&bedata->rsp[req_id];
> +			src += sizeof(rsp->req_id);
> +			dst = (uint8_t *)rsp;
> +			dst += sizeof(rsp->req_id);
> +			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> +			/*
> +			 * First copy the rest of the data, then req_id. It is
> +			 * paired with the barrier when accessing bedata->rsp.
> +			 */
> +			smp_wmb();
> +			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> +		}
>  
>  		bedata->ring.rsp_cons++;
>  		wake_up(&bedata->inflight_req);
> @@ -704,6 +716,95 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
>  	return ret;
>  }
>  
> +static unsigned int pvcalls_front_poll_passive(struct file *file,
> +					       struct pvcalls_bedata *bedata,
> +					       struct sock_mapping *map,
> +					       poll_table *wait)
> +{
> +	int notify, req_id;
> +	struct xen_pvcalls_request *req;
> +
> +	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +		     (void *)&map->passive.flags)) {
> +		poll_wait(file, &map->passive.inflight_accept_req, wait);
> +		return 0;
> +	}
> +
> +	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> +			       (void *)&map->passive.flags))
> +		return POLLIN;
> +
> +	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +			     (void *)&map->passive.flags)) {
> +		poll_wait(file, &bedata->inflight_req, wait);
> +		return 0;
> +	}
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_POLL;
> +	req->u.poll.id = (uint64_t) map->sock;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	poll_wait(file, &bedata->inflight_req, wait);
> +	return 0;
> +}
> +
> +static unsigned int pvcalls_front_poll_active(struct file *file,
> +					      struct pvcalls_bedata *bedata,
> +					      struct sock_mapping *map,
> +					      poll_table *wait)
> +{
> +	unsigned int mask = 0;
> +	int32_t in_error, out_error;
> +	struct pvcalls_data_intf *intf = map->active.ring;
> +
> +	out_error = intf->out_error;
> +	in_error = intf->in_error;
> +
> +	poll_wait(file, &map->active.inflight_conn_req, wait);
> +	if (pvcalls_front_write_todo(map))
> +		mask |= POLLOUT | POLLWRNORM;
> +	if (pvcalls_front_read_todo(map))
> +		mask |= POLLIN | POLLRDNORM;
> +	if (in_error != 0 || out_error != 0)
> +		mask |= POLLERR;
> +
> +	return mask;
> +}
> +
> +unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> +			       poll_table *wait)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +
> +	if (!pvcalls_front_dev)
> +		return POLLNVAL;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return POLLNVAL;
> +	if (map->active_socket)
> +		return pvcalls_front_poll_active(file, bedata, map, wait);
> +	else
> +		return pvcalls_front_poll_passive(file, bedata, map, wait);
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index de24041..25e05b8 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
>  			  struct msghdr *msg,
>  			  size_t len,
>  			  int flags);
> +unsigned int pvcalls_front_poll(struct file *file,
> +				struct socket *sock,
> +				poll_table *wait);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 10/13] xen/pvcalls: implement poll command
@ 2017-07-24 20:08       ` Juergen Gross
  0 siblings, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:08 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:12, Stefano Stabellini wrote:
> For active sockets, check the indexes and use the inflight_conn_req
> waitqueue to wait.
> 
> For passive sockets, send PVCALLS_POLL to the backend. Use the
> inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
> the inflight_req waitqueue: inflight_req is awaken when a new response
> is received; on wakeup we check whether the POLL response is arrived by
> looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
> pvcalls_front_event_handler, if the response was for a POLL command.
> 
> In pvcalls_front_event_handler, get the struct socket pointer from the
> poll id (we previously converted struct socket* to uint64_t and used it
> as id).
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 123 ++++++++++++++++++++++++++++++++++++++++----
>  drivers/xen/pvcalls-front.h |   3 ++
>  2 files changed, 115 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 3d1041a..b6cfb7d 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -128,17 +128,29 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
>  
>  		req_id = rsp->req_id;
> -		src = (uint8_t *)&bedata->rsp[req_id];
> -		src += sizeof(rsp->req_id);
> -		dst = (uint8_t *)rsp;
> -		dst += sizeof(rsp->req_id);
> -		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> -		/*
> -		 * First copy the rest of the data, then req_id. It is
> -		 * paired with the barrier when accessing bedata->rsp.
> -		 */
> -		smp_wmb();
> -		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> +		if (rsp->cmd == PVCALLS_POLL) {
> +			struct socket *sock = (struct socket *) rsp->u.poll.id;
> +			struct sock_mapping *map =
> +				(struct sock_mapping *)
> +				READ_ONCE(sock->sk->sk_send_head);
> +
> +			set_bit(PVCALLS_FLAG_POLL_RET,
> +				(void *)&map->passive.flags);

Add a barrier here to make sure PVCALLS_FLAG_POLL_INFLIGHT is cleared
_after_ setting PVCALLS_FLAG_POLL_RET?


Juergen

> +			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +				  (void *)&map->passive.flags);
> +		} else {
> +			src = (uint8_t *)&bedata->rsp[req_id];
> +			src += sizeof(rsp->req_id);
> +			dst = (uint8_t *)rsp;
> +			dst += sizeof(rsp->req_id);
> +			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> +			/*
> +			 * First copy the rest of the data, then req_id. It is
> +			 * paired with the barrier when accessing bedata->rsp.
> +			 */
> +			smp_wmb();
> +			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> +		}
>  
>  		bedata->ring.rsp_cons++;
>  		wake_up(&bedata->inflight_req);
> @@ -704,6 +716,95 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
>  	return ret;
>  }
>  
> +static unsigned int pvcalls_front_poll_passive(struct file *file,
> +					       struct pvcalls_bedata *bedata,
> +					       struct sock_mapping *map,
> +					       poll_table *wait)
> +{
> +	int notify, req_id;
> +	struct xen_pvcalls_request *req;
> +
> +	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +		     (void *)&map->passive.flags)) {
> +		poll_wait(file, &map->passive.inflight_accept_req, wait);
> +		return 0;
> +	}
> +
> +	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> +			       (void *)&map->passive.flags))
> +		return POLLIN;
> +
> +	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +			     (void *)&map->passive.flags)) {
> +		poll_wait(file, &bedata->inflight_req, wait);
> +		return 0;
> +	}
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_POLL;
> +	req->u.poll.id = (uint64_t) map->sock;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	poll_wait(file, &bedata->inflight_req, wait);
> +	return 0;
> +}
> +
> +static unsigned int pvcalls_front_poll_active(struct file *file,
> +					      struct pvcalls_bedata *bedata,
> +					      struct sock_mapping *map,
> +					      poll_table *wait)
> +{
> +	unsigned int mask = 0;
> +	int32_t in_error, out_error;
> +	struct pvcalls_data_intf *intf = map->active.ring;
> +
> +	out_error = intf->out_error;
> +	in_error = intf->in_error;
> +
> +	poll_wait(file, &map->active.inflight_conn_req, wait);
> +	if (pvcalls_front_write_todo(map))
> +		mask |= POLLOUT | POLLWRNORM;
> +	if (pvcalls_front_read_todo(map))
> +		mask |= POLLIN | POLLRDNORM;
> +	if (in_error != 0 || out_error != 0)
> +		mask |= POLLERR;
> +
> +	return mask;
> +}
> +
> +unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> +			       poll_table *wait)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +
> +	if (!pvcalls_front_dev)
> +		return POLLNVAL;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (!map)
> +		return POLLNVAL;
> +	if (map->active_socket)
> +		return pvcalls_front_poll_active(file, bedata, map, wait);
> +	else
> +		return pvcalls_front_poll_passive(file, bedata, map, wait);
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index de24041..25e05b8 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
>  			  struct msghdr *msg,
>  			  size_t len,
>  			  int flags);
> +unsigned int pvcalls_front_poll(struct file *file,
> +				struct socket *sock,
> +				poll_table *wait);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 11/13] xen/pvcalls: implement release command
  2017-07-22  0:12   ` Stefano Stabellini
  2017-07-24 20:14     ` Juergen Gross
@ 2017-07-24 20:14     ` Juergen Gross
  2017-07-25 21:07       ` Stefano Stabellini
  2017-07-25 21:07       ` Stefano Stabellini
  1 sibling, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:14 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:12, Stefano Stabellini wrote:
> Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
> in_mutex and out_mutex to avoid concurrent accesses. Then, free the
> socket.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 86 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  1 +
>  2 files changed, 87 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index b6cfb7d..bd3dfac 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -174,6 +174,24 @@ static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
>  	return IRQ_HANDLED;
>  }
>  
> +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
> +				   struct sock_mapping *map)
> +{
> +	int i;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	if (!list_empty(&map->list))
> +		list_del_init(&map->list);
> +	spin_unlock(&bedata->pvcallss_lock);
> +
> +	/* what if the thread waiting still need access? */

Is this handled? If not, why is it no problem?

> +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> +		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
> +	gnttab_end_foreign_access(map->active.ref, 0, 0);
> +	free_page((unsigned long)map->active.ring);
> +	unbind_from_irqhandler(map->active.irq, map);
> +}
> +
>  int pvcalls_front_socket(struct socket *sock)
>  {
>  	struct pvcalls_bedata *bedata;
> @@ -805,6 +823,74 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
>  		return pvcalls_front_poll_passive(file, bedata, map, wait);
>  }
>  
> +int pvcalls_front_release(struct socket *sock)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	int req_id, notify;
> +	struct xen_pvcalls_request *req;
> +
> +	if (!pvcalls_front_dev)
> +		return -EIO;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +	if (!bedata)
> +		return -EIO;
> +
> +	if (sock->sk == NULL)
> +		return 0;
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (map == NULL)
> +		return 0;
> +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;

Isn't it a problem you already cleared sock->sk->sk_send_head?


Juergen

> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_RELEASE;
> +	req->u.release.id = (uint64_t)sock;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	if (map->active_socket) {
> +		/* 
> +		 * Set in_error and wake up inflight_conn_req to force
> +		 * recvmsg waiters to exit.
> +		 */
> +		map->active.ring->in_error = -EBADF;
> +		wake_up_interruptible(&map->active.inflight_conn_req);
> +
> +		mutex_lock(&map->active.in_mutex);
> +		mutex_lock(&map->active.out_mutex);
> +		pvcalls_front_free_map(bedata, map);
> +		mutex_unlock(&map->active.out_mutex);
> +		mutex_unlock(&map->active.in_mutex);
> +		kfree(map);
> +	} else {
> +		spin_lock(&bedata->pvcallss_lock);
> +		list_del_init(&map->list);
> +		kfree(map);
> +		spin_unlock(&bedata->pvcallss_lock);
> +	}
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +
> +	return 0;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 25e05b8..3332978 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
>  unsigned int pvcalls_front_poll(struct file *file,
>  				struct socket *sock,
>  				poll_table *wait);
> +int pvcalls_front_release(struct socket *sock);
>  
>  #endif
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 11/13] xen/pvcalls: implement release command
  2017-07-22  0:12   ` Stefano Stabellini
@ 2017-07-24 20:14     ` Juergen Gross
  2017-07-24 20:14     ` Juergen Gross
  1 sibling, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:14 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:12, Stefano Stabellini wrote:
> Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
> in_mutex and out_mutex to avoid concurrent accesses. Then, free the
> socket.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 86 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/pvcalls-front.h |  1 +
>  2 files changed, 87 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index b6cfb7d..bd3dfac 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -174,6 +174,24 @@ static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
>  	return IRQ_HANDLED;
>  }
>  
> +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
> +				   struct sock_mapping *map)
> +{
> +	int i;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	if (!list_empty(&map->list))
> +		list_del_init(&map->list);
> +	spin_unlock(&bedata->pvcallss_lock);
> +
> +	/* what if the thread waiting still need access? */

Is this handled? If not, why is it no problem?

> +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> +		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
> +	gnttab_end_foreign_access(map->active.ref, 0, 0);
> +	free_page((unsigned long)map->active.ring);
> +	unbind_from_irqhandler(map->active.irq, map);
> +}
> +
>  int pvcalls_front_socket(struct socket *sock)
>  {
>  	struct pvcalls_bedata *bedata;
> @@ -805,6 +823,74 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
>  		return pvcalls_front_poll_passive(file, bedata, map, wait);
>  }
>  
> +int pvcalls_front_release(struct socket *sock)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	int req_id, notify;
> +	struct xen_pvcalls_request *req;
> +
> +	if (!pvcalls_front_dev)
> +		return -EIO;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +	if (!bedata)
> +		return -EIO;
> +
> +	if (sock->sk == NULL)
> +		return 0;
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (map == NULL)
> +		return 0;
> +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;

Isn't it a problem you already cleared sock->sk->sk_send_head?


Juergen

> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_RELEASE;
> +	req->u.release.id = (uint64_t)sock;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	if (map->active_socket) {
> +		/* 
> +		 * Set in_error and wake up inflight_conn_req to force
> +		 * recvmsg waiters to exit.
> +		 */
> +		map->active.ring->in_error = -EBADF;
> +		wake_up_interruptible(&map->active.inflight_conn_req);
> +
> +		mutex_lock(&map->active.in_mutex);
> +		mutex_lock(&map->active.out_mutex);
> +		pvcalls_front_free_map(bedata, map);
> +		mutex_unlock(&map->active.out_mutex);
> +		mutex_unlock(&map->active.in_mutex);
> +		kfree(map);
> +	} else {
> +		spin_lock(&bedata->pvcallss_lock);
> +		list_del_init(&map->list);
> +		kfree(map);
> +		spin_unlock(&bedata->pvcallss_lock);
> +	}
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +
> +	return 0;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 25e05b8..3332978 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
>  unsigned int pvcalls_front_poll(struct file *file,
>  				struct socket *sock,
>  				poll_table *wait);
> +int pvcalls_front_release(struct socket *sock);
>  
>  #endif
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 12/13] xen/pvcalls: implement frontend disconnect
  2017-07-22  0:12   ` Stefano Stabellini
  2017-07-24 20:16     ` Juergen Gross
@ 2017-07-24 20:16     ` Juergen Gross
  1 sibling, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:16 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:12, Stefano Stabellini wrote:
> Implement pvcalls frontend removal function. Go through the list of
> active and passive sockets and free them all, one at a time.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 12/13] xen/pvcalls: implement frontend disconnect
  2017-07-22  0:12   ` Stefano Stabellini
@ 2017-07-24 20:16     ` Juergen Gross
  2017-07-24 20:16     ` Juergen Gross
  1 sibling, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:16 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:12, Stefano Stabellini wrote:
> Implement pvcalls frontend removal function. Go through the list of
> active and passive sockets and free them all, one at a time.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend
  2017-07-22  0:12   ` [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend Stefano Stabellini
  2017-07-24 20:17     ` Juergen Gross
@ 2017-07-24 20:17     ` Juergen Gross
  2017-07-24 22:36       ` Stefano Stabellini
  2017-07-24 22:36       ` Stefano Stabellini
  1 sibling, 2 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:17 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: linux-kernel, boris.ostrovsky, Stefano Stabellini

On 22/07/17 02:12, Stefano Stabellini wrote:
> Also add pvcalls-front to the Makefile.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/Kconfig  | 9 +++++++++
>  drivers/xen/Makefile | 1 +
>  2 files changed, 10 insertions(+)
> 
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 4545561..ea5e99f 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -196,6 +196,15 @@ config XEN_PCIDEV_BACKEND
>  
>  	  If in doubt, say m.
>  
> +config XEN_PVCALLS_FRONTEND
> +	bool "XEN PV Calls frontend driver"

tristate?


Juergen

> +	depends on INET && XEN
> +	help
> +	  Experimental frontend for the Xen PV Calls protocol
> +	  (https://xenbits.xen.org/docs/unstable/misc/pvcalls.html). It
> +	  sends a small set of POSIX calls to the backend, which
> +	  implements them.
> +
>  config XEN_PVCALLS_BACKEND
>  	bool "XEN PV Calls backend driver"
>  	depends on INET && XEN && XEN_BACKEND
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 480b928..afb9e03 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -39,6 +39,7 @@ obj-$(CONFIG_XEN_EFI)			+= efi.o
>  obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
>  obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
>  obj-$(CONFIG_XEN_PVCALLS_BACKEND)	+= pvcalls-back.o
> +obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
>  xen-evtchn-y				:= evtchn.o
>  xen-gntdev-y				:= gntdev.o
>  xen-gntalloc-y				:= gntalloc.o
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend
  2017-07-22  0:12   ` [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend Stefano Stabellini
@ 2017-07-24 20:17     ` Juergen Gross
  2017-07-24 20:17     ` Juergen Gross
  1 sibling, 0 replies; 77+ messages in thread
From: Juergen Gross @ 2017-07-24 20:17 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: Stefano Stabellini, boris.ostrovsky, linux-kernel

On 22/07/17 02:12, Stefano Stabellini wrote:
> Also add pvcalls-front to the Makefile.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/Kconfig  | 9 +++++++++
>  drivers/xen/Makefile | 1 +
>  2 files changed, 10 insertions(+)
> 
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 4545561..ea5e99f 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -196,6 +196,15 @@ config XEN_PCIDEV_BACKEND
>  
>  	  If in doubt, say m.
>  
> +config XEN_PVCALLS_FRONTEND
> +	bool "XEN PV Calls frontend driver"

tristate?


Juergen

> +	depends on INET && XEN
> +	help
> +	  Experimental frontend for the Xen PV Calls protocol
> +	  (https://xenbits.xen.org/docs/unstable/misc/pvcalls.html). It
> +	  sends a small set of POSIX calls to the backend, which
> +	  implements them.
> +
>  config XEN_PVCALLS_BACKEND
>  	bool "XEN PV Calls backend driver"
>  	depends on INET && XEN && XEN_BACKEND
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 480b928..afb9e03 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -39,6 +39,7 @@ obj-$(CONFIG_XEN_EFI)			+= efi.o
>  obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
>  obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
>  obj-$(CONFIG_XEN_PVCALLS_BACKEND)	+= pvcalls-back.o
> +obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
>  xen-evtchn-y				:= evtchn.o
>  xen-gntdev-y				:= gntdev.o
>  xen-gntalloc-y				:= gntalloc.o
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend
  2017-07-24 19:06   ` Juergen Gross
@ 2017-07-24 22:32     ` Stefano Stabellini
  2017-07-24 22:32     ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:32 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Introduce a xenbus frontend for the pvcalls protocol, as defined by
> > https://xenbits.xen.org/docs/unstable/misc/pvcalls.html.
> > 
> > This patch only adds the stubs, the code will be added by the following
> > patches.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 68 insertions(+)
> >  create mode 100644 drivers/xen/pvcalls-front.c
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > new file mode 100644
> > index 0000000..173e204
> > --- /dev/null
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -0,0 +1,68 @@
> > +/*
> > + * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +
> > +#include <linux/module.h>
> > +
> > +#include <xen/events.h>
> > +#include <xen/grant_table.h>
> > +#include <xen/xen.h>
> > +#include <xen/xenbus.h>
> > +#include <xen/interface/io/pvcalls.h>
> > +
> > +static const struct xenbus_device_id pvcalls_front_ids[] = {
> > +	{ "pvcalls" },
> > +	{ "" }
> > +};
> > +
> > +static int pvcalls_front_remove(struct xenbus_device *dev)
> > +{
> > +	return 0;
> > +}
> > +
> > +static int pvcalls_front_probe(struct xenbus_device *dev,
> > +			  const struct xenbus_device_id *id)
> > +{
> > +	return 0;
> > +}
> > +
> > +static int pvcalls_front_resume(struct xenbus_device *dev)
> > +{
> > +	dev_warn(&dev->dev, "suspsend/resume unsupported\n");
> > +	return 0;
> > +}
> 
> Why are you adding a resume function doing nothing but issuing a
> message? Just omit it.

I'll do, thanks!


> > +
> > +static void pvcalls_front_changed(struct xenbus_device *dev,
> > +			    enum xenbus_state backend_state)
> > +{
> > +}
> > +
> > +static struct xenbus_driver pvcalls_front_driver = {
> > +	.ids = pvcalls_front_ids,
> > +	.probe = pvcalls_front_probe,
> > +	.remove = pvcalls_front_remove,
> > +	.resume = pvcalls_front_resume,
> > +	.otherend_changed = pvcalls_front_changed,
> > +};
> > +
> > +static int __init pvcalls_frontend_init(void)
> > +{
> > +	if (!xen_domain())
> > +		return -ENODEV;
> > +
> > +	pr_info("Initialising Xen pvcalls frontend driver\n");
> > +
> > +	return xenbus_register_frontend(&pvcalls_front_driver);
> > +}
> > +
> > +module_init(pvcalls_frontend_init);
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend
  2017-07-24 19:06   ` Juergen Gross
  2017-07-24 22:32     ` Stefano Stabellini
@ 2017-07-24 22:32     ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:32 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Introduce a xenbus frontend for the pvcalls protocol, as defined by
> > https://xenbits.xen.org/docs/unstable/misc/pvcalls.html.
> > 
> > This patch only adds the stubs, the code will be added by the following
> > patches.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 68 insertions(+)
> >  create mode 100644 drivers/xen/pvcalls-front.c
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > new file mode 100644
> > index 0000000..173e204
> > --- /dev/null
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -0,0 +1,68 @@
> > +/*
> > + * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +
> > +#include <linux/module.h>
> > +
> > +#include <xen/events.h>
> > +#include <xen/grant_table.h>
> > +#include <xen/xen.h>
> > +#include <xen/xenbus.h>
> > +#include <xen/interface/io/pvcalls.h>
> > +
> > +static const struct xenbus_device_id pvcalls_front_ids[] = {
> > +	{ "pvcalls" },
> > +	{ "" }
> > +};
> > +
> > +static int pvcalls_front_remove(struct xenbus_device *dev)
> > +{
> > +	return 0;
> > +}
> > +
> > +static int pvcalls_front_probe(struct xenbus_device *dev,
> > +			  const struct xenbus_device_id *id)
> > +{
> > +	return 0;
> > +}
> > +
> > +static int pvcalls_front_resume(struct xenbus_device *dev)
> > +{
> > +	dev_warn(&dev->dev, "suspsend/resume unsupported\n");
> > +	return 0;
> > +}
> 
> Why are you adding a resume function doing nothing but issuing a
> message? Just omit it.

I'll do, thanks!


> > +
> > +static void pvcalls_front_changed(struct xenbus_device *dev,
> > +			    enum xenbus_state backend_state)
> > +{
> > +}
> > +
> > +static struct xenbus_driver pvcalls_front_driver = {
> > +	.ids = pvcalls_front_ids,
> > +	.probe = pvcalls_front_probe,
> > +	.remove = pvcalls_front_remove,
> > +	.resume = pvcalls_front_resume,
> > +	.otherend_changed = pvcalls_front_changed,
> > +};
> > +
> > +static int __init pvcalls_frontend_init(void)
> > +{
> > +	if (!xen_domain())
> > +		return -ENODEV;
> > +
> > +	pr_info("Initialising Xen pvcalls frontend driver\n");
> > +
> > +	return xenbus_register_frontend(&pvcalls_front_driver);
> > +}
> > +
> > +module_init(pvcalls_frontend_init);
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend
  2017-07-24 20:17     ` Juergen Gross
@ 2017-07-24 22:36       ` Stefano Stabellini
  2017-07-24 22:36       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:36 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:12, Stefano Stabellini wrote:
> > Also add pvcalls-front to the Makefile.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/Kconfig  | 9 +++++++++
> >  drivers/xen/Makefile | 1 +
> >  2 files changed, 10 insertions(+)
> > 
> > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> > index 4545561..ea5e99f 100644
> > --- a/drivers/xen/Kconfig
> > +++ b/drivers/xen/Kconfig
> > @@ -196,6 +196,15 @@ config XEN_PCIDEV_BACKEND
> >  
> >  	  If in doubt, say m.
> >  
> > +config XEN_PVCALLS_FRONTEND
> > +	bool "XEN PV Calls frontend driver"
> 
> tristate?

Make sense, I'll change


> > +	depends on INET && XEN
> > +	help
> > +	  Experimental frontend for the Xen PV Calls protocol
> > +	  (https://xenbits.xen.org/docs/unstable/misc/pvcalls.html). It
> > +	  sends a small set of POSIX calls to the backend, which
> > +	  implements them.
> > +
> >  config XEN_PVCALLS_BACKEND
> >  	bool "XEN PV Calls backend driver"
> >  	depends on INET && XEN && XEN_BACKEND
> > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> > index 480b928..afb9e03 100644
> > --- a/drivers/xen/Makefile
> > +++ b/drivers/xen/Makefile
> > @@ -39,6 +39,7 @@ obj-$(CONFIG_XEN_EFI)			+= efi.o
> >  obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
> >  obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
> >  obj-$(CONFIG_XEN_PVCALLS_BACKEND)	+= pvcalls-back.o
> > +obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
> >  xen-evtchn-y				:= evtchn.o
> >  xen-gntdev-y				:= gntdev.o
> >  xen-gntalloc-y				:= gntalloc.o
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend
  2017-07-24 20:17     ` Juergen Gross
  2017-07-24 22:36       ` Stefano Stabellini
@ 2017-07-24 22:36       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:36 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:12, Stefano Stabellini wrote:
> > Also add pvcalls-front to the Makefile.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/Kconfig  | 9 +++++++++
> >  drivers/xen/Makefile | 1 +
> >  2 files changed, 10 insertions(+)
> > 
> > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> > index 4545561..ea5e99f 100644
> > --- a/drivers/xen/Kconfig
> > +++ b/drivers/xen/Kconfig
> > @@ -196,6 +196,15 @@ config XEN_PCIDEV_BACKEND
> >  
> >  	  If in doubt, say m.
> >  
> > +config XEN_PVCALLS_FRONTEND
> > +	bool "XEN PV Calls frontend driver"
> 
> tristate?

Make sense, I'll change


> > +	depends on INET && XEN
> > +	help
> > +	  Experimental frontend for the Xen PV Calls protocol
> > +	  (https://xenbits.xen.org/docs/unstable/misc/pvcalls.html). It
> > +	  sends a small set of POSIX calls to the backend, which
> > +	  implements them.
> > +
> >  config XEN_PVCALLS_BACKEND
> >  	bool "XEN PV Calls backend driver"
> >  	depends on INET && XEN && XEN_BACKEND
> > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> > index 480b928..afb9e03 100644
> > --- a/drivers/xen/Makefile
> > +++ b/drivers/xen/Makefile
> > @@ -39,6 +39,7 @@ obj-$(CONFIG_XEN_EFI)			+= efi.o
> >  obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
> >  obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
> >  obj-$(CONFIG_XEN_PVCALLS_BACKEND)	+= pvcalls-back.o
> > +obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
> >  xen-evtchn-y				:= evtchn.o
> >  xen-gntdev-y				:= gntdev.o
> >  xen-gntalloc-y				:= gntalloc.o
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 09/13] xen/pvcalls: implement recvmsg
  2017-07-24 19:56     ` Juergen Gross
@ 2017-07-24 22:37       ` Stefano Stabellini
  2017-07-24 22:37       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:37 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Implement recvmsg by copying data from the "in" ring. If not enough data
> > is available and the recvmsg call is blocking, then wait on the
> > inflight_conn_req waitqueue. Take the active socket in_mutex so that
> > only one function can access the ring at any given time.
> > 
> > If not enough data is available on the ring, rather than returning
> > immediately or sleep-waiting, spin for up to 5000 cycles. This small
> > optimization turns out to improve performance and latency significantly.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |   4 ++
> >  2 files changed, 110 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index bf29f40..3d1041a 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -94,6 +94,20 @@ static int pvcalls_front_write_todo(struct sock_mapping *map)
> >  	return size - pvcalls_queued(prod, cons, size);
> >  }
> >  
> > +static int pvcalls_front_read_todo(struct sock_mapping *map)
> 
> Return type bool?

Yes, good idea


> > +{
> > +	struct pvcalls_data_intf *intf = map->active.ring;
> > +	RING_IDX cons, prod;
> > +	int32_t error;
> > +
> > +	cons = intf->in_cons;
> > +	prod = intf->in_prod;
> > +	error = intf->in_error;
> > +	return (error != 0 ||
> > +		pvcalls_queued(prod, cons,
> > +			       XEN_FLEX_RING_SIZE(intf->ring_order))) != 0;
> > +}
> > +
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> >  	struct xenbus_device *dev = dev_id;
> > @@ -413,6 +427,98 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
> >  	return tot_sent;
> >  }
> >  
> > +static int __read_ring(struct pvcalls_data_intf *intf,
> > +		       struct pvcalls_data *data,
> > +		       struct iov_iter *msg_iter,
> > +		       size_t len, int flags)
> > +{
> > +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> > +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> > +	int32_t error;
> > +
> > +	cons = intf->in_cons;
> > +	prod = intf->in_prod;
> > +	error = intf->in_error;
> > +	/* get pointers before reading from the ring */
> > +	virt_rmb();
> > +	if (error < 0)
> > +		return error;
> > +
> > +	size = pvcalls_queued(prod, cons, array_size);
> > +	masked_prod = pvcalls_mask(prod, array_size);
> > +	masked_cons = pvcalls_mask(cons, array_size);
> > +
> > +	if (size == 0)
> > +		return 0;
> > +
> > +	if (len > size)
> > +		len = size;
> > +
> > +	if (masked_prod > masked_cons) {
> > +		copy_to_iter(data->in + masked_cons, len, msg_iter);
> > +	} else {
> > +		if (len > (array_size - masked_cons)) {
> > +			copy_to_iter(data->in + masked_cons,
> > +				     array_size - masked_cons, msg_iter);
> > +			copy_to_iter(data->in,
> > +				     len - (array_size - masked_cons),
> > +				     msg_iter);
> > +		} else {
> > +			copy_to_iter(data->in + masked_cons, len, msg_iter);
> > +		}
> > +	}
> > +	/* read data from the ring before increasing the index */
> > +	virt_mb();
> > +	if (!(flags & MSG_PEEK))
> > +		intf->in_cons += len;
> > +
> > +	return len;
> > +}
> > +
> > +int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> > +		     int flags)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	int ret = -EAGAIN;
> > +	struct sock_mapping *map;
> > +	int count = 0;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
> > +		return -EOPNOTSUPP;
> > +
> > +	mutex_lock(&map->active.in_mutex);
> > +	if (len > XEN_FLEX_RING_SIZE(map->active.ring->ring_order))
> > +		len = XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> > +
> > +	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> > +		if (count < PVCALLS_FRON_MAX_SPIN)
> > +			count++;
> > +		else
> > +			wait_event_interruptible(map->active.inflight_conn_req,
> > +						 pvcalls_front_read_todo(map));
> > +	}
> > +	ret = __read_ring(map->active.ring, &map->active.data,
> > +			  &msg->msg_iter, len, flags);
> > +
> > +	if (ret > 0)
> > +		notify_remote_via_irq(map->active.irq);
> > +	if (ret == 0)
> > +		ret = -EAGAIN;
> > +	if (ret == -ENOTCONN)
> > +		ret = 0;
> > +
> > +	mutex_unlock(&map->active.in_mutex);
> > +	return ret;
> > +}
> > +
> >  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index d937c24..de24041 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -16,5 +16,9 @@ int pvcalls_front_accept(struct socket *sock,
> >  int pvcalls_front_sendmsg(struct socket *sock,
> >  			  struct msghdr *msg,
> >  			  size_t len);
> > +int pvcalls_front_recvmsg(struct socket *sock,
> > +			  struct msghdr *msg,
> > +			  size_t len,
> > +			  int flags);
> >  
> >  #endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 09/13] xen/pvcalls: implement recvmsg
  2017-07-24 19:56     ` Juergen Gross
  2017-07-24 22:37       ` Stefano Stabellini
@ 2017-07-24 22:37       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:37 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Implement recvmsg by copying data from the "in" ring. If not enough data
> > is available and the recvmsg call is blocking, then wait on the
> > inflight_conn_req waitqueue. Take the active socket in_mutex so that
> > only one function can access the ring at any given time.
> > 
> > If not enough data is available on the ring, rather than returning
> > immediately or sleep-waiting, spin for up to 5000 cycles. This small
> > optimization turns out to improve performance and latency significantly.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |   4 ++
> >  2 files changed, 110 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index bf29f40..3d1041a 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -94,6 +94,20 @@ static int pvcalls_front_write_todo(struct sock_mapping *map)
> >  	return size - pvcalls_queued(prod, cons, size);
> >  }
> >  
> > +static int pvcalls_front_read_todo(struct sock_mapping *map)
> 
> Return type bool?

Yes, good idea


> > +{
> > +	struct pvcalls_data_intf *intf = map->active.ring;
> > +	RING_IDX cons, prod;
> > +	int32_t error;
> > +
> > +	cons = intf->in_cons;
> > +	prod = intf->in_prod;
> > +	error = intf->in_error;
> > +	return (error != 0 ||
> > +		pvcalls_queued(prod, cons,
> > +			       XEN_FLEX_RING_SIZE(intf->ring_order))) != 0;
> > +}
> > +
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> >  	struct xenbus_device *dev = dev_id;
> > @@ -413,6 +427,98 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
> >  	return tot_sent;
> >  }
> >  
> > +static int __read_ring(struct pvcalls_data_intf *intf,
> > +		       struct pvcalls_data *data,
> > +		       struct iov_iter *msg_iter,
> > +		       size_t len, int flags)
> > +{
> > +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> > +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> > +	int32_t error;
> > +
> > +	cons = intf->in_cons;
> > +	prod = intf->in_prod;
> > +	error = intf->in_error;
> > +	/* get pointers before reading from the ring */
> > +	virt_rmb();
> > +	if (error < 0)
> > +		return error;
> > +
> > +	size = pvcalls_queued(prod, cons, array_size);
> > +	masked_prod = pvcalls_mask(prod, array_size);
> > +	masked_cons = pvcalls_mask(cons, array_size);
> > +
> > +	if (size == 0)
> > +		return 0;
> > +
> > +	if (len > size)
> > +		len = size;
> > +
> > +	if (masked_prod > masked_cons) {
> > +		copy_to_iter(data->in + masked_cons, len, msg_iter);
> > +	} else {
> > +		if (len > (array_size - masked_cons)) {
> > +			copy_to_iter(data->in + masked_cons,
> > +				     array_size - masked_cons, msg_iter);
> > +			copy_to_iter(data->in,
> > +				     len - (array_size - masked_cons),
> > +				     msg_iter);
> > +		} else {
> > +			copy_to_iter(data->in + masked_cons, len, msg_iter);
> > +		}
> > +	}
> > +	/* read data from the ring before increasing the index */
> > +	virt_mb();
> > +	if (!(flags & MSG_PEEK))
> > +		intf->in_cons += len;
> > +
> > +	return len;
> > +}
> > +
> > +int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> > +		     int flags)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	int ret = -EAGAIN;
> > +	struct sock_mapping *map;
> > +	int count = 0;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
> > +		return -EOPNOTSUPP;
> > +
> > +	mutex_lock(&map->active.in_mutex);
> > +	if (len > XEN_FLEX_RING_SIZE(map->active.ring->ring_order))
> > +		len = XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> > +
> > +	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> > +		if (count < PVCALLS_FRON_MAX_SPIN)
> > +			count++;
> > +		else
> > +			wait_event_interruptible(map->active.inflight_conn_req,
> > +						 pvcalls_front_read_todo(map));
> > +	}
> > +	ret = __read_ring(map->active.ring, &map->active.data,
> > +			  &msg->msg_iter, len, flags);
> > +
> > +	if (ret > 0)
> > +		notify_remote_via_irq(map->active.irq);
> > +	if (ret == 0)
> > +		ret = -EAGAIN;
> > +	if (ret == -ENOTCONN)
> > +		ret = 0;
> > +
> > +	mutex_unlock(&map->active.in_mutex);
> > +	return ret;
> > +}
> > +
> >  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index d937c24..de24041 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -16,5 +16,9 @@ int pvcalls_front_accept(struct socket *sock,
> >  int pvcalls_front_sendmsg(struct socket *sock,
> >  			  struct msghdr *msg,
> >  			  size_t len);
> > +int pvcalls_front_recvmsg(struct socket *sock,
> > +			  struct msghdr *msg,
> > +			  size_t len,
> > +			  int flags);
> >  
> >  #endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 08/13] xen/pvcalls: implement sendmsg
  2017-07-24 19:51     ` Juergen Gross
@ 2017-07-24 22:38       ` Stefano Stabellini
  2017-07-24 22:38       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:38 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send data to an active socket by copying data to the "out" ring. Take
> > the active socket out_mutex so that only one function can access the
> > ring at any given time.
> > 
> > If not enough room is available on the ring, rather than returning
> > immediately or sleep-waiting, spin for up to 5000 cycles. This small
> > optimization turns out to improve performance significantly.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 109 ++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |   3 ++
> >  2 files changed, 112 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index f3a04a2..bf29f40 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -27,6 +27,7 @@
> >  #define PVCALLS_INVALID_ID (UINT_MAX)
> >  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> >  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> > +#define PVCALLS_FRON_MAX_SPIN 5000
> 
> Any reason not to name it PVCALLS_FRONT_MAX_SPIN? I first thought you
> meant FROM instead.

Clearly a typo :-)  I'll fix it.


> 
> >  
> >  struct pvcalls_bedata {
> >  	struct xen_pvcalls_front_ring ring;
> > @@ -77,6 +78,22 @@ struct sock_mapping {
> >  	};
> >  };
> >  
> > +static int pvcalls_front_write_todo(struct sock_mapping *map)
> > +{
> > +	struct pvcalls_data_intf *intf = map->active.ring;
> > +	RING_IDX cons, prod, size = XEN_FLEX_RING_SIZE(intf->ring_order);
> > +	int32_t error;
> > +
> > +	cons = intf->out_cons;
> > +	prod = intf->out_prod;
> > +	error = intf->out_error;
> > +	if (error == -ENOTCONN)
> > +		return 0;
> > +	if (error != 0)
> > +		return error;
> > +	return size - pvcalls_queued(prod, cons, size);
> > +}
> > +
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> >  	struct xenbus_device *dev = dev_id;
> > @@ -304,6 +321,98 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  	return ret;
> >  }
> >  
> > +static int __write_ring(struct pvcalls_data_intf *intf,
> > +			struct pvcalls_data *data,
> > +			struct iov_iter *msg_iter,
> > +			size_t len)
> > +{
> > +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> > +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> > +	int32_t error;
> > +
> > +	cons = intf->out_cons;
> > +	prod = intf->out_prod;
> > +	error = intf->out_error;
> > +	/* read indexes before continuing */
> > +	virt_mb();
> > +
> > +	if (error < 0)
> > +		return error;
> > +
> > +	size = pvcalls_queued(prod, cons, array_size);
> > +	if (size >= array_size)
> > +		return 0;
> > +	if (len > array_size - size)
> > +		len = array_size - size;
> > +
> > +	masked_prod = pvcalls_mask(prod, array_size);
> > +	masked_cons = pvcalls_mask(cons, array_size);
> > +
> > +	if (masked_prod < masked_cons) {
> > +		copy_from_iter(data->out + masked_prod, len, msg_iter);
> > +	} else {
> > +		if (len > array_size - masked_prod) {
> > +			copy_from_iter(data->out + masked_prod,
> > +				       array_size - masked_prod, msg_iter);
> > +			copy_from_iter(data->out,
> > +				       len - (array_size - masked_prod),
> > +				       msg_iter);
> > +		} else {
> > +			copy_from_iter(data->out + masked_prod, len, msg_iter);
> > +		}
> > +	}
> > +	/* write to ring before updating pointer */
> > +	virt_wmb();
> > +	intf->out_prod += len;
> > +
> > +	return len;
> > +}
> > +
> > +int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
> > +			  size_t len)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	int sent = 0, tot_sent = 0;
> > +	int count = 0, flags;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	flags = msg->msg_flags;
> > +	if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
> > +		return -EOPNOTSUPP;
> > +
> > +	mutex_lock(&map->active.out_mutex);
> > +	if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
> > +		mutex_unlock(&map->active.out_mutex);
> > +		return -EAGAIN;
> > +	}
> > +
> > +again:
> > +	count++;
> > +	sent = __write_ring(map->active.ring,
> > +			    &map->active.data, &msg->msg_iter,
> > +			    len);
> > +	if (sent > 0) {
> > +		len -= sent;
> > +		tot_sent += sent;
> > +		notify_remote_via_irq(map->active.irq);
> > +	}
> > +	if (sent >= 0 && len > 0 && count < PVCALLS_FRON_MAX_SPIN)
> > +		goto again;
> > +	if (sent < 0)
> > +		tot_sent = sent;
> > +
> > +	mutex_unlock(&map->active.out_mutex);
> > +	return tot_sent;
> > +}
> > +
> >  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index ab4f1da..d937c24 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -13,5 +13,8 @@ int pvcalls_front_bind(struct socket *sock,
> >  int pvcalls_front_accept(struct socket *sock,
> >  			 struct socket *newsock,
> >  			 int flags);
> > +int pvcalls_front_sendmsg(struct socket *sock,
> > +			  struct msghdr *msg,
> > +			  size_t len);
> >  
> >  #endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 08/13] xen/pvcalls: implement sendmsg
  2017-07-24 19:51     ` Juergen Gross
  2017-07-24 22:38       ` Stefano Stabellini
@ 2017-07-24 22:38       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:38 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send data to an active socket by copying data to the "out" ring. Take
> > the active socket out_mutex so that only one function can access the
> > ring at any given time.
> > 
> > If not enough room is available on the ring, rather than returning
> > immediately or sleep-waiting, spin for up to 5000 cycles. This small
> > optimization turns out to improve performance significantly.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 109 ++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |   3 ++
> >  2 files changed, 112 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index f3a04a2..bf29f40 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -27,6 +27,7 @@
> >  #define PVCALLS_INVALID_ID (UINT_MAX)
> >  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> >  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> > +#define PVCALLS_FRON_MAX_SPIN 5000
> 
> Any reason not to name it PVCALLS_FRONT_MAX_SPIN? I first thought you
> meant FROM instead.

Clearly a typo :-)  I'll fix it.


> 
> >  
> >  struct pvcalls_bedata {
> >  	struct xen_pvcalls_front_ring ring;
> > @@ -77,6 +78,22 @@ struct sock_mapping {
> >  	};
> >  };
> >  
> > +static int pvcalls_front_write_todo(struct sock_mapping *map)
> > +{
> > +	struct pvcalls_data_intf *intf = map->active.ring;
> > +	RING_IDX cons, prod, size = XEN_FLEX_RING_SIZE(intf->ring_order);
> > +	int32_t error;
> > +
> > +	cons = intf->out_cons;
> > +	prod = intf->out_prod;
> > +	error = intf->out_error;
> > +	if (error == -ENOTCONN)
> > +		return 0;
> > +	if (error != 0)
> > +		return error;
> > +	return size - pvcalls_queued(prod, cons, size);
> > +}
> > +
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> >  	struct xenbus_device *dev = dev_id;
> > @@ -304,6 +321,98 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  	return ret;
> >  }
> >  
> > +static int __write_ring(struct pvcalls_data_intf *intf,
> > +			struct pvcalls_data *data,
> > +			struct iov_iter *msg_iter,
> > +			size_t len)
> > +{
> > +	RING_IDX cons, prod, size, masked_prod, masked_cons;
> > +	RING_IDX array_size = XEN_FLEX_RING_SIZE(intf->ring_order);
> > +	int32_t error;
> > +
> > +	cons = intf->out_cons;
> > +	prod = intf->out_prod;
> > +	error = intf->out_error;
> > +	/* read indexes before continuing */
> > +	virt_mb();
> > +
> > +	if (error < 0)
> > +		return error;
> > +
> > +	size = pvcalls_queued(prod, cons, array_size);
> > +	if (size >= array_size)
> > +		return 0;
> > +	if (len > array_size - size)
> > +		len = array_size - size;
> > +
> > +	masked_prod = pvcalls_mask(prod, array_size);
> > +	masked_cons = pvcalls_mask(cons, array_size);
> > +
> > +	if (masked_prod < masked_cons) {
> > +		copy_from_iter(data->out + masked_prod, len, msg_iter);
> > +	} else {
> > +		if (len > array_size - masked_prod) {
> > +			copy_from_iter(data->out + masked_prod,
> > +				       array_size - masked_prod, msg_iter);
> > +			copy_from_iter(data->out,
> > +				       len - (array_size - masked_prod),
> > +				       msg_iter);
> > +		} else {
> > +			copy_from_iter(data->out + masked_prod, len, msg_iter);
> > +		}
> > +	}
> > +	/* write to ring before updating pointer */
> > +	virt_wmb();
> > +	intf->out_prod += len;
> > +
> > +	return len;
> > +}
> > +
> > +int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
> > +			  size_t len)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	int sent = 0, tot_sent = 0;
> > +	int count = 0, flags;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	flags = msg->msg_flags;
> > +	if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
> > +		return -EOPNOTSUPP;
> > +
> > +	mutex_lock(&map->active.out_mutex);
> > +	if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
> > +		mutex_unlock(&map->active.out_mutex);
> > +		return -EAGAIN;
> > +	}
> > +
> > +again:
> > +	count++;
> > +	sent = __write_ring(map->active.ring,
> > +			    &map->active.data, &msg->msg_iter,
> > +			    len);
> > +	if (sent > 0) {
> > +		len -= sent;
> > +		tot_sent += sent;
> > +		notify_remote_via_irq(map->active.irq);
> > +	}
> > +	if (sent >= 0 && len > 0 && count < PVCALLS_FRON_MAX_SPIN)
> > +		goto again;
> > +	if (sent < 0)
> > +		tot_sent = sent;
> > +
> > +	mutex_unlock(&map->active.out_mutex);
> > +	return tot_sent;
> > +}
> > +
> >  int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index ab4f1da..d937c24 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -13,5 +13,8 @@ int pvcalls_front_bind(struct socket *sock,
> >  int pvcalls_front_accept(struct socket *sock,
> >  			 struct socket *newsock,
> >  			 int flags);
> > +int pvcalls_front_sendmsg(struct socket *sock,
> > +			  struct msghdr *msg,
> > +			  size_t len);
> >  
> >  #endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 04/13] xen/pvcalls: implement connect command
  2017-07-24 19:40     ` Juergen Gross
@ 2017-07-24 22:45       ` Stefano Stabellini
  2017-07-24 22:45       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:45 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_CONNECT to the backend. Allocate a new ring and evtchn for
> > the active socket.
> > 
> > Introduce a data structure to keep track of sockets. Introduce a
> > waitqueue to allow the frontend to wait on data coming from the backend
> > on the active socket (recvmsg command).
> > 
> > Two mutexes (one of reads and one for writes) will be used to protect
> > the active socket in and out rings from concurrent accesses.
> > 
> > sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> > store a pointer to the struct sock_mapping corresponding to the socket.
> > This way, we can easily get the struct sock_mapping from the struct
> > socket.
> > 
> > Convert the struct socket pointer into an uint64_t and use it as id for
> > the new socket to pass to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |   2 +
> >  2 files changed, 155 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 7933c73..0d305e0 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -13,6 +13,8 @@
> >   */
> >  
> >  #include <linux/module.h>
> > +#include <linux/net.h>
> > +#include <linux/socket.h>
> >  
> >  #include <xen/events.h>
> >  #include <xen/grant_table.h>
> > @@ -20,6 +22,8 @@
> >  #include <xen/xenbus.h>
> >  #include <xen/interface/io/pvcalls.h>
> >  
> > +#include <net/sock.h>
> > +
> >  #define PVCALLS_INVALID_ID (UINT_MAX)
> >  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> >  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> > @@ -38,6 +42,24 @@ struct pvcalls_bedata {
> >  };
> >  struct xenbus_device *pvcalls_front_dev;
> >  
> > +struct sock_mapping {
> > +	bool active_socket;
> > +	struct list_head list;
> > +	struct socket *sock;
> > +	union {
> > +		struct {
> > +			int irq;
> > +			grant_ref_t ref;
> > +			struct pvcalls_data_intf *ring;
> > +			struct pvcalls_data data;
> > +			struct mutex in_mutex;
> > +			struct mutex out_mutex;
> > +
> > +			wait_queue_head_t inflight_conn_req;
> > +		} active;
> > +	};
> > +};
> > +
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> >  	struct xenbus_device *dev = dev_id;
> > @@ -80,6 +102,18 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  	return IRQ_HANDLED;
> >  }
> >  
> > +static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
> > +{
> > +	struct sock_mapping *map = sock_map;
> > +
> > +	if (map == NULL)
> > +		return IRQ_HANDLED;
> > +
> > +	wake_up_interruptible(&map->active.inflight_conn_req);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> >  int pvcalls_front_socket(struct socket *sock)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > @@ -134,6 +168,125 @@ int pvcalls_front_socket(struct socket *sock)
> >  	return ret;
> >  }
> >  
> > +static struct sock_mapping *create_active(int *evtchn)
> > +{
> > +	struct sock_mapping *map = NULL;
> > +	void *bytes;
> > +	int ret, irq = -1, i;
> > +
> > +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> > +	if (map == NULL)
> > +		return NULL;
> > +
> > +	init_waitqueue_head(&map->active.inflight_conn_req);
> > +
> > +	map->active.ring = (struct pvcalls_data_intf *)
> > +		__get_free_page(GFP_KERNEL | __GFP_ZERO);
> > +	if (map->active.ring == NULL)
> > +		goto out_error;
> > +	memset(map->active.ring, 0, XEN_PAGE_SIZE);
> > +	map->active.ring->ring_order = RING_ORDER;
> > +	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> > +					map->active.ring->ring_order);
> > +	if (bytes == NULL)
> > +		goto out_error;
> > +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> > +		map->active.ring->ref[i] = gnttab_grant_foreign_access(
> > +			pvcalls_front_dev->otherend_id,
> > +			pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
> > +
> > +	map->active.ref = gnttab_grant_foreign_access(
> > +		pvcalls_front_dev->otherend_id,
> > +		pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
> > +
> > +	ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
> > +	if (ret)
> > +		goto out_error;
> 
> You are leaking bytes here in case of error.

well spotted, I'll move "map->active.data.in = bytes" up


> > +	map->active.data.in = bytes;
> > +	map->active.data.out = bytes +
> > +		XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> > +	irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
> > +					0, "pvcalls-frontend", map);
> > +	if (irq < 0)
> > +		goto out_error;
> > +
> > +	map->active.irq = irq;
> > +	map->active_socket = true;
> > +	mutex_init(&map->active.in_mutex);
> > +	mutex_init(&map->active.out_mutex);
> > +
> > +	return map;
> > +
> > +out_error:
> > +	if (irq >= 0)
> > +		unbind_from_irqhandler(irq, map);
> > +	else if (*evtchn >= 0)
> > +		xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
> > +	kfree(map->active.data.in);
> > +	kfree(map->active.ring);
> > +	kfree(map);
> > +	return NULL;
> > +}
> > +
> > +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> > +				int addr_len, int flags)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map = NULL;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret, evtchn;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENETUNREACH;
> > +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> > +		return -ENOTSUPP;
> > +
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +
> > +	map = create_active(&evtchn);
> > +	if (!map)
> > +	    return -ENOMEM;
> 
> spin_unlock()?

Yes, I'll fix


> > +
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_CONNECT;
> > +	req->u.connect.id = (uint64_t)sock;
> > +	memcpy(req->u.connect.addr, addr, sizeof(*addr));
> > +	req->u.connect.len = addr_len;
> > +	req->u.connect.flags = flags;
> > +	req->u.connect.ref = map->active.ref;
> > +	req->u.connect.evtchn = evtchn;
> > +	
> > +	list_add_tail(&map->list, &bedata->socket_mappings);
> > +	map->sock = sock;
> > +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index b7dabed..63b0417 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -4,5 +4,7 @@
> >  #include <linux/net.h>
> >  
> >  int pvcalls_front_socket(struct socket *sock);
> > +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> > +			  int addr_len, int flags);
> >  
> >  #endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 04/13] xen/pvcalls: implement connect command
  2017-07-24 19:40     ` Juergen Gross
  2017-07-24 22:45       ` Stefano Stabellini
@ 2017-07-24 22:45       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:45 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_CONNECT to the backend. Allocate a new ring and evtchn for
> > the active socket.
> > 
> > Introduce a data structure to keep track of sockets. Introduce a
> > waitqueue to allow the frontend to wait on data coming from the backend
> > on the active socket (recvmsg command).
> > 
> > Two mutexes (one of reads and one for writes) will be used to protect
> > the active socket in and out rings from concurrent accesses.
> > 
> > sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> > store a pointer to the struct sock_mapping corresponding to the socket.
> > This way, we can easily get the struct sock_mapping from the struct
> > socket.
> > 
> > Convert the struct socket pointer into an uint64_t and use it as id for
> > the new socket to pass to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |   2 +
> >  2 files changed, 155 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 7933c73..0d305e0 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -13,6 +13,8 @@
> >   */
> >  
> >  #include <linux/module.h>
> > +#include <linux/net.h>
> > +#include <linux/socket.h>
> >  
> >  #include <xen/events.h>
> >  #include <xen/grant_table.h>
> > @@ -20,6 +22,8 @@
> >  #include <xen/xenbus.h>
> >  #include <xen/interface/io/pvcalls.h>
> >  
> > +#include <net/sock.h>
> > +
> >  #define PVCALLS_INVALID_ID (UINT_MAX)
> >  #define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> >  #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> > @@ -38,6 +42,24 @@ struct pvcalls_bedata {
> >  };
> >  struct xenbus_device *pvcalls_front_dev;
> >  
> > +struct sock_mapping {
> > +	bool active_socket;
> > +	struct list_head list;
> > +	struct socket *sock;
> > +	union {
> > +		struct {
> > +			int irq;
> > +			grant_ref_t ref;
> > +			struct pvcalls_data_intf *ring;
> > +			struct pvcalls_data data;
> > +			struct mutex in_mutex;
> > +			struct mutex out_mutex;
> > +
> > +			wait_queue_head_t inflight_conn_req;
> > +		} active;
> > +	};
> > +};
> > +
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> >  	struct xenbus_device *dev = dev_id;
> > @@ -80,6 +102,18 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  	return IRQ_HANDLED;
> >  }
> >  
> > +static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
> > +{
> > +	struct sock_mapping *map = sock_map;
> > +
> > +	if (map == NULL)
> > +		return IRQ_HANDLED;
> > +
> > +	wake_up_interruptible(&map->active.inflight_conn_req);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> >  int pvcalls_front_socket(struct socket *sock)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > @@ -134,6 +168,125 @@ int pvcalls_front_socket(struct socket *sock)
> >  	return ret;
> >  }
> >  
> > +static struct sock_mapping *create_active(int *evtchn)
> > +{
> > +	struct sock_mapping *map = NULL;
> > +	void *bytes;
> > +	int ret, irq = -1, i;
> > +
> > +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> > +	if (map == NULL)
> > +		return NULL;
> > +
> > +	init_waitqueue_head(&map->active.inflight_conn_req);
> > +
> > +	map->active.ring = (struct pvcalls_data_intf *)
> > +		__get_free_page(GFP_KERNEL | __GFP_ZERO);
> > +	if (map->active.ring == NULL)
> > +		goto out_error;
> > +	memset(map->active.ring, 0, XEN_PAGE_SIZE);
> > +	map->active.ring->ring_order = RING_ORDER;
> > +	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> > +					map->active.ring->ring_order);
> > +	if (bytes == NULL)
> > +		goto out_error;
> > +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> > +		map->active.ring->ref[i] = gnttab_grant_foreign_access(
> > +			pvcalls_front_dev->otherend_id,
> > +			pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
> > +
> > +	map->active.ref = gnttab_grant_foreign_access(
> > +		pvcalls_front_dev->otherend_id,
> > +		pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
> > +
> > +	ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
> > +	if (ret)
> > +		goto out_error;
> 
> You are leaking bytes here in case of error.

well spotted, I'll move "map->active.data.in = bytes" up


> > +	map->active.data.in = bytes;
> > +	map->active.data.out = bytes +
> > +		XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
> > +	irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
> > +					0, "pvcalls-frontend", map);
> > +	if (irq < 0)
> > +		goto out_error;
> > +
> > +	map->active.irq = irq;
> > +	map->active_socket = true;
> > +	mutex_init(&map->active.in_mutex);
> > +	mutex_init(&map->active.out_mutex);
> > +
> > +	return map;
> > +
> > +out_error:
> > +	if (irq >= 0)
> > +		unbind_from_irqhandler(irq, map);
> > +	else if (*evtchn >= 0)
> > +		xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
> > +	kfree(map->active.data.in);
> > +	kfree(map->active.ring);
> > +	kfree(map);
> > +	return NULL;
> > +}
> > +
> > +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> > +				int addr_len, int flags)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map = NULL;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret, evtchn;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENETUNREACH;
> > +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> > +		return -ENOTSUPP;
> > +
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +
> > +	map = create_active(&evtchn);
> > +	if (!map)
> > +	    return -ENOMEM;
> 
> spin_unlock()?

Yes, I'll fix


> > +
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_CONNECT;
> > +	req->u.connect.id = (uint64_t)sock;
> > +	memcpy(req->u.connect.addr, addr, sizeof(*addr));
> > +	req->u.connect.len = addr_len;
> > +	req->u.connect.flags = flags;
> > +	req->u.connect.ref = map->active.ref;
> > +	req->u.connect.evtchn = evtchn;
> > +	
> > +	list_add_tail(&map->list, &bedata->socket_mappings);
> > +	map->sock = sock;
> > +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index b7dabed..63b0417 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -4,5 +4,7 @@
> >  #include <linux/net.h>
> >  
> >  int pvcalls_front_socket(struct socket *sock);
> > +int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> > +			  int addr_len, int flags);
> >  
> >  #endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 05/13] xen/pvcalls: implement bind command
  2017-07-24 19:43     ` Juergen Gross
  2017-07-24 22:51       ` Stefano Stabellini
@ 2017-07-24 22:51       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:51 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_BIND to the backend. Introduce a new structure, part of
> > struct sock_mapping, to store information specific to passive sockets.
> > 
> > Introduce a status field to keep track of the status of the passive
> > socket.
> > 
> > Introduce a waitqueue for the "accept" command (see the accept command
> > implementation): it is used to allow only one outstanding accept
> > command at any given time and to implement polling on the passive
> > socket. Introduce a flags field to keep track of in-flight accept and
> > poll commands.
> > 
> > sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> > store a pointer to the struct sock_mapping corresponding to the socket.
> > 
> > Convert the struct socket pointer into an uint64_t and use it as id for
> > the socket to pass to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 74 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  3 ++
> >  2 files changed, 77 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 0d305e0..71619bc 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -57,6 +57,23 @@ struct sock_mapping {
> >  
> >  			wait_queue_head_t inflight_conn_req;
> >  		} active;
> > +		struct {
> > +		/* Socket status */
> > +#define PVCALLS_STATUS_UNINITALIZED  0
> > +#define PVCALLS_STATUS_BIND          1
> > +#define PVCALLS_STATUS_LISTEN        2
> > +			uint8_t status;
> > +		/*
> > +		 * Internal state-machine flags.
> > +		 * Only one accept operation can be inflight for a socket.
> > +		 * Only one poll operation can be inflight for a given socket.
> > +		 */
> > +#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> > +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> > +#define PVCALLS_FLAG_POLL_RET        2
> > +			uint8_t flags;
> > +			wait_queue_head_t inflight_accept_req;
> > +		} passive;
> >  	};
> >  };
> >  
> > @@ -287,6 +304,63 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  	return ret;
> >  }
> >  
> > +int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map = NULL;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> > +		return -ENOTSUPP;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> > +	if (map == NULL)
> > +		return -ENOMEM;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> 
> BUG_ON() isn't appropriate here. The system can still be used.

Yes, and checkpatch.pl also told me the same. Basically, this condition
cannot happen, I'll just remove the BUG_ON.


> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		kfree(map);
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	map->sock = sock;
> > +	req->cmd = PVCALLS_BIND;
> > +	req->u.bind.id = (uint64_t) sock;
> > +	memcpy(req->u.bind.addr, addr, sizeof(*addr));
> > +	req->u.bind.len = addr_len;
> > +
> > +	init_waitqueue_head(&map->passive.inflight_accept_req);
> > +
> > +	list_add_tail(&map->list, &bedata->socketpass_mappings);
> > +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> > +	map->active_socket = false;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	map->passive.status = PVCALLS_STATUS_BIND;
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return 0;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 63b0417..8b0a274 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -6,5 +6,8 @@
> >  int pvcalls_front_socket(struct socket *sock);
> >  int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  			  int addr_len, int flags);
> > +int pvcalls_front_bind(struct socket *sock,
> > +		       struct sockaddr *addr,
> > +		       int addr_len);
> >  
> >  #endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 05/13] xen/pvcalls: implement bind command
  2017-07-24 19:43     ` Juergen Gross
@ 2017-07-24 22:51       ` Stefano Stabellini
  2017-07-24 22:51       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:51 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_BIND to the backend. Introduce a new structure, part of
> > struct sock_mapping, to store information specific to passive sockets.
> > 
> > Introduce a status field to keep track of the status of the passive
> > socket.
> > 
> > Introduce a waitqueue for the "accept" command (see the accept command
> > implementation): it is used to allow only one outstanding accept
> > command at any given time and to implement polling on the passive
> > socket. Introduce a flags field to keep track of in-flight accept and
> > poll commands.
> > 
> > sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> > store a pointer to the struct sock_mapping corresponding to the socket.
> > 
> > Convert the struct socket pointer into an uint64_t and use it as id for
> > the socket to pass to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 74 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  3 ++
> >  2 files changed, 77 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 0d305e0..71619bc 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -57,6 +57,23 @@ struct sock_mapping {
> >  
> >  			wait_queue_head_t inflight_conn_req;
> >  		} active;
> > +		struct {
> > +		/* Socket status */
> > +#define PVCALLS_STATUS_UNINITALIZED  0
> > +#define PVCALLS_STATUS_BIND          1
> > +#define PVCALLS_STATUS_LISTEN        2
> > +			uint8_t status;
> > +		/*
> > +		 * Internal state-machine flags.
> > +		 * Only one accept operation can be inflight for a socket.
> > +		 * Only one poll operation can be inflight for a given socket.
> > +		 */
> > +#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> > +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> > +#define PVCALLS_FLAG_POLL_RET        2
> > +			uint8_t flags;
> > +			wait_queue_head_t inflight_accept_req;
> > +		} passive;
> >  	};
> >  };
> >  
> > @@ -287,6 +304,63 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  	return ret;
> >  }
> >  
> > +int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map = NULL;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> > +		return -ENOTSUPP;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> > +	if (map == NULL)
> > +		return -ENOMEM;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> 
> BUG_ON() isn't appropriate here. The system can still be used.

Yes, and checkpatch.pl also told me the same. Basically, this condition
cannot happen, I'll just remove the BUG_ON.


> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		kfree(map);
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	map->sock = sock;
> > +	req->cmd = PVCALLS_BIND;
> > +	req->u.bind.id = (uint64_t) sock;
> > +	memcpy(req->u.bind.addr, addr, sizeof(*addr));
> > +	req->u.bind.len = addr_len;
> > +
> > +	init_waitqueue_head(&map->passive.inflight_accept_req);
> > +
> > +	list_add_tail(&map->list, &bedata->socketpass_mappings);
> > +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> > +	map->active_socket = false;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	map->passive.status = PVCALLS_STATUS_BIND;
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return 0;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 63b0417..8b0a274 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -6,5 +6,8 @@
> >  int pvcalls_front_socket(struct socket *sock);
> >  int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  			  int addr_len, int flags);
> > +int pvcalls_front_bind(struct socket *sock,
> > +		       struct sockaddr *addr,
> > +		       int addr_len);
> >  
> >  #endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 06/13] xen/pvcalls: implement listen command
  2017-07-24 19:44     ` Juergen Gross
@ 2017-07-24 22:51       ` Stefano Stabellini
  2017-07-24 22:51       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:51 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_LISTEN to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 49 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  1 +
> >  2 files changed, 50 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 71619bc..80fd5fb 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -361,6 +361,55 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >  	return 0;
> >  }
> >  
> > +int pvcalls_front_listen(struct socket *sock, int backlog)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	if (map->passive.status != PVCALLS_STATUS_BIND)
> > +		return -EOPNOTSUPP;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> 
> BUG_ON() again!

Yes, I'll remove it from every patch


> > +	if (RING_FULL(&bedata->ring) ||
> > +	    bedata->rsp[req_id].req_id != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_LISTEN;
> > +	req->u.listen.id = (uint64_t) sock;
> > +	req->u.listen.backlog = backlog;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	map->passive.status = PVCALLS_STATUS_LISTEN;
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 8b0a274..aa8fe10 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -9,5 +9,6 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  int pvcalls_front_bind(struct socket *sock,
> >  		       struct sockaddr *addr,
> >  		       int addr_len);
> > +int pvcalls_front_listen(struct socket *sock, int backlog);
> >  
> >  #endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 06/13] xen/pvcalls: implement listen command
  2017-07-24 19:44     ` Juergen Gross
  2017-07-24 22:51       ` Stefano Stabellini
@ 2017-07-24 22:51       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:51 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_LISTEN to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 49 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  1 +
> >  2 files changed, 50 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 71619bc..80fd5fb 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -361,6 +361,55 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >  	return 0;
> >  }
> >  
> > +int pvcalls_front_listen(struct socket *sock, int backlog)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	if (map->passive.status != PVCALLS_STATUS_BIND)
> > +		return -EOPNOTSUPP;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> 
> BUG_ON() again!

Yes, I'll remove it from every patch


> > +	if (RING_FULL(&bedata->ring) ||
> > +	    bedata->rsp[req_id].req_id != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_LISTEN;
> > +	req->u.listen.id = (uint64_t) sock;
> > +	req->u.listen.backlog = backlog;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	map->passive.status = PVCALLS_STATUS_LISTEN;
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 8b0a274..aa8fe10 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -9,5 +9,6 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >  int pvcalls_front_bind(struct socket *sock,
> >  		       struct sockaddr *addr,
> >  		       int addr_len);
> > +int pvcalls_front_listen(struct socket *sock, int backlog);
> >  
> >  #endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 07/13] xen/pvcalls: implement accept command
  2017-07-24 19:47     ` Juergen Gross
  2017-07-24 22:53       ` Stefano Stabellini
@ 2017-07-24 22:53       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:53 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_ACCEPT to the backend. Allocate a new active socket. Make
> > sure that only one accept command is executed at any given time by
> > setting PVCALLS_FLAG_ACCEPT_INFLIGHT and waiting on the
> > inflight_accept_req waitqueue.
> > 
> > sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> > store a pointer to the struct sock_mapping corresponding to the socket.
> > 
> > Convert the new struct socket pointer into an uint64_t and use it as id
> > for the new socket to pass to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  3 ++
> >  2 files changed, 82 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 80fd5fb..f3a04a2 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -410,6 +410,85 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
> >  	return ret;
> >  }
> >  
> > +int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	struct sock_mapping *map2 = NULL;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret, evtchn;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	if (map->passive.status != PVCALLS_STATUS_LISTEN)
> > +		return -EINVAL;
> > +
> > +	/*
> > +	 * Backend only supports 1 inflight accept request, will return
> > +	 * errors for the others
> > +	 */
> > +	if (test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +			     (void *)&map->passive.flags)) {
> > +		if (wait_event_interruptible(map->passive.inflight_accept_req,
> > +			!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +					  (void *)&map->passive.flags))
> > +			!= 0)
> > +			return -EINTR;
> > +	}
> > +
> > +
> > +	newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
> > +	if (newsock->sk == NULL)
> > +		return -ENOMEM;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> 
> BUG_ON()?
> 
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> 
> Leaking newsock->sk?

I'll fix


> > +	}
> > +
> > +	map2 = create_active(&evtchn);
> > +
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_ACCEPT;
> > +	req->u.accept.id = (uint64_t) sock;
> > +	req->u.accept.ref = map2->active.ref;
> > +	req->u.accept.id_new = (uint64_t) newsock;
> > +	req->u.accept.evtchn = evtchn;
> > +
> > +	list_add_tail(&map2->list, &bedata->socket_mappings);
> > +	WRITE_ONCE(newsock->sk->sk_send_head, (void *)map2);
> > +	map2->sock = newsock;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
> > +	wake_up(&map->passive.inflight_accept_req);
> > +
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index aa8fe10..ab4f1da 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -10,5 +10,8 @@ int pvcalls_front_bind(struct socket *sock,
> >  		       struct sockaddr *addr,
> >  		       int addr_len);
> >  int pvcalls_front_listen(struct socket *sock, int backlog);
> > +int pvcalls_front_accept(struct socket *sock,
> > +			 struct socket *newsock,
> > +			 int flags);
> >  
> >  #endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 07/13] xen/pvcalls: implement accept command
  2017-07-24 19:47     ` Juergen Gross
@ 2017-07-24 22:53       ` Stefano Stabellini
  2017-07-24 22:53       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-24 22:53 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send PVCALLS_ACCEPT to the backend. Allocate a new active socket. Make
> > sure that only one accept command is executed at any given time by
> > setting PVCALLS_FLAG_ACCEPT_INFLIGHT and waiting on the
> > inflight_accept_req waitqueue.
> > 
> > sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> > store a pointer to the struct sock_mapping corresponding to the socket.
> > 
> > Convert the new struct socket pointer into an uint64_t and use it as id
> > for the new socket to pass to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  3 ++
> >  2 files changed, 82 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 80fd5fb..f3a04a2 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -410,6 +410,85 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
> >  	return ret;
> >  }
> >  
> > +int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	struct sock_mapping *map2 = NULL;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret, evtchn;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return -ENOTSOCK;
> > +
> > +	if (map->passive.status != PVCALLS_STATUS_LISTEN)
> > +		return -EINVAL;
> > +
> > +	/*
> > +	 * Backend only supports 1 inflight accept request, will return
> > +	 * errors for the others
> > +	 */
> > +	if (test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +			     (void *)&map->passive.flags)) {
> > +		if (wait_event_interruptible(map->passive.inflight_accept_req,
> > +			!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +					  (void *)&map->passive.flags))
> > +			!= 0)
> > +			return -EINTR;
> > +	}
> > +
> > +
> > +	newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
> > +	if (newsock->sk == NULL)
> > +		return -ENOMEM;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> 
> BUG_ON()?
> 
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> 
> Leaking newsock->sk?

I'll fix


> > +	}
> > +
> > +	map2 = create_active(&evtchn);
> > +
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_ACCEPT;
> > +	req->u.accept.id = (uint64_t) sock;
> > +	req->u.accept.ref = map2->active.ref;
> > +	req->u.accept.id_new = (uint64_t) newsock;
> > +	req->u.accept.evtchn = evtchn;
> > +
> > +	list_add_tail(&map2->list, &bedata->socket_mappings);
> > +	WRITE_ONCE(newsock->sk->sk_send_head, (void *)map2);
> > +	map2->sock = newsock;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
> > +	wake_up(&map->passive.inflight_accept_req);
> > +
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index aa8fe10..ab4f1da 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -10,5 +10,8 @@ int pvcalls_front_bind(struct socket *sock,
> >  		       struct sockaddr *addr,
> >  		       int addr_len);
> >  int pvcalls_front_listen(struct socket *sock, int backlog);
> > +int pvcalls_front_accept(struct socket *sock,
> > +			 struct socket *newsock,
> > +			 int flags);
> >  
> >  #endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 02/13] xen/pvcalls: connect to the backend
  2017-07-24 19:23     ` Juergen Gross
  2017-07-25 20:10       ` Stefano Stabellini
@ 2017-07-25 20:10       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 20:10 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Implement the probe function for the pvcalls frontend. Read the
> > supported versions, max-page-order and function-calls nodes from
> > xenstore.
> > 
> > Introduce a data structure named pvcalls_bedata. It contains pointers to
> > the command ring, the event channel, a list of active sockets and a list
> > of passive sockets. Lists accesses are protected by a spin_lock.
> > 
> > Introduce a waitqueue to allow waiting for a response on commands sent
> > to the backend.
> > 
> > Introduce an array of struct xen_pvcalls_response to store commands
> > responses.
> > 
> > Only one frontend<->backend connection is supported at any given time
> > for a guest. Store the active frontend device to a static pointer.
> > 
> > Introduce a stub functions for the event handler.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 153 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 173e204..fb08ebf 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -20,6 +20,29 @@
> >  #include <xen/xenbus.h>
> >  #include <xen/interface/io/pvcalls.h>
> >  
> > +#define PVCALLS_INVALID_ID (UINT_MAX)
> > +#define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> > +#define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> > +
> > +struct pvcalls_bedata {
> > +	struct xen_pvcalls_front_ring ring;
> > +	grant_ref_t ref;
> > +	int irq;
> > +
> > +	struct list_head socket_mappings;
> > +	struct list_head socketpass_mappings;
> > +	spinlock_t pvcallss_lock;
> > +
> > +	wait_queue_head_t inflight_req;
> > +	struct xen_pvcalls_response rsp[PVCALLS_NR_REQ_PER_RING];
> > +};
> > +struct xenbus_device *pvcalls_front_dev;
> > +
> > +static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> > +{
> > +	return IRQ_HANDLED;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > @@ -33,7 +56,114 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
> >  static int pvcalls_front_probe(struct xenbus_device *dev,
> >  			  const struct xenbus_device_id *id)
> >  {
> > +	int ret = -EFAULT, evtchn, ref = -1, i;
> > +	unsigned int max_page_order, function_calls, len;
> > +	char *versions;
> > +	grant_ref_t gref_head = 0;
> > +	struct xenbus_transaction xbt;
> > +	struct pvcalls_bedata *bedata = NULL;
> > +	struct xen_pvcalls_sring *sring;
> > +
> > +	if (pvcalls_front_dev != NULL) {
> > +		dev_err(&dev->dev, "only one PV Calls connection supported\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
> > +	if (!len)
> > +		return -EINVAL;
> > +	if (strcmp(versions, "1")) {
> > +		kfree(versions);
> > +		return -EINVAL;
> > +	}
> > +	kfree(versions);
> > +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> > +			   "max-page-order", "%u", &max_page_order);
> 
> Use xenbus_read_unsigned() instead?

OK


> > +	if (ret <= 0)
> > +		return -ENODEV;
> > +	if (max_page_order < RING_ORDER)
> > +		return -ENODEV;
> > +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> > +			   "function-calls", "%u", &function_calls);
> 
> xenbus_read_unsigned() again?

OK


> > +	if (ret <= 0 || function_calls != 1)
> > +		return -ENODEV;
> > +	pr_info("%s max-page-order is %u\n", __func__, max_page_order);
> > +
> > +	bedata = kzalloc(sizeof(struct pvcalls_bedata), GFP_KERNEL);
> > +	if (!bedata)
> > +		return -ENOMEM;
> > +
> 
> You should call dev_set_drvdata() here already, otherwise entering the
> error path will dereference a NULL pointer instead of bedata.

OK


> > +	init_waitqueue_head(&bedata->inflight_req);
> > +	for (i = 0; i < PVCALLS_NR_REQ_PER_RING; i++)
> > +		bedata->rsp[i].req_id = PVCALLS_INVALID_ID;
> > +
> > +	sring = (struct xen_pvcalls_sring *) __get_free_page(GFP_KERNEL |
> > +							     __GFP_ZERO);
> > +	if (!sring)
> > +		goto error;
> 
> ret will be 1 here. Shouldn't you set it to -ENOMEM?

Yes, I'll do that


> 
> > +	SHARED_RING_INIT(sring);
> > +	FRONT_RING_INIT(&bedata->ring, sring, XEN_PAGE_SIZE);
> > +
> > +	ret = xenbus_alloc_evtchn(dev, &evtchn);
> > +	if (ret)
> > +		goto error;
> > +
> > +	bedata->irq = bind_evtchn_to_irqhandler(evtchn,
> > +						pvcalls_front_event_handler,
> > +						0, "pvcalls-frontend", dev);
> > +	if (bedata->irq < 0) {
> > +		ret = bedata->irq;
> > +		goto error;
> > +	}
> > +
> > +	ret = gnttab_alloc_grant_references(1, &gref_head);
> > +	if (ret < 0)
> > +		goto error;
> > +	bedata->ref = ref = gnttab_claim_grant_reference(&gref_head);
> > +	if (ref < 0)
> > +		goto error;
> 
> Setting ret?

OK

> 
> > +	gnttab_grant_foreign_access_ref(ref, dev->otherend_id,
> > +					virt_to_gfn((void *)sring), 0);
> > +
> > + again:
> > +	ret = xenbus_transaction_start(&xbt);
> > +	if (ret) {
> > +		xenbus_dev_fatal(dev, ret, "starting transaction");
> > +		goto error;
> > +	}
> > +	ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
> > +	if (ret)
> > +		goto error_xenbus;
> > +	ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref);
> > +	if (ret)
> > +		goto error_xenbus;
> > +	ret = xenbus_printf(xbt, dev->nodename, "port", "%u",
> > +			    evtchn);
> > +	if (ret)
> > +		goto error_xenbus;
> > +	ret = xenbus_transaction_end(xbt, 0);
> > +	if (ret) {
> > +		if (ret == -EAGAIN)
> > +			goto again;
> > +		xenbus_dev_fatal(dev, ret, "completing transaction");
> > +		goto error;
> > +	}
> > +
> > +	INIT_LIST_HEAD(&bedata->socket_mappings);
> > +	INIT_LIST_HEAD(&bedata->socketpass_mappings);
> > +	spin_lock_init(&bedata->pvcallss_lock);
> > +	dev_set_drvdata(&dev->dev, bedata);
> > +	pvcalls_front_dev = dev;
> > +	xenbus_switch_state(dev, XenbusStateInitialised);
> > +
> >  	return 0;
> > +
> > + error_xenbus:
> > +	xenbus_transaction_end(xbt, 1);
> > +	xenbus_dev_fatal(dev, ret, "writing xenstore");
> > + error:
> > +	pvcalls_front_remove(dev);
> > +	return ret;
> >  }
> >  
> >  static int pvcalls_front_resume(struct xenbus_device *dev)
> > @@ -45,6 +175,29 @@ static int pvcalls_front_resume(struct xenbus_device *dev)
> >  static void pvcalls_front_changed(struct xenbus_device *dev,
> >  			    enum xenbus_state backend_state)
> >  {
> > +	switch (backend_state) {
> > +	case XenbusStateReconfiguring:
> > +	case XenbusStateReconfigured:
> > +	case XenbusStateInitialising:
> > +	case XenbusStateInitialised:
> > +	case XenbusStateUnknown:
> > +		break;
> > +
> > +	case XenbusStateInitWait:
> > +		break;
> > +
> > +	case XenbusStateConnected:
> > +		xenbus_switch_state(dev, XenbusStateConnected);
> > +		break;
> > +
> > +	case XenbusStateClosed:
> > +		if (dev->state == XenbusStateClosed)
> > +			break;
> > +		/* Missed the backend's CLOSING state -- fallthrough */
> > +	case XenbusStateClosing:
> > +		xenbus_frontend_closed(dev);
> > +		break;
> > +	}
> >  }
> >  
> >  static struct xenbus_driver pvcalls_front_driver = {
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 02/13] xen/pvcalls: connect to the backend
  2017-07-24 19:23     ` Juergen Gross
@ 2017-07-25 20:10       ` Stefano Stabellini
  2017-07-25 20:10       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 20:10 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Implement the probe function for the pvcalls frontend. Read the
> > supported versions, max-page-order and function-calls nodes from
> > xenstore.
> > 
> > Introduce a data structure named pvcalls_bedata. It contains pointers to
> > the command ring, the event channel, a list of active sockets and a list
> > of passive sockets. Lists accesses are protected by a spin_lock.
> > 
> > Introduce a waitqueue to allow waiting for a response on commands sent
> > to the backend.
> > 
> > Introduce an array of struct xen_pvcalls_response to store commands
> > responses.
> > 
> > Only one frontend<->backend connection is supported at any given time
> > for a guest. Store the active frontend device to a static pointer.
> > 
> > Introduce a stub functions for the event handler.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 153 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 173e204..fb08ebf 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -20,6 +20,29 @@
> >  #include <xen/xenbus.h>
> >  #include <xen/interface/io/pvcalls.h>
> >  
> > +#define PVCALLS_INVALID_ID (UINT_MAX)
> > +#define RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
> > +#define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
> > +
> > +struct pvcalls_bedata {
> > +	struct xen_pvcalls_front_ring ring;
> > +	grant_ref_t ref;
> > +	int irq;
> > +
> > +	struct list_head socket_mappings;
> > +	struct list_head socketpass_mappings;
> > +	spinlock_t pvcallss_lock;
> > +
> > +	wait_queue_head_t inflight_req;
> > +	struct xen_pvcalls_response rsp[PVCALLS_NR_REQ_PER_RING];
> > +};
> > +struct xenbus_device *pvcalls_front_dev;
> > +
> > +static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> > +{
> > +	return IRQ_HANDLED;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > @@ -33,7 +56,114 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
> >  static int pvcalls_front_probe(struct xenbus_device *dev,
> >  			  const struct xenbus_device_id *id)
> >  {
> > +	int ret = -EFAULT, evtchn, ref = -1, i;
> > +	unsigned int max_page_order, function_calls, len;
> > +	char *versions;
> > +	grant_ref_t gref_head = 0;
> > +	struct xenbus_transaction xbt;
> > +	struct pvcalls_bedata *bedata = NULL;
> > +	struct xen_pvcalls_sring *sring;
> > +
> > +	if (pvcalls_front_dev != NULL) {
> > +		dev_err(&dev->dev, "only one PV Calls connection supported\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
> > +	if (!len)
> > +		return -EINVAL;
> > +	if (strcmp(versions, "1")) {
> > +		kfree(versions);
> > +		return -EINVAL;
> > +	}
> > +	kfree(versions);
> > +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> > +			   "max-page-order", "%u", &max_page_order);
> 
> Use xenbus_read_unsigned() instead?

OK


> > +	if (ret <= 0)
> > +		return -ENODEV;
> > +	if (max_page_order < RING_ORDER)
> > +		return -ENODEV;
> > +	ret = xenbus_scanf(XBT_NIL, dev->otherend,
> > +			   "function-calls", "%u", &function_calls);
> 
> xenbus_read_unsigned() again?

OK


> > +	if (ret <= 0 || function_calls != 1)
> > +		return -ENODEV;
> > +	pr_info("%s max-page-order is %u\n", __func__, max_page_order);
> > +
> > +	bedata = kzalloc(sizeof(struct pvcalls_bedata), GFP_KERNEL);
> > +	if (!bedata)
> > +		return -ENOMEM;
> > +
> 
> You should call dev_set_drvdata() here already, otherwise entering the
> error path will dereference a NULL pointer instead of bedata.

OK


> > +	init_waitqueue_head(&bedata->inflight_req);
> > +	for (i = 0; i < PVCALLS_NR_REQ_PER_RING; i++)
> > +		bedata->rsp[i].req_id = PVCALLS_INVALID_ID;
> > +
> > +	sring = (struct xen_pvcalls_sring *) __get_free_page(GFP_KERNEL |
> > +							     __GFP_ZERO);
> > +	if (!sring)
> > +		goto error;
> 
> ret will be 1 here. Shouldn't you set it to -ENOMEM?

Yes, I'll do that


> 
> > +	SHARED_RING_INIT(sring);
> > +	FRONT_RING_INIT(&bedata->ring, sring, XEN_PAGE_SIZE);
> > +
> > +	ret = xenbus_alloc_evtchn(dev, &evtchn);
> > +	if (ret)
> > +		goto error;
> > +
> > +	bedata->irq = bind_evtchn_to_irqhandler(evtchn,
> > +						pvcalls_front_event_handler,
> > +						0, "pvcalls-frontend", dev);
> > +	if (bedata->irq < 0) {
> > +		ret = bedata->irq;
> > +		goto error;
> > +	}
> > +
> > +	ret = gnttab_alloc_grant_references(1, &gref_head);
> > +	if (ret < 0)
> > +		goto error;
> > +	bedata->ref = ref = gnttab_claim_grant_reference(&gref_head);
> > +	if (ref < 0)
> > +		goto error;
> 
> Setting ret?

OK

> 
> > +	gnttab_grant_foreign_access_ref(ref, dev->otherend_id,
> > +					virt_to_gfn((void *)sring), 0);
> > +
> > + again:
> > +	ret = xenbus_transaction_start(&xbt);
> > +	if (ret) {
> > +		xenbus_dev_fatal(dev, ret, "starting transaction");
> > +		goto error;
> > +	}
> > +	ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
> > +	if (ret)
> > +		goto error_xenbus;
> > +	ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref);
> > +	if (ret)
> > +		goto error_xenbus;
> > +	ret = xenbus_printf(xbt, dev->nodename, "port", "%u",
> > +			    evtchn);
> > +	if (ret)
> > +		goto error_xenbus;
> > +	ret = xenbus_transaction_end(xbt, 0);
> > +	if (ret) {
> > +		if (ret == -EAGAIN)
> > +			goto again;
> > +		xenbus_dev_fatal(dev, ret, "completing transaction");
> > +		goto error;
> > +	}
> > +
> > +	INIT_LIST_HEAD(&bedata->socket_mappings);
> > +	INIT_LIST_HEAD(&bedata->socketpass_mappings);
> > +	spin_lock_init(&bedata->pvcallss_lock);
> > +	dev_set_drvdata(&dev->dev, bedata);
> > +	pvcalls_front_dev = dev;
> > +	xenbus_switch_state(dev, XenbusStateInitialised);
> > +
> >  	return 0;
> > +
> > + error_xenbus:
> > +	xenbus_transaction_end(xbt, 1);
> > +	xenbus_dev_fatal(dev, ret, "writing xenstore");
> > + error:
> > +	pvcalls_front_remove(dev);
> > +	return ret;
> >  }
> >  
> >  static int pvcalls_front_resume(struct xenbus_device *dev)
> > @@ -45,6 +175,29 @@ static int pvcalls_front_resume(struct xenbus_device *dev)
> >  static void pvcalls_front_changed(struct xenbus_device *dev,
> >  			    enum xenbus_state backend_state)
> >  {
> > +	switch (backend_state) {
> > +	case XenbusStateReconfiguring:
> > +	case XenbusStateReconfigured:
> > +	case XenbusStateInitialising:
> > +	case XenbusStateInitialised:
> > +	case XenbusStateUnknown:
> > +		break;
> > +
> > +	case XenbusStateInitWait:
> > +		break;
> > +
> > +	case XenbusStateConnected:
> > +		xenbus_switch_state(dev, XenbusStateConnected);
> > +		break;
> > +
> > +	case XenbusStateClosed:
> > +		if (dev->state == XenbusStateClosed)
> > +			break;
> > +		/* Missed the backend's CLOSING state -- fallthrough */
> > +	case XenbusStateClosing:
> > +		xenbus_frontend_closed(dev);
> > +		break;
> > +	}
> >  }
> >  
> >  static struct xenbus_driver pvcalls_front_driver = {
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 03/13] xen/pvcalls: implement socket command and handle events
  2017-07-24 19:29     ` Juergen Gross
  2017-07-25 20:43       ` Stefano Stabellini
@ 2017-07-25 20:43       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 20:43 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send a PVCALLS_SOCKET command to the backend, use the masked
> > req_prod_pvt as req_id. This way, req_id is guaranteed to be between 0
> > and PVCALLS_NR_REQ_PER_RING. We already have a slot in the rsp array
> > ready for the response, and there cannot be two outstanding responses
> > with the same req_id.
> > 
> > Wait for the response by waiting on the inflight_req waitqueue and
> > check for the req_id field in rsp[req_id]. Use atomic accesses to
> > read the field. Once a response is received, clear the corresponding rsp
> > slot by setting req_id to PVCALLS_INVALID_ID. Note that
> > PVCALLS_INVALID_ID is invalid only from the frontend point of view. It
> > is not part of the PVCalls protocol.
> > 
> > pvcalls_front_event_handler is in charge of copying responses from the
> > ring to the appropriate rsp slot. It is done by copying the body of the
> > response first, then by copying req_id atomically. After the copies,
> > wake up anybody waiting on waitqueue.
> > 
> > pvcallss_lock protects accesses to the ring.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 91 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  8 ++++
> >  2 files changed, 99 insertions(+)
> >  create mode 100644 drivers/xen/pvcalls-front.h
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index fb08ebf..7933c73 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> 
> Shouldn't you include pvcalls-front.h?

Yes


> > @@ -40,9 +40,100 @@ struct pvcalls_bedata {
> >  
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> > +	struct xenbus_device *dev = dev_id;
> > +	struct pvcalls_bedata *bedata;
> > +	struct xen_pvcalls_response *rsp;
> > +	uint8_t *src, *dst;
> > +	int req_id = 0, more = 0;
> > +
> > +	if (dev == NULL)
> > +		return IRQ_HANDLED;
> > +
> > +	bedata = dev_get_drvdata(&dev->dev);
> > +	if (bedata == NULL)
> > +		return IRQ_HANDLED;
> > +
> > +again:
> > +	while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
> > +		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> > +
> > +		req_id = rsp->req_id;
> > +		src = (uint8_t *)&bedata->rsp[req_id];
> > +		src += sizeof(rsp->req_id);
> > +		dst = (uint8_t *)rsp;
> > +		dst += sizeof(rsp->req_id);
> > +		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > +		/*
> > +		 * First copy the rest of the data, then req_id. It is
> > +		 * paired with the barrier when accessing bedata->rsp.
> > +		 */
> > +		smp_wmb();
> > +		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +
> > +		bedata->ring.rsp_cons++;
> > +		wake_up(&bedata->inflight_req);
> > +	}
> > +
> > +	RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
> > +	if (more)
> > +		goto again;
> 
> Wouldn't it make more sense to use wake_up() just once if there is any
> response pending and do the consuming loop outside the irq handler?

You are definitely right: it's far better to call wake_up() just once
after the consuming loop if there is any response pending. I'll do that.

However, I am not sure there is much to gain in moving the consuming
loop out of the irq handler: it's pretty short and doesn't call any long
running or sleeping functions.



 
> >  	return IRQ_HANDLED;
> >  }
> >  
> > +int pvcalls_front_socket(struct socket *sock)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -EACCES;
> > +	/*
> > +	 * PVCalls only supports domain AF_INET,
> > +	 * type SOCK_STREAM and protocol 0 sockets for now.
> > +	 *
> > +	 * Check socket type here, AF_INET and protocol checks are done
> > +	 * by the caller.
> > +	 */
> > +	if (sock->type != SOCK_STREAM)
> > +	    return -ENOTSUPP;
> > +
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_SOCKET;
> > +	req->u.socket.id = (uint64_t) sock;
> > +	req->u.socket.domain = AF_INET;
> > +	req->u.socket.type = SOCK_STREAM;
> > +	req->u.socket.protocol = 0;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	if (wait_event_interruptible(bedata->inflight_req,
> > +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id) != 0)
> > +		return -EINTR;
> > +
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > new file mode 100644
> > index 0000000..b7dabed
> > --- /dev/null
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -0,0 +1,8 @@
> > +#ifndef __PVCALLS_FRONT_H__
> > +#define __PVCALLS_FRONT_H__
> > +
> > +#include <linux/net.h>
> > +
> > +int pvcalls_front_socket(struct socket *sock);
> > +
> > +#endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 03/13] xen/pvcalls: implement socket command and handle events
  2017-07-24 19:29     ` Juergen Gross
@ 2017-07-25 20:43       ` Stefano Stabellini
  2017-07-25 20:43       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 20:43 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:11, Stefano Stabellini wrote:
> > Send a PVCALLS_SOCKET command to the backend, use the masked
> > req_prod_pvt as req_id. This way, req_id is guaranteed to be between 0
> > and PVCALLS_NR_REQ_PER_RING. We already have a slot in the rsp array
> > ready for the response, and there cannot be two outstanding responses
> > with the same req_id.
> > 
> > Wait for the response by waiting on the inflight_req waitqueue and
> > check for the req_id field in rsp[req_id]. Use atomic accesses to
> > read the field. Once a response is received, clear the corresponding rsp
> > slot by setting req_id to PVCALLS_INVALID_ID. Note that
> > PVCALLS_INVALID_ID is invalid only from the frontend point of view. It
> > is not part of the PVCalls protocol.
> > 
> > pvcalls_front_event_handler is in charge of copying responses from the
> > ring to the appropriate rsp slot. It is done by copying the body of the
> > response first, then by copying req_id atomically. After the copies,
> > wake up anybody waiting on waitqueue.
> > 
> > pvcallss_lock protects accesses to the ring.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 91 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  8 ++++
> >  2 files changed, 99 insertions(+)
> >  create mode 100644 drivers/xen/pvcalls-front.h
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index fb08ebf..7933c73 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> 
> Shouldn't you include pvcalls-front.h?

Yes


> > @@ -40,9 +40,100 @@ struct pvcalls_bedata {
> >  
> >  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  {
> > +	struct xenbus_device *dev = dev_id;
> > +	struct pvcalls_bedata *bedata;
> > +	struct xen_pvcalls_response *rsp;
> > +	uint8_t *src, *dst;
> > +	int req_id = 0, more = 0;
> > +
> > +	if (dev == NULL)
> > +		return IRQ_HANDLED;
> > +
> > +	bedata = dev_get_drvdata(&dev->dev);
> > +	if (bedata == NULL)
> > +		return IRQ_HANDLED;
> > +
> > +again:
> > +	while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
> > +		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> > +
> > +		req_id = rsp->req_id;
> > +		src = (uint8_t *)&bedata->rsp[req_id];
> > +		src += sizeof(rsp->req_id);
> > +		dst = (uint8_t *)rsp;
> > +		dst += sizeof(rsp->req_id);
> > +		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > +		/*
> > +		 * First copy the rest of the data, then req_id. It is
> > +		 * paired with the barrier when accessing bedata->rsp.
> > +		 */
> > +		smp_wmb();
> > +		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +
> > +		bedata->ring.rsp_cons++;
> > +		wake_up(&bedata->inflight_req);
> > +	}
> > +
> > +	RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
> > +	if (more)
> > +		goto again;
> 
> Wouldn't it make more sense to use wake_up() just once if there is any
> response pending and do the consuming loop outside the irq handler?

You are definitely right: it's far better to call wake_up() just once
after the consuming loop if there is any response pending. I'll do that.

However, I am not sure there is much to gain in moving the consuming
loop out of the irq handler: it's pretty short and doesn't call any long
running or sleeping functions.



 
> >  	return IRQ_HANDLED;
> >  }
> >  
> > +int pvcalls_front_socket(struct socket *sock)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -EACCES;
> > +	/*
> > +	 * PVCalls only supports domain AF_INET,
> > +	 * type SOCK_STREAM and protocol 0 sockets for now.
> > +	 *
> > +	 * Check socket type here, AF_INET and protocol checks are done
> > +	 * by the caller.
> > +	 */
> > +	if (sock->type != SOCK_STREAM)
> > +	    return -ENOTSUPP;
> > +
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_SOCKET;
> > +	req->u.socket.id = (uint64_t) sock;
> > +	req->u.socket.domain = AF_INET;
> > +	req->u.socket.type = SOCK_STREAM;
> > +	req->u.socket.protocol = 0;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	if (wait_event_interruptible(bedata->inflight_req,
> > +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id) != 0)
> > +		return -EINTR;
> > +
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +
> > +	return ret;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > new file mode 100644
> > index 0000000..b7dabed
> > --- /dev/null
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -0,0 +1,8 @@
> > +#ifndef __PVCALLS_FRONT_H__
> > +#define __PVCALLS_FRONT_H__
> > +
> > +#include <linux/net.h>
> > +
> > +int pvcalls_front_socket(struct socket *sock);
> > +
> > +#endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 10/13] xen/pvcalls: implement poll command
  2017-07-24 20:08       ` Juergen Gross
  (?)
  (?)
@ 2017-07-25 20:59       ` Stefano Stabellini
  -1 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 20:59 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:12, Stefano Stabellini wrote:
> > For active sockets, check the indexes and use the inflight_conn_req
> > waitqueue to wait.
> > 
> > For passive sockets, send PVCALLS_POLL to the backend. Use the
> > inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
> > the inflight_req waitqueue: inflight_req is awaken when a new response
> > is received; on wakeup we check whether the POLL response is arrived by
> > looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
> > pvcalls_front_event_handler, if the response was for a POLL command.
> > 
> > In pvcalls_front_event_handler, get the struct socket pointer from the
> > poll id (we previously converted struct socket* to uint64_t and used it
> > as id).
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 123 ++++++++++++++++++++++++++++++++++++++++----
> >  drivers/xen/pvcalls-front.h |   3 ++
> >  2 files changed, 115 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 3d1041a..b6cfb7d 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -128,17 +128,29 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> >  
> >  		req_id = rsp->req_id;
> > -		src = (uint8_t *)&bedata->rsp[req_id];
> > -		src += sizeof(rsp->req_id);
> > -		dst = (uint8_t *)rsp;
> > -		dst += sizeof(rsp->req_id);
> > -		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > -		/*
> > -		 * First copy the rest of the data, then req_id. It is
> > -		 * paired with the barrier when accessing bedata->rsp.
> > -		 */
> > -		smp_wmb();
> > -		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +		if (rsp->cmd == PVCALLS_POLL) {
> > +			struct socket *sock = (struct socket *) rsp->u.poll.id;
> > +			struct sock_mapping *map =
> > +				(struct sock_mapping *)
> > +				READ_ONCE(sock->sk->sk_send_head);
> > +
> > +			set_bit(PVCALLS_FLAG_POLL_RET,
> > +				(void *)&map->passive.flags);
> 
> Add a barrier here to make sure PVCALLS_FLAG_POLL_INFLIGHT is cleared
> _after_ setting PVCALLS_FLAG_POLL_RET?

Yes, good point, I'll add an smp_wmb() here. A barrier is unnecessary at
the other end (the beginning of pvcalls_front_poll_passive) because of
the conditional instructions creating control dependencies. I'll add a
comment.


> > +			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > +				  (void *)&map->passive.flags);
> > +		} else {
> > +			src = (uint8_t *)&bedata->rsp[req_id];
> > +			src += sizeof(rsp->req_id);
> > +			dst = (uint8_t *)rsp;
> > +			dst += sizeof(rsp->req_id);
> > +			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > +			/*
> > +			 * First copy the rest of the data, then req_id. It is
> > +			 * paired with the barrier when accessing bedata->rsp.
> > +			 */
> > +			smp_wmb();
> > +			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +		}
> >  
> >  		bedata->ring.rsp_cons++;
> >  		wake_up(&bedata->inflight_req);
> > @@ -704,6 +716,95 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
> >  	return ret;
> >  }
> >  
> > +static unsigned int pvcalls_front_poll_passive(struct file *file,
> > +					       struct pvcalls_bedata *bedata,
> > +					       struct sock_mapping *map,
> > +					       poll_table *wait)
> > +{
> > +	int notify, req_id;
> > +	struct xen_pvcalls_request *req;
> > +
> > +	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +		     (void *)&map->passive.flags)) {
> > +		poll_wait(file, &map->passive.inflight_accept_req, wait);
> > +		return 0;
> > +	}
> > +
> > +	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> > +			       (void *)&map->passive.flags))
> > +		return POLLIN;
> > +
> > +	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > +			     (void *)&map->passive.flags)) {
> > +		poll_wait(file, &bedata->inflight_req, wait);
> > +		return 0;
> > +	}
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_POLL;
> > +	req->u.poll.id = (uint64_t) map->sock;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	poll_wait(file, &bedata->inflight_req, wait);
> > +	return 0;
> > +}
> > +
> > +static unsigned int pvcalls_front_poll_active(struct file *file,
> > +					      struct pvcalls_bedata *bedata,
> > +					      struct sock_mapping *map,
> > +					      poll_table *wait)
> > +{
> > +	unsigned int mask = 0;
> > +	int32_t in_error, out_error;
> > +	struct pvcalls_data_intf *intf = map->active.ring;
> > +
> > +	out_error = intf->out_error;
> > +	in_error = intf->in_error;
> > +
> > +	poll_wait(file, &map->active.inflight_conn_req, wait);
> > +	if (pvcalls_front_write_todo(map))
> > +		mask |= POLLOUT | POLLWRNORM;
> > +	if (pvcalls_front_read_todo(map))
> > +		mask |= POLLIN | POLLRDNORM;
> > +	if (in_error != 0 || out_error != 0)
> > +		mask |= POLLERR;
> > +
> > +	return mask;
> > +}
> > +
> > +unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> > +			       poll_table *wait)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return POLLNVAL;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return POLLNVAL;
> > +	if (map->active_socket)
> > +		return pvcalls_front_poll_active(file, bedata, map, wait);
> > +	else
> > +		return pvcalls_front_poll_passive(file, bedata, map, wait);
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index de24041..25e05b8 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
> >  			  struct msghdr *msg,
> >  			  size_t len,
> >  			  int flags);
> > +unsigned int pvcalls_front_poll(struct file *file,
> > +				struct socket *sock,
> > +				poll_table *wait);
> >  
> >  #endif
> > 
> 

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 10/13] xen/pvcalls: implement poll command
  2017-07-24 20:08       ` Juergen Gross
  (?)
@ 2017-07-25 20:59       ` Stefano Stabellini
  -1 siblings, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 20:59 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:12, Stefano Stabellini wrote:
> > For active sockets, check the indexes and use the inflight_conn_req
> > waitqueue to wait.
> > 
> > For passive sockets, send PVCALLS_POLL to the backend. Use the
> > inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
> > the inflight_req waitqueue: inflight_req is awaken when a new response
> > is received; on wakeup we check whether the POLL response is arrived by
> > looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
> > pvcalls_front_event_handler, if the response was for a POLL command.
> > 
> > In pvcalls_front_event_handler, get the struct socket pointer from the
> > poll id (we previously converted struct socket* to uint64_t and used it
> > as id).
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 123 ++++++++++++++++++++++++++++++++++++++++----
> >  drivers/xen/pvcalls-front.h |   3 ++
> >  2 files changed, 115 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 3d1041a..b6cfb7d 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -128,17 +128,29 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> >  
> >  		req_id = rsp->req_id;
> > -		src = (uint8_t *)&bedata->rsp[req_id];
> > -		src += sizeof(rsp->req_id);
> > -		dst = (uint8_t *)rsp;
> > -		dst += sizeof(rsp->req_id);
> > -		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > -		/*
> > -		 * First copy the rest of the data, then req_id. It is
> > -		 * paired with the barrier when accessing bedata->rsp.
> > -		 */
> > -		smp_wmb();
> > -		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +		if (rsp->cmd == PVCALLS_POLL) {
> > +			struct socket *sock = (struct socket *) rsp->u.poll.id;
> > +			struct sock_mapping *map =
> > +				(struct sock_mapping *)
> > +				READ_ONCE(sock->sk->sk_send_head);
> > +
> > +			set_bit(PVCALLS_FLAG_POLL_RET,
> > +				(void *)&map->passive.flags);
> 
> Add a barrier here to make sure PVCALLS_FLAG_POLL_INFLIGHT is cleared
> _after_ setting PVCALLS_FLAG_POLL_RET?

Yes, good point, I'll add an smp_wmb() here. A barrier is unnecessary at
the other end (the beginning of pvcalls_front_poll_passive) because of
the conditional instructions creating control dependencies. I'll add a
comment.


> > +			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > +				  (void *)&map->passive.flags);
> > +		} else {
> > +			src = (uint8_t *)&bedata->rsp[req_id];
> > +			src += sizeof(rsp->req_id);
> > +			dst = (uint8_t *)rsp;
> > +			dst += sizeof(rsp->req_id);
> > +			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > +			/*
> > +			 * First copy the rest of the data, then req_id. It is
> > +			 * paired with the barrier when accessing bedata->rsp.
> > +			 */
> > +			smp_wmb();
> > +			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +		}
> >  
> >  		bedata->ring.rsp_cons++;
> >  		wake_up(&bedata->inflight_req);
> > @@ -704,6 +716,95 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
> >  	return ret;
> >  }
> >  
> > +static unsigned int pvcalls_front_poll_passive(struct file *file,
> > +					       struct pvcalls_bedata *bedata,
> > +					       struct sock_mapping *map,
> > +					       poll_table *wait)
> > +{
> > +	int notify, req_id;
> > +	struct xen_pvcalls_request *req;
> > +
> > +	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +		     (void *)&map->passive.flags)) {
> > +		poll_wait(file, &map->passive.inflight_accept_req, wait);
> > +		return 0;
> > +	}
> > +
> > +	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> > +			       (void *)&map->passive.flags))
> > +		return POLLIN;
> > +
> > +	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > +			     (void *)&map->passive.flags)) {
> > +		poll_wait(file, &bedata->inflight_req, wait);
> > +		return 0;
> > +	}
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_POLL;
> > +	req->u.poll.id = (uint64_t) map->sock;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	poll_wait(file, &bedata->inflight_req, wait);
> > +	return 0;
> > +}
> > +
> > +static unsigned int pvcalls_front_poll_active(struct file *file,
> > +					      struct pvcalls_bedata *bedata,
> > +					      struct sock_mapping *map,
> > +					      poll_table *wait)
> > +{
> > +	unsigned int mask = 0;
> > +	int32_t in_error, out_error;
> > +	struct pvcalls_data_intf *intf = map->active.ring;
> > +
> > +	out_error = intf->out_error;
> > +	in_error = intf->in_error;
> > +
> > +	poll_wait(file, &map->active.inflight_conn_req, wait);
> > +	if (pvcalls_front_write_todo(map))
> > +		mask |= POLLOUT | POLLWRNORM;
> > +	if (pvcalls_front_read_todo(map))
> > +		mask |= POLLIN | POLLRDNORM;
> > +	if (in_error != 0 || out_error != 0)
> > +		mask |= POLLERR;
> > +
> > +	return mask;
> > +}
> > +
> > +unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> > +			       poll_table *wait)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return POLLNVAL;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (!map)
> > +		return POLLNVAL;
> > +	if (map->active_socket)
> > +		return pvcalls_front_poll_active(file, bedata, map, wait);
> > +	else
> > +		return pvcalls_front_poll_passive(file, bedata, map, wait);
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index de24041..25e05b8 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
> >  			  struct msghdr *msg,
> >  			  size_t len,
> >  			  int flags);
> > +unsigned int pvcalls_front_poll(struct file *file,
> > +				struct socket *sock,
> > +				poll_table *wait);
> >  
> >  #endif
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 11/13] xen/pvcalls: implement release command
  2017-07-24 20:14     ` Juergen Gross
  2017-07-25 21:07       ` Stefano Stabellini
@ 2017-07-25 21:07       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 21:07 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, xen-devel, linux-kernel, boris.ostrovsky,
	Stefano Stabellini

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:12, Stefano Stabellini wrote:
> > Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
> > in_mutex and out_mutex to avoid concurrent accesses. Then, free the
> > socket.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 86 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  1 +
> >  2 files changed, 87 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index b6cfb7d..bd3dfac 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -174,6 +174,24 @@ static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
> >  	return IRQ_HANDLED;
> >  }
> >  
> > +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
> > +				   struct sock_mapping *map)
> > +{
> > +	int i;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	if (!list_empty(&map->list))
> > +		list_del_init(&map->list);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +
> > +	/* what if the thread waiting still need access? */
> 
> Is this handled? If not, why is it no problem?

Yes, sorry. This is a left-over from earlier versions of the code.

This scenario is handled because threads waiting will have already been
awaken by the wake_up_interruptible call in pvcalls_front_release, and
also the code is protected by both the in_mutex and out_mutex. I hadn't
introduced in_mutex and out_mutex yet when I wrote this comment, it no
longer applies.



> > +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> > +		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
> > +	gnttab_end_foreign_access(map->active.ref, 0, 0);
> > +	free_page((unsigned long)map->active.ring);
> > +	unbind_from_irqhandler(map->active.irq, map);
> > +}
> > +
> >  int pvcalls_front_socket(struct socket *sock)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > @@ -805,6 +823,74 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> >  		return pvcalls_front_poll_passive(file, bedata, map, wait);
> >  }
> >  
> > +int pvcalls_front_release(struct socket *sock)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	int req_id, notify;
> > +	struct xen_pvcalls_request *req;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -EIO;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +	if (!bedata)
> > +		return -EIO;
> > +
> > +	if (sock->sk == NULL)
> > +		return 0;
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (map == NULL)
> > +		return 0;
> > +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> 
> Isn't it a problem you already cleared sock->sk->sk_send_head?

Yes, you are right. It would effectively leak the socket. I'll move the
clearing of sk_send_head after this check.



> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_RELEASE;
> > +	req->u.release.id = (uint64_t)sock;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	if (map->active_socket) {
> > +		/* 
> > +		 * Set in_error and wake up inflight_conn_req to force
> > +		 * recvmsg waiters to exit.
> > +		 */
> > +		map->active.ring->in_error = -EBADF;
> > +		wake_up_interruptible(&map->active.inflight_conn_req);
> > +
> > +		mutex_lock(&map->active.in_mutex);
> > +		mutex_lock(&map->active.out_mutex);
> > +		pvcalls_front_free_map(bedata, map);
> > +		mutex_unlock(&map->active.out_mutex);
> > +		mutex_unlock(&map->active.in_mutex);
> > +		kfree(map);
> > +	} else {
> > +		spin_lock(&bedata->pvcallss_lock);
> > +		list_del_init(&map->list);
> > +		kfree(map);
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +	}
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +
> > +	return 0;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 25e05b8..3332978 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
> >  unsigned int pvcalls_front_poll(struct file *file,
> >  				struct socket *sock,
> >  				poll_table *wait);
> > +int pvcalls_front_release(struct socket *sock);
> >  
> >  #endif

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH v1 11/13] xen/pvcalls: implement release command
  2017-07-24 20:14     ` Juergen Gross
@ 2017-07-25 21:07       ` Stefano Stabellini
  2017-07-25 21:07       ` Stefano Stabellini
  1 sibling, 0 replies; 77+ messages in thread
From: Stefano Stabellini @ 2017-07-25 21:07 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, boris.ostrovsky, Stefano Stabellini,
	linux-kernel, xen-devel

On Mon, 24 Jul 2017, Juergen Gross wrote:
> On 22/07/17 02:12, Stefano Stabellini wrote:
> > Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
> > in_mutex and out_mutex to avoid concurrent accesses. Then, free the
> > socket.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 86 +++++++++++++++++++++++++++++++++++++++++++++
> >  drivers/xen/pvcalls-front.h |  1 +
> >  2 files changed, 87 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index b6cfb7d..bd3dfac 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -174,6 +174,24 @@ static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
> >  	return IRQ_HANDLED;
> >  }
> >  
> > +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
> > +				   struct sock_mapping *map)
> > +{
> > +	int i;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	if (!list_empty(&map->list))
> > +		list_del_init(&map->list);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +
> > +	/* what if the thread waiting still need access? */
> 
> Is this handled? If not, why is it no problem?

Yes, sorry. This is a left-over from earlier versions of the code.

This scenario is handled because threads waiting will have already been
awaken by the wake_up_interruptible call in pvcalls_front_release, and
also the code is protected by both the in_mutex and out_mutex. I hadn't
introduced in_mutex and out_mutex yet when I wrote this comment, it no
longer applies.



> > +	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
> > +		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
> > +	gnttab_end_foreign_access(map->active.ref, 0, 0);
> > +	free_page((unsigned long)map->active.ring);
> > +	unbind_from_irqhandler(map->active.irq, map);
> > +}
> > +
> >  int pvcalls_front_socket(struct socket *sock)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > @@ -805,6 +823,74 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> >  		return pvcalls_front_poll_passive(file, bedata, map, wait);
> >  }
> >  
> > +int pvcalls_front_release(struct socket *sock)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	int req_id, notify;
> > +	struct xen_pvcalls_request *req;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -EIO;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +	if (!bedata)
> > +		return -EIO;
> > +
> > +	if (sock->sk == NULL)
> > +		return 0;
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (map == NULL)
> > +		return 0;
> > +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> 
> Isn't it a problem you already cleared sock->sk->sk_send_head?

Yes, you are right. It would effectively leak the socket. I'll move the
clearing of sk_send_head after this check.



> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_RELEASE;
> > +	req->u.release.id = (uint64_t)sock;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	if (map->active_socket) {
> > +		/* 
> > +		 * Set in_error and wake up inflight_conn_req to force
> > +		 * recvmsg waiters to exit.
> > +		 */
> > +		map->active.ring->in_error = -EBADF;
> > +		wake_up_interruptible(&map->active.inflight_conn_req);
> > +
> > +		mutex_lock(&map->active.in_mutex);
> > +		mutex_lock(&map->active.out_mutex);
> > +		pvcalls_front_free_map(bedata, map);
> > +		mutex_unlock(&map->active.out_mutex);
> > +		mutex_unlock(&map->active.in_mutex);
> > +		kfree(map);
> > +	} else {
> > +		spin_lock(&bedata->pvcallss_lock);
> > +		list_del_init(&map->list);
> > +		kfree(map);
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +	}
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +
> > +	return 0;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 25e05b8..3332978 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
> >  unsigned int pvcalls_front_poll(struct file *file,
> >  				struct socket *sock,
> >  				poll_table *wait);
> > +int pvcalls_front_release(struct socket *sock);
> >  
> >  #endif

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 77+ messages in thread

end of thread, other threads:[~2017-07-25 21:07 UTC | newest]

Thread overview: 77+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-22  0:11 [PATCH v1 00/13] introduce the Xen PV Calls frontend Stefano Stabellini
2017-07-22  0:11 ` [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend Stefano Stabellini
2017-07-22  0:11 ` Stefano Stabellini
2017-07-22  0:11   ` [PATCH v1 02/13] xen/pvcalls: connect to the backend Stefano Stabellini
2017-07-22  0:11     ` Stefano Stabellini
2017-07-24 19:23     ` Juergen Gross
2017-07-24 19:23     ` Juergen Gross
2017-07-25 20:10       ` Stefano Stabellini
2017-07-25 20:10       ` Stefano Stabellini
2017-07-22  0:11   ` [PATCH v1 03/13] xen/pvcalls: implement socket command and handle events Stefano Stabellini
2017-07-22  0:11     ` Stefano Stabellini
2017-07-24 19:29     ` Juergen Gross
2017-07-24 19:29     ` Juergen Gross
2017-07-25 20:43       ` Stefano Stabellini
2017-07-25 20:43       ` Stefano Stabellini
2017-07-22  0:11   ` [PATCH v1 04/13] xen/pvcalls: implement connect command Stefano Stabellini
2017-07-22  0:11     ` Stefano Stabellini
2017-07-24 19:40     ` Juergen Gross
2017-07-24 19:40     ` Juergen Gross
2017-07-24 22:45       ` Stefano Stabellini
2017-07-24 22:45       ` Stefano Stabellini
2017-07-22  0:11   ` [PATCH v1 05/13] xen/pvcalls: implement bind command Stefano Stabellini
2017-07-22  0:11   ` Stefano Stabellini
2017-07-24 19:43     ` Juergen Gross
2017-07-24 22:51       ` Stefano Stabellini
2017-07-24 22:51       ` Stefano Stabellini
2017-07-24 19:43     ` Juergen Gross
2017-07-22  0:11   ` [PATCH v1 06/13] xen/pvcalls: implement listen command Stefano Stabellini
2017-07-24 19:44     ` Juergen Gross
2017-07-24 22:51       ` Stefano Stabellini
2017-07-24 22:51       ` Stefano Stabellini
2017-07-24 19:44     ` Juergen Gross
2017-07-22  0:11   ` Stefano Stabellini
2017-07-22  0:11   ` [PATCH v1 07/13] xen/pvcalls: implement accept command Stefano Stabellini
2017-07-22  0:11     ` Stefano Stabellini
2017-07-24 19:47     ` Juergen Gross
2017-07-24 19:47     ` Juergen Gross
2017-07-24 22:53       ` Stefano Stabellini
2017-07-24 22:53       ` Stefano Stabellini
2017-07-22  0:11   ` [PATCH v1 08/13] xen/pvcalls: implement sendmsg Stefano Stabellini
2017-07-22  0:11     ` Stefano Stabellini
2017-07-24 19:51     ` Juergen Gross
2017-07-24 22:38       ` Stefano Stabellini
2017-07-24 22:38       ` Stefano Stabellini
2017-07-24 19:51     ` Juergen Gross
2017-07-22  0:11   ` [PATCH v1 09/13] xen/pvcalls: implement recvmsg Stefano Stabellini
2017-07-22  0:11   ` Stefano Stabellini
2017-07-24 19:56     ` Juergen Gross
2017-07-24 19:56     ` Juergen Gross
2017-07-24 22:37       ` Stefano Stabellini
2017-07-24 22:37       ` Stefano Stabellini
2017-07-22  0:12   ` [PATCH v1 10/13] xen/pvcalls: implement poll command Stefano Stabellini
2017-07-22  0:12   ` Stefano Stabellini
2017-07-24 20:08     ` Juergen Gross
2017-07-24 20:08       ` Juergen Gross
2017-07-25 20:59       ` Stefano Stabellini
2017-07-25 20:59       ` Stefano Stabellini
2017-07-22  0:12   ` [PATCH v1 11/13] xen/pvcalls: implement release command Stefano Stabellini
2017-07-22  0:12   ` Stefano Stabellini
2017-07-24 20:14     ` Juergen Gross
2017-07-24 20:14     ` Juergen Gross
2017-07-25 21:07       ` Stefano Stabellini
2017-07-25 21:07       ` Stefano Stabellini
2017-07-22  0:12   ` [PATCH v1 12/13] xen/pvcalls: implement frontend disconnect Stefano Stabellini
2017-07-22  0:12   ` Stefano Stabellini
2017-07-24 20:16     ` Juergen Gross
2017-07-24 20:16     ` Juergen Gross
2017-07-22  0:12   ` [PATCH v1 13/13] xen: introduce a Kconfig option to enable the pvcalls frontend Stefano Stabellini
2017-07-24 20:17     ` Juergen Gross
2017-07-24 20:17     ` Juergen Gross
2017-07-24 22:36       ` Stefano Stabellini
2017-07-24 22:36       ` Stefano Stabellini
2017-07-22  0:12   ` Stefano Stabellini
2017-07-24 19:06   ` [PATCH v1 01/13] xen/pvcalls: introduce the pvcalls xenbus frontend Juergen Gross
2017-07-24 19:06   ` Juergen Gross
2017-07-24 22:32     ` Stefano Stabellini
2017-07-24 22:32     ` Stefano Stabellini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.