All of lore.kernel.org
 help / color / mirror / Atom feed
From: Saurabh Sengar <ssengar@linux.microsoft.com>
To: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, gregkh@linuxfoundation.org,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: ssengar@microsoft.com
Subject: [PATCH 4/6] tools: hv: Add vmbus_bufring
Date: Sat, 17 Feb 2024 10:03:38 -0800	[thread overview]
Message-ID: <1708193020-14740-5-git-send-email-ssengar@linux.microsoft.com> (raw)
In-Reply-To: <1708193020-14740-1-git-send-email-ssengar@linux.microsoft.com>

Common userspace interface for read/write from VMBus ringbuffer.
This implementation is open for use by any userspace driver or
application seeking direct control over VMBus ring buffers.
A significant  part of this code is borrowed from DPDK.
Link: https://github.com/DPDK/dpdk/

Signed-off-by: Mary Hardy <maryhardy@microsoft.com>
Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
---
 tools/hv/vmbus_bufring.c | 316 +++++++++++++++++++++++++++++++++++++++
 tools/hv/vmbus_bufring.h | 158 ++++++++++++++++++++
 2 files changed, 474 insertions(+)
 create mode 100644 tools/hv/vmbus_bufring.c
 create mode 100644 tools/hv/vmbus_bufring.h

diff --git a/tools/hv/vmbus_bufring.c b/tools/hv/vmbus_bufring.c
new file mode 100644
index 000000000000..b74b56283bc5
--- /dev/null
+++ b/tools/hv/vmbus_bufring.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <emmintrin.h>
+#include <linux/limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "vmbus_bufring.h"
+
+#define	rte_compiler_barrier()	({ asm volatile ("" : : : "memory"); })
+
+#define	rte_smp_rwmb()		({ asm volatile ("" : : : "memory"); })
+
+#define VMBUS_RQST_ERROR	0xFFFFFFFFFFFFFFFF
+#define ALIGN(val, align)	((typeof(val))((val) & (~((typeof(val))((align) - 1)))))
+
+void *vmbus_uio_map(int *fd, int size)
+{
+	void *map;
+
+	map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
+	if (map == MAP_FAILED)
+		return NULL;
+
+	return map;
+}
+
+/* Increase bufring index by inc with wraparound */
+static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
+{
+	idx += inc;
+	if (idx >= sz)
+		idx -= sz;
+
+	return idx;
+}
+
+void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
+{
+	br->vbr = buf;
+	br->windex = br->vbr->windex;
+	br->dsize = blen - sizeof(struct vmbus_bufring);
+}
+
+static inline __always_inline void
+rte_smp_mb(void)
+{
+	asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
+}
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+	uint8_t res;
+
+	asm volatile("lock ; "
+		     "cmpxchgl %[src], %[dst];"
+		     "sete %[res];"
+		     : [res] "=a" (res),     /* output */
+		     [dst] "=m" (*dst)
+		     : [src] "r" (src),      /* input */
+		     "a" (exp),
+		     "m" (*dst)
+		     : "memory");            /* no-clobber list */
+	return res;
+}
+
+static inline uint32_t
+vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
+		  const void *src0, uint32_t cplen)
+{
+	uint8_t *br_data = tbr->vbr->data;
+	uint32_t br_dsize = tbr->dsize;
+	const uint8_t *src = src0;
+
+	/* XXX use double mapping like Linux kernel? */
+	if (cplen > br_dsize - windex) {
+		uint32_t fraglen = br_dsize - windex;
+
+		/* Wrap-around detected */
+		memcpy(br_data + windex, src, fraglen);
+		memcpy(br_data, src + fraglen, cplen - fraglen);
+	} else {
+		memcpy(br_data + windex, src, cplen);
+	}
+
+	return vmbus_br_idxinc(windex, cplen, br_dsize);
+}
+
+/*
+ * Write scattered channel packet to TX bufring.
+ *
+ * The offset of this channel packet is written as a 64bits value
+ * immediately after this channel packet.
+ *
+ * The write goes through three stages:
+ *  1. Reserve space in ring buffer for the new data.
+ *     Writer atomically moves priv_write_index.
+ *  2. Copy the new data into the ring.
+ *  3. Update the tail of the ring (visible to host) that indicates
+ *     next read location. Writer updates write_index
+ */
+static int
+vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen,
+		 bool *need_sig)
+{
+	struct vmbus_bufring *vbr = tbr->vbr;
+	uint32_t ring_size = tbr->dsize;
+	uint32_t old_windex, next_windex, windex, total;
+	uint64_t save_windex;
+	int i;
+
+	total = 0;
+	for (i = 0; i < iovlen; i++)
+		total += iov[i].iov_len;
+	total += sizeof(save_windex);
+
+	/* Reserve space in ring */
+	do {
+		uint32_t avail;
+
+		/* Get current free location */
+		old_windex = tbr->windex;
+
+		/* Prevent compiler reordering this with calculation */
+		rte_compiler_barrier();
+
+		avail = vmbus_br_availwrite(tbr, old_windex);
+
+		/* If not enough space in ring, then tell caller. */
+		if (avail <= total)
+			return -EAGAIN;
+
+		next_windex = vmbus_br_idxinc(old_windex, total, ring_size);
+
+		/* Atomic update of next write_index for other threads */
+	} while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));
+
+	/* Space from old..new is now reserved */
+	windex = old_windex;
+	for (i = 0; i < iovlen; i++)
+		windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len);
+
+	/* Set the offset of the current channel packet. */
+	save_windex = ((uint64_t)old_windex) << 32;
+	windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
+				   sizeof(save_windex));
+
+	/* The region reserved should match region used */
+	if (windex != next_windex)
+		return -EINVAL;
+
+	/* Ensure that data is available before updating host index */
+	rte_smp_rwmb();
+
+	/* Checkin for our reservation. wait for our turn to update host */
+	while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
+		_mm_pause();
+
+	return 0;
+}
+
+int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
+			uint32_t dlen, uint32_t flags)
+{
+	struct vmbus_chanpkt pkt;
+	unsigned int pktlen, pad_pktlen;
+	const uint32_t hlen = sizeof(pkt);
+	bool send_evt = false;
+	uint64_t pad = 0;
+	struct iovec iov[3];
+	int error;
+
+	pktlen = hlen + dlen;
+	pad_pktlen = ALIGN(pktlen, sizeof(uint64_t));
+
+	pkt.hdr.type = type;
+	pkt.hdr.flags = flags;
+	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+	pkt.hdr.xactid = VMBUS_RQST_ERROR;
+
+	iov[0].iov_base = &pkt;
+	iov[0].iov_len = hlen;
+	iov[1].iov_base = data;
+	iov[1].iov_len = dlen;
+	iov[2].iov_base = &pad;
+	iov[2].iov_len = pad_pktlen - pktlen;
+
+	error = vmbus_txbr_write(txbr, iov, 3, &send_evt);
+
+	return error;
+}
+
+static inline uint32_t
+vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
+		    void *dst0, size_t cplen)
+{
+	const uint8_t *br_data = rbr->vbr->data;
+	uint32_t br_dsize = rbr->dsize;
+	uint8_t *dst = dst0;
+
+	if (cplen > br_dsize - rindex) {
+		uint32_t fraglen = br_dsize - rindex;
+
+		/* Wrap-around detected. */
+		memcpy(dst, br_data + rindex, fraglen);
+		memcpy(dst + fraglen, br_data, cplen - fraglen);
+	} else {
+		memcpy(dst, br_data + rindex, cplen);
+	}
+
+	return vmbus_br_idxinc(rindex, cplen, br_dsize);
+}
+
+/* Copy data from receive ring but don't change index */
+static int
+vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
+{
+	uint32_t avail;
+
+	/*
+	 * The requested data and the 64bits channel packet
+	 * offset should be there at least.
+	 */
+	avail = vmbus_br_availread(rbr);
+	if (avail < dlen + sizeof(uint64_t))
+		return -EAGAIN;
+
+	vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
+	return 0;
+}
+
+/*
+ * Copy data from receive ring and change index
+ * NOTE:
+ * We assume (dlen + skip) == sizeof(channel packet).
+ */
+static int
+vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
+{
+	struct vmbus_bufring *vbr = rbr->vbr;
+	uint32_t br_dsize = rbr->dsize;
+	uint32_t rindex;
+
+	if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
+		return -EAGAIN;
+
+	/* Record where host was when we started read (for debug) */
+	rbr->windex = rbr->vbr->windex;
+
+	/*
+	 * Copy channel packet from RX bufring.
+	 */
+	rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
+	rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
+
+	/*
+	 * Discard this channel packet's 64bits offset, which is useless to us.
+	 */
+	rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);
+
+	/* Update the read index _after_ the channel packet is fetched.	 */
+	rte_compiler_barrier();
+
+	vbr->rindex = rindex;
+
+	return 0;
+}
+
+int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr,
+			    void *data, uint32_t *len)
+{
+	struct vmbus_chanpkt_hdr pkt;
+	uint32_t dlen, bufferlen = *len;
+	int error;
+
+	error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt));
+	if (error)
+		return error;
+
+	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN))
+		/* XXX this channel is dead actually. */
+		return -EIO;
+
+	if (unlikely(pkt.hlen > pkt.tlen))
+		return -EIO;
+
+	/* Length are in quad words */
+	dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
+	*len = dlen;
+
+	/* If caller buffer is not large enough */
+	if (unlikely(dlen > bufferlen))
+		return -ENOBUFS;
+
+	/* Read data and skip packet header */
+	error = vmbus_rxbr_read(rxbr, data, dlen, 0);
+	if (error)
+		return error;
+
+	/* Return the number of bytes read */
+	return dlen + sizeof(uint64_t);
+}
diff --git a/tools/hv/vmbus_bufring.h b/tools/hv/vmbus_bufring.h
new file mode 100644
index 000000000000..6e7caacfff57
--- /dev/null
+++ b/tools/hv/vmbus_bufring.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef _VMBUS_BUF_H_
+#define _VMBUS_BUF_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define __packed   __attribute__((__packed__))
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+
+#define ICMSGHDRFLAG_TRANSACTION	1
+#define ICMSGHDRFLAG_REQUEST		2
+#define ICMSGHDRFLAG_RESPONSE		4
+
+#define IC_VERSION_NEGOTIATION_MAX_VER_COUNT 100
+#define ICMSG_HDR (sizeof(struct vmbuspipe_hdr) + sizeof(struct icmsg_hdr))
+#define ICMSG_NEGOTIATE_PKT_SIZE(icframe_vercnt, icmsg_vercnt) \
+	(ICMSG_HDR + sizeof(struct icmsg_negotiate) + \
+	 (((icframe_vercnt) + (icmsg_vercnt)) * sizeof(struct ic_version)))
+
+/*
+ * Channel packets
+ */
+
+/* Channel packet flags */
+#define VMBUS_CHANPKT_TYPE_INBAND	0x0006
+#define VMBUS_CHANPKT_TYPE_RXBUF	0x0007
+#define VMBUS_CHANPKT_TYPE_GPA		0x0009
+#define VMBUS_CHANPKT_TYPE_COMP		0x000b
+
+#define VMBUS_CHANPKT_FLAG_NONE		0
+#define VMBUS_CHANPKT_FLAG_RC		0x0001  /* report completion */
+
+#define VMBUS_CHANPKT_SIZE_SHIFT	3
+#define VMBUS_CHANPKT_SIZE_ALIGN	BIT(VMBUS_CHANPKT_SIZE_SHIFT)
+#define VMBUS_CHANPKT_HLEN_MIN		\
+	(sizeof(struct vmbus_chanpkt_hdr) >> VMBUS_CHANPKT_SIZE_SHIFT)
+
+/*
+ * Buffer ring
+ */
+struct vmbus_bufring {
+	volatile uint32_t windex;
+	volatile uint32_t rindex;
+
+	/*
+	 * Interrupt mask {0,1}
+	 *
+	 * For TX bufring, host set this to 1, when it is processing
+	 * the TX bufring, so that we can safely skip the TX event
+	 * notification to host.
+	 *
+	 * For RX bufring, once this is set to 1 by us, host will not
+	 * further dispatch interrupts to us, even if there are data
+	 * pending on the RX bufring.  This effectively disables the
+	 * interrupt of the channel to which this RX bufring is attached.
+	 */
+	volatile uint32_t imask;
+
+	/*
+	 * Win8 uses some of the reserved bits to implement
+	 * interrupt driven flow management. On the send side
+	 * we can request that the receiver interrupt the sender
+	 * when the ring transitions from being full to being able
+	 * to handle a message of size "pending_send_sz".
+	 *
+	 * Add necessary state for this enhancement.
+	 */
+	volatile uint32_t pending_send;
+	uint32_t reserved1[12];
+
+	union {
+		struct {
+			uint32_t feat_pending_send_sz:1;
+		};
+		uint32_t value;
+	} feature_bits;
+
+	/* Pad it to rte_mem_page_size() so that data starts on page boundary */
+	uint8_t	reserved2[4028];
+
+	/*
+	 * Ring data starts here + RingDataStartOffset
+	 * !!! DO NOT place any fields below this !!!
+	 */
+	uint8_t data[];
+} __packed;
+
+struct vmbus_br {
+	struct vmbus_bufring *vbr;
+	uint32_t	dsize;
+	uint32_t	windex; /* next available location */
+};
+
+struct vmbus_chanpkt_hdr {
+	uint16_t	type;	/* VMBUS_CHANPKT_TYPE_ */
+	uint16_t	hlen;	/* header len, in 8 bytes */
+	uint16_t	tlen;	/* total len, in 8 bytes */
+	uint16_t	flags;	/* VMBUS_CHANPKT_FLAG_ */
+	uint64_t	xactid;
+} __packed;
+
+struct vmbus_chanpkt {
+	struct vmbus_chanpkt_hdr hdr;
+} __packed;
+
+struct vmbuspipe_hdr {
+	unsigned int flags;
+	unsigned int msgsize;
+} __packed;
+
+struct ic_version {
+	unsigned short major;
+	unsigned short minor;
+} __packed;
+
+struct icmsg_negotiate {
+	unsigned short icframe_vercnt;
+	unsigned short icmsg_vercnt;
+	unsigned int reserved;
+	struct ic_version icversion_data[]; /* any size array */
+} __packed;
+
+struct icmsg_hdr {
+	struct ic_version icverframe;
+	unsigned short icmsgtype;
+	struct ic_version icvermsg;
+	unsigned short icmsgsize;
+	unsigned int status;
+	unsigned char ictransaction_id;
+	unsigned char icflags;
+	unsigned char reserved[2];
+} __packed;
+
+int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr, void *data, uint32_t *len);
+int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
+			uint32_t dlen, uint32_t flags);
+void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen);
+void *vmbus_uio_map(int *fd, int size);
+
+/* Amount of space available for write */
+static inline uint32_t vmbus_br_availwrite(const struct vmbus_br *br, uint32_t windex)
+{
+	uint32_t rindex = br->vbr->rindex;
+
+	if (windex >= rindex)
+		return br->dsize - (windex - rindex);
+	else
+		return rindex - windex;
+}
+
+static inline uint32_t vmbus_br_availread(const struct vmbus_br *br)
+{
+	return br->dsize - vmbus_br_availwrite(br, br->vbr->windex);
+}
+
+#endif	/* !_VMBUS_BUF_H_ */
-- 
2.34.1


  parent reply	other threads:[~2024-02-17 18:04 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-17 18:03 [PATCH 0/6] Low speed Hyper-V devices support Saurabh Sengar
2024-02-17 18:03 ` [PATCH 1/6] Drivers: hv: vmbus: Add utility function for querying ring size Saurabh Sengar
2024-02-18  7:11   ` Greg KH
2024-02-18  8:03     ` Saurabh Singh Sengar
2024-02-18  9:11       ` Greg KH
2024-03-12 20:57   ` Long Li
2024-02-17 18:03 ` [PATCH 2/6] uio_hv_generic: Query the ringbuffer size for device Saurabh Sengar
2024-02-19  8:50   ` Greg KH
2024-02-19  9:40     ` Saurabh Singh Sengar
2024-02-19 10:02       ` Greg KH
2024-02-19 10:21         ` Saurabh Singh Sengar
2024-02-17 18:03 ` [PATCH 3/6] uio_hv_generic: Enable interrupt for low speed VMBus devices Saurabh Sengar
2024-03-12 20:59   ` Long Li
2024-02-17 18:03 ` Saurabh Sengar [this message]
2024-03-13 19:12   ` [PATCH 4/6] tools: hv: Add vmbus_bufring Long Li
2024-02-17 18:03 ` [PATCH 5/6] tools: hv: Add new fcopy application based on uio driver Saurabh Sengar
2024-02-19  8:53   ` Greg KH
2024-02-19  9:24     ` Saurabh Singh Sengar
2024-02-19  9:52       ` Greg KH
2024-02-19 10:23         ` Saurabh Singh Sengar
2024-03-13 19:23   ` Long Li
2024-02-17 18:03 ` [PATCH 6/6] Drivers: hv: Remove fcopy driver Saurabh Sengar
2024-03-13 19:25   ` Long Li
2024-02-18  7:10 ` [PATCH 0/6] Low speed Hyper-V devices support Greg KH
2024-02-18  7:51   ` Saurabh Singh Sengar
2024-02-18  9:09     ` Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1708193020-14740-5-git-send-email-ssengar@linux.microsoft.com \
    --to=ssengar@linux.microsoft.com \
    --cc=decui@microsoft.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=haiyangz@microsoft.com \
    --cc=kys@microsoft.com \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ssengar@microsoft.com \
    --cc=wei.liu@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.