All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
To: roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	liranl-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org,
	yevgenyp-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org,
	Jack Morgenstein
	<jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
Subject: [PATCH for-next V1 17/29] IB/mlx4: SRIOV multiplex and demultiplex MADs
Date: Tue, 19 Jun 2012 11:21:49 +0300	[thread overview]
Message-ID: <1340094121-14858-18-git-send-email-jackm@dev.mellanox.co.il> (raw)
In-Reply-To: <1340094121-14858-1-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>

Special QPs are para-virtualized.

vHCAs are not given direct access to QP0/1. Rather, these QPs are operated by a
special context hosted by the PF, which mediates access to/from vHCAs.
This is done by opening a “tunnel” per vHCA port per QP0/1. A tunnel comprises
a pair of UD QPs:  a “Tunnel QP” in the PF-context and a “Proxy QP” in the vHCA.
All vHCA MAD traffic must pass through the corresponding tunnel.
vHCA QPs cannot be assigned to VL15 and are denied of the well-known QKey.

Outgoing messages are "de-multiplexed" (i.e., directed to the wire via
the real special QP).

Incoming messages are "multiplexed" (i.e. steered by the PPF to the correct
VF or to the PF)

QP0 access is restricted to the PF vHCA. VF vHCAs also have (virtual) QP0’s,
but they never receive any SMPs and all SMPs sent are discarded.
QP1 traffic is allowed for all vHCAs, but special care is required to bridge
the gap between the host and network views.

Specifically:
- Transaction IDs are mapped to guarantee uniqueness among vHCAs
- CM para-virtualization
  o   Incoming requests are steered to the correct vHCA according to the embedded GID
  o   Local communication IDs are mapped to ensure uniqueness among vHCAs
  (see the patch that adds CM paravirtualization.)
- Multicast para-virtualization
  o   The PF context aggregates membership state from all vHCAs
  o   The SA is contacted only when the aggregate membership changes
  o   If the aggregate does not change, the PF context will provide the
       requesting vHCA with the proper response
  (see the patch that adds multicast group paravirtualization.)

Incoming MADs are steered according to:
- the DGID If a GRH is present
- the mapped transaction ID for response MADs
- the embedded GID in CM requests
- the remote communication ID in other CM messages

Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
---
 drivers/infiniband/hw/mlx4/mad.c |  567 +++++++++++++++++++++++++++++++++++++-
 1 files changed, 565 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index d508597..819fa5c 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -32,6 +32,8 @@
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cache.h>
 
 #include <linux/mlx4/cmd.h>
 #include <linux/gfp.h>
@@ -297,6 +299,254 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
 	}
 }
 
+static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
+							     struct ib_sa_mad *sa_mad)
+{
+	return 0;
+}
+
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibdev);
+	int i;
+
+	for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+		if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
+			return i;
+	}
+	return -1;
+}
+
+
+static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix,
+				 u8 *full_pk_ix, u8 *partial_pk_ix,
+				 int *is_full_member)
+{
+	u16 search_pkey;
+	int fm;
+	int err = 0;
+	u16 pk;
+
+	err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey);
+	if (err)
+		return err;
+
+	fm = (search_pkey & 0x8000) ? 1 : 0;
+	if (fm) {
+		*full_pk_ix = ph_pkey_ix;
+		search_pkey &= 0x7FFF;
+	} else {
+		*partial_pk_ix = ph_pkey_ix;
+		search_pkey |= 0x8000;
+	}
+
+	if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk))
+		pk = 0xFFFF;
+
+	if (fm)
+		*partial_pk_ix = (pk & 0xFF);
+	else
+		*full_pk_ix = (pk & 0xFF);
+
+	*is_full_member = fm;
+	return err;
+}
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+			  enum ib_qp_type dest_qpt, struct ib_wc *wc,
+			  struct ib_grh *grh, struct ib_mad *mad)
+{
+	struct ib_sge list;
+	struct ib_send_wr wr, *bad_wr;
+	struct mlx4_ib_demux_pv_ctx *tun_ctx;
+	struct mlx4_ib_demux_pv_qp *tun_qp;
+	struct mlx4_rcv_tunnel_mad *tun_mad;
+	struct ib_ah_attr attr;
+	struct ib_ah *ah;
+	struct ib_qp *src_qp = NULL;
+	unsigned tun_tx_ix = 0;
+	int dqpn;
+	int ret = 0;
+	int i;
+	int is_full_member = 0;
+	u16 tun_pkey_ix;
+	u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0;
+
+	if (dest_qpt > IB_QPT_GSI)
+		return -EINVAL;
+
+	tun_ctx = dev->sriov.demux[port-1].tun[slave];
+
+	/* check if proxy qp created */
+	if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
+		return -EAGAIN;
+
+	/* QP0 forwarding only for Dom0 */
+	if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
+		return -EINVAL;
+
+	if (!dest_qpt)
+		tun_qp = &tun_ctx->qp[0];
+	else
+		tun_qp = &tun_ctx->qp[1];
+
+	/* compute pkey index for slave */
+	/* get physical pkey -- virtualized Dom0 pkey to phys*/
+	if (dest_qpt) {
+		ph_pkey_ix =
+			dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index];
+
+		/* now, translate this to the slave pkey index */
+		ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix,
+					    &partial_pk_ix, &is_full_member);
+		if (ret)
+			return -EINVAL;
+
+		for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
+			if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) ||
+			    (is_full_member &&
+			     (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix)))
+				break;
+		}
+		if (i == dev->dev->caps.pkey_table_len[port])
+			return -EINVAL;
+		tun_pkey_ix = i;
+	} else
+		tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+
+	dqpn = dev->dev->caps.sqp_start + 8 * slave + port + (dest_qpt * 2) - 1;
+
+	/* get tunnel tx data buf for slave */
+	src_qp = tun_qp->qp;
+
+	/* create ah. Just need an empty one with the port num for the post send.
+	 * The driver will set the force loopback bit in post_send */
+	memset(&attr, 0, sizeof attr);
+	attr.port_num = port;
+	ah = ib_create_ah(tun_ctx->pd, &attr);
+	if (IS_ERR(ah))
+		return -ENOMEM;
+
+	/* allocate tunnel tx buf after pass failure returns */
+	spin_lock(&tun_qp->tx_lock);
+	if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
+	    (MLX4_NUM_TUNNEL_BUFS - 1))
+		ret = -EAGAIN;
+	else
+		tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+	spin_unlock(&tun_qp->tx_lock);
+	if (ret)
+		goto out;
+
+	tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
+	if (tun_qp->tx_ring[tun_tx_ix].ah)
+		ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+	tun_qp->tx_ring[tun_tx_ix].ah = ah;
+	ib_dma_sync_single_for_cpu(&dev->ib_dev,
+				   tun_qp->tx_ring[tun_tx_ix].buf.map,
+				   sizeof(struct mlx4_rcv_tunnel_mad),
+				   DMA_TO_DEVICE);
+
+	/* copy over to tunnel buffer */
+	if (grh)
+		memcpy(&tun_mad->grh, grh, sizeof(*grh));
+	memcpy(&tun_mad->mad, mad, sizeof(*mad));
+
+	/* adjust tunnel data */
+	tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
+	tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+	tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+	tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
+	tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+
+	ib_dma_sync_single_for_device(&dev->ib_dev,
+				      tun_qp->tx_ring[tun_tx_ix].buf.map,
+				      sizeof(struct mlx4_rcv_tunnel_mad),
+				      DMA_TO_DEVICE);
+
+	list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
+	list.length = sizeof(struct mlx4_rcv_tunnel_mad);
+	list.lkey = tun_ctx->mr->lkey;
+
+	wr.wr.ud.ah = ah;
+	wr.wr.ud.port_num = port;
+	wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+	wr.wr.ud.remote_qpn = dqpn;
+	wr.next = NULL;
+	wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
+	wr.sg_list = &list;
+	wr.num_sge = 1;
+	wr.opcode = IB_WR_SEND;
+	wr.send_flags = IB_SEND_SIGNALED;
+
+	ret = ib_post_send(src_qp, &wr, &bad_wr);
+out:
+	if (ret)
+		ib_destroy_ah(ah);
+	return ret;
+}
+
+static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
+			struct ib_wc *wc, struct ib_grh *grh,
+			struct ib_mad *mad)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibdev);
+	int err;
+	int slave;
+	u8 *slave_id;
+
+	/* Initially assume that this mad is for us */
+	slave = mlx4_master_func_num(dev->dev);
+
+	/* See if the slave id is encoded in a response mad */
+	if (mad->mad_hdr.method & 0x80) {
+		slave_id = (u8 *) &mad->mad_hdr.tid;
+		slave = *slave_id;
+		if (slave != 255) /*255 indicates the dom0*/
+			*slave_id = 0; /* remap tid */
+	}
+
+	/* If a grh is present, we demux according to it */
+	if (wc->wc_flags & IB_WC_GRH) {
+		slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
+		if (slave < 0) {
+			mlx4_ib_warn(ibdev, "failed matching grh\n");
+			return -ENOENT;
+		}
+	}
+	/* Class-specific handling */
+	switch (mad->mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_ADM:
+		if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
+					     (struct ib_sa_mad *) mad))
+			return 0;
+		break;
+	case IB_MGMT_CLASS_DEVICE_MGMT:
+		if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
+			return 0;
+		break;
+	default:
+		/* Drop unsupported classes for slaves in tunnel mode */
+		if (slave != mlx4_master_func_num(dev->dev)) {
+			pr_debug("dropping unsupported ingress mad from class:%d "
+				 "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
+			return 0;
+		}
+	}
+	/*make sure that no slave==255 was not handled yet.*/
+	if (slave >= dev->dev->caps.sqp_demux) {
+		mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+			     slave, dev->dev->caps.sqp_demux);
+		return -ENOENT;
+	}
+
+	err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+	if (err)
+		pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+			 slave, err);
+	return 0;
+}
+
 static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 			struct ib_wc *in_wc, struct ib_grh *in_grh,
 			struct ib_mad *in_mad, struct ib_mad *out_mad)
@@ -608,6 +858,216 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
 	return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
 }
 
+static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
+		int slave, struct ib_sa_mad *sa_mad)
+{
+	return 0;
+}
+
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+	int slave_start = dev->dev->caps.sqp_start + 8 * slave;
+
+	return (qpn >= slave_start && qpn <= slave_start + 1);
+}
+
+
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+			 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
+			 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+{
+	struct ib_sge list;
+	struct ib_send_wr wr, *bad_wr;
+	struct mlx4_ib_demux_pv_ctx *sqp_ctx;
+	struct mlx4_ib_demux_pv_qp *sqp;
+	struct mlx4_mad_snd_buf *sqp_mad;
+	struct ib_ah *ah;
+	struct ib_qp *send_qp = NULL;
+	unsigned wire_tx_ix = 0;
+	int ret = 0;
+	u16 wire_pkey_ix;
+	int src_qpnum;
+	u8 sgid_index;
+
+
+	sqp_ctx = dev->sriov.sqps[port-1];
+
+	/* check if proxy qp created */
+	if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
+		return -EAGAIN;
+
+	/* QP0 forwarding only for Dom0 */
+	if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
+		return -EINVAL;
+
+	if (dest_qpt == IB_QPT_SMI) {
+		src_qpnum = 0;
+		sqp = &sqp_ctx->qp[0];
+		wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+	} else {
+		src_qpnum = 1;
+		sqp = &sqp_ctx->qp[1];
+		wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
+	}
+
+	send_qp = sqp->qp;
+
+	/* create ah */
+	sgid_index = attr->grh.sgid_index;
+	attr->grh.sgid_index = 0;
+	ah = ib_create_ah(sqp_ctx->pd, attr);
+	if (IS_ERR(ah))
+		return -ENOMEM;
+	attr->grh.sgid_index = sgid_index;
+	to_mah(ah)->av.ib.gid_index = sgid_index;
+	/* get rid of force-loopback bit */
+	to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+	spin_lock(&sqp->tx_lock);
+	if (sqp->tx_ix_head - sqp->tx_ix_tail >=
+	    (MLX4_NUM_TUNNEL_BUFS - 1))
+		ret = -EAGAIN;
+	else
+		wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+	spin_unlock(&sqp->tx_lock);
+	if (ret)
+		goto out;
+
+	sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
+	if (sqp->tx_ring[wire_tx_ix].ah)
+		ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+	sqp->tx_ring[wire_tx_ix].ah = ah;
+	ib_dma_sync_single_for_cpu(&dev->ib_dev,
+				   sqp->tx_ring[wire_tx_ix].buf.map,
+				   sizeof(struct mlx4_mad_snd_buf),
+				   DMA_TO_DEVICE);
+
+	memcpy(&sqp_mad->payload, mad, sizeof(*mad));
+
+	ib_dma_sync_single_for_device(&dev->ib_dev,
+				      sqp->tx_ring[wire_tx_ix].buf.map,
+				      sizeof(struct mlx4_mad_snd_buf),
+				      DMA_TO_DEVICE);
+
+	list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
+	list.length = sizeof(struct mlx4_mad_snd_buf);
+	list.lkey = sqp_ctx->mr->lkey;
+
+	wr.wr.ud.ah = ah;
+	wr.wr.ud.port_num = port;
+	wr.wr.ud.pkey_index = wire_pkey_ix;
+	wr.wr.ud.remote_qkey = qkey;
+	wr.wr.ud.remote_qpn = remote_qpn;
+	wr.next = NULL;
+	wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
+	wr.sg_list = &list;
+	wr.num_sge = 1;
+	wr.opcode = IB_WR_SEND;
+	wr.send_flags = IB_SEND_SIGNALED;
+
+	ret = ib_post_send(send_qp, &wr, &bad_wr);
+out:
+	if (ret)
+		ib_destroy_ah(ah);
+	return ret;
+}
+
+static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+	struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
+	int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
+	struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
+	struct mlx4_ib_ah ah;
+	struct ib_ah_attr ah_attr;
+	u8 *slave_id;
+	int slave;
+
+	/* Get slave that sent this packet */
+	if (wc->src_qp < dev->dev->caps.sqp_start ||
+	    wc->src_qp >= dev->dev->caps.base_tunnel_sqpn ||
+	    (wc->src_qp & 0x1) != ctx->port - 1 ||
+	    wc->src_qp & 0x4) {
+		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
+		return;
+	}
+	slave = ((wc->src_qp & ~0x7) - dev->dev->caps.sqp_start) / 8;
+	if (slave != ctx->slave) {
+		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
+			     "belongs to another slave\n", wc->src_qp);
+		return;
+	}
+	if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
+		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
+			     "non-master trying to send QP0 packets\n", wc->src_qp);
+		return;
+	}
+
+	/* Map transaction ID */
+	ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
+				   sizeof(struct mlx4_tunnel_mad),
+				   DMA_FROM_DEVICE);
+	switch (tunnel->mad.mad_hdr.method) {
+	case IB_MGMT_METHOD_SET:
+	case IB_MGMT_METHOD_GET:
+	case IB_MGMT_METHOD_REPORT:
+	case IB_SA_METHOD_GET_TABLE:
+	case IB_SA_METHOD_DELETE:
+	case IB_SA_METHOD_GET_MULTI:
+	case IB_SA_METHOD_GET_TRACE_TBL:
+		slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
+		if (*slave_id) {
+			mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
+				     "class:%d slave:%d\n", *slave_id,
+				     tunnel->mad.mad_hdr.mgmt_class, slave);
+			return;
+		} else
+			*slave_id = slave;
+	default:
+		/* nothing */;
+	}
+
+	/* Class-specific handling */
+	switch (tunnel->mad.mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_ADM:
+		if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
+			      (struct ib_sa_mad *) &tunnel->mad))
+			return;
+		break;
+	case IB_MGMT_CLASS_DEVICE_MGMT:
+		if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
+		    tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
+			return;
+		break;
+	default:
+		/* Drop unsupported classes for slaves in tunnel mode */
+		if (slave != mlx4_master_func_num(dev->dev)) {
+			mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
+				     "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
+			return;
+		}
+	}
+
+	/* We are using standard ib_core services to send the mad, so generate a
+	 * stadard address handle by decoding the tunnelled mlx4_ah fields */
+	memcpy(&ah.av, &tunnel->hdr.av, sizeof(struct mlx4_av));
+	ah.ibah.device = ctx->ib_dev;
+	mlx4_ib_query_ah(&ah.ibah, &ah_attr);
+	if ((ah_attr.ah_flags & IB_AH_GRH) &&
+	    (ah_attr.grh.sgid_index != slave)) {
+		mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
+			     slave, ah_attr.grh.sgid_index);
+		return;
+	}
+
+	mlx4_ib_send_to_wire(dev, slave, ctx->port,
+			     is_proxy_qp0(dev, wc->src_qp, slave) ?
+			     IB_QPT_SMI : IB_QPT_GSI,
+			     be16_to_cpu(tunnel->hdr.pkey_index),
+			     be32_to_cpu(tunnel->hdr.remote_qpn),
+			     be32_to_cpu(tunnel->hdr.qkey),
+			     &ah_attr, &tunnel->mad);
+}
+
 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
 				 enum ib_qp_type qp_type, int is_tun)
 {
@@ -732,7 +1192,57 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
 
 static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
 {
-	/* dummy until next patch in series */
+	struct mlx4_ib_demux_pv_ctx *ctx;
+	struct mlx4_ib_demux_pv_qp *tun_qp;
+	struct ib_wc wc;
+	int ret;
+	ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+	ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+	while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+		tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+		if (wc.status == IB_WC_SUCCESS) {
+			switch (wc.opcode) {
+			case IB_WC_RECV:
+				mlx4_ib_multiplex_mad(ctx, &wc);
+				ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
+							     wc.wr_id &
+							     (MLX4_NUM_TUNNEL_BUFS - 1));
+				if (ret)
+					pr_err("Failed reposting tunnel "
+					       "buf:%lld\n", wc.wr_id);
+				break;
+			case IB_WC_SEND:
+				pr_debug("received tunnel send completion:"
+					 "wrid=0x%llx, status=0x%x\n",
+					 wc.wr_id, wc.status);
+				ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+					      (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+				tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+					= NULL;
+				spin_lock(&tun_qp->tx_lock);
+				tun_qp->tx_ix_tail++;
+				spin_unlock(&tun_qp->tx_lock);
+
+				break;
+			default:
+				break;
+			}
+		} else  {
+			pr_debug("mlx4_ib: completion error in tunnel: %d."
+				 " status = %d, wrid = 0x%llx\n",
+				 ctx->slave, wc.status, wc.wr_id);
+			if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+				ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+					      (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+				tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+					= NULL;
+				spin_lock(&tun_qp->tx_lock);
+				tun_qp->tx_ix_tail++;
+				spin_unlock(&tun_qp->tx_lock);
+			}
+		}
+	}
 }
 
 static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
@@ -840,7 +1350,60 @@ err_qp:
  */
 static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
 {
-	/* dummy until next patch in series */
+	struct mlx4_ib_demux_pv_ctx *ctx;
+	struct mlx4_ib_demux_pv_qp *sqp;
+	struct ib_wc wc;
+	struct ib_grh *grh;
+	struct ib_mad *mad;
+
+	ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+	ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+	while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+		sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+		if (wc.status == IB_WC_SUCCESS) {
+			switch (wc.opcode) {
+			case IB_WC_SEND:
+				ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+					      (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+				sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+					= NULL;
+				spin_lock(&sqp->tx_lock);
+				sqp->tx_ix_tail++;
+				spin_unlock(&sqp->tx_lock);
+				break;
+			case IB_WC_RECV:
+				mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
+						(sqp->ring[wc.wr_id &
+						(MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+				grh = &(((struct mlx4_mad_rcv_buf *)
+						(sqp->ring[wc.wr_id &
+						(MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+				mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
+				if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
+							   (MLX4_NUM_TUNNEL_BUFS - 1)))
+					pr_err("Failed reposting SQP "
+					       "buf:%lld\n", wc.wr_id);
+				break;
+			default:
+				BUG_ON(1);
+				break;
+			}
+		} else  {
+			pr_debug("mlx4_ib: completion error in tunnel: %d."
+				 " status = %d, wrid = 0x%llx\n",
+				 ctx->slave, wc.status, wc.wr_id);
+			if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+				ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+					      (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+				sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+					= NULL;
+				spin_lock(&sqp->tx_lock);
+				sqp->tx_ix_tail++;
+				spin_unlock(&sqp->tx_lock);
+			}
+		}
+	}
 }
 
 static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2012-06-19  8:21 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-19  8:21 [PATCH for-next V1 00/29] Add SRIOV support for IB interfaces Jack Morgenstein
     [not found] ` <1340094121-14858-1-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-06-19  8:21   ` [PATCH for-next V1 01/29] net/mlx4_core: Pass an invalid PCI id number to VFs Jack Morgenstein
     [not found]     ` <1340094121-14858-2-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-07-05 20:50       ` Roland Dreier
2012-06-19  8:21   ` [PATCH for-next V1 02/29] IB/core: Reserve bits in enum ib_qp_create_flags for low-level driver use Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 03/29] IB/mlx4: Add debug printouts Jack Morgenstein
     [not found]     ` <1340094121-14858-4-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-07-05 22:59       ` Roland Dreier
2012-06-19  8:21   ` [PATCH for-next V1 04/29] IB/core: change pkey table lookups to support full and partial membership for the same pkey Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 05/29] IB/core: Add ib_find_exact_cached_pkey() to search for 16-bit pkey match Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 06/29] IB/sa: Add GuidInfoRecord query support Jack Morgenstein
     [not found]     ` <1340094121-14858-7-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-07-05 23:00       ` Roland Dreier
2012-06-19  8:21   ` [PATCH for-next V1 07/29] IB/core: move macros from cm_msgs.h to ib_cm.h Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 08/29] {NET,IB}/mlx4: Use port management change event instead of smp_snoop Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 09/29] net/mlx4_core: For SRIOV, initialize ib port-capabilities for all slaves Jack Morgenstein
     [not found]     ` <1340094121-14858-10-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-07-10 16:57       ` Roland Dreier
     [not found]         ` <CAL1RGDXOwkxQ4TXO7-KG-Eq3bLUx4r3OZ8GQqHd9YSzHsNsXbA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-07-11 13:33           ` Or Gerlitz
2012-06-19  8:21   ` [PATCH for-next V1 10/29] net/mlx4_core: Implement mechanism for reserved qkeys Jack Morgenstein
     [not found]     ` <1340094121-14858-11-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-07-05 23:26       ` Roland Dreier
     [not found]         ` <CAL1RGDVXTWs6Xi2JQ=7-RiZUePfc6SoUsCHHdbS9XLUQyim6UA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-07-06  2:51           ` Or Gerlitz
2012-07-08 15:17           ` Or Gerlitz
2012-07-11 18:37       ` Roland Dreier
     [not found]         ` <CAL1RGDXU+Btt+r=AsqcTXiiFV5f1Ourau77AVbw68Ekf80W0tg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-07-13 13:40           ` Or Gerlitz
2012-06-19  8:21   ` [PATCH for-next V1 11/29] net/mlx4_core: Allow guests to support IB ports Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 12/29] {NET,IB}/mlx4_core: place phys gid and pkey tbl sizes in mlx4_phys_caps struct and paravirtualize them Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 13/29] IB/mlx4: SRIOV IB context objects and proxy/tunnel sqp support Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 14/29] net/mlx4_core: Add proxy and tunnel QPs to the reserved QP area Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 15/29] IB/mlx4: Initialize SRIOV IB support for slaves in master context Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 16/29] {NET,IB}/mlx4: Implement QP paravirtualization and maintain phys_pkey_cache for smp_snoop Jack Morgenstein
2012-06-19  8:21   ` Jack Morgenstein [this message]
2012-06-19  8:21   ` [PATCH for-next V1 18/29] {NET,IB}/mlx4: MAD_IFC paravirtualization Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 19/29] IB/mlx4: Added Multicast Groups (MCG) para-virtualization for SRIOV Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 20/29] IB/mlx4: Add CM paravirtualization Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 21/29] net/mlx4_core: Add IB port-state machine, and port mgmt event propagation infrastructure Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 22/29] {NET,IB}/mlx4: Add alias_guid mechanism Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 23/29] IB/mlx4: Propagate pkey and guid change port management events to slaves Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 24/29] IB/mlx4: Add iov directory in sysfs under the ib device Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 25/29] net/mlx4_core: Adjustments to SET_PORT for SRIOV-IB Jack Morgenstein
     [not found]     ` <1340094121-14858-26-git-send-email-jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-07-06  0:09       ` Roland Dreier
     [not found]         ` <CAL1RGDW093B0p1caO2CHKQiAGtxLGR97h1tFd7W4QY8_31MfTg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-07-06  2:43           ` Or Gerlitz
     [not found]             ` <CAJZOPZKhV=rcGF0duJLkGFBhD3_tG-JgW+hkVH51KX6uokVEiA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-07-06  3:09               ` Roland Dreier
     [not found]                 ` <CAL1RGDUg+fhPDHBfXywaW2AaPfarM3vcx-UV0648hLSmT9s1Fw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-07-09  3:02                   ` Or Gerlitz
2012-07-06  3:01           ` Or Gerlitz
2012-06-19  8:21   ` [PATCH for-next V1 26/29] net/mlx4_core: INIT/CLOSE port logic for IB ports in SRIOV mode Jack Morgenstein
2012-06-19  8:21   ` [PATCH for-next V1 27/29] IB/mlx4: Miscellaneous adjustments to SRIOV IB support Jack Morgenstein
2012-06-19  8:22   ` [PATCH for-next V1 28/29] {NET,IB}/mlx4: Activate SRIOV mode for IB Jack Morgenstein
2012-06-19  8:22   ` [PATCH for-next V1 29/29] {NET,IB}/mlx4: Paravirtualize Node Guids for slaves Jack Morgenstein

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1340094121-14858-18-git-send-email-jackm@dev.mellanox.co.il \
    --to=jackm-ldsdmyg8hgv8yrgs2mwiifqbs+8scbdb@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=liranl-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=yevgenyp-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.