All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bernard Metzler <bmt-OA+xvbQnYDHMbYB6QlFGEg@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: Bernard Metzler <bmt-OA+xvbQnYDHMbYB6QlFGEg@public.gmane.org>
Subject: [PATCH 08/14] SIWv3: Queue Pair management: siw_qp.c
Date: Fri, 22 Jul 2011 20:47:53 +0200	[thread overview]
Message-ID: <1311360473-15238-1-git-send-email-bmt@zurich.ibm.com> (raw)

---
 drivers/infiniband/hw/siw/siw_qp.c | 1007 ++++++++++++++++++++++++++++++++++++
 1 files changed, 1007 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/siw/siw_qp.c

diff --git a/drivers/infiniband/hw/siw/siw_qp.c b/drivers/infiniband/hw/siw/siw_qp.c
new file mode 100644
index 0000000..ef124eb
--- /dev/null
+++ b/drivers/infiniband/hw/siw/siw_qp.c
@@ -0,0 +1,1007 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt-OA+xvbQnYDHMbYB6QlFGEg@public.gmane.org>
+ *          Fredy Neeser <nfd-OA+xvbQnYDHMbYB6QlFGEg@public.gmane.org>
+ *
+ * Copyright (c) 2008-2011, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/file.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+
+#if DPRINT_MASK > 0
+static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
+	[SIW_QP_STATE_IDLE]		= "IDLE",
+	[SIW_QP_STATE_RTR]		= "RTR",
+	[SIW_QP_STATE_RTS]		= "RTS",
+	[SIW_QP_STATE_CLOSING]		= "CLOSING",
+	[SIW_QP_STATE_TERMINATE]	= "TERMINATE",
+	[SIW_QP_STATE_ERROR]		= "ERROR",
+	[SIW_QP_STATE_MORIBUND]		= "MORIBUND",
+	[SIW_QP_STATE_UNDEF]		= "UNDEF"
+};
+#endif
+
+/*
+ * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
+ * per-RDMAP message basis. Please keep order of initializer. All MPA len
+ * is initialized to minimum packet size.
+ */
+struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = { {
+	.hdr_len = sizeof(struct iwarp_rdma_write),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_RDMA_WRITE),
+	.proc_data = siw_proc_write
+},
+{
+	.hdr_len = sizeof(struct iwarp_rdma_rreq),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_RDMA_READ_REQ),
+	.proc_data = siw_proc_rreq
+},
+{
+	.hdr_len = sizeof(struct iwarp_rdma_rresp),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_RDMA_READ_RESP),
+	.proc_data = siw_proc_rresp
+},
+{
+	.hdr_len = sizeof(struct iwarp_send),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_SEND),
+	.proc_data = siw_proc_send
+},
+{
+	.hdr_len = sizeof(struct iwarp_send_inv),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_SEND_INVAL),
+	.proc_data = siw_proc_unsupp
+},
+{
+	.hdr_len = sizeof(struct iwarp_send),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_SEND_SE),
+	.proc_data = siw_proc_send
+},
+{
+	.hdr_len = sizeof(struct iwarp_send_inv),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_SEND_SE_INVAL),
+	.proc_data = siw_proc_unsupp
+},
+{
+	.hdr_len = sizeof(struct iwarp_terminate),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
+	.ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+		| cpu_to_be16(DDP_VERSION << 8)
+		| cpu_to_be16(RDMAP_VERSION << 6)
+		| cpu_to_be16(RDMAP_TERMINATE),
+	.proc_data = siw_proc_terminate
+} };
+
+
+static void siw_qp_llp_data_ready(struct sock *sk, int flags)
+{
+	struct siw_qp		*qp;
+
+	read_lock(&sk->sk_callback_lock);
+
+	if (unlikely(!sk->sk_user_data || !sk_to_qp(sk))) {
+		dprint(DBG_ON, " No QP: %p\n", sk->sk_user_data);
+		goto done;
+	}
+	qp = sk_to_qp(sk);
+
+	if (down_read_trylock(&qp->state_lock)) {
+		read_descriptor_t	rd_desc = {.arg.data = qp, .count = 1};
+
+		dprint(DBG_SK|DBG_RX, "(QP%d): "
+			"state (before tcp_read_sock)=%d, flags=%x\n",
+			QP_ID(qp), qp->attrs.state, flags);
+
+		if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
+			/*
+			 * Implements data receive operation during
+			 * socket callback. TCP gracefully catches
+			 * the case where there is nothing to receive
+			 * (not calling siw_tcp_rx_data() then).
+			 */
+			tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
+
+		dprint(DBG_SK|DBG_RX, "(QP%d): "
+			"state (after tcp_read_sock)=%d, flags=%x\n",
+			QP_ID(qp), qp->attrs.state, flags);
+
+		up_read(&qp->state_lock);
+	} else {
+		dprint(DBG_SK|DBG_RX, "(QP%d): "
+			"Unable to acquire state_lock\n", QP_ID(qp));
+	}
+done:
+	read_unlock(&sk->sk_callback_lock);
+}
+
+
+void siw_qp_llp_close(struct siw_qp *qp)
+{
+	dprint(DBG_CM, "(QP%d): Enter: SIW QP state = %s, cep=0x%p\n",
+		QP_ID(qp), siw_qp_state_to_string[qp->attrs.state],
+		qp->cep);
+
+	down_write(&qp->state_lock);
+
+	qp->rx_ctx.rx_suspend = 1;
+	qp->tx_ctx.tx_suspend = 1;
+	qp->attrs.llp_stream_handle = NULL;
+
+	switch (qp->attrs.state) {
+
+	case SIW_QP_STATE_RTS:
+	case SIW_QP_STATE_RTR:
+	case SIW_QP_STATE_IDLE:
+	case SIW_QP_STATE_TERMINATE:
+
+		qp->attrs.state = SIW_QP_STATE_ERROR;
+
+		break;
+	/*
+	 * SIW_QP_STATE_CLOSING:
+	 *
+	 * This is a forced close. shall the QP be moved to
+	 * ERROR or IDLE ?
+	 */
+	case SIW_QP_STATE_CLOSING:
+		if (!TX_IDLE(qp))
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+		else
+			qp->attrs.state = SIW_QP_STATE_IDLE;
+
+		break;
+
+	default:
+		dprint(DBG_CM, " No state transition needed: %d\n",
+			qp->attrs.state);
+		break;
+	}
+	siw_sq_flush(qp);
+	siw_rq_flush(qp);
+
+	up_write(&qp->state_lock);
+
+	dprint(DBG_CM, "(QP%d): Exit: SIW QP state = %s\n",
+		QP_ID(qp), siw_qp_state_to_string[qp->attrs.state]);
+}
+
+
+/*
+ * socket callback routine informing about newly available send space.
+ * Function schedules SQ work for processing SQ items.
+ */
+static void siw_qp_llp_write_space(struct sock *sk)
+{
+	struct siw_qp	*qp = sk_to_qp(sk);
+
+	/*
+	 * TODO:
+	 * Resemble sk_stream_write_space() logic for iWARP constraints:
+	 * Clear SOCK_NOSPACE only if sendspace may hold some reasonable
+	 * sized FPDU.
+	 */
+#ifdef SIW_TX_FULLSEGS
+	struct socket *sock = sk->sk_socket;
+	if (sk_stream_wspace(sk) >= (int)qp->tx_ctx.fpdu_len && sock) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+		siw_sq_queue_work(qp);
+	}
+#else
+	sk_stream_write_space(sk);
+
+	if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+		siw_sq_queue_work(qp);
+#endif
+}
+
+static void siw_qp_socket_assoc(struct socket *s, struct siw_qp *qp)
+{
+	struct sock *sk = s->sk;
+
+	write_lock_bh(&sk->sk_callback_lock);
+
+	qp->attrs.llp_stream_handle = s;
+	s->sk->sk_data_ready = siw_qp_llp_data_ready;
+	s->sk->sk_write_space = siw_qp_llp_write_space;
+
+	write_unlock_bh(&sk->sk_callback_lock);
+}
+
+
+static int siw_qp_irq_init(struct siw_qp *qp, int size)
+{
+	struct siw_wqe *wqe = NULL;
+	int i = 0;
+
+	dprint(DBG_CM|DBG_WR, "(QP%d): irq size: %d\n", QP_ID(qp), i);
+	if (size <= 0)
+		return 0;
+
+	atomic_set(&qp->irq_space, size);
+
+	while (size--) {
+		wqe = kzalloc(sizeof(struct siw_wqe), GFP_KERNEL);
+		if (!wqe)
+			break;
+
+		INIT_LIST_HEAD(&wqe->list);
+		list_add(&wqe->list, &qp->freeq);
+		i++;
+		SIW_INC_STAT_WQE;
+	}
+	if (!wqe) {
+		dprint(DBG_ON, "(QP%d): Failed\n", QP_ID(qp));
+		while (i--) {
+			wqe = list_first_wqe(&qp->freeq);
+			list_del(&wqe->list);
+			kfree(wqe);
+			SIW_DEC_STAT_WQE;
+		}
+		atomic_set(&qp->irq_space, 0);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+
+static void siw_send_terminate(struct siw_qp *qp)
+{
+	struct iwarp_terminate	pkt;
+
+	memset(&pkt, 0, sizeof pkt);
+	/*
+	 * TODO: send TERMINATE
+	 */
+	dprint(DBG_CM, "(QP%d): Todo\n", QP_ID(qp));
+}
+
+
+static int siw_qp_enable_crc(struct siw_qp *qp)
+{
+	struct siw_iwarp_rx *c_rx = &qp->rx_ctx;
+	struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
+	int rv = 0;
+
+	c_tx->mpa_crc_hd.tfm = crypto_alloc_hash("crc32c", 0,
+						 CRYPTO_ALG_ASYNC);
+	if (IS_ERR(c_tx->mpa_crc_hd.tfm)) {
+		rv = -PTR_ERR(c_tx->mpa_crc_hd.tfm);
+		goto out;
+	}
+	c_rx->mpa_crc_hd.tfm = crypto_alloc_hash("crc32c", 0,
+						 CRYPTO_ALG_ASYNC);
+	if (IS_ERR(c_rx->mpa_crc_hd.tfm)) {
+		rv = -PTR_ERR(c_rx->mpa_crc_hd.tfm);
+		crypto_free_hash(c_tx->mpa_crc_hd.tfm);
+	}
+out:
+	if (rv)
+		dprint(DBG_ON, "(QP%d): Failed loading crc32c: error=%d.",
+			QP_ID(qp), rv);
+	else
+		c_tx->crc_enabled = c_rx->crc_enabled = 1;
+
+	return rv;
+}
+
+
+/*
+ * caller holds qp->state_lock
+ */
+int
+siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
+	      enum siw_qp_attr_mask mask)
+{
+	int	drop_conn = 0, rv = 0;
+
+	if (!mask)
+		return 0;
+
+	dprint(DBG_CM, "(QP%d)\n", QP_ID(qp));
+
+	if (mask != SIW_QP_ATTR_STATE) {
+		/*
+		 * changes of qp attributes (maybe state, too)
+		 */
+		if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
+
+			if (attrs->flags & SIW_RDMA_BIND_ENABLED)
+				qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
+			else
+				qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
+
+			if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
+				qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
+			else
+				qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
+
+			if (attrs->flags & SIW_RDMA_READ_ENABLED)
+				qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
+			else
+				qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
+
+		}
+		/*
+		 * TODO: what else ??
+		 */
+	}
+	if (!(mask & SIW_QP_ATTR_STATE))
+		return 0;
+
+	dprint(DBG_CM, "(QP%d): SIW QP state: %s => %s\n", QP_ID(qp),
+		siw_qp_state_to_string[qp->attrs.state],
+		siw_qp_state_to_string[attrs->state]);
+
+
+	switch (qp->attrs.state) {
+
+	case SIW_QP_STATE_IDLE:
+	case SIW_QP_STATE_RTR:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_RTS:
+
+			if (attrs->mpa.crc) {
+				rv = siw_qp_enable_crc(qp);
+				if (rv)
+					break;
+			}
+			if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
+				dprint(DBG_ON, "(QP%d): socket?\n", QP_ID(qp));
+				rv = -EINVAL;
+				break;
+			}
+			if (!(mask & SIW_QP_ATTR_MPA)) {
+				dprint(DBG_ON, "(QP%d): MPA?\n", QP_ID(qp));
+				rv = -EINVAL;
+				break;
+			}
+			dprint(DBG_CM, "(QP%d): Enter RTS: "
+				"peer 0x%08x, local 0x%08x\n", QP_ID(qp),
+				qp->cep->llp.raddr.sin_addr.s_addr,
+				qp->cep->llp.laddr.sin_addr.s_addr);
+			/*
+			 * Initialize global iWARP TX state
+			 */
+			qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
+			qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
+			qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
+
+			/*
+			 * Initialize global iWARP RX state
+			 */
+			qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
+			qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
+			qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
+
+			/*
+			 * init IRD freequeue, caller has already checked
+			 * limits. Add one extra entry since after sending
+			 * the RResponse it may trigger another peer RRequest
+			 * before the RResponse goes back to free queue.
+			 */
+			++attrs->ird;
+			rv = siw_qp_irq_init(qp, attrs->ird);
+			if (rv)
+				break;
+
+			atomic_set(&qp->orq_space, attrs->ord);
+
+			qp->attrs.ord = attrs->ord;
+			qp->attrs.ird = attrs->ird;
+			qp->attrs.mpa = attrs->mpa;
+			/*
+			 * move socket rx and tx under qp's control
+			 */
+			siw_qp_socket_assoc(attrs->llp_stream_handle, qp);
+
+			qp->attrs.state = SIW_QP_STATE_RTS;
+			/*
+			 * set initial mss
+			 */
+			qp->tx_ctx.tcp_seglen =
+				get_tcp_mss(attrs->llp_stream_handle->sk);
+
+			break;
+
+		case SIW_QP_STATE_ERROR:
+			siw_rq_flush(qp);
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+			if (qp->cep) {
+				siw_cep_put(qp->cep);
+				qp->cep = NULL;
+			}
+			break;
+
+		case SIW_QP_STATE_RTR:
+			/* ignore */
+			break;
+
+		default:
+			dprint(DBG_CM,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+			break;
+		}
+		break;
+
+	case SIW_QP_STATE_RTS:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_CLOSING:
+			/*
+			 * Verbs: move to IDLE if SQ and ORQ are empty.
+			 * Move to ERROR otherwise. But first of all we must
+			 * close the connection. So we keep CLOSING or ERROR
+			 * as a transient state, schedule connection drop work
+			 * and wait for the socket state change upcall to
+			 * come back closed.
+			 */
+			if (TX_IDLE(qp))
+				qp->attrs.state = SIW_QP_STATE_CLOSING;
+			else {
+				qp->attrs.state = SIW_QP_STATE_ERROR;
+				siw_sq_flush(qp);
+			}
+			siw_rq_flush(qp);
+
+			drop_conn = 1;
+			break;
+
+		case SIW_QP_STATE_TERMINATE:
+			qp->attrs.state = SIW_QP_STATE_TERMINATE;
+			siw_send_terminate(qp);
+			drop_conn = 1;
+
+			break;
+
+		case SIW_QP_STATE_ERROR:
+			/*
+			 * This is an emergency close.
+			 *
+			 * Any in progress transmit operation will get
+			 * cancelled.
+			 * This will likely result in a protocol failure,
+			 * if a TX operation is in transit. The caller
+			 * could unconditional wait to give the current
+			 * operation a chance to complete.
+			 * Esp., how to handle the non-empty IRQ case?
+			 * The peer was asking for data transfer at a valid
+			 * point in time.
+			 */
+			siw_sq_flush(qp);
+			siw_rq_flush(qp);
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+			drop_conn = 1;
+
+			break;
+
+		default:
+			dprint(DBG_ON,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+			break;
+		}
+		break;
+
+	case SIW_QP_STATE_TERMINATE:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_ERROR:
+			siw_rq_flush(qp);
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+
+			if (!TX_IDLE(qp))
+				siw_sq_flush(qp);
+
+			break;
+
+		default:
+			dprint(DBG_ON,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+		}
+		break;
+
+	case SIW_QP_STATE_CLOSING:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_IDLE:
+			BUG_ON(!TX_IDLE(qp));
+			qp->attrs.state = SIW_QP_STATE_IDLE;
+
+			break;
+
+		case SIW_QP_STATE_CLOSING:
+			/*
+			 * The LLP may already moved the QP to closing
+			 * due to graceful peer close init
+			 */
+			break;
+
+		case SIW_QP_STATE_ERROR:
+			/*
+			 * QP was moved to CLOSING by LLP event
+			 * not yet seen by user.
+			 */
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+
+			if (!TX_IDLE(qp))
+				siw_sq_flush(qp);
+
+			siw_rq_flush(qp);
+
+			break;
+
+		default:
+			dprint(DBG_CM,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+			return -ECONNABORTED;
+		}
+		break;
+
+	default:
+		dprint(DBG_CM, " NOP: State: %d\n", qp->attrs.state);
+		break;
+	}
+	if (drop_conn)
+		siw_qp_cm_drop(qp, 0);
+
+	return rv;
+}
+
+struct ib_qp *siw_get_ofaqp(struct ib_device *ofa_dev, int id)
+{
+	struct siw_qp *qp =  siw_qp_id2obj(siw_dev_ofa2siw(ofa_dev), id);
+
+	dprint(DBG_OBJ, ": dev_name: %s, OFA QPID: %d, QP: %p\n",
+		ofa_dev->name, id, qp);
+	if (qp) {
+		/*
+		 * siw_qp_id2obj() increments object reference count
+		 */
+		siw_qp_put(qp);
+		dprint(DBG_OBJ, " QPID: %d\n", QP_ID(qp));
+		return &qp->ofa_qp;
+	}
+	return (struct ib_qp *)NULL;
+}
+
+/*
+ * siw_check_mem()
+ *
+ * Check protection domain, STAG state, access permissions and
+ * address range for memory object.
+ *
+ * @pd:		Protection Domain memory should belong to
+ * @mem:	memory to be checked
+ * @addr:	starting addr of mem
+ * @perms:	requested access permissions
+ * @len:	len of memory interval to be checked
+ *
+ */
+int siw_check_mem(struct siw_pd *pd, struct siw_mem *mem, u64 addr,
+		  enum siw_access_flags perms, int len)
+{
+	if (siw_mem2mr(mem)->pd != pd) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): PD mismatch %p : %p\n",
+			OBJ_ID(pd),
+			siw_mem2mr(mem)->pd, pd);
+
+		return -EINVAL;
+	}
+	if (mem->stag_state == STAG_INVALID) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): STAG 0x%08x invalid\n",
+			OBJ_ID(pd), OBJ_ID(mem));
+		return -EPERM;
+	}
+	/*
+	 * check access permissions
+	 */
+	if ((mem->perms & perms) < perms) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): "
+			"INSUFFICIENT permissions 0x%08x : 0x%08x\n",
+			OBJ_ID(pd), mem->perms, perms);
+		return -EPERM;
+	}
+	/*
+	 * Check address interval: we relax check to allow memory shrinked
+	 * from the start address _after_ placing or fetching len bytes.
+	 * TODO: this relaxation is probably overdone
+	 */
+	if (addr < mem->va || addr + len > mem->va + mem->len) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): MEM interval len %d "
+			"[0x%016llx, 0x%016llx) out of bounds "
+			"[0x%016llx, 0x%016llx) for LKey=0x%08x\n",
+			OBJ_ID(pd), len, (unsigned long long)addr,
+			(unsigned long long)(addr + len),
+			(unsigned long long)mem->va,
+			(unsigned long long)(mem->va + mem->len),
+			OBJ_ID(mem));
+
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * siw_check_sge()
+ *
+ * Check SGE for access rights in given interval
+ *
+ * @pd:		Protection Domain memory should belong to
+ * @sge:	SGE to be checked
+ * @perms:	requested access permissions
+ * @off:	starting offset in SGE
+ * @len:	len of memory interval to be checked
+ *
+ * NOTE: Function references each SGE's memory object (sge->mem)
+ * if not yet done. New reference is kept if check went ok and
+ * released if check failed. If sge->mem is already valid, no new
+ * lookup is being done and mem is not released it check fails.
+ */
+int
+siw_check_sge(struct siw_pd *pd, struct siw_sge *sge,
+	      enum siw_access_flags perms, u32 off, int len)
+{
+	struct siw_dev	*sdev = pd->hdr.sdev;
+	struct siw_mem	*mem;
+	int		new_ref = 0, rv = 0;
+
+	if (len + off > sge->len) {
+		rv = -EPERM;
+		goto fail;
+	}
+	if (sge->mem.obj == NULL) {
+		mem = siw_mem_id2obj(sdev, sge->lkey >> 8);
+		if (!mem) {
+			rv = -EINVAL;
+			goto fail;
+		}
+		sge->mem.obj = mem;
+		new_ref = 1;
+	} else {
+		mem = sge->mem.obj;
+		new_ref = 0;
+	}
+	rv = siw_check_mem(pd, mem, sge->addr + off, perms, len);
+	if (rv)
+		goto fail;
+
+	return 0;
+
+fail:
+	if (new_ref) {
+		siw_mem_put(mem);
+		sge->mem.obj = NULL;
+	}
+	return rv;
+}
+
+
+/*
+ * siw_check_sgl()
+ *
+ * Check permissions for a list of SGE's (SGL)
+ *
+ * @pd:		Protection Domain SGL should belong to
+ * @sge:	List of SGE to be checked
+ * @perms:	requested access permissions
+ * @off:	starting offset in SGL
+ * @len:	len of memory interval to be checked
+ *
+ * Function checks only subinterval of SGL described by bytelen @len,
+ * check starts with byte offset @off which must be within
+ * the length of the first SGE.
+ *
+ * The caller is responsible for keeping @len + @off within
+ * the total byte len of the SGL.
+ */
+
+int siw_check_sgl(struct siw_pd *pd, struct siw_sge *sge,
+		  enum siw_access_flags perms, u32 off, int len)
+{
+	int	rv = 0;
+
+	dprint(DBG_WR, "(PD%d): Enter\n", OBJ_ID(pd));
+
+	BUG_ON(off >= sge->len);
+
+	while (len > 0) {
+		dprint(DBG_WR, "(PD%d): sge=%p, perms=0x%x, "
+			"len=%d, off=%u, sge->len=%d\n",
+			OBJ_ID(pd), sge, perms, len, off, sge->len);
+		/*
+		 * rdma verbs: do not check stag for a zero length sge
+		 */
+		if (sge->len == 0) {
+			sge++;
+			continue;
+		}
+
+		rv = siw_check_sge(pd, sge, perms, off, sge->len - off);
+		if (rv)
+			break;
+
+		len -= sge->len - off;
+		off = 0;
+		sge++;
+	}
+	return rv;
+}
+
+int siw_crc_array(struct hash_desc *desc, u8 *start, size_t len)
+{
+	struct scatterlist sg;
+
+	sg_init_one(&sg, start, len);
+	return crypto_hash_update(desc, &sg, len);
+}
+
+int siw_crc_sg(struct hash_desc *desc, struct scatterlist *sg,
+	       int off, int len)
+{
+	int rv;
+
+	if (off == 0)
+		rv = crypto_hash_update(desc, sg, len);
+	else {
+		struct scatterlist t_sg;
+
+		sg_init_table(&t_sg, 1);
+		sg_set_page(&t_sg, sg_page(sg), len, off);
+		rv = crypto_hash_update(desc, &t_sg, len);
+	}
+	return rv;
+}
+
+
+/*
+ * siw_sq_flush()
+ *
+ * Flush SQ and ORRQ entries to CQ.
+ * IRRQ entries are silently dropped.
+ *
+ * TODO: Add termination code for in-progress WQE.
+ * TODO: an in-progress WQE may have been partially
+ *       processed. It should be enforced, that transmission
+ *       of a started DDP segment must be completed if possible
+ *       by any chance.
+ *
+ * Must be called with qp state write lock held.
+ * Therefore, SQ and ORQ lock must not be taken.
+ */
+void siw_sq_flush(struct siw_qp *qp)
+{
+	struct list_head	*pos, *n;
+	struct siw_wqe		*wqe = tx_wqe(qp);
+	struct siw_cq		*cq = qp->scq;
+	int			async_event = 0;
+
+	dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
+
+	/*
+	 * flush the in-progress wqe, if there.
+	 */
+	if (wqe) {
+		/*
+		 * TODO: Add iWARP Termination code
+		 */
+		tx_wqe(qp) = NULL;
+
+		dprint(DBG_WR,
+			" (QP%d): Flush current WQE %p, type %d\n",
+			QP_ID(qp), wqe, wr_type(wqe));
+
+		if (wr_type(wqe) == SIW_WR_RDMA_READ_RESP) {
+			siw_wqe_put(wqe);
+			wqe = NULL;
+		} else if (wr_type(wqe) != SIW_WR_RDMA_READ_REQ)
+			/*
+			 *  A RREQUEST is already on the ORRQ
+			 */
+			list_add_tail(&wqe->list, &qp->orq);
+	}
+	if (!list_empty(&qp->irq))
+		list_for_each_safe(pos, n, &qp->irq) {
+			wqe = list_entry_wqe(pos);
+			dprint(DBG_WR,
+				" (QP%d): Flush IRQ WQE %p, status %d\n",
+				QP_ID(qp), wqe, wqe->wr_status);
+			list_del(&wqe->list);
+			siw_wqe_put(wqe);
+		}
+
+	if (!list_empty(&qp->orq))
+		list_for_each_safe(pos, n, &qp->orq) {
+			wqe = list_entry_wqe(pos);
+			dprint(DBG_WR,
+				" (QP%d): Flush ORQ WQE %p, type %d,"
+				" status %d\n", QP_ID(qp), wqe, wr_type(wqe),
+				wqe->wr_status);
+			if (wqe->wr_status != SR_WR_DONE) {
+				async_event = 1;
+				wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+				wqe->wr_status = SR_WR_DONE;
+			}
+			if (cq) {
+				lock_cq(cq);
+				list_move_tail(&wqe->list, &cq->queue);
+				/* TODO: enforce CQ limits */
+				atomic_inc(&cq->qlen);
+				unlock_cq(cq);
+			} else {
+				list_del(&wqe->list);
+				siw_wqe_put(wqe);
+			}
+		}
+	if (!list_empty(&qp->sq))
+		async_event = 1;
+		list_for_each_safe(pos, n, &qp->sq) {
+			wqe = list_entry_wqe(pos);
+			dprint(DBG_WR,
+				" (QP%d): Flush SQ WQE %p, type %d\n",
+				QP_ID(qp), wqe, wr_type(wqe));
+			if (cq) {
+				wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+				wqe->wr_status = SR_WR_DONE;
+				lock_cq(cq);
+				list_move_tail(&wqe->list, &cq->queue);
+				/* TODO: enforce CQ limits */
+				atomic_inc(&cq->qlen);
+				unlock_cq(cq);
+			} else  {
+				list_del(&wqe->list);
+				siw_wqe_put(wqe);
+			}
+		}
+	atomic_set(&qp->sq_space, qp->attrs.sq_size);
+
+	if (wqe != NULL && cq != NULL && cq->ofa_cq.comp_handler != NULL)
+		(*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
+
+	if (async_event)
+		siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
+}
+
+/*
+ * siw_rq_flush()
+ *
+ * Flush recv queue entries to cq. An in-progress WQE may have some bytes
+ * processed (wqe->processed).
+ *
+ * Must be called with qp state write lock held.
+ * Therefore, RQ lock must not be taken.
+ */
+void siw_rq_flush(struct siw_qp *qp)
+{
+	struct list_head	*pos, *n;
+	struct siw_wqe		*wqe;
+	struct siw_cq		*cq;
+
+	dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
+
+	/*
+	 * Flush an in-progess WQE if present
+	 */
+	if (rx_wqe(qp)) {
+		if (__rdmap_opcode(&qp->rx_ctx.hdr.ctrl) != RDMAP_RDMA_WRITE)
+			list_add(&rx_wqe(qp)->list, &qp->rq);
+		else
+			siw_mem_put(rx_mem(qp));
+
+		rx_wqe(qp) = NULL;
+	}
+	if (list_empty(&qp->rq))
+		return;
+
+	cq = qp->rcq;
+
+	list_for_each_safe(pos, n, &qp->rq) {
+		wqe = list_entry_wqe(pos);
+		list_del_init(&wqe->list);
+		if (cq) {
+			wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+			lock_cq(cq);
+			list_add_tail(&wqe->list, &cq->queue);
+			/* TODO: enforce CQ limits */
+			atomic_inc(&cq->qlen);
+			unlock_cq(cq);
+		} else
+			siw_wqe_put(wqe);
+
+		if (!qp->srq)
+			atomic_inc(&qp->rq_space);
+		else
+			atomic_inc(&qp->srq->space);
+
+	}
+	if (cq != NULL && cq->ofa_cq.comp_handler != NULL)
+		(*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
+}
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

                 reply	other threads:[~2011-07-22 18:47 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1311360473-15238-1-git-send-email-bmt@zurich.ibm.com \
    --to=bmt-oa+xvbqnydhmbyb6qlfgeg@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.