From: Frank Huang <tigerinxm@gmail.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: linux-rdma@vger.kernel.org
Subject: Re: rxe panic
Date: Wed, 25 Dec 2019 15:43:13 +0800 [thread overview]
Message-ID: <CAKC_zStPUCpjaF3mJb01A9sPjB0t0GxfBqB=8zsto96dsaD5qQ@mail.gmail.com> (raw)
In-Reply-To: <CAKC_zSts5zdbM4LhUaPBWk8=uKGAKWX6vgd85cdKjOrZViiEJg@mail.gmail.com>
there is the patch what i used. :)
rdma_rxe(4.14.97) : has problems in dealing with disorderly messages
this patch transplant rdma_rxe module from linux-5.2.9 to fix this problems.
the fix only under linux-4.14.97/drivers/infiniband/sw/rxe. At
present, no impact on other modules has been found.
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_comp.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_comp.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_comp.c 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_comp.c 2019-09-17
16:00:39.168896560 +0800
@@ -191,6 +191,7 @@
{
qp->comp.retry_cnt = qp->attr.retry_cnt;
qp->comp.rnr_retry = qp->attr.rnr_retry;
+ qp->comp.started_retry = 0;
}
static inline enum comp_state check_psn(struct rxe_qp *qp,
@@ -253,6 +254,17 @@
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
+ /* read retries of partial data may restart from
+ * read response first or response only.
+ */
+ if ((pkt->psn == wqe->first_psn &&
+ pkt->opcode ==
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) ||
+ (wqe->first_psn == wqe->last_psn &&
+ pkt->opcode ==
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY))
+ break;
+
return COMPST_ERROR;
}
break;
@@ -270,8 +282,8 @@
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
- /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
- * doesn't have an AETH)
+ /* fall through */
+ /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
*/
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (wqe->wr.opcode != IB_WR_RDMA_READ &&
@@ -501,11 +513,11 @@
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
- qp->comp.opcode = -1;
-
- if (pkt) {
- if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
- qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ if (pkt && wqe->state == wqe_state_pending) {
+ if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) {
+ qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK;
+ qp->comp.opcode = -1;
+ }
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
@@ -662,7 +674,6 @@
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- WARN_ON_ONCE(skb);
goto exit;
case COMPST_ERROR_RETRY:
@@ -676,10 +687,23 @@
/* there is nothing to retry in this case */
if (!wqe || (wqe->state == wqe_state_posted)) {
- WARN_ON_ONCE(skb);
goto exit;
}
+ /* if we've started a retry, don't start another
+ * retry sequence, unless this is a timeout.
+ */
+ if (qp->comp.started_retry &&
+ !qp->comp.timeout_retry) {
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ skb = NULL;
+ }
+
+ goto done;
+ }
+
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
qp->comp.retry_cnt--;
@@ -696,6 +720,7 @@
rxe_counter_inc(rxe,
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
+ qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
@@ -705,8 +730,7 @@
skb = NULL;
}
- WARN_ON_ONCE(skb);
- goto exit;
+ goto done;
} else {
rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
@@ -749,7 +773,6 @@
skb = NULL;
}
- WARN_ON_ONCE(skb);
goto exit;
}
}
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe.h
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe.h
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe.h 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe.h 2019-09-17
16:00:39.169896565 +0800
@@ -74,7 +74,6 @@
SHASH_DESC_ON_STACK(shash, rxe->tfm);
shash->tfm = rxe->tfm;
- shash->flags = 0;
*(u32 *)shash_desc_ctx(shash) = crc;
err = crypto_shash_update(shash, next, len);
if (unlikely(err)) {
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_hdr.h
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_hdr.h
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_hdr.h 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_hdr.h 2019-09-17
16:00:39.169896565 +0800
@@ -643,7 +643,7 @@
__be32 rkey;
__be64 swap_add;
__be64 comp;
-} __attribute__((__packed__));
+} __packed;
static inline u64 __atmeth_va(void *arg)
{
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_hw_counters.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_hw_counters.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_hw_counters.c
2019-01-31 15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_hw_counters.c
2019-09-17 16:00:39.169896565 +0800
@@ -37,11 +37,11 @@
[RXE_CNT_SENT_PKTS] = "sent_pkts",
[RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
[RXE_CNT_DUP_REQ] = "duplicate_request",
- [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence",
+ [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request",
[RXE_CNT_RCV_RNR] = "rcvd_rnr_err",
[RXE_CNT_SND_RNR] = "send_rnr_err",
[RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err",
- [RXE_CNT_COMPLETER_SCHED] = "ack_deffered",
+ [RXE_CNT_COMPLETER_SCHED] = "ack_deferred",
[RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err",
[RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err",
[RXE_CNT_COMP_RETRY] = "completer_retry_err",
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_loc.h
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_loc.h
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_loc.h 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_loc.h 2019-09-17
16:00:39.170896570 +0800
@@ -268,7 +268,8 @@
if (pkt->mask & RXE_LOOPBACK_MASK) {
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
- err = rxe_loopback(skb);
+ rxe_loopback(skb);
+ err = 0;
} else {
err = rxe_send(rxe, pkt, skb);
}
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_mmap.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_mmap.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_mmap.c 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_mmap.c 2019-09-17
16:00:39.170896570 +0800
@@ -146,6 +146,8 @@
void *obj)
{
struct rxe_mmap_info *ip;
+ if (!context)
+ return ERR_PTR(-EINVAL);
ip = kmalloc(sizeof(*ip), GFP_KERNEL);
if (!ip)
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_pool.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_pool.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_pool.c 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_pool.c 2019-09-17
16:00:39.171896575 +0800
@@ -112,6 +112,20 @@
return rxe_type_info[pool->type].cache;
}
+static void rxe_cache_clean(size_t cnt)
+{
+ int i;
+ struct rxe_type_info *type;
+
+ for (i = 0; i < cnt; i++) {
+ type = &rxe_type_info[i];
+ if (!(type->flags & RXE_POOL_NO_ALLOC)) {
+ kmem_cache_destroy(type->cache);
+ type->cache = NULL;
+ }
+ }
+}
+
int rxe_cache_init(void)
{
int err;
@@ -136,24 +150,14 @@
return 0;
err1:
- while (--i >= 0) {
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(i);
return err;
}
void rxe_cache_exit(void)
{
- int i;
- struct rxe_type_info *type;
-
- for (i = 0; i < RXE_NUM_TYPES; i++) {
- type = &rxe_type_info[i];
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(RXE_NUM_TYPES);
}
static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
@@ -207,7 +211,7 @@
kref_init(&pool->ref_cnt);
- spin_lock_init(&pool->pool_lock);
+ rwlock_init(&pool->pool_lock);
if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
err = rxe_pool_init_index(pool,
@@ -222,7 +226,7 @@
pool->key_size = rxe_type_info[type].key_size;
}
- pool->state = rxe_pool_valid;
+ pool->state = RXE_POOL_STATE_VALID;
out:
return err;
@@ -232,7 +236,7 @@
{
struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
- pool->state = rxe_pool_invalid;
+ pool->state = RXE_POOL_STATE_INVALID;
kfree(pool->table);
}
@@ -245,12 +249,12 @@
{
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
- pool->state = rxe_pool_invalid;
+ write_lock_irqsave(&pool->pool_lock, flags);
+ pool->state = RXE_POOL_STATE_INVALID;
if (atomic_read(&pool->num_elem) > 0)
pr_warn("%s pool destroyed with unfree'd elem\n",
pool_name(pool));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
rxe_pool_put(pool);
@@ -336,10 +340,10 @@
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
insert_key(pool, elem);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_key(void *arg)
@@ -348,9 +352,9 @@
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
rb_erase(&elem->node, &pool->tree);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_add_index(void *arg)
@@ -359,10 +363,10 @@
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
elem->index = alloc_index(pool);
insert_index(pool, elem);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_index(void *arg)
@@ -371,10 +375,10 @@
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
clear_bit(elem->index - pool->min_index, pool->table);
rb_erase(&elem->node, &pool->tree);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void *rxe_alloc(struct rxe_pool *pool)
@@ -384,13 +388,13 @@
might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
- spin_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
+ if (pool->state != RXE_POOL_STATE_VALID) {
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return NULL;
}
kref_get(&pool->ref_cnt);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
kref_get(&pool->rxe->ref_cnt);
@@ -436,9 +440,9 @@
struct rxe_pool_entry *elem = NULL;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid)
+ if (pool->state != RXE_POOL_STATE_VALID)
goto out;
node = pool->tree.rb_node;
@@ -450,15 +454,14 @@
node = node->rb_left;
else if (elem->index < index)
node = node->rb_right;
- else
+ else {
+ kref_get(&elem->ref_cnt);
break;
+ }
}
- if (node)
- kref_get(&elem->ref_cnt);
-
out:
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return node ? elem : NULL;
}
@@ -469,9 +472,9 @@
int cmp;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid)
+ if (pool->state != RXE_POOL_STATE_VALID)
goto out;
node = pool->tree.rb_node;
@@ -494,6 +497,6 @@
kref_get(&elem->ref_cnt);
out:
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return node ? elem : NULL;
}
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_pool.h
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_pool.h
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_pool.h 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_pool.h 2019-09-17
16:00:39.171896575 +0800
@@ -41,6 +41,7 @@
RXE_POOL_ATOMIC = BIT(0),
RXE_POOL_INDEX = BIT(1),
RXE_POOL_KEY = BIT(2),
+ RXE_POOL_NO_ALLOC = BIT(4),
};
enum rxe_elem_type {
@@ -74,8 +75,8 @@
extern struct rxe_type_info rxe_type_info[];
enum rxe_pool_state {
- rxe_pool_invalid,
- rxe_pool_valid,
+ RXE_POOL_STATE_INVALID,
+ RXE_POOL_STATE_VALID,
};
struct rxe_pool_entry {
@@ -90,7 +91,7 @@
struct rxe_pool {
struct rxe_dev *rxe;
- spinlock_t pool_lock; /* pool spinlock */
+ rwlock_t pool_lock; /* protects pool add/del/search */
size_t elem_size;
struct kref ref_cnt;
void (*cleanup)(struct rxe_pool_entry *obj);
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_qp.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_qp.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_qp.c 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_qp.c 2019-09-17
16:00:39.172896580 +0800
@@ -235,6 +235,16 @@
return err;
qp->sk->sk->sk_user_data = qp;
+ /* pick a source UDP port number for this QP based on
+ * the source QPN. this spreads traffic for different QPs
+ * across different NIC RX queues (while using a single
+ * flow for a given QP to maintain packet order).
+ * the port number must be in the Dynamic Ports range
+ * (0xc000 - 0xffff).
+ */
+ qp->src_port = RXE_ROCE_V2_SPORT +
+ (hash_32_generic(qp_num(qp), 14) & 0x3fff);
+
qp->sq.max_wr = init->cap.max_send_wr;
qp->sq.max_sge = init->cap.max_send_sge;
qp->sq.max_inline = init->cap.max_inline_data;
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_req.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_req.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_req.c 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_req.c 2019-09-17
16:00:39.172896580 +0800
@@ -73,9 +73,6 @@
int npsn;
int first = 1;
- wqe = queue_head(qp->sq.queue);
- npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
-
qp->req.wqe_index = consumer_index(qp->sq.queue);
qp->req.psn = qp->comp.psn;
qp->req.opcode = -1;
@@ -107,11 +104,17 @@
if (first) {
first = 0;
- if (mask & WR_WRITE_OR_SEND_MASK)
+ if (mask & WR_WRITE_OR_SEND_MASK) {
+ npsn = (qp->comp.psn - wqe->first_psn) &
+ BTH_PSN_MASK;
retry_first_write_send(qp, wqe, mask, npsn);
+ }
- if (mask & WR_READ_MASK)
+ if (mask & WR_READ_MASK) {
+ npsn = (wqe->dma.length - wqe->dma.resid) /
+ qp->mtu;
wqe->iova += npsn * qp->mtu;
+ }
}
wqe->state = wqe_state_posted;
@@ -435,7 +438,7 @@
if (pkt->mask & RXE_RETH_MASK) {
reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
reth_set_va(pkt, wqe->iova);
- reth_set_len(pkt, wqe->dma.length);
+ reth_set_len(pkt, wqe->dma.resid);
}
if (pkt->mask & RXE_IMMDT_MASK)
@@ -713,6 +716,7 @@
if (fill_packet(qp, wqe, &pkt, skb, payload)) {
pr_debug("qp#%d Error during fill packet\n", qp_num(qp));
+ kfree_skb(skb);
goto err;
}
@@ -744,7 +748,6 @@
goto next_wqe;
err:
- kfree_skb(skb);
wqe->status = IB_WC_LOC_PROT_ERR;
wqe->state = wqe_state_error;
__rxe_do_task(&qp->comp.task);
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_resp.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_resp.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_resp.c 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_resp.c 2019-09-17
16:00:39.173896585 +0800
@@ -124,12 +124,9 @@
struct sk_buff *skb;
if (qp->resp.state == QP_STATE_ERROR) {
- skb = skb_dequeue(&qp->req_pkts);
- if (skb) {
- /* drain request packet queue */
+ while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
- return RESPST_GET_REQ;
}
/* go drain recv wr queue */
@@ -435,6 +432,7 @@
qp->resp.va = reth_va(pkt);
qp->resp.rkey = reth_rkey(pkt);
qp->resp.resid = reth_len(pkt);
+ qp->resp.length = reth_len(pkt);
}
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
: IB_ACCESS_REMOTE_WRITE;
@@ -860,7 +858,9 @@
pkt->mask & RXE_WRITE_MASK) ?
IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
wc->vendor_err = 0;
- wc->byte_len = wqe->dma.length - wqe->dma.resid;
+ wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
+ pkt->mask & RXE_WRITE_MASK) ?
+ qp->resp.length : wqe->dma.length - wqe->dma.resid;
/* fields after byte_len are different between kernel and user
* space
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_verbs.c
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_verbs.c
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_verbs.c 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_verbs.c 2019-09-17
16:00:39.174896590 +0800
@@ -644,6 +644,7 @@
switch (wr->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
wr->ex.imm_data = ibwr->ex.imm_data;
+ /* fall through */
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
@@ -774,7 +775,6 @@
unsigned int mask;
unsigned int length = 0;
int i;
- int must_sched;
while (wr) {
mask = wr_opcode_mask(wr->opcode, qp);
@@ -804,14 +804,7 @@
wr = wr->next;
}
- /*
- * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
- * and the requester call ip_local_out_sk() that takes spin_lock_bh.
- */
- must_sched = (qp_type(qp) == IB_QPT_GSI) ||
- (queue_count(qp->sq.queue) > 1);
-
- rxe_run_task(&qp->req.task, must_sched);
+ rxe_run_task(&qp->req.task, 1);
if (unlikely(qp->req.state == QP_STATE_ERROR))
rxe_run_task(&qp->comp.task, 1);
diff -ur linux-4.14.97/drivers/infiniband/sw/rxe/rxe_verbs.h
linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_verbs.h
--- linux-4.14.97/drivers/infiniband/sw/rxe/rxe_verbs.h 2019-01-31
15:13:48.000000000 +0800
+++ linux-4.14.97-rxe/drivers/infiniband/sw/rxe/rxe_verbs.h 2019-09-17
16:00:39.174896590 +0800
@@ -160,6 +160,7 @@
int opcode;
int timeout;
int timeout_retry;
+ int started_retry;
u32 retry_cnt;
u32 rnr_retry;
struct rxe_task task;
@@ -214,6 +215,7 @@
struct rxe_mem *mr;
u32 resid;
u32 rkey;
+ u32 length;
u64 atomic_orig;
/* SRQ only */
@@ -252,6 +254,7 @@
struct socket *sk;
u32 dst_cookie;
+ u16 src_port;
struct rxe_av pri_av;
struct rxe_av alt_av;
On Wed, Dec 25, 2019 at 3:23 PM Frank Huang <tigerinxm@gmail.com> wrote:
>
> hi leon
>
> I can not get what you means, do you say the rxe_add_ref(qp) is not needed?
> My kernel is old, and I found some bugs of rxe on 4.14.97, especially
> the rnr errors.
> I can not upgrade whole kernel because there are many dependencies.
> Finally , I sync the fixed from newest kernel version to the 4.14.97.
>
> When I compare my rxe_resp.c with kernel 5.2.9 , I found the snippet
> of duplicate_request is changed.
> and rxe_xmit_packet will call rxe_send,enter the log "rdma_rxe:
> Unknown layer 3 protocol: 0"
>
> 1137 } else {
> 1138 struct resp_res *res;
> 1139
> 1140 /* Find the operation in our list of responder resources. */
> 1141 res = find_resource(qp, pkt->psn);
> 1142 if (res) {
> 1143 struct sk_buff *skb_copy;
> 1144
> 1145 skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC);
> 1146 if (skb_copy) {
> 1147 rxe_add_ref(qp); /* for the new SKB */
> 1148 } else {
> 1149 pr_warn("Couldn't clone atomic resp\n");
> 1150 rc = RESPST_CLEANUP;
> 1151 goto out;
> 1152 }
> 1153
> 1154 /* Resend the result. */
> 1155 rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
> 1156 pkt, skb_copy);
> 1157 if (rc) {
> 1158 pr_err("Failed resending result. This flow is not handled - skb
> ignored\n");
> 1159 rxe_drop_ref(qp);
> 1160 rc = RESPST_CLEANUP;
> 1161 goto out;
> 1162 }
> 1163 }
> 1164
> 1165 /* Resource not found. Class D error. Drop the request. */
> 1166 rc = RESPST_CLEANUP;
> 1167 goto out;
> 1168 }
> 1169 out:
> 1170 return rc;
> 1171 }
>
> On Wed, Dec 25, 2019 at 2:33 PM Leon Romanovsky <leon@kernel.org> wrote:
> >
> > On Wed, Dec 25, 2019 at 12:55:35PM +0800, Frank Huang wrote:
> > > hi, there is a panic on rdma_rxe module when the restart
> > > network.service or shutdown the switch.
> > >
> > > it looks like a use-after-free error.
> > >
> > > everytime it happens, there is the log "rdma_rxe: Unknown layer 3 protocol: 0"
> >
> > The error print itself is harmless.
> > >
> > > is it a known error?
> > >
> > > my kernel version is 4.14.97
> >
> > Your kernel is old enough and doesn't include refcount,
> > so I can't say for sure that it is the case, but the
> > following code is not correct and with refcount debug
> > it will be seen immediately.
> >
> > 1213 int rxe_responder(void *arg)
> > 1214 {
> > 1215 struct rxe_qp *qp = (struct rxe_qp *)arg;
> > 1216 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
> > 1217 enum resp_states state;
> > 1218 struct rxe_pkt_info *pkt = NULL;
> > 1219 int ret = 0;
> > 1220
> > 1221 rxe_add_ref(qp); <------ USE-AFTER-FREE
> > 1222
> > 1223 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
> > 1224
> > 1225 if (!qp->valid) {
> > 1226 ret = -EINVAL;
> > 1227 goto done;
> > 1228 }
> >
> > Thanks
next prev parent reply other threads:[~2019-12-25 7:43 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-25 4:55 rxe panic Frank Huang
2019-12-25 5:27 ` Zhu Yanjun
2019-12-25 6:01 ` Frank Huang
2019-12-25 6:34 ` Zhu Yanjun
2019-12-25 7:10 ` Frank Huang
2019-12-25 6:32 ` Leon Romanovsky
2019-12-25 7:23 ` Frank Huang
2019-12-25 7:43 ` Frank Huang [this message]
2019-12-25 9:23 ` Leon Romanovsky
2019-12-26 1:08 ` Zhu Yanjun
2019-12-26 1:39 ` Frank Huang
2019-12-26 2:35 ` Zhu Yanjun
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAKC_zStPUCpjaF3mJb01A9sPjB0t0GxfBqB=8zsto96dsaD5qQ@mail.gmail.com' \
--to=tigerinxm@gmail.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).