From mboxrd@z Thu Jan 1 00:00:00 1970 From: swise@opengridcomputing.com (Steve Wise) Date: Wed, 10 Aug 2016 11:00:40 -0500 Subject: nvmf/rdma host crash during heavy load and keep alive recovery In-Reply-To: <010f01d1f31e$50c8cb40$f25a61c0$@opengridcomputing.com> References: <018301d1e9e1$da3b2e40$8eb18ac0$@opengridcomputing.com> <20160801110658.GF16141@lst.de> <008801d1ec00$a0bcfbf0$e236f3d0$@opengridcomputing.com> <015801d1ec3d$0ca07ea0$25e17be0$@opengridcomputing.com> <010f01d1f31e$50c8cb40$f25a61c0$@opengridcomputing.com> Message-ID: <013601d1f320$57a9e4b0$06fdae10$@opengridcomputing.com> > Hey guys, I've rebased the nvmf-4.8-rc branch on top of 4.8-rc2 so I have > the latest/gratest, and continued debugging this crash. I see: > > 0) 10 ram disks attached via nvmf/iw_cxgb4, and fio started on all 10 > disks. This node has 8 cores, so that is 80 connections. > 1) the cxgb4 interface brought down a few seconds later > 2) kato fires on all connections > 3) the interface is brought back up 8 seconds after #1 > 4) 10 seconds after #2 all the qps are destroyed > 5) reconnects start happening > 6) a blk request is executed and the nvme_rdma_request struct still has a > pointer to one of the qps destroyed in 3 and whamo... > > I'm digging into the request cancel logic. Any ideas/help is greatly > appreciated... > > Thanks, > > Steve. Here is the stack that crashed processing a blk request: crash> bt PID: 402 TASK: ffff880397968040 CPU: 0 COMMAND: "kworker/0:1H" #0 [ffff8803970f7800] machine_kexec at ffffffff8105fc40 #1 [ffff8803970f7870] __crash_kexec at ffffffff81116908 #2 [ffff8803970f7940] crash_kexec at ffffffff811169dd #3 [ffff8803970f7970] oops_end at ffffffff81032be6 #4 [ffff8803970f79a0] die at ffffffff810330db #5 [ffff8803970f79d0] do_general_protection at ffffffff81030144 #6 [ffff8803970f7a00] general_protection at ffffffff816e4ca8 [exception RIP: nvme_rdma_post_send+131] RIP: ffffffffa0414083 RSP: ffff8803970f7ab8 RFLAGS: 00010246 RAX: 6b6b6b6b6b6b6b6b RBX: ffff8802dd923598 RCX: 0000000000000002 RDX: ffff8803970f7ae0 RSI: ffff8803970f7ab8 RDI: ffff8802dd9fc518 RBP: ffff8803970f7af8 R8: ffff8803970f7ab8 R9: 0000000000000000 R10: 0000000000000000 R11: ffff8802dde6ef58 R12: ffff8802dd923598 R13: ffff8802dde6eeb0 R14: ffff880399f4c548 R15: ffff8802dde59db8 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff8803970f7b00] nvme_rdma_queue_rq at ffffffffa0415c72 [nvme_rdma] #8 [ffff8803970f7b50] __blk_mq_run_hw_queue at ffffffff81338324 #9 [ffff8803970f7ca0] blk_mq_run_work_fn at ffffffff81338552 #10 [ffff8803970f7cb0] process_one_work at ffffffff810a1593 #11 [ffff8803970f7d90] worker_thread at ffffffff810a222d #12 [ffff8803970f7ec0] kthread at ffffffff810a6d6c #13 [ffff8803970f7f50] ret_from_fork at ffffffff816e2cbf Here is the nvme_rdma_request: crash> nvme_rdma_request ffff8802dde6eeb0 struct nvme_rdma_request { mr = 0xffff8802dde5c008, sqe = { cqe = { done = 0xffffffffa0414320 }, data = 0xffff8802dde59db8, dma = 12312747448 }, sge = {{ addr = 12312747448, length = 64, lkey = 0 }, { addr = 12138727424, length = 2048, lkey = 0 }}, num_sge = 2, nents = 1, inline_data = true, need_inval = false, reg_wr = { wr = { next = 0x0, { wr_id = 0, wr_cqe = 0x0 }, sg_list = 0x0, num_sge = 0, opcode = IB_WR_RDMA_WRITE, send_flags = 0, ex = { imm_data = 0, invalidate_rkey = 0 } }, mr = 0x0, key = 0, access = 0 }, reg_cqe = { done = 0x0 }, queue = 0xffff8802dd923598, sg_table = { sgl = 0xffff8802dde6ef58, nents = 1, orig_nents = 1 }, first_sgl = 0xffff8802dde6ef58 } And here is the nvme_rdma_queue: crash> nvme_rdma_queue 0xffff8802dd923598 struct nvme_rdma_queue { rsp_ring = 0xffff8802dd968008, sig_count = 200 '\310', queue_size = 128, cmnd_capsule_len = 4160, ctrl = 0xffff8802dbd5d3d8, device = 0xffff880384ceb5e8, ib_cq = 0xffff8802dd9d2e68, qp = 0xffff8802dd9fc518, flags = 0, cm_id = 0xffff8802dd9f8008, cm_error = 0, cm_done = { done = 0, wait = { lock = { { rlock = { raw_lock = { val = { counter = 0 } } } } }, task_list = { next = 0xffff8802dd9235f8, prev = 0xffff8802dd9235f8 } } } } And see here the ib_qp has been freed: crash> gdb x/8g 0xffff8802dd9fc518 0xffff8802dd9fc518: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b 0xffff8802dd9fc528: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b 0xffff8802dd9fc538: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b 0xffff8802dd9fc548: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b