linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Harald Freudenberger <freude@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	Sasha Levin <sashal@kernel.org>,
	linux-s390@vger.kernel.org
Subject: [PATCH AUTOSEL 5.14 46/47] s390/ap: fix state machine hang after failure to enable irq
Date: Sun,  5 Sep 2021 21:19:50 -0400	[thread overview]
Message-ID: <20210906011951.928679-46-sashal@kernel.org> (raw)
In-Reply-To: <20210906011951.928679-1-sashal@kernel.org>

From: Harald Freudenberger <freude@linux.ibm.com>

[ Upstream commit cabebb697c98fb1f05cc950a747a9b6ec61a5b01 ]

If for any reason the interrupt enable for an ap queue fails the
state machine run for the queue returned wrong return codes to the
caller. So the caller assumed interrupt support for this queue in
enabled and thus did not re-establish the high resolution timer used
for polling. In the end this let to a hang for the user space process
waiting "forever" for the reply.

This patch reworks these return codes to return correct indications
for the caller to re-establish the timer when a queue runs without
interrupt support.

Please note that this is fixing a wrong behavior after a first
failure (enable interrupt support for the queue) failed. However,
looks like this occasionally happens on KVM systems.

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/s390/crypto/ap_bus.c   | 25 ++++++++-----------------
 drivers/s390/crypto/ap_bus.h   | 10 ++--------
 drivers/s390/crypto/ap_queue.c | 20 +++++++++++---------
 3 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 8d3a1d84a757..9c4f3c388934 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -127,22 +127,13 @@ static struct bus_type ap_bus_type;
 /* Adapter interrupt definitions */
 static void ap_interrupt_handler(struct airq_struct *airq, bool floating);
 
-static int ap_airq_flag;
+static bool ap_irq_flag;
 
 static struct airq_struct ap_airq = {
 	.handler = ap_interrupt_handler,
 	.isc = AP_ISC,
 };
 
-/**
- * ap_using_interrupts() - Returns non-zero if interrupt support is
- * available.
- */
-static inline int ap_using_interrupts(void)
-{
-	return ap_airq_flag;
-}
-
 /**
  * ap_airq_ptr() - Get the address of the adapter interrupt indicator
  *
@@ -152,7 +143,7 @@ static inline int ap_using_interrupts(void)
  */
 void *ap_airq_ptr(void)
 {
-	if (ap_using_interrupts())
+	if (ap_irq_flag)
 		return ap_airq.lsi_ptr;
 	return NULL;
 }
@@ -396,7 +387,7 @@ void ap_wait(enum ap_sm_wait wait)
 	switch (wait) {
 	case AP_SM_WAIT_AGAIN:
 	case AP_SM_WAIT_INTERRUPT:
-		if (ap_using_interrupts())
+		if (ap_irq_flag)
 			break;
 		if (ap_poll_kthread) {
 			wake_up(&ap_poll_wait);
@@ -471,7 +462,7 @@ static void ap_tasklet_fn(unsigned long dummy)
 	 * be received. Doing it in the beginning of the tasklet is therefor
 	 * important that no requests on any AP get lost.
 	 */
-	if (ap_using_interrupts())
+	if (ap_irq_flag)
 		xchg(ap_airq.lsi_ptr, 0);
 
 	spin_lock_bh(&ap_queues_lock);
@@ -541,7 +532,7 @@ static int ap_poll_thread_start(void)
 {
 	int rc;
 
-	if (ap_using_interrupts() || ap_poll_kthread)
+	if (ap_irq_flag || ap_poll_kthread)
 		return 0;
 	mutex_lock(&ap_poll_thread_mutex);
 	ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll");
@@ -1187,7 +1178,7 @@ static BUS_ATTR_RO(ap_adapter_mask);
 static ssize_t ap_interrupts_show(struct bus_type *bus, char *buf)
 {
 	return scnprintf(buf, PAGE_SIZE, "%d\n",
-			 ap_using_interrupts() ? 1 : 0);
+			 ap_irq_flag ? 1 : 0);
 }
 
 static BUS_ATTR_RO(ap_interrupts);
@@ -1912,7 +1903,7 @@ static int __init ap_module_init(void)
 	/* enable interrupts if available */
 	if (ap_interrupts_available()) {
 		rc = register_adapter_interrupt(&ap_airq);
-		ap_airq_flag = (rc == 0);
+		ap_irq_flag = (rc == 0);
 	}
 
 	/* Create /sys/bus/ap. */
@@ -1956,7 +1947,7 @@ static int __init ap_module_init(void)
 out_bus:
 	bus_unregister(&ap_bus_type);
 out:
-	if (ap_using_interrupts())
+	if (ap_irq_flag)
 		unregister_adapter_interrupt(&ap_airq);
 	kfree(ap_qci_info);
 	return rc;
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 8f18abdbbc2b..6dd5e8f0380c 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -80,12 +80,6 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_FUNC_EP11  5
 #define AP_FUNC_APXA  6
 
-/*
- * AP interrupt states
- */
-#define AP_INTR_DISABLED	0	/* AP interrupt disabled */
-#define AP_INTR_ENABLED		1	/* AP interrupt enabled */
-
 /*
  * AP queue state machine states
  */
@@ -112,7 +106,7 @@ enum ap_sm_event {
  * AP queue state wait behaviour
  */
 enum ap_sm_wait {
-	AP_SM_WAIT_AGAIN,	/* retry immediately */
+	AP_SM_WAIT_AGAIN = 0,	/* retry immediately */
 	AP_SM_WAIT_TIMEOUT,	/* wait for timeout */
 	AP_SM_WAIT_INTERRUPT,	/* wait for thin interrupt (if available) */
 	AP_SM_WAIT_NONE,	/* no wait */
@@ -186,7 +180,7 @@ struct ap_queue {
 	enum ap_dev_state dev_state;	/* queue device state */
 	bool config;			/* configured state */
 	ap_qid_t qid;			/* AP queue id. */
-	int interrupt;			/* indicate if interrupts are enabled */
+	bool interrupt;			/* indicate if interrupts are enabled */
 	int queue_count;		/* # messages currently on AP queue. */
 	int pendingq_count;		/* # requests on pendingq list. */
 	int requestq_count;		/* # requests on requestq list. */
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 669f96fddad6..d70c4d3d0907 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -19,7 +19,7 @@
 static void __ap_flush_queue(struct ap_queue *aq);
 
 /**
- * ap_queue_enable_interruption(): Enable interruption on an AP queue.
+ * ap_queue_enable_irq(): Enable interrupt support on this AP queue.
  * @qid: The AP queue number
  * @ind: the notification indicator byte
  *
@@ -27,7 +27,7 @@ static void __ap_flush_queue(struct ap_queue *aq);
  * value it waits a while and tests the AP queue if interrupts
  * have been switched on using ap_test_queue().
  */
-static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind)
+static int ap_queue_enable_irq(struct ap_queue *aq, void *ind)
 {
 	struct ap_queue_status status;
 	struct ap_qirq_ctrl qirqctrl = { 0 };
@@ -218,7 +218,8 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
 		return AP_SM_WAIT_NONE;
 	case AP_RESPONSE_NO_PENDING_REPLY:
 		if (aq->queue_count > 0)
-			return AP_SM_WAIT_INTERRUPT;
+			return aq->interrupt ?
+				AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_TIMEOUT;
 		aq->sm_state = AP_SM_STATE_IDLE;
 		return AP_SM_WAIT_NONE;
 	default:
@@ -272,7 +273,8 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
 		fallthrough;
 	case AP_RESPONSE_Q_FULL:
 		aq->sm_state = AP_SM_STATE_QUEUE_FULL;
-		return AP_SM_WAIT_INTERRUPT;
+		return aq->interrupt ?
+			AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_TIMEOUT;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 		aq->sm_state = AP_SM_STATE_RESET_WAIT;
 		return AP_SM_WAIT_TIMEOUT;
@@ -322,7 +324,7 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
 	case AP_RESPONSE_NORMAL:
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 		aq->sm_state = AP_SM_STATE_RESET_WAIT;
-		aq->interrupt = AP_INTR_DISABLED;
+		aq->interrupt = false;
 		return AP_SM_WAIT_TIMEOUT;
 	default:
 		aq->dev_state = AP_DEV_STATE_ERROR;
@@ -355,7 +357,7 @@ static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq)
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
 		lsi_ptr = ap_airq_ptr();
-		if (lsi_ptr && ap_queue_enable_interruption(aq, lsi_ptr) == 0)
+		if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0)
 			aq->sm_state = AP_SM_STATE_SETIRQ_WAIT;
 		else
 			aq->sm_state = (aq->queue_count > 0) ?
@@ -396,7 +398,7 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
 
 	if (status.irq_enabled == 1) {
 		/* Irqs are now enabled */
-		aq->interrupt = AP_INTR_ENABLED;
+		aq->interrupt = true;
 		aq->sm_state = (aq->queue_count > 0) ?
 			AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
 	}
@@ -586,7 +588,7 @@ static ssize_t interrupt_show(struct device *dev,
 	spin_lock_bh(&aq->lock);
 	if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT)
 		rc = scnprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
-	else if (aq->interrupt == AP_INTR_ENABLED)
+	else if (aq->interrupt)
 		rc = scnprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
 	else
 		rc = scnprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
@@ -767,7 +769,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
 	aq->ap_dev.device.type = &ap_queue_type;
 	aq->ap_dev.device_type = device_type;
 	aq->qid = qid;
-	aq->interrupt = AP_INTR_DISABLED;
+	aq->interrupt = false;
 	spin_lock_init(&aq->lock);
 	INIT_LIST_HEAD(&aq->pendingq);
 	INIT_LIST_HEAD(&aq->requestq);
-- 
2.30.2


  parent reply	other threads:[~2021-09-06  1:22 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-06  1:19 [PATCH AUTOSEL 5.14 01/47] locking/mutex: Fix HANDOFF condition Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 02/47] regmap: fix the offset of register error log Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 03/47] regulator: tps65910: Silence deferred probe error Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 04/47] crypto: mxs-dcp - Check for DMA mapping errors Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 05/47] sched/deadline: Fix reset_on_fork reporting of DL tasks Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 06/47] power: supply: axp288_fuel_gauge: Report register-address on readb / writeb errors Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 07/47] crypto: omap-sham - clear dma flags only after omap_sham_update_dma_stop() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 08/47] sched/deadline: Fix missing clock update in migrate_task_rq_dl() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 09/47] rcu/tree: Handle VM stoppage in stall detection Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 10/47] EDAC/mce_amd: Do not load edac_mce_amd module on guests Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 11/47] posix-cpu-timers: Force next expiration recalc after itimer reset Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 12/47] hrtimer: Avoid double reprogramming in __hrtimer_start_range_ns() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 13/47] hrtimer: Ensure timerfd notification for HIGHRES=n Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 14/47] udf: Check LVID earlier Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 15/47] udf: Fix iocharset=utf8 mount option Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 16/47] isofs: joliet: " Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 17/47] bcache: add proper error unwinding in bcache_device_init Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 18/47] nbd: add the check to prevent overflow in __nbd_ioctl() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 19/47] blk-throtl: optimize IOPS throttle for large IO scenarios Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 20/47] nvme-tcp: don't update queue count when failing to set io queues Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 21/47] nvme-rdma: " Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 22/47] nvmet: pass back cntlid on successful completion Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 23/47] power: supply: smb347-charger: Add missing pin control activation Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 24/47] power: supply: max17042_battery: fix typo in MAx17042_TOFF Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 25/47] s390/cio: add dev_busid sysfs entry for each subchannel Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 26/47] s390/zcrypt: fix wrong offset index for APKA master key valid state Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 27/47] libata: fix ata_host_start() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 28/47] sched/topology: Skip updating masks for non-online nodes Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 29/47] crypto: omap - Fix inconsistent locking of device lists Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 30/47] crypto: qat - do not ignore errors from enable_vf2pf_comms() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 31/47] crypto: qat - handle both source of interrupt in VF ISR Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 32/47] crypto: qat - fix reuse of completion variable Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 33/47] crypto: qat - fix naming for init/shutdown VF to PF notifications Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 34/47] crypto: qat - do not export adf_iov_putmsg() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 35/47] crypto: hisilicon/sec - fix the abnormal exiting process Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 36/47] crypto: hisilicon/sec - modify the hardware endian configuration Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 37/47] crypto: tcrypt - Fix missing return value check Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 38/47] fcntl: fix potential deadlocks for &fown_struct.lock Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 39/47] fcntl: fix potential deadlock for &fasync_struct.fa_lock Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 40/47] udf_get_extendedattr() had no boundary checks Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 41/47] io-wq: remove GFP_ATOMIC allocation off schedule out path Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 42/47] s390/kasan: fix large PMD pages address alignment check Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 43/47] s390/pci: fix misleading rc in clp_set_pci_fn() Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 44/47] s390/debug: keep debug data on resize Sasha Levin
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 45/47] s390/debug: fix debug area life cycle Sasha Levin
2021-09-06  1:19 ` Sasha Levin [this message]
2021-09-06  1:19 ` [PATCH AUTOSEL 5.14 47/47] s390/smp: enable DAT before CPU restart callback is called Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210906011951.928679-46-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=freude@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).