From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christoph Hellwig Subject: [PATCH 1/9] move blk_iopoll to limit and make it generally available Date: Fri, 13 Nov 2015 14:46:42 +0100 Message-ID: <1447422410-20891-2-git-send-email-hch@lst.de> References: <1447422410-20891-1-git-send-email-hch@lst.de> Return-path: In-Reply-To: <1447422410-20891-1-git-send-email-hch@lst.de> Sender: linux-kernel-owner@vger.kernel.org To: linux-rdma@vger.kernel.org Cc: sagig@dev.mellanox.co.il, bart.vanassche@sandisk.com, axboe@fb.com, linux-scsi@vger.kernel.org, linux-kernel@vger.kernel.org List-Id: linux-rdma@vger.kernel.org The new name is irq_poll as iopoll is already taken. Better suggestions welcome. Signed-off-by: Christoph Hellwig --- Documentation/kernel-per-CPU-kthreads.txt | 2 +- block/Makefile | 2 +- block/blk-iopoll.c | 224 ------------------------------ drivers/scsi/Kconfig | 1 + drivers/scsi/be2iscsi/Kconfig | 1 + drivers/scsi/be2iscsi/be.h | 4 +- drivers/scsi/be2iscsi/be_iscsi.c | 4 +- drivers/scsi/be2iscsi/be_main.c | 24 ++-- drivers/scsi/ipr.c | 28 ++-- drivers/scsi/ipr.h | 4 +- include/linux/blk-iopoll.h | 46 ------ include/linux/interrupt.h | 2 +- include/linux/irq_poll.h | 46 ++++++ include/trace/events/irq.h | 2 +- lib/Kconfig | 5 + lib/Makefile | 1 + lib/irq_poll.c | 221 +++++++++++++++++++++++++++++ tools/lib/traceevent/event-parse.c | 2 +- tools/perf/util/trace-event-parse.c | 2 +- 19 files changed, 313 insertions(+), 308 deletions(-) delete mode 100644 block/blk-iopoll.c delete mode 100644 include/linux/blk-iopoll.h create mode 100644 include/linux/irq_poll.h create mode 100644 lib/irq_poll.c diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index f4cbfe0..edec3a3 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -90,7 +90,7 @@ BLOCK_SOFTIRQ: Do all of the following: from being initiated from tasks that might run on the CPU to be de-jittered. (It is OK to force this CPU offline and then bring it back online before you start your application.) -BLOCK_IOPOLL_SOFTIRQ: Do all of the following: +IRQ_POLL_SOFTIRQ: Do all of the following: 1. Force block-device interrupts onto some other CPU. 2. Initiate any block I/O and block-I/O polling on other CPUs. 3. Once your application has started, prevent CPU-hotplug operations diff --git a/block/Makefile b/block/Makefile index 00ecc97..e850474 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ + blk-lib.o blk-mq.o blk-mq-tag.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ partitions/ diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c deleted file mode 100644 index 0736729..0000000 --- a/block/blk-iopoll.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Functions related to interrupt-poll handling in the block layer. This - * is similar to NAPI for network devices. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "blk.h" - -static unsigned int blk_iopoll_budget __read_mostly = 256; - -static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); - -/** - * blk_iopoll_sched - Schedule a run of the iopoll handler - * @iop: The parent iopoll structure - * - * Description: - * Add this blk_iopoll structure to the pending poll list and trigger the - * raise of the blk iopoll softirq. The driver must already have gotten a - * successful return from blk_iopoll_sched_prep() before calling this. - **/ -void blk_iopoll_sched(struct blk_iopoll *iop) -{ - unsigned long flags; - - local_irq_save(flags); - list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); - local_irq_restore(flags); -} -EXPORT_SYMBOL(blk_iopoll_sched); - -/** - * __blk_iopoll_complete - Mark this @iop as un-polled again - * @iop: The parent iopoll structure - * - * Description: - * See blk_iopoll_complete(). This function must be called with interrupts - * disabled. - **/ -void __blk_iopoll_complete(struct blk_iopoll *iop) -{ - list_del(&iop->list); - smp_mb__before_atomic(); - clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); -} -EXPORT_SYMBOL(__blk_iopoll_complete); - -/** - * blk_iopoll_complete - Mark this @iop as un-polled again - * @iop: The parent iopoll structure - * - * Description: - * If a driver consumes less than the assigned budget in its run of the - * iopoll handler, it'll end the polled mode by calling this function. The - * iopoll handler will not be invoked again before blk_iopoll_sched_prep() - * is called. - **/ -void blk_iopoll_complete(struct blk_iopoll *iop) -{ - unsigned long flags; - - local_irq_save(flags); - __blk_iopoll_complete(iop); - local_irq_restore(flags); -} -EXPORT_SYMBOL(blk_iopoll_complete); - -static void blk_iopoll_softirq(struct softirq_action *h) -{ - struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); - int rearm = 0, budget = blk_iopoll_budget; - unsigned long start_time = jiffies; - - local_irq_disable(); - - while (!list_empty(list)) { - struct blk_iopoll *iop; - int work, weight; - - /* - * If softirq window is exhausted then punt. - */ - if (budget <= 0 || time_after(jiffies, start_time)) { - rearm = 1; - break; - } - - local_irq_enable(); - - /* Even though interrupts have been re-enabled, this - * access is safe because interrupts can only add new - * entries to the tail of this list, and only ->poll() - * calls can remove this head entry from the list. - */ - iop = list_entry(list->next, struct blk_iopoll, list); - - weight = iop->weight; - work = 0; - if (test_bit(IOPOLL_F_SCHED, &iop->state)) - work = iop->poll(iop, weight); - - budget -= work; - - local_irq_disable(); - - /* - * Drivers must not modify the iopoll state, if they - * consume their assigned weight (or more, some drivers can't - * easily just stop processing, they have to complete an - * entire mask of commands).In such cases this code - * still "owns" the iopoll instance and therefore can - * move the instance around on the list at-will. - */ - if (work >= weight) { - if (blk_iopoll_disable_pending(iop)) - __blk_iopoll_complete(iop); - else - list_move_tail(&iop->list, list); - } - } - - if (rearm) - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); - - local_irq_enable(); -} - -/** - * blk_iopoll_disable - Disable iopoll on this @iop - * @iop: The parent iopoll structure - * - * Description: - * Disable io polling and wait for any pending callbacks to have completed. - **/ -void blk_iopoll_disable(struct blk_iopoll *iop) -{ - set_bit(IOPOLL_F_DISABLE, &iop->state); - while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) - msleep(1); - clear_bit(IOPOLL_F_DISABLE, &iop->state); -} -EXPORT_SYMBOL(blk_iopoll_disable); - -/** - * blk_iopoll_enable - Enable iopoll on this @iop - * @iop: The parent iopoll structure - * - * Description: - * Enable iopoll on this @iop. Note that the handler run will not be - * scheduled, it will only mark it as active. - **/ -void blk_iopoll_enable(struct blk_iopoll *iop) -{ - BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); - smp_mb__before_atomic(); - clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); -} -EXPORT_SYMBOL(blk_iopoll_enable); - -/** - * blk_iopoll_init - Initialize this @iop - * @iop: The parent iopoll structure - * @weight: The default weight (or command completion budget) - * @poll_fn: The handler to invoke - * - * Description: - * Initialize this blk_iopoll structure. Before being actively used, the - * driver must call blk_iopoll_enable(). - **/ -void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) -{ - memset(iop, 0, sizeof(*iop)); - INIT_LIST_HEAD(&iop->list); - iop->weight = weight; - iop->poll = poll_fn; - set_bit(IOPOLL_F_SCHED, &iop->state); -} -EXPORT_SYMBOL(blk_iopoll_init); - -static int blk_iopoll_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - int cpu = (unsigned long) hcpu; - - local_irq_disable(); - list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), - this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); - local_irq_enable(); - } - - return NOTIFY_OK; -} - -static struct notifier_block blk_iopoll_cpu_notifier = { - .notifier_call = blk_iopoll_cpu_notify, -}; - -static __init int blk_iopoll_setup(void) -{ - int i; - - for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); - - open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); - register_hotcpu_notifier(&blk_iopoll_cpu_notifier); - return 0; -} -subsys_initcall(blk_iopoll_setup); diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index d2f480b..cea683e 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1103,6 +1103,7 @@ config SCSI_IPR tristate "IBM Power Linux RAID adapter support" depends on PCI && SCSI && ATA select FW_LOADER + select IRQ_POLL ---help--- This driver supports the IBM Power Linux family RAID adapters. This includes IBM pSeries 5712, 5703, 5709, and 570A, as well diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig index 4e7cad2..bad5f32 100644 --- a/drivers/scsi/be2iscsi/Kconfig +++ b/drivers/scsi/be2iscsi/Kconfig @@ -3,6 +3,7 @@ config BE2ISCSI depends on PCI && SCSI && NET select SCSI_ISCSI_ATTRS select ISCSI_BOOT_SYSFS + select IRQ_POLL help This driver implements the iSCSI functionality for Emulex diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h index 77f992e..a41c643 100644 --- a/drivers/scsi/be2iscsi/be.h +++ b/drivers/scsi/be2iscsi/be.h @@ -20,7 +20,7 @@ #include #include -#include +#include #define FW_VER_LEN 32 #define MCC_Q_LEN 128 #define MCC_CQ_LEN 256 @@ -101,7 +101,7 @@ struct be_eq_obj { struct beiscsi_hba *phba; struct be_queue_info *cq; struct work_struct work_cqs; /* Work Item */ - struct blk_iopoll iopoll; + struct irq_poll iopoll; }; struct be_mcc_obj { diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index b7087ba..022e87b 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba) for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); beiscsi_process_cq(pbe_eq); - blk_iopoll_enable(&pbe_eq->iopoll); + irq_poll_enable(&pbe_eq->iopoll); } } diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 2e6abe7..7f0fdbe 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -910,8 +910,8 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id) num_eq_processed = 0; while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] & EQE_VALID_MASK) { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); + if (!irq_poll_sched_prep(&pbe_eq->iopoll)) + irq_poll_sched(&pbe_eq->iopoll); AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); queue_tail_inc(eq); @@ -972,8 +972,8 @@ static irqreturn_t be_isr(int irq, void *dev_id) spin_unlock_irqrestore(&phba->isr_lock, flags); num_mcceq_processed++; } else { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); + if (!irq_poll_sched_prep(&pbe_eq->iopoll)) + irq_poll_sched(&pbe_eq->iopoll); num_ioeq_processed++; } AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); @@ -2293,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work) hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1); } -static int be_iopoll(struct blk_iopoll *iop, int budget) +static int be_iopoll(struct irq_poll *iop, int budget) { unsigned int ret; struct beiscsi_hba *phba; @@ -2304,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget) pbe_eq->cq_count += ret; if (ret < budget) { phba = pbe_eq->phba; - blk_iopoll_complete(iop); + irq_poll_complete(iop); beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO, "BM_%d : rearm pbe_eq->q.id =%d\n", @@ -5261,7 +5261,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba, for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); } if (unload_state == BEISCSI_CLEAN_UNLOAD) { @@ -5547,9 +5547,9 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev) for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); + irq_poll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; @@ -5720,9 +5720,9 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); + irq_poll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; @@ -5763,7 +5763,7 @@ free_blkenbld: destroy_workqueue(phba->wq); for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); } free_twq: beiscsi_clean_port(phba); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index b62836d..6b98e75 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; -static int ipr_iopoll(struct blk_iopoll *iop, int budget); +static int ipr_iopoll(struct irq_poll *iop, int budget); /** * ipr_show_iopoll_weight - Show ipr polling mode * @dev: class device struct @@ -3681,34 +3681,34 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev, int i; if (!ioa_cfg->sis64) { - dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n"); return -EINVAL; } if (kstrtoul(buf, 10, &user_iopoll_weight)) return -EINVAL; if (user_iopoll_weight > 256) { - dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n"); return -EINVAL; } if (user_iopoll_weight == ioa_cfg->iopoll_weight) { - dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n"); return strlen(buf); } if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) - blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_disable(&ioa_cfg->hrrq[i].iopoll); } spin_lock_irqsave(shost->host_lock, lock_flags); ioa_cfg->iopoll_weight = user_iopoll_weight; if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { - blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_enable(&ioa_cfg->hrrq[i].iopoll); } } spin_unlock_irqrestore(shost->host_lock, lock_flags); @@ -5569,7 +5569,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, return num_hrrq; } -static int ipr_iopoll(struct blk_iopoll *iop, int budget) +static int ipr_iopoll(struct irq_poll *iop, int budget) { struct ipr_ioa_cfg *ioa_cfg; struct ipr_hrr_queue *hrrq; @@ -5585,7 +5585,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget) completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); if (completed_ops < budget) - blk_iopoll_complete(iop); + irq_poll_complete(iop); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { @@ -5693,8 +5693,8 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == hrrq->toggle_bit) { - if (!blk_iopoll_sched_prep(&hrrq->iopoll)) - blk_iopoll_sched(&hrrq->iopoll); + if (!irq_poll_sched_prep(&hrrq->iopoll)) + irq_poll_sched(&hrrq->iopoll); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_HANDLED; } @@ -10285,9 +10285,9 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { - blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_enable(&ioa_cfg->hrrq[i].iopoll); } } @@ -10316,7 +10316,7 @@ static void ipr_shutdown(struct pci_dev *pdev) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { ioa_cfg->iopoll_weight = 0; for (i = 1; i < ioa_cfg->hrrq_num; i++) - blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_disable(&ioa_cfg->hrrq[i].iopoll); } while (ioa_cfg->in_reset_reload) { diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index e4fb17a..022fc3c 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -510,7 +510,7 @@ struct ipr_hrr_queue { u8 allow_cmds:1; u8 removing_ioa:1; - struct blk_iopoll iopoll; + struct irq_poll iopoll; }; /* Command packet structure */ diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h deleted file mode 100644 index 77ae77c..0000000 --- a/include/linux/blk-iopoll.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef BLK_IOPOLL_H -#define BLK_IOPOLL_H - -struct blk_iopoll; -typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); - -struct blk_iopoll { - struct list_head list; - unsigned long state; - unsigned long data; - int weight; - int max; - blk_iopoll_fn *poll; -}; - -enum { - IOPOLL_F_SCHED = 0, - IOPOLL_F_DISABLE = 1, -}; - -/* - * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) -{ - if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); - - return 1; -} - -static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) -{ - return test_bit(IOPOLL_F_DISABLE, &iop->state); -} - -extern void blk_iopoll_sched(struct blk_iopoll *); -extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); -extern void blk_iopoll_complete(struct blk_iopoll *); -extern void __blk_iopoll_complete(struct blk_iopoll *); -extern void blk_iopoll_enable(struct blk_iopoll *); -extern void blk_iopoll_disable(struct blk_iopoll *); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ad16809..7ff98c2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -412,7 +412,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - BLOCK_IOPOLL_SOFTIRQ, + IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h new file mode 100644 index 0000000..0cf7c26 --- /dev/null +++ b/include/linux/irq_poll.h @@ -0,0 +1,46 @@ +#ifndef IRQ_POLL_H +#define IRQ_POLL_H + +struct irq_poll; +typedef int (irq_poll_fn)(struct irq_poll *, int); + +struct irq_poll { + struct list_head list; + unsigned long state; + unsigned long data; + int weight; + int max; + irq_poll_fn *poll; +}; + +enum { + IRQ_POLL_F_SCHED = 0, + IRQ_POLL_F_DISABLE = 1, +}; + +/* + * Returns 0 if we successfully set the IRQ_POLL_F_SCHED bit, indicating + * that we were the first to acquire this iop for scheduling. If this iop + * is currently disabled, return "failure". + */ +static inline int irq_poll_sched_prep(struct irq_poll *iop) +{ + if (!test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + return test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state); + + return 1; +} + +static inline int irq_poll_disable_pending(struct irq_poll *iop) +{ + return test_bit(IRQ_POLL_F_DISABLE, &iop->state); +} + +extern void irq_poll_sched(struct irq_poll *); +extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); +extern void irq_poll_complete(struct irq_poll *); +extern void __irq_poll_complete(struct irq_poll *); +extern void irq_poll_enable(struct irq_poll *); +extern void irq_poll_disable(struct irq_poll *); + +#endif diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ff8f6c0..f95f25e 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -15,7 +15,7 @@ struct softirq_action; softirq_name(NET_TX) \ softirq_name(NET_RX) \ softirq_name(BLOCK) \ - softirq_name(BLOCK_IOPOLL) \ + softirq_name(IRQ_POLL) \ softirq_name(TASKLET) \ softirq_name(SCHED) \ softirq_name(HRTIMER) \ diff --git a/lib/Kconfig b/lib/Kconfig index f0df318..e00e196 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -475,6 +475,11 @@ config DDR information. This data is useful for drivers handling DDR SDRAM controllers. +config IRQ_POLL + bool "IRQ polling library" + help + Helper library to poll interrupt mitigation using polling. + config MPILIB tristate select CLZ_TAB diff --git a/lib/Makefile b/lib/Makefile index 7f1de26..1478ae2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -164,6 +164,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_SG_SPLIT) += sg_split.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o +obj-$(CONFIG_IRQ_POLL) += irq_poll.o libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ fdt_empty_tree.o diff --git a/lib/irq_poll.c b/lib/irq_poll.c new file mode 100644 index 0000000..e6fd1dc --- /dev/null +++ b/lib/irq_poll.c @@ -0,0 +1,221 @@ +/* + * Functions related to interrupt-poll handling in the block layer. This + * is similar to NAPI for network devices. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int irq_poll_budget __read_mostly = 256; + +static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); + +/** + * irq_poll_sched - Schedule a run of the iopoll handler + * @iop: The parent iopoll structure + * + * Description: + * Add this irq_poll structure to the pending poll list and trigger the + * raise of the blk iopoll softirq. The driver must already have gotten a + * successful return from irq_poll_sched_prep() before calling this. + **/ +void irq_poll_sched(struct irq_poll *iop) +{ + unsigned long flags; + + local_irq_save(flags); + list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(irq_poll_sched); + +/** + * __irq_poll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * See irq_poll_complete(). This function must be called with interrupts + * disabled. + **/ +void __irq_poll_complete(struct irq_poll *iop) +{ + list_del(&iop->list); + smp_mb__before_atomic(); + clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(__irq_poll_complete); + +/** + * irq_poll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * If a driver consumes less than the assigned budget in its run of the + * iopoll handler, it'll end the polled mode by calling this function. The + * iopoll handler will not be invoked again before irq_poll_sched_prep() + * is called. + **/ +void irq_poll_complete(struct irq_poll *iop) +{ + unsigned long flags; + + local_irq_save(flags); + __irq_poll_complete(iop); + local_irq_restore(flags); +} +EXPORT_SYMBOL(irq_poll_complete); + +static void irq_poll_softirq(struct softirq_action *h) +{ + struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); + int rearm = 0, budget = irq_poll_budget; + unsigned long start_time = jiffies; + + local_irq_disable(); + + while (!list_empty(list)) { + struct irq_poll *iop; + int work, weight; + + /* + * If softirq window is exhausted then punt. + */ + if (budget <= 0 || time_after(jiffies, start_time)) { + rearm = 1; + break; + } + + local_irq_enable(); + + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + iop = list_entry(list->next, struct irq_poll, list); + + weight = iop->weight; + work = 0; + if (test_bit(IRQ_POLL_F_SCHED, &iop->state)) + work = iop->poll(iop, weight); + + budget -= work; + + local_irq_disable(); + + /* + * Drivers must not modify the iopoll state, if they + * consume their assigned weight (or more, some drivers can't + * easily just stop processing, they have to complete an + * entire mask of commands).In such cases this code + * still "owns" the iopoll instance and therefore can + * move the instance around on the list at-will. + */ + if (work >= weight) { + if (irq_poll_disable_pending(iop)) + __irq_poll_complete(iop); + else + list_move_tail(&iop->list, list); + } + } + + if (rearm) + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + + local_irq_enable(); +} + +/** + * irq_poll_disable - Disable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Disable io polling and wait for any pending callbacks to have completed. + **/ +void irq_poll_disable(struct irq_poll *iop) +{ + set_bit(IRQ_POLL_F_DISABLE, &iop->state); + while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) + msleep(1); + clear_bit(IRQ_POLL_F_DISABLE, &iop->state); +} +EXPORT_SYMBOL(irq_poll_disable); + +/** + * irq_poll_enable - Enable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Enable iopoll on this @iop. Note that the handler run will not be + * scheduled, it will only mark it as active. + **/ +void irq_poll_enable(struct irq_poll *iop) +{ + BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state)); + smp_mb__before_atomic(); + clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(irq_poll_enable); + +/** + * irq_poll_init - Initialize this @iop + * @iop: The parent iopoll structure + * @weight: The default weight (or command completion budget) + * @poll_fn: The handler to invoke + * + * Description: + * Initialize this irq_poll structure. Before being actively used, the + * driver must call irq_poll_enable(). + **/ +void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn) +{ + memset(iop, 0, sizeof(*iop)); + INIT_LIST_HEAD(&iop->list); + iop->weight = weight; + iop->poll = poll_fn; + set_bit(IRQ_POLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(irq_poll_init); + +static int irq_poll_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), + this_cpu_ptr(&blk_cpu_iopoll)); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block irq_poll_cpu_notifier = { + .notifier_call = irq_poll_cpu_notify, +}; + +static __init int irq_poll_setup(void) +{ + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); + + open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq); + register_hotcpu_notifier(&irq_poll_cpu_notifier); + return 0; +} +subsys_initcall(irq_poll_setup); diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 2a912df..af5a316 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3746,7 +3746,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 8ff7d62..33b52ea 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -209,7 +209,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, -- 1.9.1