From b0bec1de8a7dba69b223dd30dd2a734401f335aa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 25 Aug 2017 13:25:43 -0700 Subject: [PATCH 1/2] Split page bit waiting functions into their own file It turns out that the page-bit waiting logic is very special indeed, and will get even more so. Split it up into its own file to make this clear. I'll rewrite the logic of the page wait queue completely, but this is pure prep-work with no actual code changes, just code movement. Signed-off-by: Linus Torvalds --- mm/Makefile | 2 +- mm/filemap.c | 260 -------------------------------------------------- mm/page_wait_bit.c | 274 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 275 insertions(+), 261 deletions(-) create mode 100644 mm/page_wait_bit.c diff --git a/mm/Makefile b/mm/Makefile index 411bd24d4a7c..8bc0194207a3 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -32,7 +32,7 @@ ifdef CONFIG_CROSS_MEMORY_ATTACH mmu-$(CONFIG_MMU) += process_vm_access.o endif -obj-y := filemap.o mempool.o oom_kill.o \ +obj-y := filemap.o page_wait_bit.o mempool.o oom_kill.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ diff --git a/mm/filemap.c b/mm/filemap.c index a49702445ce0..f9b710e8f76e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -856,195 +856,6 @@ struct page *__page_cache_alloc(gfp_t gfp) EXPORT_SYMBOL(__page_cache_alloc); #endif -/* - * In order to wait for pages to become available there must be - * waitqueues associated with pages. By using a hash table of - * waitqueues where the bucket discipline is to maintain all - * waiters on the same queue and wake all when any of the pages - * become available, and for the woken contexts to check to be - * sure the appropriate page became available, this saves space - * at a cost of "thundering herd" phenomena during rare hash - * collisions. - */ -#define PAGE_WAIT_TABLE_BITS 8 -#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) -static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; - -static wait_queue_head_t *page_waitqueue(struct page *page) -{ - return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; -} - -void __init pagecache_init(void) -{ - int i; - - for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) - init_waitqueue_head(&page_wait_table[i]); - - page_writeback_init(); -} - -struct wait_page_key { - struct page *page; - int bit_nr; - int page_match; -}; - -struct wait_page_queue { - struct page *page; - int bit_nr; - wait_queue_entry_t wait; -}; - -static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) -{ - struct wait_page_key *key = arg; - struct wait_page_queue *wait_page - = container_of(wait, struct wait_page_queue, wait); - - if (wait_page->page != key->page) - return 0; - key->page_match = 1; - - if (wait_page->bit_nr != key->bit_nr) - return 0; - if (test_bit(key->bit_nr, &key->page->flags)) - return 0; - - return autoremove_wake_function(wait, mode, sync, key); -} - -static void wake_up_page_bit(struct page *page, int bit_nr) -{ - wait_queue_head_t *q = page_waitqueue(page); - struct wait_page_key key; - unsigned long flags; - - key.page = page; - key.bit_nr = bit_nr; - key.page_match = 0; - - spin_lock_irqsave(&q->lock, flags); - __wake_up_locked_key(q, TASK_NORMAL, &key); - /* - * It is possible for other pages to have collided on the waitqueue - * hash, so in that case check for a page match. That prevents a long- - * term waiter - * - * It is still possible to miss a case here, when we woke page waiters - * and removed them from the waitqueue, but there are still other - * page waiters. - */ - if (!waitqueue_active(q) || !key.page_match) { - ClearPageWaiters(page); - /* - * It's possible to miss clearing Waiters here, when we woke - * our page waiters, but the hashed waitqueue has waiters for - * other pages on it. - * - * That's okay, it's a rare case. The next waker will clear it. - */ - } - spin_unlock_irqrestore(&q->lock, flags); -} - -static void wake_up_page(struct page *page, int bit) -{ - if (!PageWaiters(page)) - return; - wake_up_page_bit(page, bit); -} - -static inline int wait_on_page_bit_common(wait_queue_head_t *q, - struct page *page, int bit_nr, int state, bool lock) -{ - struct wait_page_queue wait_page; - wait_queue_entry_t *wait = &wait_page.wait; - int ret = 0; - - init_wait(wait); - wait->func = wake_page_function; - wait_page.page = page; - wait_page.bit_nr = bit_nr; - - for (;;) { - spin_lock_irq(&q->lock); - - if (likely(list_empty(&wait->entry))) { - if (lock) - __add_wait_queue_entry_tail_exclusive(q, wait); - else - __add_wait_queue(q, wait); - SetPageWaiters(page); - } - - set_current_state(state); - - spin_unlock_irq(&q->lock); - - if (likely(test_bit(bit_nr, &page->flags))) { - io_schedule(); - if (unlikely(signal_pending_state(state, current))) { - ret = -EINTR; - break; - } - } - - if (lock) { - if (!test_and_set_bit_lock(bit_nr, &page->flags)) - break; - } else { - if (!test_bit(bit_nr, &page->flags)) - break; - } - } - - finish_wait(q, wait); - - /* - * A signal could leave PageWaiters set. Clearing it here if - * !waitqueue_active would be possible (by open-coding finish_wait), - * but still fail to catch it in the case of wait hash collision. We - * already can fail to clear wait hash collision cases, so don't - * bother with signals either. - */ - - return ret; -} - -void wait_on_page_bit(struct page *page, int bit_nr) -{ - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false); -} -EXPORT_SYMBOL(wait_on_page_bit); - -int wait_on_page_bit_killable(struct page *page, int bit_nr) -{ - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); -} - -/** - * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue - * @page: Page defining the wait queue of interest - * @waiter: Waiter to add to the queue - * - * Add an arbitrary @waiter to the wait queue for the nominated @page. - */ -void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) -{ - wait_queue_head_t *q = page_waitqueue(page); - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - __add_wait_queue(q, waiter); - SetPageWaiters(page); - spin_unlock_irqrestore(&q->lock, flags); -} -EXPORT_SYMBOL_GPL(add_page_wait_queue); - #ifndef clear_bit_unlock_is_negative_byte /* @@ -1068,57 +879,6 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem #endif -/** - * unlock_page - unlock a locked page - * @page: the page - * - * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). - * Also wakes sleepers in wait_on_page_writeback() because the wakeup - * mechanism between PageLocked pages and PageWriteback pages is shared. - * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. - * - * Note that this depends on PG_waiters being the sign bit in the byte - * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to - * clear the PG_locked bit and test PG_waiters at the same time fairly - * portably (architectures that do LL/SC can test any bit, while x86 can - * test the sign bit). - */ -void unlock_page(struct page *page) -{ - BUILD_BUG_ON(PG_waiters != 7); - page = compound_head(page); - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) - wake_up_page_bit(page, PG_locked); -} -EXPORT_SYMBOL(unlock_page); - -/** - * end_page_writeback - end writeback against a page - * @page: the page - */ -void end_page_writeback(struct page *page) -{ - /* - * TestClearPageReclaim could be used here but it is an atomic - * operation and overkill in this particular case. Failing to - * shuffle a page marked for immediate reclaim is too mild to - * justify taking an atomic operation penalty at the end of - * ever page writeback. - */ - if (PageReclaim(page)) { - ClearPageReclaim(page); - rotate_reclaimable_page(page); - } - - if (!test_clear_page_writeback(page)) - BUG(); - - smp_mb__after_atomic(); - wake_up_page(page, PG_writeback); -} -EXPORT_SYMBOL(end_page_writeback); - /* * After completing I/O on a page, call this routine to update the page * flags appropriately @@ -1147,26 +907,6 @@ void page_endio(struct page *page, bool is_write, int err) } EXPORT_SYMBOL_GPL(page_endio); -/** - * __lock_page - get a lock on the page, assuming we need to sleep to get it - * @__page: the page to lock - */ -void __lock_page(struct page *__page) -{ - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true); -} -EXPORT_SYMBOL(__lock_page); - -int __lock_page_killable(struct page *__page) -{ - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true); -} -EXPORT_SYMBOL_GPL(__lock_page_killable); - /* * Return values: * 1 - page is locked; mmap_sem is still held. diff --git a/mm/page_wait_bit.c b/mm/page_wait_bit.c new file mode 100644 index 000000000000..7550b6d2715a --- /dev/null +++ b/mm/page_wait_bit.c @@ -0,0 +1,274 @@ +/* + * linux/mm/page_wait_bit.c + * + * Copyright (C) 2017 Linus Torvalds + */ + +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * In order to wait for pages to become available there must be + * waitqueues associated with pages. By using a hash table of + * waitqueues where the bucket discipline is to maintain all + * waiters on the same queue and wake all when any of the pages + * become available, and for the woken contexts to check to be + * sure the appropriate page became available, this saves space + * at a cost of "thundering herd" phenomena during rare hash + * collisions. + */ +#define PAGE_WAIT_TABLE_BITS 8 +#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) +static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; + +static wait_queue_head_t *page_waitqueue(struct page *page) +{ + return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; +} + +void __init pagecache_init(void) +{ + int i; + + for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) + init_waitqueue_head(&page_wait_table[i]); + + page_writeback_init(); +} + +struct wait_page_key { + struct page *page; + int bit_nr; + int page_match; +}; + +struct wait_page_queue { + struct page *page; + int bit_nr; + wait_queue_entry_t wait; +}; + +static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) +{ + struct wait_page_key *key = arg; + struct wait_page_queue *wait_page + = container_of(wait, struct wait_page_queue, wait); + + if (wait_page->page != key->page) + return 0; + key->page_match = 1; + + if (wait_page->bit_nr != key->bit_nr) + return 0; + if (test_bit(key->bit_nr, &key->page->flags)) + return 0; + + return autoremove_wake_function(wait, mode, sync, key); +} + +static void wake_up_page_bit(struct page *page, int bit_nr) +{ + wait_queue_head_t *q = page_waitqueue(page); + struct wait_page_key key; + unsigned long flags; + + key.page = page; + key.bit_nr = bit_nr; + key.page_match = 0; + + spin_lock_irqsave(&q->lock, flags); + __wake_up_locked_key(q, TASK_NORMAL, &key); + /* + * It is possible for other pages to have collided on the waitqueue + * hash, so in that case check for a page match. That prevents a long- + * term waiter + * + * It is still possible to miss a case here, when we woke page waiters + * and removed them from the waitqueue, but there are still other + * page waiters. + */ + if (!waitqueue_active(q) || !key.page_match) { + ClearPageWaiters(page); + /* + * It's possible to miss clearing Waiters here, when we woke + * our page waiters, but the hashed waitqueue has waiters for + * other pages on it. + * + * That's okay, it's a rare case. The next waker will clear it. + */ + } + spin_unlock_irqrestore(&q->lock, flags); +} + +static void wake_up_page(struct page *page, int bit) +{ + if (!PageWaiters(page)) + return; + wake_up_page_bit(page, bit); +} + +static inline int wait_on_page_bit_common(wait_queue_head_t *q, + struct page *page, int bit_nr, int state, bool lock) +{ + struct wait_page_queue wait_page; + wait_queue_entry_t *wait = &wait_page.wait; + int ret = 0; + + init_wait(wait); + wait->func = wake_page_function; + wait_page.page = page; + wait_page.bit_nr = bit_nr; + + for (;;) { + spin_lock_irq(&q->lock); + + if (likely(list_empty(&wait->entry))) { + if (lock) + __add_wait_queue_entry_tail_exclusive(q, wait); + else + __add_wait_queue(q, wait); + SetPageWaiters(page); + } + + set_current_state(state); + + spin_unlock_irq(&q->lock); + + if (likely(test_bit(bit_nr, &page->flags))) { + io_schedule(); + if (unlikely(signal_pending_state(state, current))) { + ret = -EINTR; + break; + } + } + + if (lock) { + if (!test_and_set_bit_lock(bit_nr, &page->flags)) + break; + } else { + if (!test_bit(bit_nr, &page->flags)) + break; + } + } + + finish_wait(q, wait); + + /* + * A signal could leave PageWaiters set. Clearing it here if + * !waitqueue_active would be possible (by open-coding finish_wait), + * but still fail to catch it in the case of wait hash collision. We + * already can fail to clear wait hash collision cases, so don't + * bother with signals either. + */ + + return ret; +} + +void wait_on_page_bit(struct page *page, int bit_nr) +{ + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false); +} +EXPORT_SYMBOL(wait_on_page_bit); + +int wait_on_page_bit_killable(struct page *page, int bit_nr) +{ + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); +} + +/** + * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue + * @page: Page defining the wait queue of interest + * @waiter: Waiter to add to the queue + * + * Add an arbitrary @waiter to the wait queue for the nominated @page. + */ +void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) +{ + wait_queue_head_t *q = page_waitqueue(page); + unsigned long flags; + + spin_lock_irqsave(&q->lock, flags); + __add_wait_queue(q, waiter); + SetPageWaiters(page); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL_GPL(add_page_wait_queue); + + +/** + * __lock_page - get a lock on the page, assuming we need to sleep to get it + * @__page: the page to lock + */ +void __lock_page(struct page *__page) +{ + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true); +} +EXPORT_SYMBOL(__lock_page); + +int __lock_page_killable(struct page *__page) +{ + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true); +} +EXPORT_SYMBOL_GPL(__lock_page_killable); + +/** + * unlock_page - unlock a locked page + * @page: the page + * + * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). + * Also wakes sleepers in wait_on_page_writeback() because the wakeup + * mechanism between PageLocked pages and PageWriteback pages is shared. + * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. + * + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). + */ +void unlock_page(struct page *page) +{ + BUILD_BUG_ON(PG_waiters != 7); + page = compound_head(page); + VM_BUG_ON_PAGE(!PageLocked(page), page); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); +} +EXPORT_SYMBOL(unlock_page); + +/** + * end_page_writeback - end writeback against a page + * @page: the page + */ +void end_page_writeback(struct page *page) +{ + /* + * TestClearPageReclaim could be used here but it is an atomic + * operation and overkill in this particular case. Failing to + * shuffle a page marked for immediate reclaim is too mild to + * justify taking an atomic operation penalty at the end of + * ever page writeback. + */ + if (PageReclaim(page)) { + ClearPageReclaim(page); + rotate_reclaimable_page(page); + } + + if (!test_clear_page_writeback(page)) + BUG(); + + smp_mb__after_atomic(); + wake_up_page(page, PG_writeback); +} +EXPORT_SYMBOL(end_page_writeback); -- 2.14.0.rc1.2.g4c8247ec3