From: Mike Snitzer <snitzer@kernel.org>
To: dm-devel@redhat.com
Cc: axboe@kernel.dk, ebiggers@kernel.org, keescook@chromium.org,
heinzm@redhat.com, Mike Snitzer <snitzer@kernel.org>,
nhuck@google.com, linux-block@vger.kernel.org, ejt@redhat.com,
mpatocka@redhat.com, luomeng12@huawei.com
Subject: [dm-devel] [dm-6.4 PATCH v3 05/20] dm bufio: add LRU abstraction
Date: Mon, 27 Mar 2023 16:11:28 -0400 [thread overview]
Message-ID: <20230327201143.51026-6-snitzer@kernel.org> (raw)
In-Reply-To: <20230327201143.51026-1-snitzer@kernel.org>
From: Joe Thornber <ejt@redhat.com>
A CLOCK algorithm is used in this LRU abstraction. This avoids
relinking list nodes, which would require a write lock protecting it.
None of the LRU methods are threadsafe; locking must be done at a
higher level.
Code that uses this new LRU will be introduced in the next 2 commits.
As such, this commit will cause "defined but not used" compiler warnings
that will be resolved by the next 2 commits.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
drivers/md/dm-bufio.c | 235 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 235 insertions(+)
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 64fb5fd39bd9..a0bb0de0d4e7 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -66,6 +66,241 @@
#define LIST_DIRTY 1
#define LIST_SIZE 2
+/*--------------------------------------------------------------*/
+
+/*
+ * Rather than use an LRU list, we use a clock algorithm where entries
+ * are held in a circular list. When an entry is 'hit' a reference bit
+ * is set. The least recently used entry is approximated by running a
+ * cursor around the list selecting unreferenced entries. Referenced
+ * entries have their reference bit cleared as the cursor passes them.
+ */
+struct lru_entry {
+ struct list_head list;
+ atomic_t referenced;
+};
+
+struct lru_iter {
+ struct lru *lru;
+ struct list_head list;
+ struct lru_entry *stop;
+ struct lru_entry *e;
+};
+
+struct lru {
+ struct list_head *cursor;
+ unsigned long count;
+
+ struct list_head iterators;
+};
+
+/*--------------*/
+
+static void lru_init(struct lru *lru)
+{
+ lru->cursor = NULL;
+ lru->count = 0;
+ INIT_LIST_HEAD(&lru->iterators);
+}
+
+static void lru_destroy(struct lru *lru)
+{
+ WARN_ON_ONCE(lru->cursor);
+ WARN_ON_ONCE(!list_empty(&lru->iterators));
+}
+
+/*
+ * Insert a new entry into the lru.
+ */
+static void lru_insert(struct lru *lru, struct lru_entry *le)
+{
+ /*
+ * Don't be tempted to set to 1, makes the lru aspect
+ * perform poorly.
+ */
+ atomic_set(&le->referenced, 0);
+
+ if (lru->cursor) {
+ list_add_tail(&le->list, lru->cursor);
+ } else {
+ INIT_LIST_HEAD(&le->list);
+ lru->cursor = &le->list;
+ }
+ lru->count++;
+}
+
+/*--------------*/
+
+/*
+ * Convert a list_head pointer to an lru_entry pointer.
+ */
+static inline struct lru_entry *to_le(struct list_head *l)
+{
+ return container_of(l, struct lru_entry, list);
+}
+
+/*
+ * Initialize an lru_iter and add it to the list of cursors in the lru.
+ */
+static void lru_iter_begin(struct lru *lru, struct lru_iter *it)
+{
+ it->lru = lru;
+ it->stop = lru->cursor ? to_le(lru->cursor->prev) : NULL;
+ it->e = lru->cursor ? to_le(lru->cursor) : NULL;
+ list_add(&it->list, &lru->iterators);
+}
+
+/*
+ * Remove an lru_iter from the list of cursors in the lru.
+ */
+static inline void lru_iter_end(struct lru_iter *it)
+{
+ list_del(&it->list);
+}
+
+/* Predicate function type to be used with lru_iter_next */
+typedef bool (*iter_predicate)(struct lru_entry *le, void *context);
+
+/*
+ * Advance the cursor to the next entry that passes the
+ * predicate, and return that entry. Returns NULL if the
+ * iteration is complete.
+ */
+static struct lru_entry *lru_iter_next(struct lru_iter *it,
+ iter_predicate pred, void *context)
+{
+ struct lru_entry *e;
+
+ while (it->e) {
+ e = it->e;
+
+ /* advance the cursor */
+ if (it->e == it->stop)
+ it->e = NULL;
+ else
+ it->e = to_le(it->e->list.next);
+
+ if (pred(e, context))
+ return e;
+ }
+
+ return NULL;
+}
+
+/*
+ * Invalidate a specific lru_entry and update all cursors in
+ * the lru accordingly.
+ */
+static void lru_iter_invalidate(struct lru *lru, struct lru_entry *e)
+{
+ struct lru_iter *it;
+
+ list_for_each_entry(it, &lru->iterators, list) {
+ /* Move c->e forwards if necc. */
+ if (it->e == e) {
+ it->e = to_le(it->e->list.next);
+ if (it->e == e)
+ it->e = NULL;
+ }
+
+ /* Move it->stop backwards if necc. */
+ if (it->stop == e) {
+ it->stop = to_le(it->stop->list.prev);
+ if (it->stop == e)
+ it->stop = NULL;
+ }
+ }
+}
+
+/*--------------*/
+
+/*
+ * Remove a specific entry from the lru.
+ */
+static void lru_remove(struct lru *lru, struct lru_entry *le)
+{
+ lru_iter_invalidate(lru, le);
+ if (lru->count == 1) {
+ lru->cursor = NULL;
+ } else {
+ if (lru->cursor == &le->list)
+ lru->cursor = lru->cursor->next;
+ list_del(&le->list);
+ }
+ lru->count--;
+}
+
+/*
+ * Mark as referenced.
+ */
+static inline void lru_reference(struct lru_entry *le)
+{
+ atomic_set(&le->referenced, 1);
+}
+
+/*--------------*/
+
+/*
+ * Remove the least recently used entry (approx), that passes the predicate.
+ * Returns NULL on failure.
+ */
+enum evict_result {
+ ER_EVICT,
+ ER_DONT_EVICT,
+ ER_STOP, /* stop looking for something to evict */
+};
+
+typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
+
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+{
+ unsigned long tested = 0;
+ struct list_head *h = lru->cursor;
+ struct lru_entry *le;
+
+ if (!h)
+ return NULL;
+ /*
+ * In the worst case we have to loop around twice. Once to clear
+ * the reference flags, and then again to discover the predicate
+ * fails for all entries.
+ */
+ while (tested < lru->count) {
+ le = container_of(h, struct lru_entry, list);
+
+ if (atomic_read(&le->referenced)) {
+ atomic_set(&le->referenced, 0);
+ } else {
+ tested++;
+ switch (pred(le, context)) {
+ case ER_EVICT:
+ /*
+ * Adjust the cursor, so we start the next
+ * search from here.
+ */
+ lru->cursor = le->list.next;
+ lru_remove(lru, le);
+ return le;
+
+ case ER_DONT_EVICT:
+ break;
+
+ case ER_STOP:
+ lru->cursor = le->list.next;
+ return NULL;
+ }
+ }
+
+ h = h->next;
+
+ cond_resched();
+ }
+
+ return NULL;
+}
+
+/*--------------------------------------------------------------*/
+
/*
* Linking of buffers:
* All buffers are linked to buffer_tree with their node field.
--
2.40.0
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
next prev parent reply other threads:[~2023-03-27 20:13 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-27 20:11 [dm-devel] [dm-6.4 PATCH v3 00/20] dm bufio, thin: improve concurrent IO performance Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 01/20] dm bufio: remove unused dm_bufio_release_move interface Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 02/20] dm bufio: use WARN_ON in dm_bufio_client_destroy and dm_bufio_exit Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 03/20] dm bufio: never crash if dm_bufio_in_request() Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 04/20] dm bufio: don't bug for clear developer oversight Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer [this message]
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 06/20] dm bufio: add dm_buffer_cache abstraction Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 07/20] dm bufio: improve concurrent IO performance Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 08/20] dm bufio: add lock_history optimization for cache iterators Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 09/20] dm bufio: move dm_bufio_client members to avoid spanning cachelines Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 10/20] dm bufio: use waitqueue_active in __free_buffer_wake Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 11/20] dm bufio: use multi-page bio vector Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 12/20] dm thin: speed up cell_defer_no_holder() Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 13/20] dm: split discards further if target sets max_discard_granularity Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 14/20] dm bio prison v1: improve concurrent IO performance Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 15/20] dm bio prison v1: add dm_cell_key_has_valid_range Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 16/20] dm: add dm_num_sharded_locks() Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 17/20] dm bufio: prepare to intelligently size dm_buffer_cache's buffer_trees Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 18/20] dm bufio: " Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 19/20] dm bio prison v1: prepare to intelligently size dm_bio_prison's prison_regions Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 20/20] dm bio prison v1: " Mike Snitzer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230327201143.51026-6-snitzer@kernel.org \
--to=snitzer@kernel.org \
--cc=axboe@kernel.dk \
--cc=dm-devel@redhat.com \
--cc=ebiggers@kernel.org \
--cc=ejt@redhat.com \
--cc=heinzm@redhat.com \
--cc=keescook@chromium.org \
--cc=linux-block@vger.kernel.org \
--cc=luomeng12@huawei.com \
--cc=mpatocka@redhat.com \
--cc=nhuck@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).