All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org
Cc: Kent Overstreet <kent.overstreet@linux.dev>,
	Dave Chinner <dchinner@redhat.com>,
	linux-xfs@vger.kernel.org, willy@infradead.org,
	linux-fsdevel@vger.kernel.org
Subject: [PATCH 5/7] xfs: speed up xfarray sort by sorting xfile page contents directly
Date: Thu, 27 Jul 2023 15:26:37 -0700	[thread overview]
Message-ID: <169049623643.921478.6377149280402650711.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <169049623563.921478.13811535720302490179.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

If all the records in an xfarray subset live within the same memory
page, we can short-circuit even more quicksort recursion by mapping that
page into the local CPU and using the kernel's heapsort function to sort
the subset.  On the author's computer, this reduces the runtime by
another 15% on a 500,000 element array.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/trace.h   |   20 ++++++++++
 fs/xfs/scrub/xfarray.c |   97 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/xfarray.h |    4 ++
 3 files changed, 121 insertions(+)


diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index f8c814e07587f..e9d7159461428 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -869,6 +869,26 @@ TRACE_EVENT(xfarray_isort,
 		  __entry->hi - __entry->lo)
 );
 
+TRACE_EVENT(xfarray_pagesort,
+	TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
+	TP_ARGS(si, lo, hi),
+	TP_STRUCT__entry(
+		__field(unsigned long, ino)
+		__field(unsigned long long, lo)
+		__field(unsigned long long, hi)
+	),
+	TP_fast_assign(
+		__entry->ino = file_inode(si->array->xfile->file)->i_ino;
+		__entry->lo = lo;
+		__entry->hi = hi;
+	),
+	TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
+		  __entry->ino,
+		  __entry->lo,
+		  __entry->hi,
+		  __entry->hi - __entry->lo)
+);
+
 TRACE_EVENT(xfarray_qsort,
 	TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
 	TP_ARGS(si, lo, hi),
diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c
index 2a0599f660d7b..457e56eac5e15 100644
--- a/fs/xfs/scrub/xfarray.c
+++ b/fs/xfs/scrub/xfarray.c
@@ -545,6 +545,87 @@ xfarray_isort(
 	return xfile_obj_store(si->array->xfile, scratch, len, lo_pos);
 }
 
+/* Grab a page for sorting records. */
+static inline int
+xfarray_sort_get_page(
+	struct xfarray_sortinfo	*si,
+	loff_t			pos,
+	uint64_t		len)
+{
+	int			error;
+
+	error = xfile_get_page(si->array->xfile, pos, len, &si->xfpage);
+	if (error)
+		return error;
+
+	/*
+	 * xfile pages must never be mapped into userspace, so we skip the
+	 * dcache flush when mapping the page.
+	 */
+	si->page_kaddr = kmap_local_page(si->xfpage.page);
+	return 0;
+}
+
+/* Release a page we grabbed for sorting records. */
+static inline int
+xfarray_sort_put_page(
+	struct xfarray_sortinfo	*si)
+{
+	if (!si->page_kaddr)
+		return 0;
+
+	kunmap_local(si->page_kaddr);
+	si->page_kaddr = NULL;
+
+	return xfile_put_page(si->array->xfile, &si->xfpage);
+}
+
+/* Decide if these records are eligible for in-page sorting. */
+static inline bool
+xfarray_want_pagesort(
+	struct xfarray_sortinfo	*si,
+	xfarray_idx_t		lo,
+	xfarray_idx_t		hi)
+{
+	pgoff_t			lo_page;
+	pgoff_t			hi_page;
+	loff_t			end_pos;
+
+	/* We can only map one page at a time. */
+	lo_page = xfarray_pos(si->array, lo) >> PAGE_SHIFT;
+	end_pos = xfarray_pos(si->array, hi) + si->array->obj_size - 1;
+	hi_page = end_pos >> PAGE_SHIFT;
+
+	return lo_page == hi_page;
+}
+
+/* Sort a bunch of records that all live in the same memory page. */
+STATIC int
+xfarray_pagesort(
+	struct xfarray_sortinfo	*si,
+	xfarray_idx_t		lo,
+	xfarray_idx_t		hi)
+{
+	void			*startp;
+	loff_t			lo_pos = xfarray_pos(si->array, lo);
+	uint64_t		len = xfarray_pos(si->array, hi - lo);
+	int			error = 0;
+
+	trace_xfarray_pagesort(si, lo, hi);
+
+	xfarray_sort_bump_loads(si);
+	error = xfarray_sort_get_page(si, lo_pos, len);
+	if (error)
+		return error;
+
+	xfarray_sort_bump_heapsorts(si);
+	startp = si->page_kaddr + offset_in_page(lo_pos);
+	sort(startp, hi - lo + 1, si->array->obj_size, si->cmp_fn, NULL);
+
+	xfarray_sort_bump_stores(si);
+	return xfarray_sort_put_page(si);
+}
+
 /* Return a pointer to the xfarray pivot record within the sortinfo struct. */
 static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si)
 {
@@ -699,6 +780,10 @@ xfarray_qsort_push(
  * 4. For small sets, load the records into the scratchpad and run heapsort on
  *    them because that is very fast.  In the author's experience, this yields
  *    a ~10% reduction in runtime.
+ *
+ *    If a small set is contained entirely within a single xfile memory page,
+ *    map the page directly and run heap sort directly on the xfile page
+ *    instead of using the load/store interface.  This halves the runtime.
  */
 
 /*
@@ -744,6 +829,18 @@ xfarray_sort(
 			continue;
 		}
 
+		/*
+		 * If directly mapping the page and sorting can solve our
+		 * problems, we're done.
+		 */
+		if (xfarray_want_pagesort(si, lo, hi)) {
+			error = xfarray_pagesort(si, lo, hi);
+			if (error)
+				goto out_free;
+			si->stack_depth--;
+			continue;
+		}
+
 		/* If insertion sort can solve our problems, we're done. */
 		if (xfarray_want_isort(si, lo, hi)) {
 			error = xfarray_isort(si, lo, hi);
diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h
index 3661c98272cd5..091614e7f6836 100644
--- a/fs/xfs/scrub/xfarray.h
+++ b/fs/xfs/scrub/xfarray.h
@@ -80,6 +80,10 @@ struct xfarray_sortinfo {
 	/* XFARRAY_SORT_* flags; see below. */
 	unsigned int		flags;
 
+	/* Cache a page here for faster access. */
+	struct xfile_page	xfpage;
+	void			*page_kaddr;
+
 #ifdef DEBUG
 	/* Performance statistics. */
 	uint64_t		loads;


  parent reply	other threads:[~2023-07-27 22:26 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-27 22:11 [MEGAPATCHSET v26] xfs: online repair, part of part 1 Darrick J. Wong
2023-07-27 22:18 ` [PATCHSET v26.0 0/9] xfs: fix online repair block reaping Darrick J. Wong
2023-07-27 22:21   ` [PATCH 1/9] xfs: cull repair code that will never get used Darrick J. Wong
2023-07-27 22:21   ` [PATCH 2/9] xfs: move the post-repair block reaping code to a separate file Darrick J. Wong
2023-07-27 22:22   ` [PATCH 3/9] xfs: only invalidate blocks if we're going to free them Darrick J. Wong
2023-07-27 22:22   ` [PATCH 4/9] xfs: only allow reaping of per-AG blocks in xrep_reap_extents Darrick J. Wong
2023-07-27 22:22   ` [PATCH 5/9] xfs: use deferred frees to reap old btree blocks Darrick J. Wong
2023-07-27 22:22   ` [PATCH 6/9] xfs: rearrange xrep_reap_block to make future code flow easier Darrick J. Wong
2023-07-27 22:23   ` [PATCH 7/9] xfs: allow scanning ranges of the buffer cache for live buffers Darrick J. Wong
2023-07-27 22:23   ` [PATCH 8/9] xfs: reap large AG metadata extents when possible Darrick J. Wong
2023-07-27 22:23   ` [PATCH 9/9] xfs: use per-AG bitmaps to reap unused AG metadata blocks during repair Darrick J. Wong
2023-08-07  6:19   ` [PATCHSET v26.0 0/9] xfs: fix online repair block reaping Dave Chinner
2023-08-08  0:40     ` Darrick J. Wong
2023-08-08  5:17       ` Dave Chinner
2023-08-09 23:17         ` Darrick J. Wong
2023-07-27 22:18 ` [PATCHSET v26.0 0/6] xfs: prepare repair for bulk loading Darrick J. Wong
2023-07-27 22:24   ` [PATCH 1/6] xfs: force all buffers to be written during btree bulk load Darrick J. Wong
2023-07-27 22:24   ` [PATCH 2/6] xfs: implement block reservation accounting for btrees we're staging Darrick J. Wong
2023-08-07  6:58     ` Dave Chinner
2023-08-08  1:08       ` Darrick J. Wong
2023-07-27 22:24   ` [PATCH 3/6] xfs: log EFIs for all btree blocks being used to stage a btree Darrick J. Wong
2023-08-07  8:41     ` Dave Chinner
2023-08-08  0:54       ` Darrick J. Wong
2023-08-08  6:11         ` Dave Chinner
2023-08-09 23:52           ` Darrick J. Wong
2023-08-10 20:36             ` Darrick J. Wong
2023-09-08 23:34       ` Darrick J. Wong
2023-07-27 22:24   ` [PATCH 4/6] xfs: add debug knobs to control btree bulk load slack factors Darrick J. Wong
2023-07-27 22:25   ` [PATCH 5/6] xfs: move btree bulkload record initialization to ->get_record implementations Darrick J. Wong
2023-07-27 22:25   ` [PATCH 6/6] xfs: constrain dirty buffers while formatting a staged btree Darrick J. Wong
2023-07-27 22:19 ` [PATCHSET v26.0 0/7] xfs: stage repair information in pageable memory Darrick J. Wong
2023-07-27 22:25   ` [PATCH 1/7] xfs: create a big array data structure Darrick J. Wong
2023-07-28  3:10     ` Matthew Wilcox
2023-07-28  4:39       ` Darrick J. Wong
2023-07-27 22:25   ` [PATCH 2/7] xfs: enable sorting of xfile-backed arrays Darrick J. Wong
2023-07-27 22:26   ` [PATCH 3/7] xfs: convert xfarray insertion sort to heapsort using scratchpad memory Darrick J. Wong
2023-07-27 22:26   ` [PATCH 4/7] xfs: teach xfile to pass back direct-map pages to caller Darrick J. Wong
2023-07-27 22:26   ` Darrick J. Wong [this message]
2023-07-27 22:26   ` [PATCH 6/7] xfs: cache pages used for xfarray quicksort convergence Darrick J. Wong
2023-07-27 22:27   ` [PATCH 7/7] xfs: improve xfarray quicksort pivot Darrick J. Wong
2023-07-27 22:19 ` [PATCHSET v26.0 0/2] xfs: add usage counters for scrub Darrick J. Wong
2023-07-27 22:27   ` [PATCH 1/2] xfs: create scaffolding for creating debugfs entries Darrick J. Wong
2023-07-27 22:27   ` [PATCH 2/2] xfs: track usage statistics of online fsck Darrick J. Wong
2023-08-08  7:09   ` [PATCHSET v26.0 0/2] xfs: add usage counters for scrub Dave Chinner
2023-07-27 22:19 ` [PATCHSET v26.0 0/4] xfs: online scrubbing of realtime summary files Darrick J. Wong
2023-07-27 22:27   ` [PATCH 1/4] xfs: get our own reference to inodes that we want to scrub Darrick J. Wong
2023-07-27 22:28   ` [PATCH 2/4] xfs: wrap ilock/iunlock operations on sc->ip Darrick J. Wong
2023-07-27 22:28   ` [PATCH 3/4] xfs: move the realtime summary file scrubber to a separate source file Darrick J. Wong
2023-07-27 22:28   ` [PATCH 4/4] xfs: implement online scrubbing of rtsummary info Darrick J. Wong
2023-07-27 22:19 ` [PATCHSET v26.0 0/2] xfs: miscellaneous repair tweaks Darrick J. Wong
2023-07-27 22:28   ` [PATCH 1/2] xfs: always rescan allegedly healthy per-ag metadata after repair Darrick J. Wong
2023-07-27 22:29   ` [PATCH 2/2] xfs: allow the user to cancel repairs before we start writing Darrick J. Wong
2023-07-27 22:20 ` [PATCHSET v26.0 0/2] xfs: force rebuilding of metadata Darrick J. Wong
2023-07-27 22:29   ` [PATCH 1/2] xfs: don't complain about unfixed metadata when repairs were injected Darrick J. Wong
2023-07-27 22:29   ` [PATCH 2/2] xfs: allow userspace to rebuild metadata structures Darrick J. Wong
2023-07-27 22:20 ` [PATCHSET v26.0 0/2] xfs: fixes to the AGFL repair code Darrick J. Wong
2023-07-27 22:30   ` [PATCH 1/2] xfs: clear pagf_agflreset when repairing the AGFL Darrick J. Wong
2023-07-27 22:30   ` [PATCH 2/2] xfs: fix agf_fllast when repairing an empty AGFL Darrick J. Wong
2023-08-08  7:10     ` Dave Chinner
2023-07-27 22:20 ` [PATCHSET v26.0 0/5] xfs: online repair of AG btrees Darrick J. Wong
2023-07-27 22:30   ` [PATCH 1/5] xfs: repair free space btrees Darrick J. Wong
2023-07-27 22:30   ` [PATCH 2/5] xfs: hide xfs_inode_is_allocated in scrub common code Darrick J. Wong
2023-08-08  7:13     ` Dave Chinner
2023-07-27 22:31   ` [PATCH 3/5] xfs: rewrite xchk_inode_is_allocated to work properly Darrick J. Wong
2023-08-08  7:14     ` Dave Chinner
2023-07-27 22:31   ` [PATCH 4/5] xfs: repair inode btrees Darrick J. Wong
2023-07-27 22:31   ` [PATCH 5/5] xfs: repair refcount btrees Darrick J. Wong
2023-07-27 22:20 ` [PATCHSET v26.0 0/2] xfs: fixes for the block mapping checker Darrick J. Wong
2023-07-27 22:31   ` [PATCH 1/2] xfs: simplify returns in xchk_bmap Darrick J. Wong
2023-07-27 22:32   ` [PATCH 2/2] xfs: don't check reflink iflag state when checking cow fork Darrick J. Wong
2023-08-08  7:16   ` [PATCHSET v26.0 0/2] xfs: fixes for the block mapping checker Dave Chinner
2023-07-27 22:21 ` [PATCHSET v26.0 0/6] xfs: online repair of inodes and forks Darrick J. Wong
2023-07-27 22:32   ` [PATCH 1/6] xfs: disable online repair quota helpers when quota not enabled Darrick J. Wong
2023-07-27 22:32   ` [PATCH 2/6] xfs: try to attach dquots to files before repairing them Darrick J. Wong
2023-07-27 22:32   ` [PATCH 3/6] xfs: repair inode records Darrick J. Wong
2023-08-09  8:42     ` Dave Chinner
2023-08-10  0:43       ` Darrick J. Wong
2023-07-27 22:33   ` [PATCH 4/6] xfs: zap broken inode forks Darrick J. Wong
2023-07-27 22:33   ` [PATCH 5/6] xfs: abort directory parent scrub scans if we encounter a zapped directory Darrick J. Wong
2023-07-27 22:33   ` [PATCH 6/6] xfs: repair obviously broken inode modes Darrick J. Wong
2023-08-09  9:44   ` [PATCHSET v26.0 0/6] xfs: online repair of inodes and forks Dave Chinner
2023-08-10  0:45     ` Darrick J. Wong
2023-07-27 22:21 ` [PATCHSET v26.0 0/5] xfs: online repair of file fork mappings Darrick J. Wong
2023-07-27 22:33   ` [PATCH 1/5] xfs: reintroduce reaping of file metadata blocks to xrep_reap_extents Darrick J. Wong
2023-07-27 22:34   ` [PATCH 2/5] xfs: repair inode fork block mapping data structures Darrick J. Wong
2023-07-27 22:34   ` [PATCH 3/5] xfs: refactor repair forcing tests into a repair.c helper Darrick J. Wong
2023-07-27 22:34   ` [PATCH 4/5] xfs: create a ranged query function for refcount btrees Darrick J. Wong
2023-07-27 22:34   ` [PATCH 5/5] xfs: repair problems in CoW forks Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2023-05-26  0:28 [PATCHSET v25.0 0/7] xfs: stage repair information in pageable memory Darrick J. Wong
2023-05-26  0:48 ` [PATCH 5/7] xfs: speed up xfarray sort by sorting xfile page contents directly Darrick J. Wong
2022-12-30 22:12 [PATCHSET v24.0 0/7] xfs: stage repair information in pageable memory Darrick J. Wong
2022-12-30 22:12 ` [PATCH 5/7] xfs: speed up xfarray sort by sorting xfile page contents directly Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=169049623643.921478.6377149280402650711.stgit@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=dchinner@redhat.com \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.