* [PATCH] btrfs: reorder extent buffer members for better packing
@ 2020-11-03 21:11 David Sterba
2020-11-03 21:25 ` Amy Parker
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: David Sterba @ 2020-11-03 21:11 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
After the rwsem replaced the tree lock implementation, the extent buffer
got smaller but leaving some holes behind. By changing log_index type
and reordering, we can squeeze the size further to 240 bytes, measured on
release config on x86_64. Log_index spans only 3 values and needs to be
signed.
Before:
struct extent_buffer {
u64 start; /* 0 8 */
long unsigned int len; /* 8 8 */
long unsigned int bflags; /* 16 8 */
struct btrfs_fs_info * fs_info; /* 24 8 */
spinlock_t refs_lock; /* 32 4 */
atomic_t refs; /* 36 4 */
atomic_t io_pages; /* 40 4 */
int read_mirror; /* 44 4 */
struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
/* --- cacheline 1 boundary (64 bytes) --- */
pid_t lock_owner; /* 64 4 */
bool lock_recursed; /* 68 1 */
/* XXX 3 bytes hole, try to pack */
struct rw_semaphore lock; /* 72 40 */
short int log_index; /* 112 2 */
/* XXX 6 bytes hole, try to pack */
struct page * pages[16]; /* 120 128 */
/* size: 248, cachelines: 4, members: 14 */
/* sum members: 239, holes: 2, sum holes: 9 */
/* forced alignments: 1 */
/* last cacheline: 56 bytes */
} __attribute__((__aligned__(8)));
After:
struct extent_buffer {
u64 start; /* 0 8 */
long unsigned int len; /* 8 8 */
long unsigned int bflags; /* 16 8 */
struct btrfs_fs_info * fs_info; /* 24 8 */
spinlock_t refs_lock; /* 32 4 */
atomic_t refs; /* 36 4 */
atomic_t io_pages; /* 40 4 */
int read_mirror; /* 44 4 */
struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
/* --- cacheline 1 boundary (64 bytes) --- */
pid_t lock_owner; /* 64 4 */
bool lock_recursed; /* 68 1 */
s8 log_index; /* 69 1 */
/* XXX 2 bytes hole, try to pack */
struct rw_semaphore lock; /* 72 40 */
struct page * pages[16]; /* 112 128 */
/* size: 240, cachelines: 4, members: 14 */
/* sum members: 238, holes: 1, sum holes: 2 */
/* forced alignments: 1 */
/* last cacheline: 48 bytes */
} __attribute__((__aligned__(8)));
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/extent_io.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5403354de0e1..3c2bf21c54eb 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -88,10 +88,10 @@ struct extent_buffer {
struct rcu_head rcu_head;
pid_t lock_owner;
bool lock_recursed;
- struct rw_semaphore lock;
-
/* >= 0 if eb belongs to a log tree, -1 otherwise */
- short log_index;
+ s8 log_index;
+
+ struct rw_semaphore lock;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
--
2.25.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] btrfs: reorder extent buffer members for better packing
2020-11-03 21:11 [PATCH] btrfs: reorder extent buffer members for better packing David Sterba
@ 2020-11-03 21:25 ` Amy Parker
2020-11-03 23:44 ` Qu Wenruo
2020-11-05 18:12 ` Josef Bacik
2 siblings, 0 replies; 7+ messages in thread
From: Amy Parker @ 2020-11-03 21:25 UTC (permalink / raw)
To: David Sterba; +Cc: linux-btrfs
On Tue, Nov 3, 2020 at 1:18 PM David Sterba <dsterba@suse.com> wrote:
>
> After the rwsem replaced the tree lock implementation, the extent buffer
> got smaller but leaving some holes behind. By changing log_index type
> and reordering, we can squeeze the size further to 240 bytes, measured on
> release config on x86_64. Log_index spans only 3 values and needs to be
> signed.
>
Sounds great!
> Before:
>
> struct extent_buffer {
> u64 start; /* 0 8 */
> long unsigned int len; /* 8 8 */
> long unsigned int bflags; /* 16 8 */
> struct btrfs_fs_info * fs_info; /* 24 8 */
> spinlock_t refs_lock; /* 32 4 */
> atomic_t refs; /* 36 4 */
> atomic_t io_pages; /* 40 4 */
> int read_mirror; /* 44 4 */
> struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> /* --- cacheline 1 boundary (64 bytes) --- */
> pid_t lock_owner; /* 64 4 */
> bool lock_recursed; /* 68 1 */
>
> /* XXX 3 bytes hole, try to pack */
>
> struct rw_semaphore lock; /* 72 40 */
> short int log_index; /* 112 2 */
>
> /* XXX 6 bytes hole, try to pack */
>
> struct page * pages[16]; /* 120 128 */
>
> /* size: 248, cachelines: 4, members: 14 */
> /* sum members: 239, holes: 2, sum holes: 9 */
> /* forced alignments: 1 */
> /* last cacheline: 56 bytes */
> } __attribute__((__aligned__(8)));
>
> After:
>
> struct extent_buffer {
> u64 start; /* 0 8 */
> long unsigned int len; /* 8 8 */
> long unsigned int bflags; /* 16 8 */
> struct btrfs_fs_info * fs_info; /* 24 8 */
> spinlock_t refs_lock; /* 32 4 */
> atomic_t refs; /* 36 4 */
> atomic_t io_pages; /* 40 4 */
> int read_mirror; /* 44 4 */
> struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> /* --- cacheline 1 boundary (64 bytes) --- */
> pid_t lock_owner; /* 64 4 */
> bool lock_recursed; /* 68 1 */
> s8 log_index; /* 69 1 */
>
> /* XXX 2 bytes hole, try to pack */
>
> struct rw_semaphore lock; /* 72 40 */
> struct page * pages[16]; /* 112 128 */
>
> /* size: 240, cachelines: 4, members: 14 */
> /* sum members: 238, holes: 1, sum holes: 2 */
> /* forced alignments: 1 */
> /* last cacheline: 48 bytes */
> } __attribute__((__aligned__(8)));
Looks alright, although based on the rest of the value types,
you may want to leave a comment for new btrfs devs about
the change, as well as a reference to this thread.
>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
> fs/btrfs/extent_io.h | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 5403354de0e1..3c2bf21c54eb 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -88,10 +88,10 @@ struct extent_buffer {
> struct rcu_head rcu_head;
> pid_t lock_owner;
> bool lock_recursed;
> - struct rw_semaphore lock;
> -
> /* >= 0 if eb belongs to a log tree, -1 otherwise */
> - short log_index;
> + s8 log_index;
> +
> + struct rw_semaphore lock;
>
> struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
> #ifdef CONFIG_BTRFS_DEBUG
> --
> 2.25.0
>
Functional-wise, everything seems great here. Again, as mentioned,
you may want to include a comment explaining the change and the
divergence from other types used in the code above.
Best regards,
Amy Parker
(they/them)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] btrfs: reorder extent buffer members for better packing
2020-11-03 21:11 [PATCH] btrfs: reorder extent buffer members for better packing David Sterba
2020-11-03 21:25 ` Amy Parker
@ 2020-11-03 23:44 ` Qu Wenruo
2020-11-04 3:55 ` Amy Parker
2020-11-04 15:53 ` David Sterba
2020-11-05 18:12 ` Josef Bacik
2 siblings, 2 replies; 7+ messages in thread
From: Qu Wenruo @ 2020-11-03 23:44 UTC (permalink / raw)
To: David Sterba, linux-btrfs
[-- Attachment #1.1: Type: text/plain, Size: 4420 bytes --]
On 2020/11/4 上午5:11, David Sterba wrote:
> After the rwsem replaced the tree lock implementation, the extent buffer
> got smaller but leaving some holes behind. By changing log_index type
> and reordering, we can squeeze the size further to 240 bytes, measured on
> release config on x86_64. Log_index spans only 3 values and needs to be
> signed.
>
> Before:
>
> struct extent_buffer {
> u64 start; /* 0 8 */
> long unsigned int len; /* 8 8 */
> long unsigned int bflags; /* 16 8 */
> struct btrfs_fs_info * fs_info; /* 24 8 */
> spinlock_t refs_lock; /* 32 4 */
> atomic_t refs; /* 36 4 */
> atomic_t io_pages; /* 40 4 */
> int read_mirror; /* 44 4 */
> struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> /* --- cacheline 1 boundary (64 bytes) --- */
> pid_t lock_owner; /* 64 4 */
> bool lock_recursed; /* 68 1 */
>
> /* XXX 3 bytes hole, try to pack */
>
> struct rw_semaphore lock; /* 72 40 */
An off-topic question, for things like aotmic_t/spinlock_t and
rw_semaphore, wouldn't various DEBUG options change their size?
Do we need to consider such case, by moving them to the end of the
structure, or we only consider production build for pa_hole?
Thanks,
Qu
> short int log_index; /* 112 2 */
>
> /* XXX 6 bytes hole, try to pack */
>
> struct page * pages[16]; /* 120 128 */
>
> /* size: 248, cachelines: 4, members: 14 */
> /* sum members: 239, holes: 2, sum holes: 9 */
> /* forced alignments: 1 */
> /* last cacheline: 56 bytes */
> } __attribute__((__aligned__(8)));
>
> After:
>
> struct extent_buffer {
> u64 start; /* 0 8 */
> long unsigned int len; /* 8 8 */
> long unsigned int bflags; /* 16 8 */
> struct btrfs_fs_info * fs_info; /* 24 8 */
> spinlock_t refs_lock; /* 32 4 */
> atomic_t refs; /* 36 4 */
> atomic_t io_pages; /* 40 4 */
> int read_mirror; /* 44 4 */
> struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> /* --- cacheline 1 boundary (64 bytes) --- */
> pid_t lock_owner; /* 64 4 */
> bool lock_recursed; /* 68 1 */
> s8 log_index; /* 69 1 */
>
> /* XXX 2 bytes hole, try to pack */
>
> struct rw_semaphore lock; /* 72 40 */
> struct page * pages[16]; /* 112 128 */
>
> /* size: 240, cachelines: 4, members: 14 */
> /* sum members: 238, holes: 1, sum holes: 2 */
> /* forced alignments: 1 */
> /* last cacheline: 48 bytes */
> } __attribute__((__aligned__(8)));
>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
> fs/btrfs/extent_io.h | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 5403354de0e1..3c2bf21c54eb 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -88,10 +88,10 @@ struct extent_buffer {
> struct rcu_head rcu_head;
> pid_t lock_owner;
> bool lock_recursed;
> - struct rw_semaphore lock;
> -
> /* >= 0 if eb belongs to a log tree, -1 otherwise */
> - short log_index;
> + s8 log_index;
> +
> + struct rw_semaphore lock;
>
> struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
> #ifdef CONFIG_BTRFS_DEBUG
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] btrfs: reorder extent buffer members for better packing
2020-11-03 23:44 ` Qu Wenruo
@ 2020-11-04 3:55 ` Amy Parker
2020-11-04 15:53 ` David Sterba
1 sibling, 0 replies; 7+ messages in thread
From: Amy Parker @ 2020-11-04 3:55 UTC (permalink / raw)
To: Qu Wenruo; +Cc: David Sterba, Btrfs BTRFS
On Tue, Nov 3, 2020 at 3:45 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> On 2020/11/4 上午5:11, David Sterba wrote:
> > After the rwsem replaced the tree lock implementation, the extent buffer
> > got smaller but leaving some holes behind. By changing log_index type
> > and reordering, we can squeeze the size further to 240 bytes, measured on
> > release config on x86_64. Log_index spans only 3 values and needs to be
> > signed.
> >
> > Before:
> >
> > struct extent_buffer {
> > u64 start; /* 0 8 */
> > long unsigned int len; /* 8 8 */
> > long unsigned int bflags; /* 16 8 */
> > struct btrfs_fs_info * fs_info; /* 24 8 */
> > spinlock_t refs_lock; /* 32 4 */
> > atomic_t refs; /* 36 4 */
> > atomic_t io_pages; /* 40 4 */
> > int read_mirror; /* 44 4 */
> > struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> > /* --- cacheline 1 boundary (64 bytes) --- */
> > pid_t lock_owner; /* 64 4 */
> > bool lock_recursed; /* 68 1 */
> >
> > /* XXX 3 bytes hole, try to pack */
> >
> > struct rw_semaphore lock; /* 72 40 */
>
> An off-topic question, for things like aotmic_t/spinlock_t and
> rw_semaphore, wouldn't various DEBUG options change their size?
Yes, I believe they could.
>
> Do we need to consider such case, by moving them to the end of the
> structure, or we only consider production build for pa_hole?
>
I'd say we could go either way on this, but personally, if it builds, it builds.
Let's just focus on pahole for now and potentially revisit this later.
> Thanks,
> Qu
>
> > short int log_index; /* 112 2 */
> >
> > /* XXX 6 bytes hole, try to pack */
> >
> > struct page * pages[16]; /* 120 128 */
> >
> > /* size: 248, cachelines: 4, members: 14 */
> > /* sum members: 239, holes: 2, sum holes: 9 */
> > /* forced alignments: 1 */
> > /* last cacheline: 56 bytes */
> > } __attribute__((__aligned__(8)));
> >
> > After:
> >
> > struct extent_buffer {
> > u64 start; /* 0 8 */
> > long unsigned int len; /* 8 8 */
> > long unsigned int bflags; /* 16 8 */
> > struct btrfs_fs_info * fs_info; /* 24 8 */
> > spinlock_t refs_lock; /* 32 4 */
> > atomic_t refs; /* 36 4 */
> > atomic_t io_pages; /* 40 4 */
> > int read_mirror; /* 44 4 */
> > struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> > /* --- cacheline 1 boundary (64 bytes) --- */
> > pid_t lock_owner; /* 64 4 */
> > bool lock_recursed; /* 68 1 */
> > s8 log_index; /* 69 1 */
> >
> > /* XXX 2 bytes hole, try to pack */
> >
> > struct rw_semaphore lock; /* 72 40 */
> > struct page * pages[16]; /* 112 128 */
> >
> > /* size: 240, cachelines: 4, members: 14 */
> > /* sum members: 238, holes: 1, sum holes: 2 */
> > /* forced alignments: 1 */
> > /* last cacheline: 48 bytes */
> > } __attribute__((__aligned__(8)));
> >
> > Signed-off-by: David Sterba <dsterba@suse.com>
> > ---
> > fs/btrfs/extent_io.h | 6 +++---
> > 1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> > index 5403354de0e1..3c2bf21c54eb 100644
> > --- a/fs/btrfs/extent_io.h
> > +++ b/fs/btrfs/extent_io.h
> > @@ -88,10 +88,10 @@ struct extent_buffer {
> > struct rcu_head rcu_head;
> > pid_t lock_owner;
> > bool lock_recursed;
> > - struct rw_semaphore lock;
> > -
> > /* >= 0 if eb belongs to a log tree, -1 otherwise */
> > - short log_index;
> > + s8 log_index;
> > +
> > + struct rw_semaphore lock;
> >
> > struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
> > #ifdef CONFIG_BTRFS_DEBUG
> >
>
Best regards,
Amy Parker
(they/them)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] btrfs: reorder extent buffer members for better packing
2020-11-03 23:44 ` Qu Wenruo
2020-11-04 3:55 ` Amy Parker
@ 2020-11-04 15:53 ` David Sterba
2020-11-04 17:42 ` Amy Parker
1 sibling, 1 reply; 7+ messages in thread
From: David Sterba @ 2020-11-04 15:53 UTC (permalink / raw)
To: Qu Wenruo; +Cc: David Sterba, linux-btrfs
On Wed, Nov 04, 2020 at 07:44:33AM +0800, Qu Wenruo wrote:
> On 2020/11/4 上午5:11, David Sterba wrote:
> > After the rwsem replaced the tree lock implementation, the extent buffer
> > got smaller but leaving some holes behind. By changing log_index type
> > and reordering, we can squeeze the size further to 240 bytes, measured on
> > release config on x86_64. Log_index spans only 3 values and needs to be
> > signed.
> >
> > Before:
> >
> > struct extent_buffer {
> > u64 start; /* 0 8 */
> > long unsigned int len; /* 8 8 */
> > long unsigned int bflags; /* 16 8 */
> > struct btrfs_fs_info * fs_info; /* 24 8 */
> > spinlock_t refs_lock; /* 32 4 */
> > atomic_t refs; /* 36 4 */
> > atomic_t io_pages; /* 40 4 */
> > int read_mirror; /* 44 4 */
> > struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> > /* --- cacheline 1 boundary (64 bytes) --- */
> > pid_t lock_owner; /* 64 4 */
> > bool lock_recursed; /* 68 1 */
> >
> > /* XXX 3 bytes hole, try to pack */
> >
> > struct rw_semaphore lock; /* 72 40 */
>
> An off-topic question, for things like aotmic_t/spinlock_t and
> rw_semaphore, wouldn't various DEBUG options change their size?
Yes they do. For example spinlock_t is 4 bytes on release config and 72
on debug. Semaphore is 40 vs 168. Atomic_t is 4 bytes always, it's just
an int.
> Do we need to consider such case, by moving them to the end of the
> structure, or we only consider production build for pa_hole?
We should optimize for the release build for structure layout or
cacheline occupation, the debugging options make it unpredictable and it
affects only development. There are way more deployments without
debugging options enabled anyway.
The resulting size of the structures is also bigger so this has
completely different slab allocation pattern and performance
characteristics.
Here's the layout of eb on the debug config I use:
struct extent_buffer {
u64 start; /* 0 8 */
long unsigned int len; /* 8 8 */
long unsigned int bflags; /* 16 8 */
struct btrfs_fs_info * fs_info; /* 24 8 */
spinlock_t refs_lock; /* 32 72 */
/* --- cacheline 1 boundary (64 bytes) was 40 bytes ago --- */
atomic_t refs; /* 104 4 */
atomic_t io_pages; /* 108 4 */
int read_mirror; /* 112 4 */
/* XXX 4 bytes hole, try to pack */
struct callback_head callback_head __attribute__((__aligned__(8))); /* 120 16 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
pid_t lock_owner; /* 136 4 */
bool lock_recursed; /* 140 1 */
s8 log_index; /* 141 1 */
/* XXX 2 bytes hole, try to pack */
struct rw_semaphore lock; /* 144 168 */
/* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
struct page * pages[16]; /* 312 128 */
/* --- cacheline 6 boundary (384 bytes) was 56 bytes ago --- */
struct list_head leak_list; /* 440 16 */
/* size: 456, cachelines: 8, members: 15 */
/* sum members: 450, holes: 2, sum holes: 6 */
/* forced alignments: 1, forced holes: 1, sum forced holes: 4 */
/* last cacheline: 8 bytes */
} __attribute__((__aligned__(8)));
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] btrfs: reorder extent buffer members for better packing
2020-11-04 15:53 ` David Sterba
@ 2020-11-04 17:42 ` Amy Parker
0 siblings, 0 replies; 7+ messages in thread
From: Amy Parker @ 2020-11-04 17:42 UTC (permalink / raw)
To: dsterba, Qu Wenruo, David Sterba, Btrfs BTRFS
On Wed, Nov 4, 2020 at 7:59 AM David Sterba <dsterba@suse.cz> wrote:
>
> On Wed, Nov 04, 2020 at 07:44:33AM +0800, Qu Wenruo wrote:
> > On 2020/11/4 上午5:11, David Sterba wrote:
> > > After the rwsem replaced the tree lock implementation, the extent buffer
> > > got smaller but leaving some holes behind. By changing log_index type
> > > and reordering, we can squeeze the size further to 240 bytes, measured on
> > > release config on x86_64. Log_index spans only 3 values and needs to be
> > > signed.
> > >
> > > Before:
> > >
> > > struct extent_buffer {
> > > u64 start; /* 0 8 */
> > > long unsigned int len; /* 8 8 */
> > > long unsigned int bflags; /* 16 8 */
> > > struct btrfs_fs_info * fs_info; /* 24 8 */
> > > spinlock_t refs_lock; /* 32 4 */
> > > atomic_t refs; /* 36 4 */
> > > atomic_t io_pages; /* 40 4 */
> > > int read_mirror; /* 44 4 */
> > > struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> > > /* --- cacheline 1 boundary (64 bytes) --- */
> > > pid_t lock_owner; /* 64 4 */
> > > bool lock_recursed; /* 68 1 */
> > >
> > > /* XXX 3 bytes hole, try to pack */
> > >
> > > struct rw_semaphore lock; /* 72 40 */
> >
> > An off-topic question, for things like aotmic_t/spinlock_t and
> > rw_semaphore, wouldn't various DEBUG options change their size?
>
> Yes they do. For example spinlock_t is 4 bytes on release config and 72
> on debug. Semaphore is 40 vs 168. Atomic_t is 4 bytes always, it's just
> an int.
These are pretty big differences in byte size. Probably not worth
shifting everything
just for debug configs.
>
> > Do we need to consider such case, by moving them to the end of the
> > structure, or we only consider production build for pa_hole?
>
> We should optimize for the release build for structure layout or
> cacheline occupation, the debugging options make it unpredictable and it
> affects only development. There are way more deployments without
> debugging options enabled anyway.
We could always just leave a comment there noting that it's unpredictable
under debug, and that debugging will require a temporary user-end shift.
Optimizing for production is best, yep.
>
> The resulting size of the structures is also bigger so this has
> completely different slab allocation pattern and performance
> characteristics.
Yeah, another reason we should just focus on production.
>
> Here's the layout of eb on the debug config I use:
>
> struct extent_buffer {
> u64 start; /* 0 8 */
> long unsigned int len; /* 8 8 */
> long unsigned int bflags; /* 16 8 */
> struct btrfs_fs_info * fs_info; /* 24 8 */
> spinlock_t refs_lock; /* 32 72 */
> /* --- cacheline 1 boundary (64 bytes) was 40 bytes ago --- */
> atomic_t refs; /* 104 4 */
> atomic_t io_pages; /* 108 4 */
> int read_mirror; /* 112 4 */
>
> /* XXX 4 bytes hole, try to pack */
>
> struct callback_head callback_head __attribute__((__aligned__(8))); /* 120 16 */
> /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
> pid_t lock_owner; /* 136 4 */
> bool lock_recursed; /* 140 1 */
> s8 log_index; /* 141 1 */
>
> /* XXX 2 bytes hole, try to pack */
>
> struct rw_semaphore lock; /* 144 168 */
> /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
> struct page * pages[16]; /* 312 128 */
> /* --- cacheline 6 boundary (384 bytes) was 56 bytes ago --- */
> struct list_head leak_list; /* 440 16 */
>
> /* size: 456, cachelines: 8, members: 15 */
> /* sum members: 450, holes: 2, sum holes: 6 */
> /* forced alignments: 1, forced holes: 1, sum forced holes: 4 */
> /* last cacheline: 8 bytes */
> } __attribute__((__aligned__(8)));
Best regards,
Amy Parker
(they/them)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] btrfs: reorder extent buffer members for better packing
2020-11-03 21:11 [PATCH] btrfs: reorder extent buffer members for better packing David Sterba
2020-11-03 21:25 ` Amy Parker
2020-11-03 23:44 ` Qu Wenruo
@ 2020-11-05 18:12 ` Josef Bacik
2 siblings, 0 replies; 7+ messages in thread
From: Josef Bacik @ 2020-11-05 18:12 UTC (permalink / raw)
To: David Sterba, linux-btrfs
On 11/3/20 4:11 PM, David Sterba wrote:
> After the rwsem replaced the tree lock implementation, the extent buffer
> got smaller but leaving some holes behind. By changing log_index type
> and reordering, we can squeeze the size further to 240 bytes, measured on
> release config on x86_64. Log_index spans only 3 values and needs to be
> signed.
>
> Before:
>
> struct extent_buffer {
> u64 start; /* 0 8 */
> long unsigned int len; /* 8 8 */
> long unsigned int bflags; /* 16 8 */
> struct btrfs_fs_info * fs_info; /* 24 8 */
> spinlock_t refs_lock; /* 32 4 */
> atomic_t refs; /* 36 4 */
> atomic_t io_pages; /* 40 4 */
> int read_mirror; /* 44 4 */
> struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> /* --- cacheline 1 boundary (64 bytes) --- */
> pid_t lock_owner; /* 64 4 */
> bool lock_recursed; /* 68 1 */
>
> /* XXX 3 bytes hole, try to pack */
>
> struct rw_semaphore lock; /* 72 40 */
> short int log_index; /* 112 2 */
>
> /* XXX 6 bytes hole, try to pack */
>
> struct page * pages[16]; /* 120 128 */
>
> /* size: 248, cachelines: 4, members: 14 */
> /* sum members: 239, holes: 2, sum holes: 9 */
> /* forced alignments: 1 */
> /* last cacheline: 56 bytes */
> } __attribute__((__aligned__(8)));
>
> After:
>
> struct extent_buffer {
> u64 start; /* 0 8 */
> long unsigned int len; /* 8 8 */
> long unsigned int bflags; /* 16 8 */
> struct btrfs_fs_info * fs_info; /* 24 8 */
> spinlock_t refs_lock; /* 32 4 */
> atomic_t refs; /* 36 4 */
> atomic_t io_pages; /* 40 4 */
> int read_mirror; /* 44 4 */
> struct callback_head callback_head __attribute__((__aligned__(8))); /* 48 16 */
> /* --- cacheline 1 boundary (64 bytes) --- */
> pid_t lock_owner; /* 64 4 */
> bool lock_recursed; /* 68 1 */
> s8 log_index; /* 69 1 */
>
> /* XXX 2 bytes hole, try to pack */
>
> struct rw_semaphore lock; /* 72 40 */
> struct page * pages[16]; /* 112 128 */
>
> /* size: 240, cachelines: 4, members: 14 */
> /* sum members: 238, holes: 1, sum holes: 2 */
> /* forced alignments: 1 */
> /* last cacheline: 48 bytes */
> } __attribute__((__aligned__(8)));
>
> Signed-off-by: David Sterba <dsterba@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Thanks,
Josef
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2020-11-05 18:12 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-03 21:11 [PATCH] btrfs: reorder extent buffer members for better packing David Sterba
2020-11-03 21:25 ` Amy Parker
2020-11-03 23:44 ` Qu Wenruo
2020-11-04 3:55 ` Amy Parker
2020-11-04 15:53 ` David Sterba
2020-11-04 17:42 ` Amy Parker
2020-11-05 18:12 ` Josef Bacik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).