All of lore.kernel.org
 help / color / mirror / Atom feed
* Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier?
@ 2019-04-02  7:41 Qu Wenruo
  2019-04-02  7:59 ` Nikolay Borisov
  0 siblings, 1 reply; 6+ messages in thread
From: Qu Wenruo @ 2019-04-02  7:41 UTC (permalink / raw)
  To: linux-btrfs, iovisor-dev


[-- Attachment #1.1: Type: text/plain, Size: 5654 bytes --]

Hi,

I'm recently working on crafting a small bcc based script to show how
btrfs tree block concurrency.

The prototype is here:
https://gist.github.com/adam900710/63db6cdc7d26c4744a53dfc7755fafda

However it has some problems, and cannot be used directly on latest kernel.

- No way to pass btrfs headers into eBPF code.
  This is because the declaration of extent_buffer is in
  `fs/btrfs/extent_io.h`.
  No in the common kernel headers search path.

- eBPF program can't access btrfs_header_owner() helper.
  Even we make eBPF code understand the headers, we can't
  access a lot of kernel functions, like btrfs_header_owner().

So I'm wondering is it possible for us to export extent_buffer
declaration just for possible eBPF usage.

Or is bcc suit strong enough to handle such case?

The diff would be something like this to make above bcc script work:

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a3e3e95c632e..ba7628434ac9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,6 +13,7 @@
 #include <linux/pagevec.h>
 #include <linux/prefetch.h>
 #include <linux/cleancache.h>
+#include <linux/btrfs.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "ctree.h"
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c4ec104ac157..5bc0df2c1c64 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -146,48 +146,6 @@ struct extent_state {
 #endif
 };

-#define INLINE_EXTENT_BUFFER_PAGES 16
-#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES *
PAGE_SIZE)
-struct extent_buffer {
-       u64 start;
-       unsigned long len;
-       unsigned long bflags;
-       struct btrfs_fs_info *fs_info;
-       spinlock_t refs_lock;
-       atomic_t refs;
-       atomic_t io_pages;
-       int read_mirror;
-       struct rcu_head rcu_head;
-       pid_t lock_owner;
-
-       atomic_t blocking_writers;
-       atomic_t blocking_readers;
-       bool lock_nested;
-       /* >= 0 if eb belongs to a log tree, -1 otherwise */
-       short log_index;
-
-       /* protects write locks */
-       rwlock_t lock;
-
-       /* readers use lock_wq while they wait for the write
-        * lock holders to unlock
-        */
-       wait_queue_head_t write_lock_wq;
-
-       /* writers use read_lock_wq while they wait for readers
-        * to unlock
-        */
-       wait_queue_head_t read_lock_wq;
-       struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
-#ifdef CONFIG_BTRFS_DEBUG
-       atomic_t spinning_writers;
-       atomic_t spinning_readers;
-       atomic_t read_locks;
-       atomic_t write_locks;
-       struct list_head leak_list;
-#endif
-};
-
 /*
  * Structure to record how many bytes and which ranges are set/cleared
  */
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 6df03ba36026..5cf075d02b1b 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -174,6 +174,8 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
                BUG_ON(eb->lock_nested);
                eb->lock_nested = true;
                read_unlock(&eb->lock);
+               if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+                       eb->owner = btrfs_header_owner(eb);
                return;
        }
        if (atomic_read(&eb->blocking_writers)) {
@@ -184,6 +186,8 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
        }
        btrfs_assert_tree_read_locks_get(eb);
        btrfs_assert_spinning_readers_get(eb);
+       if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+               eb->owner = btrfs_header_owner(eb);
 }

 /*
@@ -312,6 +316,8 @@ void btrfs_tree_lock(struct extent_buffer *eb)
        btrfs_assert_spinning_writers_get(eb);
        btrfs_assert_tree_write_locks_get(eb);
        eb->lock_owner = current->pid;
+       if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+               eb->owner = btrfs_header_owner(eb);
 }

 /*
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index c195896d478f..3aeaffdadabb 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -944,4 +944,47 @@ enum btrfs_err_code {
 #define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
                                struct btrfs_ioctl_ino_lookup_user_args)

+#define INLINE_EXTENT_BUFFER_PAGES 16
+#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES *
PAGE_SIZE)
+struct extent_buffer {
+       u64 start;
+       unsigned long len;
+       unsigned long bflags;
+       struct btrfs_fs_info *fs_info;
+       spinlock_t refs_lock;
+       atomic_t refs;
+       atomic_t io_pages;
+       int read_mirror;
+       struct rcu_head rcu_head;
+       pid_t lock_owner;
+
+       atomic_t blocking_writers;
+       atomic_t blocking_readers;
+       bool lock_nested;
+       /* >= 0 if eb belongs to a log tree, -1 otherwise */
+       short log_index;
+
+       /* protects write locks */
+       rwlock_t lock;
+
+       /* readers use lock_wq while they wait for the write
+        * lock holders to unlock
+        */
+       wait_queue_head_t write_lock_wq;
+
+       /* writers use read_lock_wq while they wait for readers
+        * to unlock
+        */
+       wait_queue_head_t read_lock_wq;
+       struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
+#ifdef CONFIG_BTRFS_DEBUG
+       atomic_t spinning_writers;
+       atomic_t spinning_readers;
+       atomic_t read_locks;
+       atomic_t write_locks;
+       u64 owner;
+       struct list_head leak_list;
+#endif
+};
+
 #endif /* _UAPI_LINUX_BTRFS_H */

Thanks,
Qu


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier?
  2019-04-02  7:41 Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier? Qu Wenruo
@ 2019-04-02  7:59 ` Nikolay Borisov
  2019-04-02  8:11   ` Qu Wenruo
  2019-04-02  8:45   ` Qu Wenruo
  0 siblings, 2 replies; 6+ messages in thread
From: Nikolay Borisov @ 2019-04-02  7:59 UTC (permalink / raw)
  To: Qu Wenruo, linux-btrfs, iovisor-dev



On 2.04.19 г. 10:41 ч., Qu Wenruo wrote:
> Hi,
> 
> I'm recently working on crafting a small bcc based script to show how
> btrfs tree block concurrency.
> 
> The prototype is here:
> https://gist.github.com/adam900710/63db6cdc7d26c4744a53dfc7755fafda
> 
> However it has some problems, and cannot be used directly on latest kernel.
> 
> - No way to pass btrfs headers into eBPF code.
>   This is because the declaration of extent_buffer is in
>   `fs/btrfs/extent_io.h`.
>   No in the common kernel headers search path.
> 
> - eBPF program can't access btrfs_header_owner() helper.
>   Even we make eBPF code understand the headers, we can't
>   access a lot of kernel functions, like btrfs_header_owner().
> 
> So I'm wondering is it possible for us to export extent_buffer
> declaration just for possible eBPF usage.
 >
> Or is bcc suit strong enough to handle such case?
> 
> The diff would be something like this to make above bcc script work:
> 


Hell no, that is _very_ ugly as hell. If you want to access such
information why not introduce tracepoints which have enough context so
that when they are used with ebpf you can query the data from the
tracepoint? I *think* this ought to be made to work with ebpf?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier?
  2019-04-02  7:59 ` Nikolay Borisov
@ 2019-04-02  8:11   ` Qu Wenruo
  2019-04-02  8:45   ` Qu Wenruo
  1 sibling, 0 replies; 6+ messages in thread
From: Qu Wenruo @ 2019-04-02  8:11 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs, iovisor-dev



On 2019/4/2 下午3:59, Nikolay Borisov wrote:
>
>
> On 2.04.19 г. 10:41 ч., Qu Wenruo wrote:
>> Hi,
>>
>> I'm recently working on crafting a small bcc based script to show how
>> btrfs tree block concurrency.
>>
>> The prototype is here:
>> https://gist.github.com/adam900710/63db6cdc7d26c4744a53dfc7755fafda
>>
>> However it has some problems, and cannot be used directly on latest kernel.
>>
>> - No way to pass btrfs headers into eBPF code.
>>   This is because the declaration of extent_buffer is in
>>   `fs/btrfs/extent_io.h`.
>>   No in the common kernel headers search path.
>>
>> - eBPF program can't access btrfs_header_owner() helper.
>>   Even we make eBPF code understand the headers, we can't
>>   access a lot of kernel functions, like btrfs_header_owner().
>>
>> So I'm wondering is it possible for us to export extent_buffer
>> declaration just for possible eBPF usage.
>  >
>> Or is bcc suit strong enough to handle such case?
>>
>> The diff would be something like this to make above bcc script work:
>>
>
>
> Hell no, that is _very_ ugly as hell. If you want to access such
> information why not introduce tracepoints which have enough context so
> that when they are used with ebpf you can query the data from the
> tracepoint? I *think* this ought to be made to work with ebpf?

The problem is, for kprobe/kretprobe, the only thing they can access is
limited C language.
I'm not sure if external function call is possible.

Another problem is, if using kprobe/kretprobe, entry and return point
are 2 separate functions, which means they are independent. Normally we
use a hash table and use pid+tgid as index to match return with entry
and get the execution time.

If using tracepoint, it's pretty static and either we record execution
time inside btrfs and export it using regular ftrace events, then the
start/end time overhead can't be skipped, it will always be there.

Any good idea on that?

Or bcc guys have better ideas on this?

Thanks,
Qu

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier?
  2019-04-02  7:59 ` Nikolay Borisov
  2019-04-02  8:11   ` Qu Wenruo
@ 2019-04-02  8:45   ` Qu Wenruo
  2019-04-02  9:34     ` Johannes Thumshirn
  1 sibling, 1 reply; 6+ messages in thread
From: Qu Wenruo @ 2019-04-02  8:45 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs, iovisor-dev



On 2019/4/2 下午3:59, Nikolay Borisov wrote:
>
>
> On 2.04.19 г. 10:41 ч., Qu Wenruo wrote:
>> Hi,
>>
>> I'm recently working on crafting a small bcc based script to show how
>> btrfs tree block concurrency.
>>
>> The prototype is here:
>> https://gist.github.com/adam900710/63db6cdc7d26c4744a53dfc7755fafda
>>
>> However it has some problems, and cannot be used directly on latest kernel.
>>
>> - No way to pass btrfs headers into eBPF code.
>>   This is because the declaration of extent_buffer is in
>>   `fs/btrfs/extent_io.h`.
>>   No in the common kernel headers search path.
>>
>> - eBPF program can't access btrfs_header_owner() helper.
>>   Even we make eBPF code understand the headers, we can't
>>   access a lot of kernel functions, like btrfs_header_owner().
>>
>> So I'm wondering is it possible for us to export extent_buffer
>> declaration just for possible eBPF usage.
>  >
>> Or is bcc suit strong enough to handle such case?
>>
>> The diff would be something like this to make above bcc script work:
>>
>
>
> Hell no, that is _very_ ugly as hell. If you want to access such
> information why not introduce tracepoints which have enough context so
> that when they are used with ebpf you can query the data from the
> tracepoint? I *think* this ought to be made to work with ebpf?

OK, after some though, the overhead for a new tracepoint is pretty small.
The only overhead when the tracepoint is disabled is, a ktimer_get_ns()
call at the beginning of btrfs_tree_lock() and btrfs_tree_read_lock().

The pairing ktimer_get_ns() call can be embedded into the trace events,
so it won't get triggered until that event is enabled.

In that case, I think the ebpf can handle it pretty well.


Any comment on this new idea?

Thanks,
Qu

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier?
  2019-04-02  8:45   ` Qu Wenruo
@ 2019-04-02  9:34     ` Johannes Thumshirn
  2019-04-02  9:38       ` Qu Wenruo
  0 siblings, 1 reply; 6+ messages in thread
From: Johannes Thumshirn @ 2019-04-02  9:34 UTC (permalink / raw)
  To: Qu Wenruo, Nikolay Borisov, linux-btrfs, iovisor-dev

On 02/04/2019 10:45, Qu Wenruo wrote:
> OK, after some though, the overhead for a new tracepoint is pretty small.
> The only overhead when the tracepoint is disabled is, a ktimer_get_ns()
> call at the beginning of btrfs_tree_lock() and btrfs_tree_read_lock().

If you only need the ktime_get_ns() call for the tracepoint, you can
wrap it in sth. like this IIRC:

if (trace_btrfs_tree_lock_enabled())
	start_time = ktime_get_ns();

[...]

trace_btrfs_tree_lock(start_time, end_time);

Byte,
	Johannes
-- 
Johannes Thumshirn                            SUSE Labs Filesystems
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier?
  2019-04-02  9:34     ` Johannes Thumshirn
@ 2019-04-02  9:38       ` Qu Wenruo
  0 siblings, 0 replies; 6+ messages in thread
From: Qu Wenruo @ 2019-04-02  9:38 UTC (permalink / raw)
  To: Johannes Thumshirn, Nikolay Borisov, linux-btrfs, iovisor-dev



On 2019/4/2 下午5:34, Johannes Thumshirn wrote:
> On 02/04/2019 10:45, Qu Wenruo wrote:
>> OK, after some though, the overhead for a new tracepoint is pretty small.
>> The only overhead when the tracepoint is disabled is, a ktimer_get_ns()
>> call at the beginning of btrfs_tree_lock() and btrfs_tree_read_lock().
>
> If you only need the ktime_get_ns() call for the tracepoint, you can
> wrap it in sth. like this IIRC:
>
> if (trace_btrfs_tree_lock_enabled())
> 	start_time = ktime_get_ns();
>
> [...]
>
> trace_btrfs_tree_lock(start_time, end_time);

Oh, I just get your email seconds after I send out my initial patch.

This helps a lot!

I'll update the patch.

Thanks,
Qu

>
> Byte,
> 	Johannes
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-04-02  9:38 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-02  7:41 Is it a good idea to export certain internal structure declaration to make bcc/ebpf happier? Qu Wenruo
2019-04-02  7:59 ` Nikolay Borisov
2019-04-02  8:11   ` Qu Wenruo
2019-04-02  8:45   ` Qu Wenruo
2019-04-02  9:34     ` Johannes Thumshirn
2019-04-02  9:38       ` Qu Wenruo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.