fiemap is slow on btrfs on files with multiple extents

* fiemap is slow on btrfs on files with multiple extents
@ 2022-08-04 16:30 Pavel Tikhomirov
  2022-08-04 18:49 ` Josef Bacik
  2022-08-05  7:38 ` Dominique MARTINET
  0 siblings, 2 replies; 9+ messages in thread
From: Pavel Tikhomirov @ 2022-08-04 16:30 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba
  Cc: linux-btrfs, lkml, Chen Liang-Chun, Alexander Mikhalitsyn,
	kernel, Dominique MARTINET, Yu Kuai, Theodore Ts'o

I ran the below test on Fedora 36 (the test basically creates "very" 
sparse file, with 4k data followed by 4k hole again and again for the 
specified length and uses fiemap to count extents in this file) and face 
the problem that fiemap hangs for too long (for instance comparing to 
ext4 version). Fiemap with 32768 extents takes ~37264 us and with 65536 
extents it takes ~34123954 us, which is x1000 times more when file only 
increased twice the size:

256Mb:

./fiemap-reproduce /testfile $((1<<28))
size: 268435456
actual size: 134217728
fiemap: fm_mapped_extents = 32768
time = 37264 us

./fiemap-reproduce /testfile $((1<<28))
size: 268435456
actual size: 134217728
fiemap: fm_mapped_extents = 32768
time = 37285 us

512Mb:

./fiemap-reproduce /testfile $((1<<29))
size: 536870912
actual size: 268435456
fiemap: fm_mapped_extents = 65536
time = 34123954 us

./fiemap-reproduce /testfile $((1<<29))
size: 536870912
actual size: 268435456
fiemap: fm_mapped_extents = 65536
time = 60404334 us

1Gb (the whole Fedora hangs sometimes when I measure it):

./fiemap-reproduce /testfile $((1<<30))
size: 1073741824
actual size: 536870912
fiemap: fm_mapped_extents = 131072
time = 231194793 us

./fiemap-reproduce /testfile $((1<<30))
size: 1073741824
actual size: 536870912
fiemap: fm_mapped_extents = 131072
time = 347867789 us

I see a similar problem here 
https://lore.kernel.org/linux-btrfs/Yr4nEoNLkXPKcOBi@atmark-techno.com/#r , 
but in my case I have "5.18.6-200.fc36.x86_64" fedora kernel which does 
not have 5ccc944dce3d ("filemap: Correct the conditions for marking a 
folio as accessed") commit, so it should be something else.

Some more info:

cat /proc/self/mountinfo | grep btrfs
106 1 0:47 /root / rw,relatime shared:1 - btrfs /dev/nvme0n1p3 
rw,compress=zstd:1,ssd,space_cache,subvolid=257,subvol=/root

perf top -ag
Samples: 268K of event 'cycles', 4000 Hz, Event count (approx.): 
77250404934 lost: 0/0 drop: 0/0
   Children      Self  Shared Object                       Symbol
+   74,25%     1,16%  [kernel]                            [k] 
entry_SYSCALL_64_after_hwframe
+   73,14%     0,65%  [kernel]                            [k] do_syscall_64
+   53,05%     3,30%  libc.so.6                           [.] __poll
+   39,53%     0,76%  [kernel]                            [k] __x64_sys_poll
+   34,91%     6,44%  [kernel]                            [k] do_sys_poll
+   29,37%     0,00%  [kernel]                            [k] 
__x64_sys_ioctl
+   29,08%     7,65%  [kernel]                            [k] 
count_range_bits
+   28,44%     0,00%  [kernel]                            [k] do_vfs_ioctl
+   28,43%     0,00%  [kernel]                            [k] extent_fiemap
+   28,43%     0,00%  [kernel]                            [k] 
btrfs_get_extent_fiemap
+   27,87%     0,00%  libc.so.6                           [.] __GI___ioctl
+   25,89%     0,00%  [kernel]                            [k] 
get_extent_skip_holes
+   21,76%    21,29%  [kernel]                            [k] rb_next
+    9,50%     0,48%  [kernel]                            [k] perf_poll
+    8,04%     0,00%  libc.so.6                           [.] 
__libc_start_call_main
+    6,93%     3,26%  [kernel]                            [k] 
select_estimate_accuracy
+    6,69%     2,15%  [kernel]                            [k] ktime_get_ts64
+    5,60%     3,99%  [kernel]                            [k] 
_raw_spin_lock_irqsave
+    5,16%     0,40%  [kernel]                            [k] poll_freewait

Here is a fiemap-reproduce.c code:

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>

#include <sys/stat.h>
#include <sys/time.h>
#include <sys/ioctl.h>

#include <linux/fs.h>
#include <linux/fiemap.h>

#define FILE_INTERVAL (1<<13) /* 8Kb */

long long interval(struct timeval t1, struct timeval t2)
{
         long long val = 0;
         val += (t2.tv_usec - t1.tv_usec);
         val += (t2.tv_sec - t1.tv_sec) * 1000 * 1000;
         return val;
}

int main(int argc, char **argv) {
         struct fiemap fiemap = {};
         struct timeval t1, t2;
         char data = 'a';
         struct stat st;
         int fd, off, file_size = FILE_INTERVAL;

         if (argc != 3 && argc != 2) {
                 printf("usage: %s <path> [size]\n", argv[0]);
                 return 1;
         }

         if (argc == 3)
                 file_size = atoi(argv[2]);
         if (file_size < FILE_INTERVAL)
                 file_size = FILE_INTERVAL;
         file_size -= file_size % FILE_INTERVAL;

         fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0644);
         if (fd < 0) {
                 perror("open");
                 return 1;
         }

         for (off = 0; off < file_size; off += FILE_INTERVAL) {
                 if (pwrite(fd, &data, 1, off) != 1) {
                         perror("pwrite");
                         close(fd);
                         return 1;
                 }
         }

         if (ftruncate(fd, file_size)) {
                 perror("ftruncate");
                 close(fd);
                 return 1;
         }

         if (fstat(fd, &st) < 0) {
                 perror("fstat");
                 close(fd);
                 return 1;
         }

         printf("size: %ld\n", st.st_size);
         printf("actual size: %ld\n", st.st_blocks * 512);

         fiemap.fm_length = FIEMAP_MAX_OFFSET;
         gettimeofday(&t1, NULL);
         if (ioctl(fd, FS_IOC_FIEMAP, &fiemap) < 0) {
                 perror("fiemap");
                 close(fd);
                 return 1;
         }
         gettimeofday(&t2, NULL);

         printf("fiemap: fm_mapped_extents = %d\n", 
fiemap.fm_mapped_extents);
         printf("time = %lld us\n", interval(t1, t2));

         close(fd);
         return 0;
}

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.

^ permalink raw reply	[flat|nested] 9+ messages in thread