* [PATCH v1 0/2] address romfs performance regression @ 2020-06-23 0:45 Sven Van Asbroeck 2020-06-23 0:45 ` [PATCH v1 1/2] romfs: use s_blocksize(_bits) if CONFIG_BLOCK Sven Van Asbroeck 2020-06-23 0:45 ` [PATCH v1 2/2] romfs: address performance regression since v3.10 Sven Van Asbroeck 0 siblings, 2 replies; 7+ messages in thread From: Sven Van Asbroeck @ 2020-06-23 0:45 UTC (permalink / raw) To: linux-kernel Cc: Al Viro, Deepa Dinamani, David Howells, Darrick J. Wong, Janos Farkas, Jeff Layton Tree: next-20200613 Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Deepa Dinamani <deepa.kernel@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Janos Farkas <chexum+dev@gmail.com> Cc: Jeff Layton <jlayton@kernel.org> To: linux-kernel@vger.kernel.org Sven Van Asbroeck (2): romfs: use s_blocksize(_bits) if CONFIG_BLOCK romfs: address performance regression since v3.10 fs/romfs/storage.c | 25 ++++++++-------- fs/romfs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 18 deletions(-) -- 2.17.1 ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v1 1/2] romfs: use s_blocksize(_bits) if CONFIG_BLOCK 2020-06-23 0:45 [PATCH v1 0/2] address romfs performance regression Sven Van Asbroeck @ 2020-06-23 0:45 ` Sven Van Asbroeck 2020-06-23 0:45 ` [PATCH v1 2/2] romfs: address performance regression since v3.10 Sven Van Asbroeck 1 sibling, 0 replies; 7+ messages in thread From: Sven Van Asbroeck @ 2020-06-23 0:45 UTC (permalink / raw) To: linux-kernel Cc: Al Viro, Deepa Dinamani, David Howells, Darrick J. Wong, Janos Farkas, Jeff Layton The super_block fields s_blocksize and s_blocksize_bits always reflect the actual configured blocksize for a filesystem. Use these in all calculations where blocksize is required. This allows us to easily change the blocksize in a later patch. Note that I cannot determine what happens if !CONFIG_BLOCK, as I have no access to such a system. Out of an abundance of caution, I have left all !CONFIG_BLOCK codepaths in their original state. Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Deepa Dinamani <deepa.kernel@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Janos Farkas <chexum+dev@gmail.com> Cc: Jeff Layton <jlayton@kernel.org> To: linux-kernel@vger.kernel.org Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com> --- fs/romfs/storage.c | 25 +++++++++++++------------ fs/romfs/super.c | 9 ++++++++- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c index 6b2b4362089e..5e84efadac3f 100644 --- a/fs/romfs/storage.c +++ b/fs/romfs/storage.c @@ -109,9 +109,9 @@ static int romfs_blk_read(struct super_block *sb, unsigned long pos, /* copy the string up to blocksize bytes at a time */ while (buflen > 0) { - offset = pos & (ROMBSIZE - 1); - segment = min_t(size_t, buflen, ROMBSIZE - offset); - bh = sb_bread(sb, pos >> ROMBSBITS); + offset = pos & (sb->s_blocksize - 1); + segment = min_t(size_t, buflen, sb->s_blocksize - offset); + bh = sb_bread(sb, pos >> sb->s_blocksize_bits); if (!bh) return -EIO; memcpy(buf, bh->b_data + offset, segment); @@ -138,9 +138,9 @@ static ssize_t romfs_blk_strnlen(struct super_block *sb, /* scan the string up to blocksize bytes at a time */ while (limit > 0) { - offset = pos & (ROMBSIZE - 1); - segment = min_t(size_t, limit, ROMBSIZE - offset); - bh = sb_bread(sb, pos >> ROMBSBITS); + offset = pos & (sb->s_blocksize - 1); + segment = min_t(size_t, limit, sb->s_blocksize - offset); + bh = sb_bread(sb, pos >> sb->s_blocksize_bits); if (!bh) return -EIO; buf = bh->b_data + offset; @@ -170,9 +170,9 @@ static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos, /* compare string up to a block at a time */ while (size > 0) { - offset = pos & (ROMBSIZE - 1); - segment = min_t(size_t, size, ROMBSIZE - offset); - bh = sb_bread(sb, pos >> ROMBSBITS); + offset = pos & (sb->s_blocksize - 1); + segment = min_t(size_t, size, sb->s_blocksize - offset); + bh = sb_bread(sb, pos >> sb->s_blocksize_bits); if (!bh) return -EIO; matched = (memcmp(bh->b_data + offset, str, segment) == 0); @@ -180,7 +180,8 @@ static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos, size -= segment; pos += segment; str += segment; - if (matched && size == 0 && offset + segment < ROMBSIZE) { + if (matched && size == 0 && + offset + segment < sb->s_blocksize) { if (!bh->b_data[offset + segment]) terminated = true; else @@ -194,8 +195,8 @@ static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos, if (!terminated) { /* the terminating NUL must be on the first byte of the next * block */ - BUG_ON((pos & (ROMBSIZE - 1)) != 0); - bh = sb_bread(sb, pos >> ROMBSBITS); + BUG_ON((pos & (sb->s_blocksize - 1)) != 0); + bh = sb_bread(sb, pos >> sb->s_blocksize_bits); if (!bh) return -EIO; matched = !bh->b_data[0]; diff --git a/fs/romfs/super.c b/fs/romfs/super.c index e582d001f792..6fecdea791f1 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -411,10 +411,17 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = ROMFS_MAGIC; buf->f_namelen = ROMFS_MAXFN; - buf->f_bsize = ROMBSIZE; buf->f_bfree = buf->f_bavail = buf->f_ffree; +#ifdef CONFIG_BLOCK + buf->f_bsize = sb->s_blocksize; + buf->f_blocks = + (romfs_maxsize(dentry->d_sb) + sb->s_blocksize - 1) >> + sb->s_blocksize_bits; +#else + buf->f_bsize = ROMBSIZE; buf->f_blocks = (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS; +#endif buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); return 0; -- 2.17.1 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v1 2/2] romfs: address performance regression since v3.10 2020-06-23 0:45 [PATCH v1 0/2] address romfs performance regression Sven Van Asbroeck 2020-06-23 0:45 ` [PATCH v1 1/2] romfs: use s_blocksize(_bits) if CONFIG_BLOCK Sven Van Asbroeck @ 2020-06-23 0:45 ` Sven Van Asbroeck 2020-06-23 16:16 ` kernel test robot 2020-07-09 12:16 ` Sven Van Asbroeck 1 sibling, 2 replies; 7+ messages in thread From: Sven Van Asbroeck @ 2020-06-23 0:45 UTC (permalink / raw) To: linux-kernel Cc: Al Viro, Deepa Dinamani, David Howells, Darrick J. Wong, Janos Farkas, Jeff Layton Problem ------- romfs sequential read performance has regressed very badly since v3.10. Currently, reading a large file inside a romfs image is up to 12x slower compared to reading the romfs image directly. Benchmarks: - use a romfs image which contains a single 250M file - calculate the md5sum of the romfs image directly (test 1) $ time md5sum image.romfs - loop-mount the romfs image, and calc the md5sum of the file inside it (test 2) $ mount -o loop,ro image.romfs /mnt/romfs $ time md5sum /mnt/romfs/file - drop caches in between $ echo 3 > /proc/sys/vm/drop_caches imx6 (arm cortex a9) on emmc, running v5.7.2: (test 1) 5 seconds (test 2) 60 seconds (12x slower) Intel i7-3630QM on Samsung SSD 850 EVO (EMT02B6Q), running Ubuntu with v4.15.0-106-generic: (test 1) 1.3 seconds (test 2) 3.3 seconds (2.5x slower) To show that a regression has occurred since v3.10: imx6 on emmc, running v3.10.17: (test 1) 16 seconds (test 2) 18 seconds Proposed Solution ----------------- Increase the blocksize from 1K to PAGE_SIZE. This brings the sequential read performance close to where it was on v3.10: imx6 on emmc, running v5.7.2: (test 2 1K blocksize) 60 seconds (test 2 4K blocksize) 22 seconds Intel on Ubuntu running v4.15: (test 2 1K blocksize) 3.3 seconds (test 2 4K blocksize) 1.9 seconds There is a risk that this may increase latency on random- access workloads. But the test below suggests that this is not a concern: Benchmark: - use a 630M romfs image consisting of 9600 files - loop-mount the romfs image $ mount -o loop,ro image.romfs /mnt/romfs - drop all caches - list all files in the filesystem (test 3) $ time find /mnt/romfs > /dev/null imx6 on emmc, running v5.7.2: (test 3 1K blocksize) 9.5 seconds (test 3 4K blocksize) 9 seconds Intel on Ubuntu, running v4.15: (test 3 1K blocksize) 1.4 seconds (test 3 4K blocksize) 1.2 seconds Practical Solution ------------------ Introduce a mount-option called 'largeblocks'. If present, increase the blocksize for much better sequential performance. Note that the Linux block layer can only support n-K blocks if the underlying block device length is also aligned to n-K. This may not always be the case. Therefore, the driver will pick the largest blocksize which the underlying block device can support. Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Deepa Dinamani <deepa.kernel@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Janos Farkas <chexum+dev@gmail.com> Cc: Jeff Layton <jlayton@kernel.org> To: linux-kernel@vger.kernel.org Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com> --- fs/romfs/super.c | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 6fecdea791f1..93565aeaa43c 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -65,7 +65,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/statfs.h> @@ -460,6 +460,54 @@ static __u32 romfs_checksum(const void *data, int size) return sum; } +enum romfs_param { + Opt_largeblocks, +}; + +static const struct fs_parameter_spec romfs_fs_parameters[] = { + fsparam_flag("largeblocks", Opt_largeblocks), + {} +}; + +/* + * Parse a single mount parameter. + */ +static int romfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, romfs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_largeblocks: + fc->fs_private = (void *) 1; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * pick the largest blocksize which the underlying block device + * is a multiple of. Or fall back to legacy (ROMBSIZE). + */ +static int romfs_largest_blocksize(struct super_block *sb) +{ + loff_t device_sz = i_size_read(sb->s_bdev->bd_inode); + int blksz; + + for (blksz = PAGE_SIZE; blksz > ROMBSIZE; blksz >>= 1) + if ((device_sz % blksz) == 0) + break; + + return blksz; +} + /* * fill in the superblock */ @@ -467,17 +515,19 @@ static int romfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct romfs_super_block *rsb; struct inode *root; - unsigned long pos, img_size; + unsigned long pos, img_size, dev_blocksize; const char *storage; size_t len; int ret; #ifdef CONFIG_BLOCK + dev_blocksize = fc->fs_private ? romfs_largest_blocksize(sb) : + ROMBSIZE; if (!sb->s_mtd) { - sb_set_blocksize(sb, ROMBSIZE); + sb_set_blocksize(sb, dev_blocksize); } else { - sb->s_blocksize = ROMBSIZE; - sb->s_blocksize_bits = blksize_bits(ROMBSIZE); + sb->s_blocksize = dev_blocksize; + sb->s_blocksize_bits = blksize_bits(dev_blocksize); } #endif @@ -573,6 +623,7 @@ static int romfs_get_tree(struct fs_context *fc) static const struct fs_context_operations romfs_context_ops = { .get_tree = romfs_get_tree, .reconfigure = romfs_reconfigure, + .parse_param = romfs_parse_param, }; /* @@ -607,6 +658,7 @@ static struct file_system_type romfs_fs_type = { .owner = THIS_MODULE, .name = "romfs", .init_fs_context = romfs_init_fs_context, + .parameters = romfs_fs_parameters, .kill_sb = romfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; -- 2.17.1 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v1 2/2] romfs: address performance regression since v3.10 2020-06-23 0:45 ` [PATCH v1 2/2] romfs: address performance regression since v3.10 Sven Van Asbroeck @ 2020-06-23 16:16 ` kernel test robot 2020-06-23 17:05 ` Sven Van Asbroeck 2020-07-09 12:16 ` Sven Van Asbroeck 1 sibling, 1 reply; 7+ messages in thread From: kernel test robot @ 2020-06-23 16:16 UTC (permalink / raw) To: Sven Van Asbroeck, linux-kernel Cc: kbuild-all, Al Viro, Deepa Dinamani, David Howells, Darrick J. Wong, Janos Farkas, Jeff Layton [-- Attachment #1: Type: text/plain, Size: 1715 bytes --] Hi Sven, I love your patch! Yet something to improve: [auto build test ERROR on linux/master] [also build test ERROR on linus/master v5.8-rc2 next-20200623] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Sven-Van-Asbroeck/address-romfs-performance-regression/20200623-085856 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e857ce6eae7ca21b2055cca4885545e29228fe2 config: i386-randconfig-r015-20200623 (attached as .config) compiler: gcc-5 (Ubuntu 5.5.0-12ubuntu1) 5.5.0 20171010 reproduce (this is a W=1 build): # save the attached .config to linux build tree make W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): ld: fs/romfs/super.o: in function `romfs_largest_blocksize': >> fs/romfs/super.c:505: undefined reference to `__moddi3' vim +505 fs/romfs/super.c 494 495 /* 496 * pick the largest blocksize which the underlying block device 497 * is a multiple of. Or fall back to legacy (ROMBSIZE). 498 */ 499 static int romfs_largest_blocksize(struct super_block *sb) 500 { 501 loff_t device_sz = i_size_read(sb->s_bdev->bd_inode); 502 int blksz; 503 504 for (blksz = PAGE_SIZE; blksz > ROMBSIZE; blksz >>= 1) > 505 if ((device_sz % blksz) == 0) 506 break; 507 508 return blksz; 509 } 510 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 42847 bytes --] ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v1 2/2] romfs: address performance regression since v3.10 2020-06-23 16:16 ` kernel test robot @ 2020-06-23 17:05 ` Sven Van Asbroeck 0 siblings, 0 replies; 7+ messages in thread From: Sven Van Asbroeck @ 2020-06-23 17:05 UTC (permalink / raw) To: Linux Kernel Mailing List Cc: kbuild-all, Al Viro, Deepa Dinamani, David Howells, Darrick J. Wong, Janos Farkas, Jeff Layton On Tue, Jun 23, 2020 at 12:17 PM kernel test robot <lkp@intel.com> wrote: > > ld: fs/romfs/super.o: in function `romfs_largest_blocksize': > >> fs/romfs/super.c:505: undefined reference to `__moddi3' > > > 505 if ((device_sz % blksz) == 0) I can change this to device_sz & (blksz-1) if this patch gets positive feedback. ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v1 2/2] romfs: address performance regression since v3.10 2020-06-23 0:45 ` [PATCH v1 2/2] romfs: address performance regression since v3.10 Sven Van Asbroeck 2020-06-23 16:16 ` kernel test robot @ 2020-07-09 12:16 ` Sven Van Asbroeck 1 sibling, 0 replies; 7+ messages in thread From: Sven Van Asbroeck @ 2020-07-09 12:16 UTC (permalink / raw) To: Al Viro Cc: Deepa Dinamani, David Howells, Darrick J. Wong, Janos Farkas, Jeff Layton, Linux Kernel Mailing List Hello Al, You are the closest I could find to a romfs maintainer. get_maintainer.pl doesn't appear to list any. This attempted performance regression fix didn't generate much feedback (to say the least). It's however a real issue for us when supporting a legacy product where we don't have the luxury of switching to a better-supported fs. Is there anything I can do to further this? Is lkml currently accepting bug / regression fixes to romfs, or is it obsolete? Many thanks, Sven On Mon, Jun 22, 2020 at 8:45 PM Sven Van Asbroeck <thesven73@gmail.com> wrote: > > Problem > ------- > romfs sequential read performance has regressed very badly since > v3.10. Currently, reading a large file inside a romfs image is > up to 12x slower compared to reading the romfs image directly. > > Benchmarks: > - use a romfs image which contains a single 250M file > - calculate the md5sum of the romfs image directly (test 1) > $ time md5sum image.romfs > - loop-mount the romfs image, and calc the md5sum of the file > inside it (test 2) > $ mount -o loop,ro image.romfs /mnt/romfs > $ time md5sum /mnt/romfs/file > - drop caches in between > $ echo 3 > /proc/sys/vm/drop_caches > > imx6 (arm cortex a9) on emmc, running v5.7.2: > (test 1) 5 seconds > (test 2) 60 seconds (12x slower) > > Intel i7-3630QM on Samsung SSD 850 EVO (EMT02B6Q), > running Ubuntu with v4.15.0-106-generic: > (test 1) 1.3 seconds > (test 2) 3.3 seconds (2.5x slower) > > To show that a regression has occurred since v3.10: > > imx6 on emmc, running v3.10.17: > (test 1) 16 seconds > (test 2) 18 seconds > > Proposed Solution > ----------------- > Increase the blocksize from 1K to PAGE_SIZE. This brings the > sequential read performance close to where it was on v3.10: > > imx6 on emmc, running v5.7.2: > (test 2 1K blocksize) 60 seconds > (test 2 4K blocksize) 22 seconds > > Intel on Ubuntu running v4.15: > (test 2 1K blocksize) 3.3 seconds > (test 2 4K blocksize) 1.9 seconds > > There is a risk that this may increase latency on random- > access workloads. But the test below suggests that this > is not a concern: > > Benchmark: > - use a 630M romfs image consisting of 9600 files > - loop-mount the romfs image > $ mount -o loop,ro image.romfs /mnt/romfs > - drop all caches > - list all files in the filesystem (test 3) > $ time find /mnt/romfs > /dev/null > > imx6 on emmc, running v5.7.2: > (test 3 1K blocksize) 9.5 seconds > (test 3 4K blocksize) 9 seconds > > Intel on Ubuntu, running v4.15: > (test 3 1K blocksize) 1.4 seconds > (test 3 4K blocksize) 1.2 seconds > > Practical Solution > ------------------ > Introduce a mount-option called 'largeblocks'. If present, > increase the blocksize for much better sequential performance. > > Note that the Linux block layer can only support n-K blocks if > the underlying block device length is also aligned to n-K. This > may not always be the case. Therefore, the driver will pick the > largest blocksize which the underlying block device can support. > > Cc: Al Viro <viro@zeniv.linux.org.uk> > Cc: Deepa Dinamani <deepa.kernel@gmail.com> > Cc: David Howells <dhowells@redhat.com> > Cc: "Darrick J. Wong" <darrick.wong@oracle.com> > Cc: Janos Farkas <chexum+dev@gmail.com> > Cc: Jeff Layton <jlayton@kernel.org> > To: linux-kernel@vger.kernel.org > Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com> > --- > fs/romfs/super.c | 62 ++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 57 insertions(+), 5 deletions(-) > > diff --git a/fs/romfs/super.c b/fs/romfs/super.c > index 6fecdea791f1..93565aeaa43c 100644 > --- a/fs/romfs/super.c > +++ b/fs/romfs/super.c > @@ -65,7 +65,7 @@ > #include <linux/slab.h> > #include <linux/init.h> > #include <linux/blkdev.h> > -#include <linux/fs_context.h> > +#include <linux/fs_parser.h> > #include <linux/mount.h> > #include <linux/namei.h> > #include <linux/statfs.h> > @@ -460,6 +460,54 @@ static __u32 romfs_checksum(const void *data, int size) > return sum; > } > > +enum romfs_param { > + Opt_largeblocks, > +}; > + > +static const struct fs_parameter_spec romfs_fs_parameters[] = { > + fsparam_flag("largeblocks", Opt_largeblocks), > + {} > +}; > + > +/* > + * Parse a single mount parameter. > + */ > +static int romfs_parse_param(struct fs_context *fc, struct fs_parameter *param) > +{ > + struct fs_parse_result result; > + int opt; > + > + opt = fs_parse(fc, romfs_fs_parameters, param, &result); > + if (opt < 0) > + return opt; > + > + switch (opt) { > + case Opt_largeblocks: > + fc->fs_private = (void *) 1; > + break; > + default: > + return -EINVAL; > + } > + > + return 0; > +} > + > +/* > + * pick the largest blocksize which the underlying block device > + * is a multiple of. Or fall back to legacy (ROMBSIZE). > + */ > +static int romfs_largest_blocksize(struct super_block *sb) > +{ > + loff_t device_sz = i_size_read(sb->s_bdev->bd_inode); > + int blksz; > + > + for (blksz = PAGE_SIZE; blksz > ROMBSIZE; blksz >>= 1) > + if ((device_sz % blksz) == 0) > + break; > + > + return blksz; > +} > + > /* > * fill in the superblock > */ > @@ -467,17 +515,19 @@ static int romfs_fill_super(struct super_block *sb, struct fs_context *fc) > { > struct romfs_super_block *rsb; > struct inode *root; > - unsigned long pos, img_size; > + unsigned long pos, img_size, dev_blocksize; > const char *storage; > size_t len; > int ret; > > #ifdef CONFIG_BLOCK > + dev_blocksize = fc->fs_private ? romfs_largest_blocksize(sb) : > + ROMBSIZE; > if (!sb->s_mtd) { > - sb_set_blocksize(sb, ROMBSIZE); > + sb_set_blocksize(sb, dev_blocksize); > } else { > - sb->s_blocksize = ROMBSIZE; > - sb->s_blocksize_bits = blksize_bits(ROMBSIZE); > + sb->s_blocksize = dev_blocksize; > + sb->s_blocksize_bits = blksize_bits(dev_blocksize); > } > #endif > > @@ -573,6 +623,7 @@ static int romfs_get_tree(struct fs_context *fc) > static const struct fs_context_operations romfs_context_ops = { > .get_tree = romfs_get_tree, > .reconfigure = romfs_reconfigure, > + .parse_param = romfs_parse_param, > }; > > /* > @@ -607,6 +658,7 @@ static struct file_system_type romfs_fs_type = { > .owner = THIS_MODULE, > .name = "romfs", > .init_fs_context = romfs_init_fs_context, > + .parameters = romfs_fs_parameters, > .kill_sb = romfs_kill_sb, > .fs_flags = FS_REQUIRES_DEV, > }; > -- > 2.17.1 > ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v1 0/2] address romfs performance regression @ 2020-06-23 0:42 Sven Van Asbroeck 2020-06-23 0:43 ` [PATCH v1 2/2] romfs: address performance regression since v3.10 Sven Van Asbroeck 0 siblings, 1 reply; 7+ messages in thread From: Sven Van Asbroeck @ 2020-06-23 0:42 UTC (permalink / raw) To: linux-kernel Cc: Al Viro, Deepa Dinamani, David Howells, Darrick J. Wong, Janos Farkas, Jeff Layton Tree: next-20200613 Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Deepa Dinamani <deepa.kernel@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Janos Farkas <chexum+dev@gmail.com> Cc: Jeff Layton <jlayton@kernel.org> To: linux-kernel@vger.kernel.org Sven Van Asbroeck (2): romfs: use s_blocksize(_bits) if CONFIG_BLOCK romfs: address performance regression since v3.10 fs/romfs/storage.c | 25 ++++++++-------- fs/romfs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 18 deletions(-) -- 2.17.1 ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v1 2/2] romfs: address performance regression since v3.10 2020-06-23 0:42 [PATCH v1 0/2] address romfs performance regression Sven Van Asbroeck @ 2020-06-23 0:43 ` Sven Van Asbroeck 0 siblings, 0 replies; 7+ messages in thread From: Sven Van Asbroeck @ 2020-06-23 0:43 UTC (permalink / raw) To: linux-kernel Problem ------- romfs sequential read performance has regressed very badly since v3.10. Currently, reading a large file inside a romfs image is up to 12x slower compared to reading the romfs image directly. Benchmarks: - use a romfs image which contains a single 250M file - calculate the md5sum of the romfs image directly (test 1) $ time md5sum image.romfs - loop-mount the romfs image, and calc the md5sum of the file inside it (test 2) $ mount -o loop,ro image.romfs /mnt/romfs $ time md5sum /mnt/romfs/file - drop caches in between $ echo 3 > /proc/sys/vm/drop_caches imx6 (arm cortex a9) on emmc, running v5.7.2: (test 1) 5 seconds (test 2) 60 seconds (12x slower) Intel i7-3630QM on Samsung SSD 850 EVO (EMT02B6Q), running Ubuntu with v4.15.0-106-generic: (test 1) 1.3 seconds (test 2) 3.3 seconds (2.5x slower) To show that a regression has occurred since v3.10: imx6 on emmc, running v3.10.17: (test 1) 16 seconds (test 2) 18 seconds Proposed Solution ----------------- Increase the blocksize from 1K to PAGE_SIZE. This brings the sequential read performance close to where it was on v3.10: imx6 on emmc, running v5.7.2: (test 2 1K blocksize) 60 seconds (test 2 4K blocksize) 22 seconds Intel on Ubuntu running v4.15: (test 2 1K blocksize) 3.3 seconds (test 2 4K blocksize) 1.9 seconds There is a risk that this may increase latency on random- access workloads. But the test below suggests that this is not a concern: Benchmark: - use a 630M romfs image consisting of 9600 files - loop-mount the romfs image $ mount -o loop,ro image.romfs /mnt/romfs - drop all caches - list all files in the filesystem (test 3) $ time find /mnt/romfs > /dev/null imx6 on emmc, running v5.7.2: (test 3 1K blocksize) 9.5 seconds (test 3 4K blocksize) 9 seconds Intel on Ubuntu, running v4.15: (test 3 1K blocksize) 1.4 seconds (test 3 4K blocksize) 1.2 seconds Practical Solution ------------------ Introduce a mount-option called 'largeblocks'. If present, increase the blocksize for much better sequential performance. Note that the Linux block layer can only support n-K blocks if the underlying block device length is also aligned to n-K. This may not always be the case. Therefore, the driver will pick the largest blocksize which the underlying block device can support. Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com> --- fs/romfs/super.c | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 6fecdea791f1..93565aeaa43c 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -65,7 +65,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/statfs.h> @@ -460,6 +460,54 @@ static __u32 romfs_checksum(const void *data, int size) return sum; } +enum romfs_param { + Opt_largeblocks, +}; + +static const struct fs_parameter_spec romfs_fs_parameters[] = { + fsparam_flag("largeblocks", Opt_largeblocks), + {} +}; + +/* + * Parse a single mount parameter. + */ +static int romfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, romfs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_largeblocks: + fc->fs_private = (void *) 1; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * pick the largest blocksize which the underlying block device + * is a multiple of. Or fall back to legacy (ROMBSIZE). + */ +static int romfs_largest_blocksize(struct super_block *sb) +{ + loff_t device_sz = i_size_read(sb->s_bdev->bd_inode); + int blksz; + + for (blksz = PAGE_SIZE; blksz > ROMBSIZE; blksz >>= 1) + if ((device_sz % blksz) == 0) + break; + + return blksz; +} + /* * fill in the superblock */ @@ -467,17 +515,19 @@ static int romfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct romfs_super_block *rsb; struct inode *root; - unsigned long pos, img_size; + unsigned long pos, img_size, dev_blocksize; const char *storage; size_t len; int ret; #ifdef CONFIG_BLOCK + dev_blocksize = fc->fs_private ? romfs_largest_blocksize(sb) : + ROMBSIZE; if (!sb->s_mtd) { - sb_set_blocksize(sb, ROMBSIZE); + sb_set_blocksize(sb, dev_blocksize); } else { - sb->s_blocksize = ROMBSIZE; - sb->s_blocksize_bits = blksize_bits(ROMBSIZE); + sb->s_blocksize = dev_blocksize; + sb->s_blocksize_bits = blksize_bits(dev_blocksize); } #endif @@ -573,6 +623,7 @@ static int romfs_get_tree(struct fs_context *fc) static const struct fs_context_operations romfs_context_ops = { .get_tree = romfs_get_tree, .reconfigure = romfs_reconfigure, + .parse_param = romfs_parse_param, }; /* @@ -607,6 +658,7 @@ static struct file_system_type romfs_fs_type = { .owner = THIS_MODULE, .name = "romfs", .init_fs_context = romfs_init_fs_context, + .parameters = romfs_fs_parameters, .kill_sb = romfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; -- 2.17.1 ^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2020-07-09 12:16 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-06-23 0:45 [PATCH v1 0/2] address romfs performance regression Sven Van Asbroeck 2020-06-23 0:45 ` [PATCH v1 1/2] romfs: use s_blocksize(_bits) if CONFIG_BLOCK Sven Van Asbroeck 2020-06-23 0:45 ` [PATCH v1 2/2] romfs: address performance regression since v3.10 Sven Van Asbroeck 2020-06-23 16:16 ` kernel test robot 2020-06-23 17:05 ` Sven Van Asbroeck 2020-07-09 12:16 ` Sven Van Asbroeck -- strict thread matches above, loose matches on Subject: below -- 2020-06-23 0:42 [PATCH v1 0/2] address romfs performance regression Sven Van Asbroeck 2020-06-23 0:43 ` [PATCH v1 2/2] romfs: address performance regression since v3.10 Sven Van Asbroeck
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).