Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
@ 2020-02-13 21:12 Eric Sandeen
  2020-02-13 23:48 ` Dave Chinner
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Eric Sandeen @ 2020-02-13 21:12 UTC (permalink / raw)
  To: linux-xfs

I had a request from someone who cared about mkfs speed(!)
over a slower network block device to look into using faster
zeroing methods, particularly for the log, during mkfs.xfs.

e2fsprogs already does this, thanks to some guy named Darrick:

/*
 * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because
 * ZERO_RANGE doesn't unmap preallocated blocks.  We prefer fallocate because
 * it always invalidates page cache, and libext2fs requires that reads after
 * ZERO_RANGE return zeroes.
 */
static int __unix_zeroout(int fd, off_t offset, off_t len)
{
        int ret = -1;

#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE)
        ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len);
        if (ret == 0)
                return 0;
#endif
#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
        ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                        offset,  len);
        if (ret == 0)
                return 0;
#endif
        errno = EOPNOTSUPP;
        return ret;
}

and nobody has exploded so far, AFAIK.  :)  So, floating this idea
for xfsprogs.  I'm a little scared of the second #ifdef block above, but
if that's really ok/consistent/safe we could add it too.

The patch moves some defines around too, I could split that up and resend
if this isn't laughed out of the room.

Thanks,
-Eric

=====

libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero

I had a request from someone who cared about mkfs speed(!)
over a slower network block device to look into using faster
zeroing methods, particularly for the log, during mkfs.

Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
a bunch of zeros across a wire.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

diff --git a/include/linux.h b/include/linux.h
index 8f3c32b0..425badb5 100644
--- a/include/linux.h
+++ b/include/linux.h
@@ -113,6 +113,26 @@ static __inline__ void platform_uuid_copy(uuid_t *dst, uuid_t *src)
 	uuid_copy(*dst, *src);
 }
 
+#ifndef FALLOC_FL_PUNCH_HOLE
+#define FALLOC_FL_PUNCH_HOLE	0x02
+#endif
+
+#ifndef FALLOC_FL_COLLAPSE_RANGE
+#define FALLOC_FL_COLLAPSE_RANGE 0x08
+#endif
+
+#ifndef FALLOC_FL_ZERO_RANGE
+#define FALLOC_FL_ZERO_RANGE 0x10
+#endif
+
+#ifndef FALLOC_FL_INSERT_RANGE
+#define FALLOC_FL_INSERT_RANGE 0x20
+#endif
+
+#ifndef FALLOC_FL_UNSHARE_RANGE
+#define FALLOC_FL_UNSHARE_RANGE 0x40
+#endif
+
 #ifndef BLKDISCARD
 #define BLKDISCARD	_IO(0x12,119)
 #endif
diff --git a/io/prealloc.c b/io/prealloc.c
index 6d452354..0b4efc45 100644
--- a/io/prealloc.c
+++ b/io/prealloc.c
@@ -12,26 +12,6 @@
 #include "init.h"
 #include "io.h"
 
-#ifndef FALLOC_FL_PUNCH_HOLE
-#define FALLOC_FL_PUNCH_HOLE	0x02
-#endif
-
-#ifndef FALLOC_FL_COLLAPSE_RANGE
-#define FALLOC_FL_COLLAPSE_RANGE 0x08
-#endif
-
-#ifndef FALLOC_FL_ZERO_RANGE
-#define FALLOC_FL_ZERO_RANGE 0x10
-#endif
-
-#ifndef FALLOC_FL_INSERT_RANGE
-#define FALLOC_FL_INSERT_RANGE 0x20
-#endif
-
-#ifndef FALLOC_FL_UNSHARE_RANGE
-#define FALLOC_FL_UNSHARE_RANGE 0x40
-#endif
-
 static cmdinfo_t allocsp_cmd;
 static cmdinfo_t freesp_cmd;
 static cmdinfo_t resvsp_cmd;
diff --git a/libxfs/Makefile b/libxfs/Makefile
index fbcc963a..b4e8864b 100644
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -105,6 +105,10 @@ CFILES = cache.c \
 #
 #LCFLAGS +=
 
+ifeq ($(HAVE_FALLOCATE),yes)
+LCFLAGS += -DHAVE_FALLOCATE
+endif
+
 FCFLAGS = -I.
 
 LTLIBS = $(LIBPTHREAD) $(LIBRT)
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 0d9d7202..94f63bbf 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -4,6 +4,9 @@
  * All Rights Reserved.
  */
 
+#if defined(HAVE_FALLOCATE)
+#include <linux/falloc.h>
+#endif
 
 #include "libxfs_priv.h"
 #include "init.h"
@@ -60,9 +63,21 @@ int
 libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 {
 	xfs_off_t	start_offset, end_offset, offset;
-	ssize_t		zsize, bytes;
+	ssize_t		zsize, bytes, len_bytes;
 	char		*z;
-	int		fd;
+	int		ret, fd;
+
+	fd = libxfs_device_to_fd(btp->dev);
+	start_offset = LIBXFS_BBTOOFF64(start);
+	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
+
+#if defined(HAVE_FALLOCATE)
+	/* try to use special zeroing methods, fall back to writes if needed */
+	len_bytes = LIBXFS_BBTOOFF64(len);
+	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes);
+	if (ret == 0)
+		return 0;
+#endif
 
 	zsize = min(BDSTRAT_SIZE, BBTOB(len));
 	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
@@ -73,9 +88,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 	}
 	memset(z, 0, zsize);
 
-	fd = libxfs_device_to_fd(btp->dev);
-	start_offset = LIBXFS_BBTOOFF64(start);
-
 	if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
 		fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
 			progname, __FUNCTION__,
@@ -83,7 +95,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 		exit(1);
 	}
 
-	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
 	for (offset = 0; offset < end_offset; ) {
 		bytes = min((ssize_t)(end_offset - offset), zsize);
 		if ((bytes = write(fd, z, bytes)) < 0) {


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-13 21:12 [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero Eric Sandeen
@ 2020-02-13 23:48 ` Dave Chinner
  2020-02-13 23:57   ` Eric Sandeen
  2020-02-14  1:05 ` [PATCH V2] " Eric Sandeen
  2020-02-22  3:22 ` [PATCH V3] " Eric Sandeen
  2 siblings, 1 reply; 10+ messages in thread
From: Dave Chinner @ 2020-02-13 23:48 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: linux-xfs

On Thu, Feb 13, 2020 at 03:12:24PM -0600, Eric Sandeen wrote:
> I had a request from someone who cared about mkfs speed(!)
> over a slower network block device to look into using faster
> zeroing methods, particularly for the log, during mkfs.xfs.
> 
> e2fsprogs already does this, thanks to some guy named Darrick:
> 
> /*
>  * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because
>  * ZERO_RANGE doesn't unmap preallocated blocks.  We prefer fallocate because
>  * it always invalidates page cache, and libext2fs requires that reads after
>  * ZERO_RANGE return zeroes.
>  */
> static int __unix_zeroout(int fd, off_t offset, off_t len)
> {
>         int ret = -1;
> 
> #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE)
>         ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len);
>         if (ret == 0)
>                 return 0;
> #endif
> #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
>         ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>                         offset,  len);
>         if (ret == 0)
>                 return 0;
> #endif
>         errno = EOPNOTSUPP;
>         return ret;
> }
> 
> and nobody has exploded so far, AFAIK.  :)  So, floating this idea
> for xfsprogs.  I'm a little scared of the second #ifdef block above, but
> if that's really ok/consistent/safe we could add it too.

If FALLOC_FL_PUNCH_HOLE is defined, then FALLOC_FL_KEEP_SIZE is
guaranteed to be defined, so that condition check is somewhat
redundant. See commit 79124f18b335 ("fs: add hole punching to
fallocate")....

> The patch moves some defines around too, I could split that up and resend
> if this isn't laughed out of the room.
> 
> Thanks,
> -Eric
> 
> =====
> 
> libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
> 
> I had a request from someone who cared about mkfs speed(!)
> over a slower network block device to look into using faster
> zeroing methods, particularly for the log, during mkfs.
> 
> Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
> a bunch of zeros across a wire.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> diff --git a/include/linux.h b/include/linux.h
> index 8f3c32b0..425badb5 100644
> --- a/include/linux.h
> +++ b/include/linux.h
> @@ -113,6 +113,26 @@ static __inline__ void platform_uuid_copy(uuid_t *dst, uuid_t *src)
>  	uuid_copy(*dst, *src);
>  }
>  
> +#ifndef FALLOC_FL_PUNCH_HOLE
> +#define FALLOC_FL_PUNCH_HOLE	0x02
> +#endif
> +
> +#ifndef FALLOC_FL_COLLAPSE_RANGE
> +#define FALLOC_FL_COLLAPSE_RANGE 0x08
> +#endif
> +
> +#ifndef FALLOC_FL_ZERO_RANGE
> +#define FALLOC_FL_ZERO_RANGE 0x10
> +#endif
> +
> +#ifndef FALLOC_FL_INSERT_RANGE
> +#define FALLOC_FL_INSERT_RANGE 0x20
> +#endif
> +
> +#ifndef FALLOC_FL_UNSHARE_RANGE
> +#define FALLOC_FL_UNSHARE_RANGE 0x40
> +#endif

These were added to allow xfs_io to test these operations before
there was userspace support for them. I do not think we should
propagate them outside of xfs_io - if they are not supported by the
distro at build time, we shouldn't attempt to use them in mkfs.

i.e. xfs_io is test-enablement code for developers, mkfs.xfs is
production code for users, so different rules kinda exist for
them...

> diff --git a/libxfs/Makefile b/libxfs/Makefile
> index fbcc963a..b4e8864b 100644
> --- a/libxfs/Makefile
> +++ b/libxfs/Makefile
> @@ -105,6 +105,10 @@ CFILES = cache.c \
>  #
>  #LCFLAGS +=
>  
> +ifeq ($(HAVE_FALLOCATE),yes)
> +LCFLAGS += -DHAVE_FALLOCATE
> +endif

HAVE_FALLOCATE comes from an autoconf test. I suspect that this
needs to be made more finegrained, testing for fallocate() features,
not just just whether the syscall exists. And the above should
probably be in include/builddefs.in so that it's available to all
of xfsprogs, not just libxfs code...

> +
>  FCFLAGS = -I.
>  
>  LTLIBS = $(LIBPTHREAD) $(LIBRT)
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index 0d9d7202..94f63bbf 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -4,6 +4,9 @@
>   * All Rights Reserved.
>   */
>  
> +#if defined(HAVE_FALLOCATE)
> +#include <linux/falloc.h>
> +#endif

Urk. That should be in include/linux.h, right?

>  
>  #include "libxfs_priv.h"
>  #include "init.h"
> @@ -60,9 +63,21 @@ int
>  libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>  {
>  	xfs_off_t	start_offset, end_offset, offset;
> -	ssize_t		zsize, bytes;
> +	ssize_t		zsize, bytes, len_bytes;
>  	char		*z;
> -	int		fd;
> +	int		ret, fd;
> +
> +	fd = libxfs_device_to_fd(btp->dev);
> +	start_offset = LIBXFS_BBTOOFF64(start);
> +	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
> +
> +#if defined(HAVE_FALLOCATE)
> +	/* try to use special zeroing methods, fall back to writes if needed */
> +	len_bytes = LIBXFS_BBTOOFF64(len);
> +	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes);
> +	if (ret == 0)
> +		return 0;
> +#endif

I kinda dislike the "return if success" hidden inside the ifdef -
it's not a code pattern I'd expect to see. This is what I'd tend
to expect in include/linux.h:

#if defined(HAVE_FALLOCATE_ZERO_RANGE)
static inline int
platform_zero_range(
	int		fd,
	xfs_off_t	start_offset,
	xfs_off_t	end_offset)
{
	int		ret;

	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes);
	if (!ret)
		return 0;
	return -errno;
}
#else
#define platform_zero_range(fd, s, o)	(true)
#endif

and then the code in libxfs_device_zero() does:

	error = platform_zero_range(fd, start_offset, len_bytes);
	if (!error)
		return 0;

without adding nasty #defines...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-13 23:48 ` Dave Chinner
@ 2020-02-13 23:57   ` Eric Sandeen
  2020-02-14  0:25     ` Dave Chinner
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Sandeen @ 2020-02-13 23:57 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On 2/13/20 5:48 PM, Dave Chinner wrote:
> On Thu, Feb 13, 2020 at 03:12:24PM -0600, Eric Sandeen wrote:
>> I had a request from someone who cared about mkfs speed(!)
>> over a slower network block device to look into using faster
>> zeroing methods, particularly for the log, during mkfs.xfs.
>>
>> e2fsprogs already does this, thanks to some guy named Darrick:
>>
>> /*
>>  * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because
>>  * ZERO_RANGE doesn't unmap preallocated blocks.  We prefer fallocate because
>>  * it always invalidates page cache, and libext2fs requires that reads after
>>  * ZERO_RANGE return zeroes.
>>  */
>> static int __unix_zeroout(int fd, off_t offset, off_t len)
>> {
>>         int ret = -1;
>>
>> #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE)
>>         ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len);
>>         if (ret == 0)
>>                 return 0;
>> #endif
>> #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
>>         ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>>                         offset,  len);
>>         if (ret == 0)
>>                 return 0;
>> #endif
>>         errno = EOPNOTSUPP;
>>         return ret;
>> }
>>
>> and nobody has exploded so far, AFAIK.  :)  So, floating this idea
>> for xfsprogs.  I'm a little scared of the second #ifdef block above, but
>> if that's really ok/consistent/safe we could add it too.
> 
> If FALLOC_FL_PUNCH_HOLE is defined, then FALLOC_FL_KEEP_SIZE is
> guaranteed to be defined, so that condition check is somewhat
> redundant. See commit 79124f18b335 ("fs: add hole punching to
> fallocate")....

Style aside, is that 2nd zeroing method something we want to try?

>> The patch moves some defines around too, I could split that up and resend
>> if this isn't laughed out of the room.
>>
>> Thanks,
>> -Eric
>>
>> =====
>>
>> libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
>>
>> I had a request from someone who cared about mkfs speed(!)
>> over a slower network block device to look into using faster
>> zeroing methods, particularly for the log, during mkfs.
>>
>> Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
>> a bunch of zeros across a wire.
>>
>> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
>> ---
>>
>> diff --git a/include/linux.h b/include/linux.h
>> index 8f3c32b0..425badb5 100644
>> --- a/include/linux.h
>> +++ b/include/linux.h
>> @@ -113,6 +113,26 @@ static __inline__ void platform_uuid_copy(uuid_t *dst, uuid_t *src)
>>  	uuid_copy(*dst, *src);
>>  }
>>  
>> +#ifndef FALLOC_FL_PUNCH_HOLE
>> +#define FALLOC_FL_PUNCH_HOLE	0x02
>> +#endif
>> +
>> +#ifndef FALLOC_FL_COLLAPSE_RANGE
>> +#define FALLOC_FL_COLLAPSE_RANGE 0x08
>> +#endif
>> +
>> +#ifndef FALLOC_FL_ZERO_RANGE
>> +#define FALLOC_FL_ZERO_RANGE 0x10
>> +#endif
>> +
>> +#ifndef FALLOC_FL_INSERT_RANGE
>> +#define FALLOC_FL_INSERT_RANGE 0x20
>> +#endif
>> +
>> +#ifndef FALLOC_FL_UNSHARE_RANGE
>> +#define FALLOC_FL_UNSHARE_RANGE 0x40
>> +#endif
> 
> These were added to allow xfs_io to test these operations before
> there was userspace support for them. I do not think we should
> propagate them outside of xfs_io - if they are not supported by the
> distro at build time, we shouldn't attempt to use them in mkfs.
> 
> i.e. xfs_io is test-enablement code for developers, mkfs.xfs is
> production code for users, so different rules kinda exist for
> them...

Fair enough.  people /could/ use newer xfsprogs on older kernels, but...
those defines are getting pretty old by now in any case.

It's probably not terrible to just fail the build on a system that doesn't
have falloc.h, by now?

>> diff --git a/libxfs/Makefile b/libxfs/Makefile
>> index fbcc963a..b4e8864b 100644
>> --- a/libxfs/Makefile
>> +++ b/libxfs/Makefile
>> @@ -105,6 +105,10 @@ CFILES = cache.c \
>>  #
>>  #LCFLAGS +=
>>  
>> +ifeq ($(HAVE_FALLOCATE),yes)
>> +LCFLAGS += -DHAVE_FALLOCATE
>> +endif
> 
> HAVE_FALLOCATE comes from an autoconf test. I suspect that this
> needs to be made more finegrained, testing for fallocate() features,
> not just just whether the syscall exists. And the above should
> probably be in include/builddefs.in so that it's available to all
> of xfsprogs, not just libxfs code...

But that's testing the kernel on the build host, right?  What's the
point of that?  Or am I misunderstanding?

>> +
>>  FCFLAGS = -I.
>>  
>>  LTLIBS = $(LIBPTHREAD) $(LIBRT)
>> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
>> index 0d9d7202..94f63bbf 100644
>> --- a/libxfs/rdwr.c
>> +++ b/libxfs/rdwr.c
>> @@ -4,6 +4,9 @@
>>   * All Rights Reserved.
>>   */
>>  
>> +#if defined(HAVE_FALLOCATE)
>> +#include <linux/falloc.h>
>> +#endif
> 
> Urk. That should be in include/linux.h, right?
> 
>>  
>>  #include "libxfs_priv.h"
>>  #include "init.h"
>> @@ -60,9 +63,21 @@ int
>>  libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>>  {
>>  	xfs_off_t	start_offset, end_offset, offset;
>> -	ssize_t		zsize, bytes;
>> +	ssize_t		zsize, bytes, len_bytes;
>>  	char		*z;
>> -	int		fd;
>> +	int		ret, fd;
>> +
>> +	fd = libxfs_device_to_fd(btp->dev);
>> +	start_offset = LIBXFS_BBTOOFF64(start);
>> +	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
>> +
>> +#if defined(HAVE_FALLOCATE)
>> +	/* try to use special zeroing methods, fall back to writes if needed */
>> +	len_bytes = LIBXFS_BBTOOFF64(len);
>> +	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes);
>> +	if (ret == 0)
>> +		return 0;
>> +#endif
> 
> I kinda dislike the "return if success" hidden inside the ifdef -
> it's not a code pattern I'd expect to see. This is what I'd tend
> to expect in include/linux.h:
> 
> #if defined(HAVE_FALLOCATE_ZERO_RANGE)
> static inline int
> platform_zero_range(
> 	int		fd,
> 	xfs_off_t	start_offset,
> 	xfs_off_t	end_offset)
> {
> 	int		ret;
> 
> 	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes);
> 	if (!ret)
> 		return 0;
> 	return -errno;
> }
> #else
> #define platform_zero_range(fd, s, o)	(true)
> #endif
> 
> and then the code in libxfs_device_zero() does:
> 
> 	error = platform_zero_range(fd, start_offset, len_bytes);
> 	if (!error)
> 		return 0;
> 
> without adding nasty #defines...

Yeah that's much better.  Tho I hate adding more platform_* stuff when really
we're down to only one platform but maybe that's a project for another day.

Thanks for the review,
-Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-13 23:57   ` Eric Sandeen
@ 2020-02-14  0:25     ` Dave Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2020-02-14  0:25 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: linux-xfs

On Thu, Feb 13, 2020 at 05:57:17PM -0600, Eric Sandeen wrote:
> On 2/13/20 5:48 PM, Dave Chinner wrote:
> > On Thu, Feb 13, 2020 at 03:12:24PM -0600, Eric Sandeen wrote:
> >> I had a request from someone who cared about mkfs speed(!)
> >> over a slower network block device to look into using faster
> >> zeroing methods, particularly for the log, during mkfs.xfs.
> >>
> >> e2fsprogs already does this, thanks to some guy named Darrick:
> >>
> >> /*
> >>  * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because
> >>  * ZERO_RANGE doesn't unmap preallocated blocks.  We prefer fallocate because
> >>  * it always invalidates page cache, and libext2fs requires that reads after
> >>  * ZERO_RANGE return zeroes.
> >>  */
> >> static int __unix_zeroout(int fd, off_t offset, off_t len)
> >> {
> >>         int ret = -1;
> >>
> >> #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE)
> >>         ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len);
> >>         if (ret == 0)
> >>                 return 0;
> >> #endif
> >> #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
> >>         ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> >>                         offset,  len);
> >>         if (ret == 0)
> >>                 return 0;
> >> #endif
> >>         errno = EOPNOTSUPP;
> >>         return ret;
> >> }
> >>
> >> and nobody has exploded so far, AFAIK.  :)  So, floating this idea
> >> for xfsprogs.  I'm a little scared of the second #ifdef block above, but
> >> if that's really ok/consistent/safe we could add it too.
> > 
> > If FALLOC_FL_PUNCH_HOLE is defined, then FALLOC_FL_KEEP_SIZE is
> > guaranteed to be defined, so that condition check is somewhat
> > redundant. See commit 79124f18b335 ("fs: add hole punching to
> > fallocate")....
> 
> Style aside, is that 2nd zeroing method something we want to try?

Oh, I misunderstood what you were asking.  I don't think we want to
implement that.

e.g. If it's a pre-allocated image file or block device on
sparse-capable storage being mkfs'd, then the user really doesn't
want it punched out, do they?

And it's for the journal: any ENOSPC error from the block device in
the journal is immediately fatal to the filesystem. Hence I think
we should not punch out the journal blocks that already are allocated
in the storage.

And, really, the space may have already been punched out via
attempting to trim the device (i.e. "-K" mkfs option wasn't set), in
which case punching here is redundant.

Also, libxfs_device_zero() is called by xfs_repair: do we want
xfs_repair to be punching out blocks for the realtime bitmap and
summary storage? I don't think we want that to happen, either.

Fast zeroing, yes. Punching out allocated storage, no.

> >> +#define FALLOC_FL_COLLAPSE_RANGE 0x08
> >> +#endif
> >> +
> >> +#ifndef FALLOC_FL_ZERO_RANGE
> >> +#define FALLOC_FL_ZERO_RANGE 0x10
> >> +#endif
> >> +
> >> +#ifndef FALLOC_FL_INSERT_RANGE
> >> +#define FALLOC_FL_INSERT_RANGE 0x20
> >> +#endif
> >> +
> >> +#ifndef FALLOC_FL_UNSHARE_RANGE
> >> +#define FALLOC_FL_UNSHARE_RANGE 0x40
> >> +#endif
> > 
> > These were added to allow xfs_io to test these operations before
> > there was userspace support for them. I do not think we should
> > propagate them outside of xfs_io - if they are not supported by the
> > distro at build time, we shouldn't attempt to use them in mkfs.
> > 
> > i.e. xfs_io is test-enablement code for developers, mkfs.xfs is
> > production code for users, so different rules kinda exist for
> > them...
> 
> Fair enough.  people /could/ use newer xfsprogs on older kernels, but...
> those defines are getting pretty old by now in any case.

*nod*

> It's probably not terrible to just fail the build on a system that doesn't
> have falloc.h, by now?

That might be going a bit far. fallocate() in most cases is not
critical to the correct functioning of the production tools, so if
it's not present it shouldn't break anything.

> >> diff --git a/libxfs/Makefile b/libxfs/Makefile
> >> index fbcc963a..b4e8864b 100644
> >> --- a/libxfs/Makefile
> >> +++ b/libxfs/Makefile
> >> @@ -105,6 +105,10 @@ CFILES = cache.c \
> >>  #
> >>  #LCFLAGS +=
> >>  
> >> +ifeq ($(HAVE_FALLOCATE),yes)
> >> +LCFLAGS += -DHAVE_FALLOCATE
> >> +endif
> > 
> > HAVE_FALLOCATE comes from an autoconf test. I suspect that this
> > needs to be made more finegrained, testing for fallocate() features,
> > not just just whether the syscall exists. And the above should
> > probably be in include/builddefs.in so that it's available to all
> > of xfsprogs, not just libxfs code...
> 
> But that's testing the kernel on the build host, right?  What's the
> point of that?  Or am I misunderstanding?

Who builds their distro binaries on a userspace that doesn't contain
the headers and libaries the distro is going to ship with?

That's the whole point of "build-root" infrastructure, right? i.e.
build against what you are going to ship, not what is installed on
the build machine....

> >> +	fd = libxfs_device_to_fd(btp->dev);
> >> +	start_offset = LIBXFS_BBTOOFF64(start);
> >> +	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
> >> +
> >> +#if defined(HAVE_FALLOCATE)
> >> +	/* try to use special zeroing methods, fall back to writes if needed */
> >> +	len_bytes = LIBXFS_BBTOOFF64(len);
> >> +	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes);
> >> +	if (ret == 0)
> >> +		return 0;
> >> +#endif
> > 
> > I kinda dislike the "return if success" hidden inside the ifdef -
> > it's not a code pattern I'd expect to see. This is what I'd tend
> > to expect in include/linux.h:
> > 
> > #if defined(HAVE_FALLOCATE_ZERO_RANGE)
> > static inline int
> > platform_zero_range(
> > 	int		fd,
> > 	xfs_off_t	start_offset,
> > 	xfs_off_t	end_offset)
> > {
> > 	int		ret;
> > 
> > 	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes);
> > 	if (!ret)
> > 		return 0;
> > 	return -errno;
> > }
> > #else
> > #define platform_zero_range(fd, s, o)	(true)
> > #endif
> > 
> > and then the code in libxfs_device_zero() does:
> > 
> > 	error = platform_zero_range(fd, start_offset, len_bytes);
> > 	if (!error)
> > 		return 0;
> > 
> > without adding nasty #defines...
> 
> Yeah that's much better.  Tho I hate adding more platform_* stuff when really
> we're down to only one platform but maybe that's a project for another day.

There's more than one linux "platform" we have to support, and this
abstraction makes it easy to include new functionality in a clean
and autoconf detection friendly way... :)

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH V2] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-13 21:12 [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero Eric Sandeen
  2020-02-13 23:48 ` Dave Chinner
@ 2020-02-14  1:05 ` " Eric Sandeen
  2020-02-14  1:34   ` Dave Chinner
  2020-02-22  3:22 ` [PATCH V3] " Eric Sandeen
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Sandeen @ 2020-02-14  1:05 UTC (permalink / raw)
  To: linux-xfs

I had a request from someone who cared about mkfs speed(!)
over a slower network block device to look into using faster
zeroing methods, particularly for the log, during mkfs.

Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
a bunch of zeros across a wire.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

V2: Clean up all the nasty stuff I'd flung out there as a wild first
cut, thanks Dave.

diff --git a/include/builddefs.in b/include/builddefs.in
index 4700b527..1dd27f76 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -144,6 +144,9 @@ endif
 ifeq ($(HAVE_GETFSMAP),yes)
 PCFLAGS+= -DHAVE_GETFSMAP
 endif
+ifeq ($(HAVE_FALLOCATE),yes)
+PCFLAGS += -DHAVE_FALLOCATE
+endif
 
 LIBICU_LIBS = @libicu_LIBS@
 LIBICU_CFLAGS = @libicu_CFLAGS@
diff --git a/include/linux.h b/include/linux.h
index 8f3c32b0..8d5c4584 100644
--- a/include/linux.h
+++ b/include/linux.h
@@ -20,6 +20,10 @@
 #include <stdio.h>
 #include <asm/types.h>
 #include <mntent.h>
+#include <fcntl.h>
+#if defined(HAVE_FALLOCATE)
+#include <linux/falloc.h>
+#endif
 #ifdef OVERRIDE_SYSTEM_FSXATTR
 # define fsxattr sys_fsxattr
 #endif
@@ -164,6 +168,24 @@ static inline void platform_mntent_close(struct mntent_cursor * cursor)
 	endmntent(cursor->mtabp);
 }
 
+#if defined(FALLOC_FL_ZERO_RANGE)
+static inline int
+platform_zero_range(
+	int		fd,
+	xfs_off_t	start,
+	size_t		len)
+{
+	int ret;
+
+	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start, len);
+	if (!ret)
+		return 0;
+	return -errno;
+}
+#else
+#define platform_zero_range(fd, s, l)	(-EOPNOTSUPP)
+#endif
+
 /*
  * Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
  * are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 0d9d7202..2f6a3eb3 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -60,9 +60,19 @@ int
 libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 {
 	xfs_off_t	start_offset, end_offset, offset;
-	ssize_t		zsize, bytes;
+	ssize_t		zsize, bytes, len_bytes;
 	char		*z;
-	int		fd;
+	int		error, fd;
+
+	fd = libxfs_device_to_fd(btp->dev);
+	start_offset = LIBXFS_BBTOOFF64(start);
+	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
+
+	/* try to use special zeroing methods, fall back to writes if needed */
+	len_bytes = LIBXFS_BBTOOFF64(len);
+	error = platform_zero_range(fd, start_offset, len_bytes);
+	if (!error)
+		return 0;
 
 	zsize = min(BDSTRAT_SIZE, BBTOB(len));
 	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
@@ -73,9 +83,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 	}
 	memset(z, 0, zsize);
 
-	fd = libxfs_device_to_fd(btp->dev);
-	start_offset = LIBXFS_BBTOOFF64(start);
-
 	if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
 		fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
 			progname, __FUNCTION__,
@@ -83,7 +90,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 		exit(1);
 	}
 
-	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
 	for (offset = 0; offset < end_offset; ) {
 		bytes = min((ssize_t)(end_offset - offset), zsize);
 		if ((bytes = write(fd, z, bytes)) < 0) {


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V2] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-14  1:05 ` [PATCH V2] " Eric Sandeen
@ 2020-02-14  1:34   ` Dave Chinner
  2020-02-14  1:43     ` Eric Sandeen
  0 siblings, 1 reply; 10+ messages in thread
From: Dave Chinner @ 2020-02-14  1:34 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: linux-xfs

On Thu, Feb 13, 2020 at 07:05:50PM -0600, Eric Sandeen wrote:
> I had a request from someone who cared about mkfs speed(!)
> over a slower network block device to look into using faster
> zeroing methods, particularly for the log, during mkfs.
> 
> Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
> a bunch of zeros across a wire.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> V2: Clean up all the nasty stuff I'd flung out there as a wild first
> cut, thanks Dave.
> 
> diff --git a/include/builddefs.in b/include/builddefs.in
> index 4700b527..1dd27f76 100644
> --- a/include/builddefs.in
> +++ b/include/builddefs.in
> @@ -144,6 +144,9 @@ endif
>  ifeq ($(HAVE_GETFSMAP),yes)
>  PCFLAGS+= -DHAVE_GETFSMAP
>  endif
> +ifeq ($(HAVE_FALLOCATE),yes)
> +PCFLAGS += -DHAVE_FALLOCATE
> +endif
>  
>  LIBICU_LIBS = @libicu_LIBS@
>  LIBICU_CFLAGS = @libicu_CFLAGS@
> diff --git a/include/linux.h b/include/linux.h
> index 8f3c32b0..8d5c4584 100644
> --- a/include/linux.h
> +++ b/include/linux.h
> @@ -20,6 +20,10 @@
>  #include <stdio.h>
>  #include <asm/types.h>
>  #include <mntent.h>
> +#include <fcntl.h>
> +#if defined(HAVE_FALLOCATE)
> +#include <linux/falloc.h>
> +#endif
>  #ifdef OVERRIDE_SYSTEM_FSXATTR
>  # define fsxattr sys_fsxattr
>  #endif
> @@ -164,6 +168,24 @@ static inline void platform_mntent_close(struct mntent_cursor * cursor)
>  	endmntent(cursor->mtabp);
>  }
>  
> +#if defined(FALLOC_FL_ZERO_RANGE)
> +static inline int
> +platform_zero_range(
> +	int		fd,
> +	xfs_off_t	start,
> +	size_t		len)
> +{
> +	int ret;
> +
> +	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start, len);
> +	if (!ret)
> +		return 0;
> +	return -errno;
> +}
> +#else
> +#define platform_zero_range(fd, s, l)	(-EOPNOTSUPP)
> +#endif

That all looks good. :)

>  /*
>   * Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
>   * are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index 0d9d7202..2f6a3eb3 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -60,9 +60,19 @@ int
>  libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>  {
>  	xfs_off_t	start_offset, end_offset, offset;
> -	ssize_t		zsize, bytes;
> +	ssize_t		zsize, bytes, len_bytes;

len_bytes should be size_t, right?

>  	char		*z;
> -	int		fd;
> +	int		error, fd;
> +
> +	fd = libxfs_device_to_fd(btp->dev);
> +	start_offset = LIBXFS_BBTOOFF64(start);
> +	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
> +
> +	/* try to use special zeroing methods, fall back to writes if needed */
> +	len_bytes = LIBXFS_BBTOOFF64(len);
> +	error = platform_zero_range(fd, start_offset, len_bytes);

This is a bit ... convoluted, and doesn't end_offset = len_bytes?
i.e.

start_offset = start << BBSHIFT
len_bytes = len << BBSHIFT
end_offset = (start + len) << BBSHIFT - start_offset
	   = (start << BBSHIFT) + (len << BBSHIFT) - start_offset
	   = start_offset + len_bytes - start_offset
	   = len_bytes

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V2] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-14  1:34   ` Dave Chinner
@ 2020-02-14  1:43     ` Eric Sandeen
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Sandeen @ 2020-02-14  1:43 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On 2/13/20 7:34 PM, Dave Chinner wrote:
> On Thu, Feb 13, 2020 at 07:05:50PM -0600, Eric Sandeen wrote:

...

>>  /*
>>   * Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
>>   * are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
>> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
>> index 0d9d7202..2f6a3eb3 100644
>> --- a/libxfs/rdwr.c
>> +++ b/libxfs/rdwr.c
>> @@ -60,9 +60,19 @@ int
>>  libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>>  {
>>  	xfs_off_t	start_offset, end_offset, offset;
>> -	ssize_t		zsize, bytes;
>> +	ssize_t		zsize, bytes, len_bytes;
> 
> len_bytes should be size_t, right?

They probably all should be, TBH...
 
>>  	char		*z;
>> -	int		fd;
>> +	int		error, fd;
>> +
>> +	fd = libxfs_device_to_fd(btp->dev);
>> +	start_offset = LIBXFS_BBTOOFF64(start);
>> +	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
>> +
>> +	/* try to use special zeroing methods, fall back to writes if needed */
>> +	len_bytes = LIBXFS_BBTOOFF64(len);
>> +	error = platform_zero_range(fd, start_offset, len_bytes);
> 
> This is a bit ... convoluted, and doesn't end_offset = len_bytes?
> i.e.
> 
> start_offset = start << BBSHIFT
> len_bytes = len << BBSHIFT
> end_offset = (start + len) << BBSHIFT - start_offset
> 	   = (start << BBSHIFT) + (len << BBSHIFT) - start_offset
> 	   = start_offset + len_bytes - start_offset
> 	   = len_bytes

oh, yikes.  Good catch.  Before, it was used as

        end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
        for (offset = 0; offset < end_offset; ) {
                bytes = min((ssize_t)(end_offset - offset), zsize);
                if ((bytes = write(fd, z, bytes)) < 0) {

"end_offset" was not the ending offset of the whole range... that's what I get
for inferring too much from a variable name w/o looking at what it actually /does/

I'll sort that out, thanks.

-Eric

> Cheers,
> 
> Dave.
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH V3] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-13 21:12 [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero Eric Sandeen
  2020-02-13 23:48 ` Dave Chinner
  2020-02-14  1:05 ` [PATCH V2] " Eric Sandeen
@ 2020-02-22  3:22 ` " Eric Sandeen
  2020-02-22  7:24   ` Darrick J. Wong
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Sandeen @ 2020-02-22  3:22 UTC (permalink / raw)
  To: linux-xfs

I had a request from someone who cared about mkfs speed(!)
over a slower network block device to look into using faster
zeroing methods, particularly for the log, during mkfs.

Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
a bunch of zeros across a wire.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

V2: Clean up all the nasty stuff I'd flung out there as a wild first
cut, thanks Dave.

V3: make len_bytes a size_t; leave "end_offset" where it is for the loop
use.  It's a bit odd but ... just don't mess with it for now, one patch
one change.

diff --git a/include/builddefs.in b/include/builddefs.in
index 4700b527..1dd27f76 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -144,6 +144,9 @@ endif
 ifeq ($(HAVE_GETFSMAP),yes)
 PCFLAGS+= -DHAVE_GETFSMAP
 endif
+ifeq ($(HAVE_FALLOCATE),yes)
+PCFLAGS += -DHAVE_FALLOCATE
+endif
 
 LIBICU_LIBS = @libicu_LIBS@
 LIBICU_CFLAGS = @libicu_CFLAGS@
diff --git a/include/linux.h b/include/linux.h
index 8f3c32b0..8d5c4584 100644
--- a/include/linux.h
+++ b/include/linux.h
@@ -20,6 +20,10 @@
 #include <stdio.h>
 #include <asm/types.h>
 #include <mntent.h>
+#include <fcntl.h>
+#if defined(HAVE_FALLOCATE)
+#include <linux/falloc.h>
+#endif
 #ifdef OVERRIDE_SYSTEM_FSXATTR
 # define fsxattr sys_fsxattr
 #endif
@@ -164,6 +168,24 @@ static inline void platform_mntent_close(struct mntent_cursor * cursor)
 	endmntent(cursor->mtabp);
 }
 
+#if defined(FALLOC_FL_ZERO_RANGE)
+static inline int
+platform_zero_range(
+	int		fd,
+	xfs_off_t	start,
+	size_t		len)
+{
+	int ret;
+
+	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start, len);
+	if (!ret)
+		return 0;
+	return -errno;
+}
+#else
+#define platform_zero_range(fd, s, l)	(-EOPNOTSUP)
+#endif
+
 /*
  * Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
  * are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 0d9d7202..e2d9d790 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -61,8 +61,18 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 {
 	xfs_off_t	start_offset, end_offset, offset;
 	ssize_t		zsize, bytes;
+	size_t		len_bytes;
 	char		*z;
-	int		fd;
+	int		error, fd;
+
+	fd = libxfs_device_to_fd(btp->dev);
+	start_offset = LIBXFS_BBTOOFF64(start);
+
+	/* try to use special zeroing methods, fall back to writes if needed */
+	len_bytes = LIBXFS_BBTOOFF64(len);
+	error = platform_zero_range(fd, start_offset, len_bytes);
+	if (!error)
+		return 0;
 
 	zsize = min(BDSTRAT_SIZE, BBTOB(len));
 	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
@@ -73,9 +83,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 	}
 	memset(z, 0, zsize);
 
-	fd = libxfs_device_to_fd(btp->dev);
-	start_offset = LIBXFS_BBTOOFF64(start);
-
 	if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
 		fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
 			progname, __FUNCTION__,



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V3] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-22  3:22 ` [PATCH V3] " Eric Sandeen
@ 2020-02-22  7:24   ` Darrick J. Wong
  2020-02-22 15:23     ` Eric Sandeen
  0 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2020-02-22  7:24 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: linux-xfs

On Fri, Feb 21, 2020 at 09:22:12PM -0600, Eric Sandeen wrote:
> I had a request from someone who cared about mkfs speed(!)
> over a slower network block device to look into using faster
> zeroing methods, particularly for the log, during mkfs.
> 
> Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
> a bunch of zeros across a wire.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> V2: Clean up all the nasty stuff I'd flung out there as a wild first
> cut, thanks Dave.
> 
> V3: make len_bytes a size_t; leave "end_offset" where it is for the loop
> use.  It's a bit odd but ... just don't mess with it for now, one patch
> one change.
> 
> diff --git a/include/builddefs.in b/include/builddefs.in
> index 4700b527..1dd27f76 100644
> --- a/include/builddefs.in
> +++ b/include/builddefs.in
> @@ -144,6 +144,9 @@ endif
>  ifeq ($(HAVE_GETFSMAP),yes)
>  PCFLAGS+= -DHAVE_GETFSMAP
>  endif
> +ifeq ($(HAVE_FALLOCATE),yes)
> +PCFLAGS += -DHAVE_FALLOCATE
> +endif
>  
>  LIBICU_LIBS = @libicu_LIBS@
>  LIBICU_CFLAGS = @libicu_CFLAGS@
> diff --git a/include/linux.h b/include/linux.h
> index 8f3c32b0..8d5c4584 100644
> --- a/include/linux.h
> +++ b/include/linux.h
> @@ -20,6 +20,10 @@
>  #include <stdio.h>
>  #include <asm/types.h>
>  #include <mntent.h>
> +#include <fcntl.h>
> +#if defined(HAVE_FALLOCATE)
> +#include <linux/falloc.h>
> +#endif
>  #ifdef OVERRIDE_SYSTEM_FSXATTR
>  # define fsxattr sys_fsxattr
>  #endif
> @@ -164,6 +168,24 @@ static inline void platform_mntent_close(struct mntent_cursor * cursor)
>  	endmntent(cursor->mtabp);
>  }
>  
> +#if defined(FALLOC_FL_ZERO_RANGE)
> +static inline int
> +platform_zero_range(
> +	int		fd,
> +	xfs_off_t	start,
> +	size_t		len)

Seems fine to me, though it's unfortunate to cap this at u32.

> +{
> +	int ret;
> +
> +	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start, len);
> +	if (!ret)
> +		return 0;
> +	return -errno;
> +}
> +#else
> +#define platform_zero_range(fd, s, l)	(-EOPNOTSUP)

EOPNOTSUPP (two P's)

--D

> +#endif
> +
>  /*
>   * Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
>   * are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index 0d9d7202..e2d9d790 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -61,8 +61,18 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>  {
>  	xfs_off_t	start_offset, end_offset, offset;
>  	ssize_t		zsize, bytes;
> +	size_t		len_bytes;
>  	char		*z;
> -	int		fd;
> +	int		error, fd;
> +
> +	fd = libxfs_device_to_fd(btp->dev);
> +	start_offset = LIBXFS_BBTOOFF64(start);
> +
> +	/* try to use special zeroing methods, fall back to writes if needed */
> +	len_bytes = LIBXFS_BBTOOFF64(len);
> +	error = platform_zero_range(fd, start_offset, len_bytes);
> +	if (!error)
> +		return 0;
>  
>  	zsize = min(BDSTRAT_SIZE, BBTOB(len));
>  	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
> @@ -73,9 +83,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>  	}
>  	memset(z, 0, zsize);
>  
> -	fd = libxfs_device_to_fd(btp->dev);
> -	start_offset = LIBXFS_BBTOOFF64(start);
> -
>  	if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
>  		fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
>  			progname, __FUNCTION__,
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V3] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
  2020-02-22  7:24   ` Darrick J. Wong
@ 2020-02-22 15:23     ` Eric Sandeen
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Sandeen @ 2020-02-22 15:23 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs



On 2/22/20 1:24 AM, Darrick J. Wong wrote:
> On Fri, Feb 21, 2020 at 09:22:12PM -0600, Eric Sandeen wrote:
>> I had a request from someone who cared about mkfs speed(!)
>> over a slower network block device to look into using faster
>> zeroing methods, particularly for the log, during mkfs.
>>
>> Using FALLOC_FL_ZERO_RANGE is faster in this case than writing
>> a bunch of zeros across a wire.
>>
>> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
>> ---
>>
>> V2: Clean up all the nasty stuff I'd flung out there as a wild first
>> cut, thanks Dave.
>>
>> V3: make len_bytes a size_t; leave "end_offset" where it is for the loop
>> use.  It's a bit odd but ... just don't mess with it for now, one patch
>> one change.
>>
>> diff --git a/include/builddefs.in b/include/builddefs.in
>> index 4700b527..1dd27f76 100644
>> --- a/include/builddefs.in
>> +++ b/include/builddefs.in
>> @@ -144,6 +144,9 @@ endif
>>  ifeq ($(HAVE_GETFSMAP),yes)
>>  PCFLAGS+= -DHAVE_GETFSMAP
>>  endif
>> +ifeq ($(HAVE_FALLOCATE),yes)
>> +PCFLAGS += -DHAVE_FALLOCATE
>> +endif
>>  
>>  LIBICU_LIBS = @libicu_LIBS@
>>  LIBICU_CFLAGS = @libicu_CFLAGS@
>> diff --git a/include/linux.h b/include/linux.h
>> index 8f3c32b0..8d5c4584 100644
>> --- a/include/linux.h
>> +++ b/include/linux.h
>> @@ -20,6 +20,10 @@
>>  #include <stdio.h>
>>  #include <asm/types.h>
>>  #include <mntent.h>
>> +#include <fcntl.h>
>> +#if defined(HAVE_FALLOCATE)
>> +#include <linux/falloc.h>
>> +#endif
>>  #ifdef OVERRIDE_SYSTEM_FSXATTR
>>  # define fsxattr sys_fsxattr
>>  #endif
>> @@ -164,6 +168,24 @@ static inline void platform_mntent_close(struct mntent_cursor * cursor)
>>  	endmntent(cursor->mtabp);
>>  }
>>  
>> +#if defined(FALLOC_FL_ZERO_RANGE)
>> +static inline int
>> +platform_zero_range(
>> +	int		fd,
>> +	xfs_off_t	start,
>> +	size_t		len)
> 
> Seems fine to me, though it's unfortunate to cap this at u32.

Hm well I suppose could/should expand it.  This starts out via libxfs_device_zero
as a uint "len" in sectors so it could/should be a bit bigger.

>> +{
>> +	int ret;
>> +
>> +	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start, len);
>> +	if (!ret)
>> +		return 0;
>> +	return -errno;
>> +}
>> +#else
>> +#define platform_zero_range(fd, s, l)	(-EOPNOTSUP)
> 
> EOPNOTSUPP (two P's)

Weird not sure how I picked that, though they are equivalent today.

*sigh* V4 I guess.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, back to index

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-13 21:12 [PATCH, RFC] libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero Eric Sandeen
2020-02-13 23:48 ` Dave Chinner
2020-02-13 23:57   ` Eric Sandeen
2020-02-14  0:25     ` Dave Chinner
2020-02-14  1:05 ` [PATCH V2] " Eric Sandeen
2020-02-14  1:34   ` Dave Chinner
2020-02-14  1:43     ` Eric Sandeen
2020-02-22  3:22 ` [PATCH V3] " Eric Sandeen
2020-02-22  7:24   ` Darrick J. Wong
2020-02-22 15:23     ` Eric Sandeen

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org
	public-inbox-index linux-xfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git