linux-bcachefs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v0] implement torture test when formatting device
@ 2023-03-03 12:21 Janpieter Sollie
  2023-03-04  2:57 ` Kent Overstreet
  0 siblings, 1 reply; 2+ messages in thread
From: Janpieter Sollie @ 2023-03-03 12:21 UTC (permalink / raw)
  To: linux-bcachefs

Hi everyone,

following my question yesterday:
hereby what I'd suggest as a torture test when formatting the device,
it would be implemented as a "torture" option, and not set by default.
Whereas this code is still not failproof (which will be for v1), and error handling should be 
returned instead of dying,
is it worth it to keep writing in libbcachefs.c?
Or should I go completely rust-way instead?

Janpieter Sollie

diff --git a/cmd_format.c b/cmd_format.c
index 26a1cd9..17eff3d 100644
--- a/cmd_format.c
+++ b/cmd_format.c
@@ -40,6 +40,7 @@ x(0,  superblock_size,        required_argument)      \
x(0,   bucket_size,            required_argument)      \
x('l', label,                  required_argument)      \
x(0,   discard,                no_argument)            \
+x(0,   torture,                no_argument)            \
x(0,   data_allowed,           required_argument)      \
x(0,   durability,             required_argument)      \
x(0,   version,                required_argument)      \
@@ -180,6 +181,9 @@ int cmd_format(int argc, char *argv[])
                case O_discard:
                        dev_opts.discard = true;
                        break;
+               case O_torture:
+                       opts.torture = true;
+                       break;
                case O_data_allowed:
                        dev_opts.data_allowed =
                                read_flag_list_or_die(optarg,
diff --git a/libbcachefs.c b/libbcachefs.c
index 092a54a..8b39654 100644
--- a/libbcachefs.c
+++ b/libbcachefs.c
@@ -8,6 +8,8 @@
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
+#include <sys/random.h>
+#include <sys/ioctl.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <time.h>
@@ -27,6 +29,11 @@
#include "libbcachefs/super-io.h"
#include "tools-util.h"

+
+#ifndef BLKDISCARD
+# define BLKDISCARD    _IO(0x12,119)
+#endif
+
#define NSEC_PER_SEC   1000000000L

static void init_layout(struct bch_sb_layout *l,
@@ -142,6 +149,72 @@ static unsigned parse_target(struct bch_sb_handle *sb,
        return 0;
}

+static int torture_target(struct dev_opts* i, const struct bch_opts fs_opts)
+{
+       const uint32_t nr_of_tests = 0x001ffff; //not sure whether this will be enough
+       uint32_t test_iterator;
+       uint64_t position_iterator[2];
+       uint64_t range[2];
+       void* random_buffer = malloc(fs_opts.block_size);
+       void* read_buffer = malloc(fs_opts.block_size);
+       void* zero_buffer = calloc(fs_opts.block_size, 1);
+
+       /* generate a block of random data */
+       if (getrandom(random_buffer, fs_opts.block_size, 0) != fs_opts.block_size)
+               die("internal torture test error");
+
+       /* open the device as synchronous as possible */
+       i->fd = open(i->path, O_RDWR, O_DIRECT | O_SYNC);
+       if(i->fd == -1)
+               die("torture test error: opening device %s failed", i->path);
+
+       /* start round zero: */
+       position_iterator[0] = (uint64_t) (random() % i->size) / fs_opts.block_size;
+       position_iterator[1] = (uint64_t) (random() % i->size) / fs_opts.block_size;
+       range[0] = position_iterator[1] * fs_opts.block_size;
+       range[1] = fs_opts.block_size;
+
+       lseek(i->fd, position_iterator[0] * fs_opts.block_size, SEEK_SET);
+       if (write(i->fd, random_buffer, fs_opts.block_size) == -1)
+               die("torture testing write operation failed %s", i->path);
+       if (i->discard && ioctl(i->fd, BLKDISCARD, &range))
+               die("torture testing discard operation failed %s", i->path);
+
+       for( test_iterator = 0; test_iterator < nr_of_tests; test_iterator++ )
+       {
+                       /* verify previous round */
+                       lseek(i->fd, position_iterator[0] * fs_opts.block_size, SEEK_SET);
+                       if (read(i->fd, read_buffer, fs_opts.block_size) == -1)
+                               die ("random torture test read error at device %s", i->path);
+                       if ( memcmp(read_buffer, random_buffer, fs_opts.block_size) != 0)
+                               die ("random torture test integrity check failed at device %s", 
i->path);
+                       if (i->discard)
+                       {
+                               lseek(i->fd, position_iterator[0] * fs_opts.block_size, SEEK_SET);
+                               if ( read(i->fd, read_buffer, fs_opts.block_size) == -1 ||
+                                       memcmp(read_buffer, zero_buffer, fs_opts.block_size) != 0)
+                                       die ("random torture test discarded read failed at 
device %s", i->path);
+                       }
+
+                       /* write new round: */
+                       position_iterator[0] = (uint64_t) (random() % i->size) / 
fs_opts.block_size;
+                       position_iterator[1] = (uint64_t) (random() % i->size) / 
fs_opts.block_size;
+                       range[0] = position_iterator[1] * fs_opts.block_size;
+
+                       lseek(i->fd, position_iterator[0] * fs_opts.block_size, SEEK_SET);
+                       if (write(i->fd, random_buffer, fs_opts.block_size) == -1)
+                               die("torture testing write operation failed %s", i->path);
+                       if (i->discard && ioctl(i->fd, BLKDISCARD, &range))
+                               die("torture testing discard operation failed %s", i->path);
+       }
+
+       close(i->fd);
+       free(random_buffer);
+       free(read_buffer);
+       free(zero_buffer);
+       return 0;
+}
+
struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
                           struct bch_opts      fs_opts,
                           struct format_opts   opts,
@@ -186,6 +259,10 @@ struct bch_sb *bch2_format(struct bch_opt_strs     fs_opt_strs,
        if (bch2_sb_realloc(&sb, 0))
                die("insufficient memory");

+       /* some devices may need checking: */
+       if(opts.torture)
+               for (i = devs; i < devs + nr_devs; i++) torture_target(i, fs_opts);
+
        sb.sb->version          = le16_to_cpu(opts.version);
        sb.sb->version_min      = le16_to_cpu(opts.version);
        sb.sb->magic            = BCHFS_MAGIC;
diff --git a/libbcachefs.h b/libbcachefs.h
index 4bb51bd..bce9894 100644
--- a/libbcachefs.h
+++ b/libbcachefs.h
@@ -37,6 +37,7 @@ struct format_opts {
        unsigned        superblock_size;
        bool            encrypted;
        char            *passphrase;
+       bool            torture;
};

static inline struct format_opts format_opts_default()
@@ -48,6 +49,7 @@ static inline struct format_opts format_opts_default()
        return (struct format_opts) {
                .version                = version,
                .superblock_size        = SUPERBLOCK_SIZE_DEFAULT,
+               .torture                = false,
        };
}

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v0] implement torture test when formatting device
  2023-03-03 12:21 [PATCH v0] implement torture test when formatting device Janpieter Sollie
@ 2023-03-04  2:57 ` Kent Overstreet
  0 siblings, 0 replies; 2+ messages in thread
From: Kent Overstreet @ 2023-03-04  2:57 UTC (permalink / raw)
  To: Janpieter Sollie; +Cc: linux-bcachefs

On Fri, Mar 03, 2023 at 01:21:47PM +0100, Janpieter Sollie wrote:
> Hi everyone,
> 
> following my question yesterday:
> hereby what I'd suggest as a torture test when formatting the device,
> it would be implemented as a "torture" option, and not set by default.
> Whereas this code is still not failproof (which will be for v1), and error
> handling should be returned instead of dying,
> is it worth it to keep writing in libbcachefs.c?
> Or should I go completely rust-way instead?

I'm not sure this is something that belongs in bcachefs; this semes like
it should be a generic tool, and then we could add an option to
'bcachefs format' to call it pre-format.

Alternately: if it was going to be integrated into the filesystem, the
reason to do that would be to make it a runtime thing, sort of an
ongoing 'health check'.

But we already tools like smartctl, so we'd have to be clear on what the
goals are and what we're trying to provide.

There _is_ something related that I've been wanting, though. We need
latency numbers for every device - we need to track each device's
current latency, so we know which device(s) to send reads to, and we
need to know the latency each device is capable of, so we know if a
device is congested.

That is, we want baseline and current numbers for every device. Right
now we're collecting latency quantiles for every device and using (IIRC)
99th percentile latency for baseline, but computing quantiles is an
expense I'd like to get rid of - having a baseline would be better.

If we're computing baseline numbers, we could also re-compute them with
a short test on every mount and track them over time - I think that's
something we could reasonably add. That'd let us provide the user with
some basic performance numbers for each device and a history over time.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-03-04  2:57 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-03 12:21 [PATCH v0] implement torture test when formatting device Janpieter Sollie
2023-03-04  2:57 ` Kent Overstreet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).