All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v8 1/7] tests/virtio-9p: added split readdir tests
  2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
@ 2020-07-29  8:10 ` Christian Schoenebeck
  2020-07-29 15:42   ` Greg Kurz
  2020-07-29  8:11 ` [PATCH v8 2/7] 9pfs: make v9fs_readdir_response_size() public Christian Schoenebeck
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

The previous, already existing 'basic' readdir test simply used a
'count' parameter big enough to retrieve all directory entries with a
single Treaddir request.

In the 3 new 'split' readdir tests added by this patch, directory
entries are retrieved, split over several Treaddir requests by picking
small 'count' parameters which force the server to truncate the
response. So the test client sends as many Treaddir requests as
necessary to get all directory entries.

The following 3 new tests are added (executed in this sequence):

1. Split readdir test with count=512
2. Split readdir test with count=256
3. Split readdir test with count=128

This test case sequence is chosen because the smaller the 'count' value,
the higher the chance of errors in case of implementation bugs on server
side.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 tests/qtest/virtio-9p-test.c | 108 +++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 2167322985..de30b717b6 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -578,6 +578,7 @@ static bool fs_dirents_contain_name(struct V9fsDirent *e, const char* name)
     return false;
 }
 
+/* basic readdir test where reply fits into a single response message */
 static void fs_readdir(void *obj, void *data, QGuestAllocator *t_alloc)
 {
     QVirtio9P *v9p = obj;
@@ -631,6 +632,89 @@ static void fs_readdir(void *obj, void *data, QGuestAllocator *t_alloc)
     g_free(wnames[0]);
 }
 
+/* readdir test where overall request is split over several messages */
+static void fs_readdir_split(void *obj, void *data, QGuestAllocator *t_alloc,
+                             uint32_t count)
+{
+    QVirtio9P *v9p = obj;
+    alloc = t_alloc;
+    char *const wnames[] = { g_strdup(QTEST_V9FS_SYNTH_READDIR_DIR) };
+    uint16_t nqid;
+    v9fs_qid qid;
+    uint32_t nentries, npartialentries;
+    struct V9fsDirent *entries, *tail, *partialentries;
+    P9Req *req;
+    int fid;
+    uint64_t offset;
+
+    fs_attach(v9p, NULL, t_alloc);
+
+    fid = 1;
+    offset = 0;
+    entries = NULL;
+    nentries = 0;
+    tail = NULL;
+
+    req = v9fs_twalk(v9p, 0, fid, 1, wnames, 0);
+    v9fs_req_wait_for_reply(req, NULL);
+    v9fs_rwalk(req, &nqid, NULL);
+    g_assert_cmpint(nqid, ==, 1);
+
+    req = v9fs_tlopen(v9p, fid, O_DIRECTORY, 0);
+    v9fs_req_wait_for_reply(req, NULL);
+    v9fs_rlopen(req, &qid, NULL);
+
+    /*
+     * send as many Treaddir requests as required to get all directory
+     * entries
+     */
+    while (true) {
+        npartialentries = 0;
+        partialentries = NULL;
+
+        req = v9fs_treaddir(v9p, fid, offset, count, 0);
+        v9fs_req_wait_for_reply(req, NULL);
+        v9fs_rreaddir(req, &count, &npartialentries, &partialentries);
+        if (npartialentries > 0 && partialentries) {
+            if (!entries) {
+                entries = partialentries;
+                nentries = npartialentries;
+                tail = partialentries;
+            } else {
+                tail->next = partialentries;
+                nentries += npartialentries;
+            }
+            while (tail->next) {
+                tail = tail->next;
+            }
+            offset = tail->offset;
+        } else {
+            break;
+        }
+    }
+
+    g_assert_cmpint(
+        nentries, ==,
+        QTEST_V9FS_SYNTH_READDIR_NFILES + 2 /* "." and ".." */
+    );
+
+    /*
+     * Check all file names exist in returned entries, ignore their order
+     * though.
+     */
+    g_assert_cmpint(fs_dirents_contain_name(entries, "."), ==, true);
+    g_assert_cmpint(fs_dirents_contain_name(entries, ".."), ==, true);
+    for (int i = 0; i < QTEST_V9FS_SYNTH_READDIR_NFILES; ++i) {
+        char *name = g_strdup_printf(QTEST_V9FS_SYNTH_READDIR_FILE, i);
+        g_assert_cmpint(fs_dirents_contain_name(entries, name), ==, true);
+        g_free(name);
+    }
+
+    v9fs_free_dirents(entries);
+
+    g_free(wnames[0]);
+}
+
 static void fs_walk_no_slash(void *obj, void *data, QGuestAllocator *t_alloc)
 {
     QVirtio9P *v9p = obj;
@@ -793,6 +877,24 @@ static void fs_flush_ignored(void *obj, void *data, QGuestAllocator *t_alloc)
     g_free(wnames[0]);
 }
 
+static void fs_readdir_split_128(void *obj, void *data,
+                                 QGuestAllocator *t_alloc)
+{
+    fs_readdir_split(obj, data, t_alloc, 128);
+}
+
+static void fs_readdir_split_256(void *obj, void *data,
+                                 QGuestAllocator *t_alloc)
+{
+    fs_readdir_split(obj, data, t_alloc, 256);
+}
+
+static void fs_readdir_split_512(void *obj, void *data,
+                                 QGuestAllocator *t_alloc)
+{
+    fs_readdir_split(obj, data, t_alloc, 512);
+}
+
 static void register_virtio_9p_test(void)
 {
     qos_add_test("config", "virtio-9p", pci_config, NULL);
@@ -810,6 +912,12 @@ static void register_virtio_9p_test(void)
     qos_add_test("fs/flush/ignored", "virtio-9p", fs_flush_ignored,
                  NULL);
     qos_add_test("fs/readdir/basic", "virtio-9p", fs_readdir, NULL);
+    qos_add_test("fs/readdir/split_512", "virtio-9p",
+                 fs_readdir_split_512, NULL);
+    qos_add_test("fs/readdir/split_256", "virtio-9p",
+                 fs_readdir_split_256, NULL);
+    qos_add_test("fs/readdir/split_128", "virtio-9p",
+                 fs_readdir_split_128, NULL);
 }
 
 libqos_init(register_virtio_9p_test);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v8 2/7] 9pfs: make v9fs_readdir_response_size() public
  2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
  2020-07-29  8:10 ` [PATCH v8 1/7] tests/virtio-9p: added split readdir tests Christian Schoenebeck
@ 2020-07-29  8:11 ` Christian Schoenebeck
  2020-07-29 15:44   ` Greg Kurz
  2020-07-29  8:11 ` [PATCH v8 3/7] 9pfs: split out fs driver core of v9fs_co_readdir() Christian Schoenebeck
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

Rename function v9fs_readdir_data_size() -> v9fs_readdir_response_size()
and make it callable from other units. So far this function is only
used by 9p.c, however subsequent patches require the function to be
callable from another 9pfs unit. And as we're at it; also make it clear
for what this function is used for.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/9p.c | 10 ++++++++--
 hw/9pfs/9p.h |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 2ffd96ade9..7a228c4828 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -2313,7 +2313,13 @@ out_nofid:
     pdu_complete(pdu, err);
 }
 
-static size_t v9fs_readdir_data_size(V9fsString *name)
+/**
+ * Returns size required in Rreaddir response for the passed dirent @p name.
+ *
+ * @param name - directory entry's name (i.e. file name, directory name)
+ * @returns required size in bytes
+ */
+size_t v9fs_readdir_response_size(V9fsString *name)
 {
     /*
      * Size of each dirent on the wire: size of qid (13) + size of offset (8)
@@ -2348,7 +2354,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
         }
         v9fs_string_init(&name);
         v9fs_string_sprintf(&name, "%s", dent->d_name);
-        if ((count + v9fs_readdir_data_size(&name)) > max_count) {
+        if ((count + v9fs_readdir_response_size(&name)) > max_count) {
             v9fs_readdir_unlock(&fidp->fs.dir);
 
             /* Ran out of buffer. Set dir back to old position and return */
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index ee2271663c..561774e843 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -419,6 +419,7 @@ void v9fs_path_init(V9fsPath *path);
 void v9fs_path_free(V9fsPath *path);
 void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...);
 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src);
+size_t v9fs_readdir_response_size(V9fsString *name);
 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
                       const char *name, V9fsPath *path);
 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v8 3/7] 9pfs: split out fs driver core of v9fs_co_readdir()
  2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
  2020-07-29  8:10 ` [PATCH v8 1/7] tests/virtio-9p: added split readdir tests Christian Schoenebeck
  2020-07-29  8:11 ` [PATCH v8 2/7] 9pfs: make v9fs_readdir_response_size() public Christian Schoenebeck
@ 2020-07-29  8:11 ` Christian Schoenebeck
  2020-07-29 16:02   ` Greg Kurz
  2020-07-29  8:12 ` [PATCH v8 4/7] 9pfs: add new function v9fs_co_readdir_many() Christian Schoenebeck
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

The implementation of v9fs_co_readdir() has two parts: the outer
part is executed by main I/O thread, whereas the inner part is
executed by fs driver on a background I/O thread.

Move the inner part to its own new, private function do_readdir(),
so it can be shared by another upcoming new function.

This is just a preparatory patch for the subsequent patch, with the
purpose to avoid the next patch to clutter the overall diff.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/codir.c | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 73f9a751e1..ff57fb8619 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -18,28 +18,37 @@
 #include "qemu/main-loop.h"
 #include "coth.h"
 
+/*
+ * This must solely be executed on a background IO thread.
+ */
+static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent **dent)
+{
+    int err = 0;
+    V9fsState *s = pdu->s;
+    struct dirent *entry;
+
+    errno = 0;
+    entry = s->ops->readdir(&s->ctx, &fidp->fs);
+    if (!entry && errno) {
+        *dent = NULL;
+        err = -errno;
+    } else {
+        *dent = entry;
+    }
+    return err;
+}
+
 int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
                                  struct dirent **dent)
 {
     int err;
-    V9fsState *s = pdu->s;
 
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
-    v9fs_co_run_in_worker(
-        {
-            struct dirent *entry;
-
-            errno = 0;
-            entry = s->ops->readdir(&s->ctx, &fidp->fs);
-            if (!entry && errno) {
-                err = -errno;
-            } else {
-                *dent = entry;
-                err = 0;
-            }
-        });
+    v9fs_co_run_in_worker({
+        err = do_readdir(pdu, fidp, dent);
+    });
     return err;
 }
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v8 4/7] 9pfs: add new function v9fs_co_readdir_many()
  2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
                   ` (2 preceding siblings ...)
  2020-07-29  8:11 ` [PATCH v8 3/7] 9pfs: split out fs driver core of v9fs_co_readdir() Christian Schoenebeck
@ 2020-07-29  8:12 ` Christian Schoenebeck
  2020-07-30 10:08   ` Christian Schoenebeck
  2020-07-29  8:13 ` [PATCH v8 5/7] 9pfs: T_readdir latency optimization Christian Schoenebeck
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:12 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

The newly added function v9fs_co_readdir_many() retrieves multiple
directory entries with a single fs driver request. It is intended to
replace uses of v9fs_co_readdir(), the latter only retrives a single
directory entry per fs driver request instead.

The reason for this planned replacement is that for every fs driver
request the coroutine is dispatched from main I/O thread to a
background I/O thread and eventually dispatched back to main I/O
thread. Hopping between threads adds latency. So if a 9pfs Treaddir
request reads a large amount of directory entries, this currently
sums up to huge latencies of several hundred ms or even more. So
using v9fs_co_readdir_many() instead of v9fs_co_readdir() will
provide significant performance improvements.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/9p.h    |  22 +++++++
 hw/9pfs/codir.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++
 hw/9pfs/coth.h  |   3 +
 3 files changed, 190 insertions(+)

diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 561774e843..93b7030edf 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -215,6 +215,28 @@ static inline void v9fs_readdir_init(V9fsDir *dir)
     qemu_co_mutex_init(&dir->readdir_mutex);
 }
 
+/**
+ * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
+ * which is a chained list of directory entries.
+ */
+typedef struct V9fsDirEnt {
+    /* mandatory (must not be NULL) information for all readdir requests */
+    struct dirent *dent;
+    /*
+     * optional (may be NULL): A full stat of each directory entry is just
+     * done if explicitly told to fs driver.
+     */
+    struct stat *st;
+    /*
+     * instead of an array, directory entries are always returned as
+     * chained list, that's because the amount of entries retrieved by fs
+     * drivers is dependent on the individual entries' name (since response
+     * messages are size limited), so the final amount cannot be estimated
+     * before hand
+     */
+    struct V9fsDirEnt *next;
+} V9fsDirEnt;
+
 /*
  * Filled by fs driver on open and other
  * calls.
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index ff57fb8619..8a8128ea13 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -38,6 +38,10 @@ static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent **dent)
     return err;
 }
 
+/*
+ * TODO: This will be removed for performance reasons.
+ * Use v9fs_co_readdir_many() instead.
+ */
 int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
                                  struct dirent **dent)
 {
@@ -52,6 +56,167 @@ int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
     return err;
 }
 
+/*
+ * This is solely executed on a background IO thread.
+ *
+ * See v9fs_co_readdir_many() (as its only user) below for details.
+ */
+static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
+                           struct V9fsDirEnt **entries, off_t offset,
+                           int32_t maxsize, bool dostat)
+{
+    V9fsState *s = pdu->s;
+    V9fsString name;
+    int len, err = 0;
+    int32_t size = 0;
+    off_t saved_dir_pos;
+    struct dirent *dent;
+    struct V9fsDirEnt *e = NULL;
+    V9fsPath path;
+    struct stat stbuf;
+
+    *entries = NULL;
+    v9fs_path_init(&path);
+
+    /*
+     * TODO: Here should be a warn_report_once() if lock failed.
+     *
+     * With a good 9p client we should not get into concurrency here,
+     * because a good client would not use the same fid for concurrent
+     * requests. We do the lock here for safety reasons though. However
+     * the client would then suffer performance issues, so better log that
+     * issue here.
+     */
+    v9fs_readdir_lock(&fidp->fs.dir);
+
+    /* seek directory to requested initial position */
+    if (offset == 0) {
+        s->ops->rewinddir(&s->ctx, &fidp->fs);
+    } else {
+        s->ops->seekdir(&s->ctx, &fidp->fs, offset);
+    }
+
+    /* save the directory position */
+    saved_dir_pos = s->ops->telldir(&s->ctx, &fidp->fs);
+    if (saved_dir_pos < 0) {
+        err = saved_dir_pos;
+        goto out;
+    }
+
+    while (true) {
+        /* get directory entry from fs driver */
+        err = do_readdir(pdu, fidp, &dent);
+        if (err || !dent) {
+            break;
+        }
+
+        /*
+         * stop this loop as soon as it would exceed the allowed maximum
+         * response message size for the directory entries collected so far,
+         * because anything beyond that size would need to be discarded by
+         * 9p controller (main thread / top half) anyway
+         */
+        v9fs_string_init(&name);
+        v9fs_string_sprintf(&name, "%s", dent->d_name);
+        len = v9fs_readdir_response_size(&name);
+        v9fs_string_free(&name);
+        if (size + len > maxsize) {
+            /* this is not an error case actually */
+            break;
+        }
+
+        /* append next node to result chain */
+        if (!e) {
+            *entries = e = g_malloc0(sizeof(V9fsDirEnt));
+        } else {
+            e = e->next = g_malloc0(sizeof(V9fsDirEnt));
+        }
+        e->dent = g_malloc0(sizeof(struct dirent));
+        memcpy(e->dent, dent, sizeof(struct dirent));
+
+        /* perform a full stat() for directory entry if requested by caller */
+        if (dostat) {
+            err = s->ops->name_to_path(
+                &s->ctx, &fidp->path, dent->d_name, &path
+            );
+            if (err < 0) {
+                err = -errno;
+                break;
+            }
+
+            err = s->ops->lstat(&s->ctx, &path, &stbuf);
+            if (err < 0) {
+                err = -errno;
+                break;
+            }
+
+            e->st = g_malloc0(sizeof(struct stat));
+            memcpy(e->st, &stbuf, sizeof(struct stat));
+        }
+
+        size += len;
+        saved_dir_pos = dent->d_off;
+    }
+
+    /* restore (last) saved position */
+    s->ops->seekdir(&s->ctx, &fidp->fs, saved_dir_pos);
+
+out:
+    v9fs_readdir_unlock(&fidp->fs.dir);
+    v9fs_path_free(&path);
+    if (err < 0) {
+        return err;
+    }
+    return size;
+}
+
+/**
+ * @brief Reads multiple directory entries in one rush.
+ *
+ * Retrieves the requested (max. amount of) directory entries from the fs
+ * driver. This function must only be called by the main IO thread (top half).
+ * Internally this function call will be dispatched to a background IO thread
+ * (bottom half) where it is eventually executed by the fs driver.
+ *
+ * @discussion Acquiring multiple directory entries in one rush from the fs
+ * driver, instead of retrieving each directory entry individually, is very
+ * beneficial from performance point of view. Because for every fs driver
+ * request latency is added, which in practice could lead to overall
+ * latencies of several hundred ms for reading all entries (of just a single
+ * directory) if every directory entry was individually requested from fs
+ * driver.
+ *
+ * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after calling
+ * v9fs_co_readdir_many(), both on success and on error cases of this
+ * function, to avoid memory leaks once @p entries are no longer needed.
+ *
+ * @param pdu - the causing 9p (T_readdir) client request
+ * @param fidp - already opened directory where readdir shall be performed on
+ * @param entries - output for directory entries (must not be NULL)
+ * @param offset - initial position inside the directory the function shall
+ *                 seek to before retrieving the directory entries
+ * @param maxsize - maximum result message body size (in bytes)
+ * @param dostat - whether a stat() should be performed and returned for
+ *                 each directory entry
+ * @returns resulting response message body size (in bytes) on success,
+ *          negative error code otherwise
+ */
+int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
+                                      struct V9fsDirEnt **entries,
+                                      off_t offset, int32_t maxsize,
+                                      bool dostat)
+{
+    int err = 0;
+
+    if (v9fs_request_cancelled(pdu)) {
+        return -EINTR;
+    }
+    v9fs_co_run_in_worker({
+        err = do_readdir_many(pdu, fidp, entries, offset, maxsize, dostat);
+    });
+    return err;
+}
+
 off_t v9fs_co_telldir(V9fsPDU *pdu, V9fsFidState *fidp)
 {
     off_t err;
diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index c2cdc7a9ea..fd4a45bc7c 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -49,6 +49,9 @@
 void co_run_in_worker_bh(void *);
 int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
 int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **);
+int coroutine_fn v9fs_co_readdir_many(V9fsPDU *, V9fsFidState *,
+                                      struct V9fsDirEnt **, off_t, int32_t,
+                                      bool);
 off_t coroutine_fn v9fs_co_telldir(V9fsPDU *, V9fsFidState *);
 void coroutine_fn v9fs_co_seekdir(V9fsPDU *, V9fsFidState *, off_t);
 void coroutine_fn v9fs_co_rewinddir(V9fsPDU *, V9fsFidState *);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v8 5/7] 9pfs: T_readdir latency optimization
  2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
                   ` (3 preceding siblings ...)
  2020-07-29  8:12 ` [PATCH v8 4/7] 9pfs: add new function v9fs_co_readdir_many() Christian Schoenebeck
@ 2020-07-29  8:13 ` Christian Schoenebeck
  2020-07-29  8:39 ` [PATCH v8 6/7] 9pfs: differentiate readdir lock between 9P2000.u vs. 9P2000.L Christian Schoenebeck
  2020-07-29  8:42 ` [PATCH v8 7/7] 9pfs: clarify latency of v9fs_co_run_in_worker() Christian Schoenebeck
  6 siblings, 0 replies; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

Make top half really top half and bottom half really bottom half:

Each T_readdir request handling is hopping between threads (main
I/O thread and background I/O driver threads) several times for
every individual directory entry, which sums up to huge latencies
for handling just a single T_readdir request.

Instead of doing that, collect now all required directory entries
(including all potentially required stat buffers for each entry) in
one rush on a background I/O thread from fs driver by calling the
previously added function v9fs_co_readdir_many() instead of
v9fs_co_readdir(), then assemble the entire resulting network
response message for the readdir request on main I/O thread. The
fs driver is still aborting the directory entry retrieval loop
(on the background I/O thread inside of v9fs_co_readdir_many())
as soon as it would exceed the client's requested maximum R_readdir
response size. So this will not introduce a performance penalty on
another end.

Also: No longer seek initial directory position in v9fs_readdir(),
as this is now handled (more consistently) by
v9fs_co_readdir_many() instead.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/9p.c | 132 ++++++++++++++++++++++-----------------------------
 1 file changed, 58 insertions(+), 74 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 7a228c4828..cc4094b971 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -972,30 +972,6 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
     return 0;
 }
 
-static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
-                                      struct dirent *dent, V9fsQID *qidp)
-{
-    struct stat stbuf;
-    V9fsPath path;
-    int err;
-
-    v9fs_path_init(&path);
-
-    err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
-    if (err < 0) {
-        goto out;
-    }
-    err = v9fs_co_lstat(pdu, &path, &stbuf);
-    if (err < 0) {
-        goto out;
-    }
-    err = stat_to_qid(pdu, &stbuf, qidp);
-
-out:
-    v9fs_path_free(&path);
-    return err;
-}
-
 V9fsPDU *pdu_alloc(V9fsState *s)
 {
     V9fsPDU *pdu = NULL;
@@ -2328,62 +2304,74 @@ size_t v9fs_readdir_response_size(V9fsString *name)
     return 24 + v9fs_string_size(name);
 }
 
+static void v9fs_free_dirents(struct V9fsDirEnt *e)
+{
+    struct V9fsDirEnt *next = NULL;
+
+    for (; e; e = next) {
+        next = e->next;
+        g_free(e->dent);
+        g_free(e->st);
+        g_free(e);
+    }
+}
+
 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
-                                        int32_t max_count)
+                                        off_t offset, int32_t max_count)
 {
     size_t size;
     V9fsQID qid;
     V9fsString name;
     int len, err = 0;
     int32_t count = 0;
-    off_t saved_dir_pos;
     struct dirent *dent;
+    struct stat *st;
+    struct V9fsDirEnt *entries = NULL;
 
-    /* save the directory position */
-    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
-    if (saved_dir_pos < 0) {
-        return saved_dir_pos;
-    }
-
-    while (1) {
-        v9fs_readdir_lock(&fidp->fs.dir);
+    /*
+     * inode remapping requires the device id, which in turn might be
+     * different for different directory entries, so if inode remapping is
+     * enabled we have to make a full stat for each directory entry
+     */
+    const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
 
-        err = v9fs_co_readdir(pdu, fidp, &dent);
-        if (err || !dent) {
-            break;
-        }
-        v9fs_string_init(&name);
-        v9fs_string_sprintf(&name, "%s", dent->d_name);
-        if ((count + v9fs_readdir_response_size(&name)) > max_count) {
-            v9fs_readdir_unlock(&fidp->fs.dir);
+    /*
+     * Fetch all required directory entries altogether on a background IO
+     * thread from fs driver. We don't want to do that for each entry
+     * individually, because hopping between threads (this main IO thread
+     * and background IO driver thread) would sum up to huge latencies.
+     */
+    count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
+                                 dostat);
+    if (count < 0) {
+        err = count;
+        count = 0;
+        goto out;
+    }
+    count = 0;
 
-            /* Ran out of buffer. Set dir back to old position and return */
-            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
-            v9fs_string_free(&name);
-            return count;
-        }
+    for (struct V9fsDirEnt *e = entries; e; e = e->next) {
+        dent = e->dent;
 
         if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
-            /*
-             * dirent_to_qid() implies expensive stat call for each entry,
-             * we must do that here though since inode remapping requires
-             * the device id, which in turn might be different for
-             * different entries; we cannot make any assumption to avoid
-             * that here.
-             */
-            err = dirent_to_qid(pdu, fidp, dent, &qid);
+            st = e->st;
+            /* e->st should never be NULL, but just to be sure */
+            if (!st) {
+                err = -1;
+                break;
+            }
+
+            /* remap inode */
+            err = stat_to_qid(pdu, st, &qid);
             if (err < 0) {
-                v9fs_readdir_unlock(&fidp->fs.dir);
-                v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
-                v9fs_string_free(&name);
-                return err;
+                break;
             }
         } else {
             /*
              * Fill up just the path field of qid because the client uses
              * only that. To fill the entire qid structure we will have
              * to stat each dirent found, which is expensive. For the
-             * latter reason we don't call dirent_to_qid() here. Only drawback
+             * latter reason we don't call stat_to_qid() here. Only drawback
              * is that no multi-device export detection of stat_to_qid()
              * would be done and provided as error to the user here. But
              * user would get that error anyway when accessing those
@@ -2396,25 +2384,26 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
             qid.version = 0;
         }
 
+        v9fs_string_init(&name);
+        v9fs_string_sprintf(&name, "%s", dent->d_name);
+
         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
         len = pdu_marshal(pdu, 11 + count, "Qqbs",
                           &qid, dent->d_off,
                           dent->d_type, &name);
 
-        v9fs_readdir_unlock(&fidp->fs.dir);
+        v9fs_string_free(&name);
 
         if (len < 0) {
-            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
-            v9fs_string_free(&name);
-            return len;
+            err = len;
+            break;
         }
+
         count += len;
-        v9fs_string_free(&name);
-        saved_dir_pos = dent->d_off;
     }
 
-    v9fs_readdir_unlock(&fidp->fs.dir);
-
+out:
+    v9fs_free_dirents(entries);
     if (err < 0) {
         return err;
     }
@@ -2457,12 +2446,7 @@ static void coroutine_fn v9fs_readdir(void *opaque)
         retval = -EINVAL;
         goto out;
     }
-    if (initial_offset == 0) {
-        v9fs_co_rewinddir(pdu, fidp);
-    } else {
-        v9fs_co_seekdir(pdu, fidp, initial_offset);
-    }
-    count = v9fs_do_readdir(pdu, fidp, max_count);
+    count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
     if (count < 0) {
         retval = count;
         goto out;
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v8 6/7] 9pfs: differentiate readdir lock between 9P2000.u vs. 9P2000.L
  2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
                   ` (4 preceding siblings ...)
  2020-07-29  8:13 ` [PATCH v8 5/7] 9pfs: T_readdir latency optimization Christian Schoenebeck
@ 2020-07-29  8:39 ` Christian Schoenebeck
  2020-07-29  8:42 ` [PATCH v8 7/7] 9pfs: clarify latency of v9fs_co_run_in_worker() Christian Schoenebeck
  6 siblings, 0 replies; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:39 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

Previous patch suggests that it might make sense to use a different mutex
type now while handling readdir requests, depending on the precise
protocol variant, as v9fs_do_readdir_with_stat() (used by 9P2000.u) uses
a CoMutex to avoid deadlocks that might happen with QemuMutex otherwise,
whereas do_readdir_many() (used by 9P2000.L) should better use a
QemuMutex, as the precise behaviour of a failed CoMutex lock on fs driver
side would not be clear.

And to avoid the wrong lock type being used, be now strict and error out
if a 9P2000.L client sends a Tread on a directory, and likeweise error out
if a 9P2000.u client sends a Treaddir request.

This patch is just intended as transitional measure, as currently 9P2000.u
vs. 9P2000.L implementations currently differ where the main logic of
fetching directory entries is located at (9P2000.u still being more top
half focused, while 9P2000.L already being bottom half focused in regards
to fetching directory entries that is).

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/9p.c | 21 ++++++++++++++++++---
 hw/9pfs/9p.h | 27 ++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index cc4094b971..7bb994bbf2 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -314,8 +314,8 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
     f->next = s->fid_list;
     s->fid_list = f;
 
-    v9fs_readdir_init(&f->fs.dir);
-    v9fs_readdir_init(&f->fs_reclaim.dir);
+    v9fs_readdir_init(s->proto_version, &f->fs.dir);
+    v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
 
     return f;
 }
@@ -2228,7 +2228,14 @@ static void coroutine_fn v9fs_read(void *opaque)
         goto out_nofid;
     }
     if (fidp->fid_type == P9_FID_DIR) {
-
+        if (s->proto_version != V9FS_PROTO_2000U) {
+            warn_report_once(
+                "9p: bad client: T_read request on directory only expected "
+                "with 9P2000.u protocol version"
+            );
+            err = -EOPNOTSUPP;
+            goto out;
+        }
         if (off == 0) {
             v9fs_co_rewinddir(pdu, fidp);
         }
@@ -2446,6 +2453,14 @@ static void coroutine_fn v9fs_readdir(void *opaque)
         retval = -EINVAL;
         goto out;
     }
+    if (s->proto_version != V9FS_PROTO_2000L) {
+        warn_report_once(
+            "9p: bad client: T_readdir request only expected with 9P2000.L "
+            "protocol version"
+        );
+        retval = -EOPNOTSUPP;
+        goto out;
+    }
     count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
     if (count < 0) {
         retval = count;
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 93b7030edf..3dd1b50b1a 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -197,22 +197,39 @@ typedef struct V9fsXattr
 
 typedef struct V9fsDir {
     DIR *stream;
-    CoMutex readdir_mutex;
+    P9ProtoVersion proto_version;
+    /* readdir mutex type used for 9P2000.u protocol variant */
+    CoMutex readdir_mutex_u;
+    /* readdir mutex type used for 9P2000.L protocol variant */
+    QemuMutex readdir_mutex_L;
 } V9fsDir;
 
 static inline void v9fs_readdir_lock(V9fsDir *dir)
 {
-    qemu_co_mutex_lock(&dir->readdir_mutex);
+    if (dir->proto_version == V9FS_PROTO_2000U) {
+        qemu_co_mutex_lock(&dir->readdir_mutex_u);
+    } else {
+        qemu_mutex_lock(&dir->readdir_mutex_L);
+    }
 }
 
 static inline void v9fs_readdir_unlock(V9fsDir *dir)
 {
-    qemu_co_mutex_unlock(&dir->readdir_mutex);
+    if (dir->proto_version == V9FS_PROTO_2000U) {
+        qemu_co_mutex_unlock(&dir->readdir_mutex_u);
+    } else {
+        qemu_mutex_unlock(&dir->readdir_mutex_L);
+    }
 }
 
-static inline void v9fs_readdir_init(V9fsDir *dir)
+static inline void v9fs_readdir_init(P9ProtoVersion proto_version, V9fsDir *dir)
 {
-    qemu_co_mutex_init(&dir->readdir_mutex);
+    dir->proto_version = proto_version;
+    if (proto_version == V9FS_PROTO_2000U) {
+        qemu_co_mutex_init(&dir->readdir_mutex_u);
+    } else {
+        qemu_mutex_init(&dir->readdir_mutex_L);
+    }
 }
 
 /**
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v8 7/7] 9pfs: clarify latency of v9fs_co_run_in_worker()
  2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
                   ` (5 preceding siblings ...)
  2020-07-29  8:39 ` [PATCH v8 6/7] 9pfs: differentiate readdir lock between 9P2000.u vs. 9P2000.L Christian Schoenebeck
@ 2020-07-29  8:42 ` Christian Schoenebeck
  6 siblings, 0 replies; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

As we just fixed a severe performance issue with Treaddir request
handling, clarify this overall issue as a comment on
v9fs_co_run_in_worker() with the intention to hopefully prevent
such performance mistakes in future (and fixing other yet
outstanding ones).

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/coth.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index fd4a45bc7c..c51289903d 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -19,7 +19,7 @@
 #include "qemu/coroutine.h"
 #include "9p.h"
 
-/*
+/**
  * we want to use bottom half because we want to make sure the below
  * sequence of events.
  *
@@ -28,6 +28,16 @@
  *   3. Enter the coroutine in the worker thread.
  * we cannot swap step 1 and 2, because that would imply worker thread
  * can enter coroutine while step1 is still running
+ *
+ * @b PERFORMANCE @b CONSIDERATIONS: As a rule of thumb, keep in mind
+ * that hopping between threads adds @b latency! So when handling a
+ * 9pfs request, avoid calling v9fs_co_run_in_worker() too often, because
+ * this might otherwise sum up to a significant, huge overall latency for
+ * providing the response for just a single request. For that reason it
+ * is highly recommended to fetch all data from fs driver with a single
+ * fs driver request on a background I/O thread (bottom half) in one rush
+ * first and then eventually assembling the final response from that data
+ * on main I/O thread (top half).
  */
 #define v9fs_co_run_in_worker(code_block)                               \
     do {                                                                \
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v8 0/7] 9pfs: readdir optimization
@ 2020-07-29  8:53 Christian Schoenebeck
  2020-07-29  8:10 ` [PATCH v8 1/7] tests/virtio-9p: added split readdir tests Christian Schoenebeck
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29  8:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

As previously mentioned, I was investigating performance issues with 9pfs.
Raw file read/write of 9pfs is actually quite good, provided that client
picked a reasonable high msize (maximum message size). I would recommend
to log a warning on 9p server side if a client attached with a small msize
that would cause performance issues for that reason.

However there are other aspects where 9pfs currently performs suboptimally,
especially readdir handling of 9pfs is extremely slow, a simple readdir
request of a guest typically blocks for several hundred milliseconds or
even several seconds, no matter how powerful the underlying hardware is.
The reason for this performance issue: latency.
Currently 9pfs is heavily dispatching a T_readdir request numerous times
between main I/O thread and a background I/O thread back and forth; in fact
it is actually hopping between threads even multiple times for every single
directory entry during T_readdir request handling which leads in total to
huge latencies for a single T_readdir request.

This patch series aims to address this severe performance issue of 9pfs
T_readdir request handling. The actual performance optimization is patch 5.

v7->v8:

  * Split previous patch 3 into two patches [patch 3], [patch 4].

  * Error out if a 9p2000.u client sends a Treaddir request, likewise error
    out if a 9p2000.L client sends a Tread request on a directory [patch 6].

Unchanged patches: [patch 1], [patch 2], [patch 5], [patch 7].

Message-ID of previous version (v7):
  cover.1595166227.git.qemu_oss@crudebyte.com

Message-ID of version with performance benchmark (v4):
  cover.1579567019.git.qemu_oss@crudebyte.com

Christian Schoenebeck (7):
  tests/virtio-9p: added split readdir tests
  9pfs: make v9fs_readdir_response_size() public
  9pfs: split out fs driver core of v9fs_co_readdir()
  9pfs: add new function v9fs_co_readdir_many()
  9pfs: T_readdir latency optimization
  9pfs: differentiate readdir lock between 9P2000.u vs. 9P2000.L
  9pfs: clarify latency of v9fs_co_run_in_worker()

 hw/9pfs/9p.c                 | 159 ++++++++++++++--------------
 hw/9pfs/9p.h                 |  50 ++++++++-
 hw/9pfs/codir.c              | 196 +++++++++++++++++++++++++++++++++--
 hw/9pfs/coth.h               |  15 ++-
 tests/qtest/virtio-9p-test.c | 108 +++++++++++++++++++
 5 files changed, 434 insertions(+), 94 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v8 1/7] tests/virtio-9p: added split readdir tests
  2020-07-29  8:10 ` [PATCH v8 1/7] tests/virtio-9p: added split readdir tests Christian Schoenebeck
@ 2020-07-29 15:42   ` Greg Kurz
  2020-07-29 17:24     ` Christian Schoenebeck
  0 siblings, 1 reply; 15+ messages in thread
From: Greg Kurz @ 2020-07-29 15:42 UTC (permalink / raw)
  To: Christian Schoenebeck; +Cc: qemu-devel

On Wed, 29 Jul 2020 10:10:23 +0200
Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:

> The previous, already existing 'basic' readdir test simply used a
> 'count' parameter big enough to retrieve all directory entries with a
> single Treaddir request.
> 
> In the 3 new 'split' readdir tests added by this patch, directory
> entries are retrieved, split over several Treaddir requests by picking
> small 'count' parameters which force the server to truncate the
> response. So the test client sends as many Treaddir requests as
> necessary to get all directory entries.
> 
> The following 3 new tests are added (executed in this sequence):
> 
> 1. Split readdir test with count=512
> 2. Split readdir test with count=256
> 3. Split readdir test with count=128
> 
> This test case sequence is chosen because the smaller the 'count' value,
> the higher the chance of errors in case of implementation bugs on server
> side.
> 
> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---

The existing fs_readdir() function for the 'basic' test is a subset
of the new fs_readdir_split() introduced by this patch (quite visible
if you sdiff the code).

To avoid code duplication, I would have probably tried to do the changes
in fs_readdir() and implement the 'basic' test as:

static void fs_readdir_basic(void *obj, void *data,
                             QGuestAllocator *t_alloc)
{
    /*
     * submit count = msize - 11, because 11 is the header size of Rreaddir
     */
    fs_readdir(obj, data, t_alloc, P9_MAX_SIZE - 11);
}

but anyway this looks good to me so:

Reviewed-by: Greg Kurz <groug@kaod.org>

>  tests/qtest/virtio-9p-test.c | 108 +++++++++++++++++++++++++++++++++++
>  1 file changed, 108 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 2167322985..de30b717b6 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -578,6 +578,7 @@ static bool fs_dirents_contain_name(struct V9fsDirent *e, const char* name)
>      return false;
>  }
>  
> +/* basic readdir test where reply fits into a single response message */
>  static void fs_readdir(void *obj, void *data, QGuestAllocator *t_alloc)
>  {
>      QVirtio9P *v9p = obj;
> @@ -631,6 +632,89 @@ static void fs_readdir(void *obj, void *data, QGuestAllocator *t_alloc)
>      g_free(wnames[0]);
>  }
>  
> +/* readdir test where overall request is split over several messages */
> +static void fs_readdir_split(void *obj, void *data, QGuestAllocator *t_alloc,
> +                             uint32_t count)
> +{
> +    QVirtio9P *v9p = obj;
> +    alloc = t_alloc;
> +    char *const wnames[] = { g_strdup(QTEST_V9FS_SYNTH_READDIR_DIR) };
> +    uint16_t nqid;
> +    v9fs_qid qid;
> +    uint32_t nentries, npartialentries;
> +    struct V9fsDirent *entries, *tail, *partialentries;
> +    P9Req *req;
> +    int fid;
> +    uint64_t offset;
> +
> +    fs_attach(v9p, NULL, t_alloc);
> +
> +    fid = 1;
> +    offset = 0;
> +    entries = NULL;
> +    nentries = 0;
> +    tail = NULL;
> +
> +    req = v9fs_twalk(v9p, 0, fid, 1, wnames, 0);
> +    v9fs_req_wait_for_reply(req, NULL);
> +    v9fs_rwalk(req, &nqid, NULL);
> +    g_assert_cmpint(nqid, ==, 1);
> +
> +    req = v9fs_tlopen(v9p, fid, O_DIRECTORY, 0);
> +    v9fs_req_wait_for_reply(req, NULL);
> +    v9fs_rlopen(req, &qid, NULL);
> +
> +    /*
> +     * send as many Treaddir requests as required to get all directory
> +     * entries
> +     */
> +    while (true) {
> +        npartialentries = 0;
> +        partialentries = NULL;
> +
> +        req = v9fs_treaddir(v9p, fid, offset, count, 0);
> +        v9fs_req_wait_for_reply(req, NULL);
> +        v9fs_rreaddir(req, &count, &npartialentries, &partialentries);
> +        if (npartialentries > 0 && partialentries) {
> +            if (!entries) {
> +                entries = partialentries;
> +                nentries = npartialentries;
> +                tail = partialentries;
> +            } else {
> +                tail->next = partialentries;
> +                nentries += npartialentries;
> +            }
> +            while (tail->next) {
> +                tail = tail->next;
> +            }
> +            offset = tail->offset;
> +        } else {
> +            break;
> +        }
> +    }
> +
> +    g_assert_cmpint(
> +        nentries, ==,
> +        QTEST_V9FS_SYNTH_READDIR_NFILES + 2 /* "." and ".." */
> +    );
> +
> +    /*
> +     * Check all file names exist in returned entries, ignore their order
> +     * though.
> +     */
> +    g_assert_cmpint(fs_dirents_contain_name(entries, "."), ==, true);
> +    g_assert_cmpint(fs_dirents_contain_name(entries, ".."), ==, true);
> +    for (int i = 0; i < QTEST_V9FS_SYNTH_READDIR_NFILES; ++i) {
> +        char *name = g_strdup_printf(QTEST_V9FS_SYNTH_READDIR_FILE, i);
> +        g_assert_cmpint(fs_dirents_contain_name(entries, name), ==, true);
> +        g_free(name);
> +    }
> +
> +    v9fs_free_dirents(entries);
> +
> +    g_free(wnames[0]);
> +}
> +
>  static void fs_walk_no_slash(void *obj, void *data, QGuestAllocator *t_alloc)
>  {
>      QVirtio9P *v9p = obj;
> @@ -793,6 +877,24 @@ static void fs_flush_ignored(void *obj, void *data, QGuestAllocator *t_alloc)
>      g_free(wnames[0]);
>  }
>  
> +static void fs_readdir_split_128(void *obj, void *data,
> +                                 QGuestAllocator *t_alloc)
> +{
> +    fs_readdir_split(obj, data, t_alloc, 128);
> +}
> +
> +static void fs_readdir_split_256(void *obj, void *data,
> +                                 QGuestAllocator *t_alloc)
> +{
> +    fs_readdir_split(obj, data, t_alloc, 256);
> +}
> +
> +static void fs_readdir_split_512(void *obj, void *data,
> +                                 QGuestAllocator *t_alloc)
> +{
> +    fs_readdir_split(obj, data, t_alloc, 512);
> +}
> +
>  static void register_virtio_9p_test(void)
>  {
>      qos_add_test("config", "virtio-9p", pci_config, NULL);
> @@ -810,6 +912,12 @@ static void register_virtio_9p_test(void)
>      qos_add_test("fs/flush/ignored", "virtio-9p", fs_flush_ignored,
>                   NULL);
>      qos_add_test("fs/readdir/basic", "virtio-9p", fs_readdir, NULL);
> +    qos_add_test("fs/readdir/split_512", "virtio-9p",
> +                 fs_readdir_split_512, NULL);
> +    qos_add_test("fs/readdir/split_256", "virtio-9p",
> +                 fs_readdir_split_256, NULL);
> +    qos_add_test("fs/readdir/split_128", "virtio-9p",
> +                 fs_readdir_split_128, NULL);
>  }
>  
>  libqos_init(register_virtio_9p_test);



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v8 2/7] 9pfs: make v9fs_readdir_response_size() public
  2020-07-29  8:11 ` [PATCH v8 2/7] 9pfs: make v9fs_readdir_response_size() public Christian Schoenebeck
@ 2020-07-29 15:44   ` Greg Kurz
  0 siblings, 0 replies; 15+ messages in thread
From: Greg Kurz @ 2020-07-29 15:44 UTC (permalink / raw)
  To: Christian Schoenebeck; +Cc: qemu-devel

On Wed, 29 Jul 2020 10:11:15 +0200
Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:

> Rename function v9fs_readdir_data_size() -> v9fs_readdir_response_size()
> and make it callable from other units. So far this function is only
> used by 9p.c, however subsequent patches require the function to be
> callable from another 9pfs unit. And as we're at it; also make it clear
> for what this function is used for.
> 
> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/9pfs/9p.c | 10 ++++++++--
>  hw/9pfs/9p.h |  1 +
>  2 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index 2ffd96ade9..7a228c4828 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -2313,7 +2313,13 @@ out_nofid:
>      pdu_complete(pdu, err);
>  }
>  
> -static size_t v9fs_readdir_data_size(V9fsString *name)
> +/**
> + * Returns size required in Rreaddir response for the passed dirent @p name.
> + *
> + * @param name - directory entry's name (i.e. file name, directory name)
> + * @returns required size in bytes
> + */
> +size_t v9fs_readdir_response_size(V9fsString *name)
>  {
>      /*
>       * Size of each dirent on the wire: size of qid (13) + size of offset (8)
> @@ -2348,7 +2354,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
>          }
>          v9fs_string_init(&name);
>          v9fs_string_sprintf(&name, "%s", dent->d_name);
> -        if ((count + v9fs_readdir_data_size(&name)) > max_count) {
> +        if ((count + v9fs_readdir_response_size(&name)) > max_count) {
>              v9fs_readdir_unlock(&fidp->fs.dir);
>  
>              /* Ran out of buffer. Set dir back to old position and return */
> diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> index ee2271663c..561774e843 100644
> --- a/hw/9pfs/9p.h
> +++ b/hw/9pfs/9p.h
> @@ -419,6 +419,7 @@ void v9fs_path_init(V9fsPath *path);
>  void v9fs_path_free(V9fsPath *path);
>  void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...);
>  void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src);
> +size_t v9fs_readdir_response_size(V9fsString *name);
>  int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
>                        const char *name, V9fsPath *path);
>  int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v8 3/7] 9pfs: split out fs driver core of v9fs_co_readdir()
  2020-07-29  8:11 ` [PATCH v8 3/7] 9pfs: split out fs driver core of v9fs_co_readdir() Christian Schoenebeck
@ 2020-07-29 16:02   ` Greg Kurz
  2020-07-29 17:38     ` Christian Schoenebeck
  0 siblings, 1 reply; 15+ messages in thread
From: Greg Kurz @ 2020-07-29 16:02 UTC (permalink / raw)
  To: Christian Schoenebeck; +Cc: qemu-devel

On Wed, 29 Jul 2020 10:11:54 +0200
Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:

> The implementation of v9fs_co_readdir() has two parts: the outer
> part is executed by main I/O thread, whereas the inner part is
> executed by fs driver on a background I/O thread.
> 
> Move the inner part to its own new, private function do_readdir(),
> so it can be shared by another upcoming new function.
> 
> This is just a preparatory patch for the subsequent patch, with the
> purpose to avoid the next patch to clutter the overall diff.
> 
> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---
>  hw/9pfs/codir.c | 37 +++++++++++++++++++++++--------------
>  1 file changed, 23 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index 73f9a751e1..ff57fb8619 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -18,28 +18,37 @@
>  #include "qemu/main-loop.h"
>  #include "coth.h"
>  
> +/*
> + * This must solely be executed on a background IO thread.
> + */

Well, technically this function could be called from any context
but of course calling it from the main I/O thread when handling
T_readdir would make the request synchronous, which is certainly
not what we want. So I'm not sure this comment brings much.

Anyway, the code change is okay so:

Reviewed-by: Greg Kurz <groug@kaod.org>

> +static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent **dent)
> +{
> +    int err = 0;
> +    V9fsState *s = pdu->s;
> +    struct dirent *entry;
> +
> +    errno = 0;
> +    entry = s->ops->readdir(&s->ctx, &fidp->fs);
> +    if (!entry && errno) {
> +        *dent = NULL;
> +        err = -errno;
> +    } else {
> +        *dent = entry;
> +    }
> +    return err;
> +}
> +
>  int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
>                                   struct dirent **dent)
>  {
>      int err;
> -    V9fsState *s = pdu->s;
>  
>      if (v9fs_request_cancelled(pdu)) {
>          return -EINTR;
>      }
> -    v9fs_co_run_in_worker(
> -        {
> -            struct dirent *entry;
> -
> -            errno = 0;
> -            entry = s->ops->readdir(&s->ctx, &fidp->fs);
> -            if (!entry && errno) {
> -                err = -errno;
> -            } else {
> -                *dent = entry;
> -                err = 0;
> -            }
> -        });
> +    v9fs_co_run_in_worker({
> +        err = do_readdir(pdu, fidp, dent);
> +    });
>      return err;
>  }
>  



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v8 1/7] tests/virtio-9p: added split readdir tests
  2020-07-29 15:42   ` Greg Kurz
@ 2020-07-29 17:24     ` Christian Schoenebeck
  0 siblings, 0 replies; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29 17:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

On Mittwoch, 29. Juli 2020 17:42:54 CEST Greg Kurz wrote:
> On Wed, 29 Jul 2020 10:10:23 +0200
> 
> Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:
> > The previous, already existing 'basic' readdir test simply used a
> > 'count' parameter big enough to retrieve all directory entries with a
> > single Treaddir request.
> > 
> > In the 3 new 'split' readdir tests added by this patch, directory
> > entries are retrieved, split over several Treaddir requests by picking
> > small 'count' parameters which force the server to truncate the
> > response. So the test client sends as many Treaddir requests as
> > necessary to get all directory entries.
> > 
> > The following 3 new tests are added (executed in this sequence):
> > 
> > 1. Split readdir test with count=512
> > 2. Split readdir test with count=256
> > 3. Split readdir test with count=128
> > 
> > This test case sequence is chosen because the smaller the 'count' value,
> > the higher the chance of errors in case of implementation bugs on server
> > side.
> > 
> > Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> > ---
> 
> The existing fs_readdir() function for the 'basic' test is a subset
> of the new fs_readdir_split() introduced by this patch (quite visible
> if you sdiff the code).
> 
> To avoid code duplication, I would have probably tried to do the changes
> in fs_readdir() and implement the 'basic' test as:
> 
> static void fs_readdir_basic(void *obj, void *data,
>                              QGuestAllocator *t_alloc)
> {
>     /*
>      * submit count = msize - 11, because 11 is the header size of Rreaddir
>      */
>     fs_readdir(obj, data, t_alloc, P9_MAX_SIZE - 11);
> }

You are right of course; there is code duplication. My thought was to preserve 
the simple readdir test code (at least at this initial stage) as it is really 
very simple and easy to understand.

The split readdir test code is probably already a tad more tedious to read.

I keep it in mind though and probably deduplicate this test code a bit later 
on. But I think it makes sense to start off with this version for now.

> but anyway this looks good to me so:
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>

Thanks!

> >  tests/qtest/virtio-9p-test.c | 108 +++++++++++++++++++++++++++++++++++
> >  1 file changed, 108 insertions(+)
> > 
> > diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> > index 2167322985..de30b717b6 100644
> > --- a/tests/qtest/virtio-9p-test.c
> > +++ b/tests/qtest/virtio-9p-test.c
> > @@ -578,6 +578,7 @@ static bool fs_dirents_contain_name(struct V9fsDirent
> > *e, const char* name)> 
> >      return false;
> >  
> >  }
> > 
> > +/* basic readdir test where reply fits into a single response message */
> > 
> >  static void fs_readdir(void *obj, void *data, QGuestAllocator *t_alloc)
> >  {
> >  
> >      QVirtio9P *v9p = obj;
> > 
> > @@ -631,6 +632,89 @@ static void fs_readdir(void *obj, void *data,
> > QGuestAllocator *t_alloc)> 
> >      g_free(wnames[0]);
> >  
> >  }
> > 
> > +/* readdir test where overall request is split over several messages */
> > +static void fs_readdir_split(void *obj, void *data, QGuestAllocator
> > *t_alloc, +                             uint32_t count)
> > +{
> > +    QVirtio9P *v9p = obj;
> > +    alloc = t_alloc;
> > +    char *const wnames[] = { g_strdup(QTEST_V9FS_SYNTH_READDIR_DIR) };
> > +    uint16_t nqid;
> > +    v9fs_qid qid;
> > +    uint32_t nentries, npartialentries;
> > +    struct V9fsDirent *entries, *tail, *partialentries;
> > +    P9Req *req;
> > +    int fid;
> > +    uint64_t offset;
> > +
> > +    fs_attach(v9p, NULL, t_alloc);
> > +
> > +    fid = 1;
> > +    offset = 0;
> > +    entries = NULL;
> > +    nentries = 0;
> > +    tail = NULL;
> > +
> > +    req = v9fs_twalk(v9p, 0, fid, 1, wnames, 0);
> > +    v9fs_req_wait_for_reply(req, NULL);
> > +    v9fs_rwalk(req, &nqid, NULL);
> > +    g_assert_cmpint(nqid, ==, 1);
> > +
> > +    req = v9fs_tlopen(v9p, fid, O_DIRECTORY, 0);
> > +    v9fs_req_wait_for_reply(req, NULL);
> > +    v9fs_rlopen(req, &qid, NULL);
> > +
> > +    /*
> > +     * send as many Treaddir requests as required to get all directory
> > +     * entries
> > +     */
> > +    while (true) {
> > +        npartialentries = 0;
> > +        partialentries = NULL;
> > +
> > +        req = v9fs_treaddir(v9p, fid, offset, count, 0);
> > +        v9fs_req_wait_for_reply(req, NULL);
> > +        v9fs_rreaddir(req, &count, &npartialentries, &partialentries);
> > +        if (npartialentries > 0 && partialentries) {
> > +            if (!entries) {
> > +                entries = partialentries;
> > +                nentries = npartialentries;
> > +                tail = partialentries;
> > +            } else {
> > +                tail->next = partialentries;
> > +                nentries += npartialentries;
> > +            }
> > +            while (tail->next) {
> > +                tail = tail->next;
> > +            }
> > +            offset = tail->offset;
> > +        } else {
> > +            break;
> > +        }
> > +    }
> > +
> > +    g_assert_cmpint(
> > +        nentries, ==,
> > +        QTEST_V9FS_SYNTH_READDIR_NFILES + 2 /* "." and ".." */
> > +    );
> > +
> > +    /*
> > +     * Check all file names exist in returned entries, ignore their order
> > +     * though.
> > +     */
> > +    g_assert_cmpint(fs_dirents_contain_name(entries, "."), ==, true);
> > +    g_assert_cmpint(fs_dirents_contain_name(entries, ".."), ==, true);
> > +    for (int i = 0; i < QTEST_V9FS_SYNTH_READDIR_NFILES; ++i) {
> > +        char *name = g_strdup_printf(QTEST_V9FS_SYNTH_READDIR_FILE, i);
> > +        g_assert_cmpint(fs_dirents_contain_name(entries, name), ==,
> > true);
> > +        g_free(name);
> > +    }
> > +
> > +    v9fs_free_dirents(entries);
> > +
> > +    g_free(wnames[0]);
> > +}
> > +
> > 
> >  static void fs_walk_no_slash(void *obj, void *data, QGuestAllocator
> >  *t_alloc) {
> >  
> >      QVirtio9P *v9p = obj;
> > 
> > @@ -793,6 +877,24 @@ static void fs_flush_ignored(void *obj, void *data,
> > QGuestAllocator *t_alloc)> 
> >      g_free(wnames[0]);
> >  
> >  }
> > 
> > +static void fs_readdir_split_128(void *obj, void *data,
> > +                                 QGuestAllocator *t_alloc)
> > +{
> > +    fs_readdir_split(obj, data, t_alloc, 128);
> > +}
> > +
> > +static void fs_readdir_split_256(void *obj, void *data,
> > +                                 QGuestAllocator *t_alloc)
> > +{
> > +    fs_readdir_split(obj, data, t_alloc, 256);
> > +}
> > +
> > +static void fs_readdir_split_512(void *obj, void *data,
> > +                                 QGuestAllocator *t_alloc)
> > +{
> > +    fs_readdir_split(obj, data, t_alloc, 512);
> > +}
> > +
> > 
> >  static void register_virtio_9p_test(void)
> >  {
> >  
> >      qos_add_test("config", "virtio-9p", pci_config, NULL);
> > 
> > @@ -810,6 +912,12 @@ static void register_virtio_9p_test(void)
> > 
> >      qos_add_test("fs/flush/ignored", "virtio-9p", fs_flush_ignored,
> >      
> >                   NULL);
> >      
> >      qos_add_test("fs/readdir/basic", "virtio-9p", fs_readdir, NULL);
> > 
> > +    qos_add_test("fs/readdir/split_512", "virtio-9p",
> > +                 fs_readdir_split_512, NULL);
> > +    qos_add_test("fs/readdir/split_256", "virtio-9p",
> > +                 fs_readdir_split_256, NULL);
> > +    qos_add_test("fs/readdir/split_128", "virtio-9p",
> > +                 fs_readdir_split_128, NULL);
> > 
> >  }
> >  
> >  libqos_init(register_virtio_9p_test);





^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v8 3/7] 9pfs: split out fs driver core of v9fs_co_readdir()
  2020-07-29 16:02   ` Greg Kurz
@ 2020-07-29 17:38     ` Christian Schoenebeck
  0 siblings, 0 replies; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-29 17:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

On Mittwoch, 29. Juli 2020 18:02:56 CEST Greg Kurz wrote:
> On Wed, 29 Jul 2020 10:11:54 +0200
> 
> Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:
> > The implementation of v9fs_co_readdir() has two parts: the outer
> > part is executed by main I/O thread, whereas the inner part is
> > executed by fs driver on a background I/O thread.
> > 
> > Move the inner part to its own new, private function do_readdir(),
> > so it can be shared by another upcoming new function.
> > 
> > This is just a preparatory patch for the subsequent patch, with the
> > purpose to avoid the next patch to clutter the overall diff.
> > 
> > Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> > ---
> > 
> >  hw/9pfs/codir.c | 37 +++++++++++++++++++++++--------------
> >  1 file changed, 23 insertions(+), 14 deletions(-)
> > 
> > diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> > index 73f9a751e1..ff57fb8619 100644
> > --- a/hw/9pfs/codir.c
> > +++ b/hw/9pfs/codir.c
> > @@ -18,28 +18,37 @@
> > 
> >  #include "qemu/main-loop.h"
> >  #include "coth.h"
> > 
> > +/*
> > + * This must solely be executed on a background IO thread.
> > + */
> 
> Well, technically this function could be called from any context
> but of course calling it from the main I/O thread when handling
> T_readdir would make the request synchronous, which is certainly
> not what we want. So I'm not sure this comment brings much.

Yeah, the intention was to more clearly separate functions' intended usage 
context either being TH or rather BH focused, by sticking appropriate human-
readable API comments to them.

But you are right, the TH functions are more limited in this regards as they 
usually contain a co-routine dispatch call, whereas BH functions usually 
preserve a more flexible/universal nature.

So maybe rather:

/*
 * Intended to be called from bottom-half (e.g. background I/O thread) 
 * context.
 */

On doubt I can also just drop the comment, as the function is really quite 
simple.

> Anyway, the code change is okay so:
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> > +static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent
> > **dent) +{
> > +    int err = 0;
> > +    V9fsState *s = pdu->s;
> > +    struct dirent *entry;
> > +
> > +    errno = 0;
> > +    entry = s->ops->readdir(&s->ctx, &fidp->fs);
> > +    if (!entry && errno) {
> > +        *dent = NULL;
> > +        err = -errno;
> > +    } else {
> > +        *dent = entry;
> > +    }
> > +    return err;
> > +}
> > +
> > 
> >  int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
> >  
> >                                   struct dirent **dent)
> >  
> >  {
> >  
> >      int err;
> > 
> > -    V9fsState *s = pdu->s;
> > 
> >      if (v9fs_request_cancelled(pdu)) {
> >      
> >          return -EINTR;
> >      
> >      }
> > 
> > -    v9fs_co_run_in_worker(
> > -        {
> > -            struct dirent *entry;
> > -
> > -            errno = 0;
> > -            entry = s->ops->readdir(&s->ctx, &fidp->fs);
> > -            if (!entry && errno) {
> > -                err = -errno;
> > -            } else {
> > -                *dent = entry;
> > -                err = 0;
> > -            }
> > -        });
> > +    v9fs_co_run_in_worker({
> > +        err = do_readdir(pdu, fidp, dent);
> > +    });
> > 
> >      return err;
> >  
> >  }





^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v8 4/7] 9pfs: add new function v9fs_co_readdir_many()
  2020-07-29  8:12 ` [PATCH v8 4/7] 9pfs: add new function v9fs_co_readdir_many() Christian Schoenebeck
@ 2020-07-30 10:08   ` Christian Schoenebeck
  2020-08-06  9:38     ` Christian Schoenebeck
  0 siblings, 1 reply; 15+ messages in thread
From: Christian Schoenebeck @ 2020-07-30 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

On Mittwoch, 29. Juli 2020 10:12:33 CEST Christian Schoenebeck wrote:
> The newly added function v9fs_co_readdir_many() retrieves multiple
> directory entries with a single fs driver request. It is intended to
> replace uses of v9fs_co_readdir(), the latter only retrives a single
> directory entry per fs driver request instead.
> 
> The reason for this planned replacement is that for every fs driver
> request the coroutine is dispatched from main I/O thread to a
> background I/O thread and eventually dispatched back to main I/O
> thread. Hopping between threads adds latency. So if a 9pfs Treaddir
> request reads a large amount of directory entries, this currently
> sums up to huge latencies of several hundred ms or even more. So
> using v9fs_co_readdir_many() instead of v9fs_co_readdir() will
> provide significant performance improvements.
> 
> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---
>  hw/9pfs/9p.h    |  22 +++++++
>  hw/9pfs/codir.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/9pfs/coth.h  |   3 +
>  3 files changed, 190 insertions(+)
> 
> diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> index 561774e843..93b7030edf 100644
> --- a/hw/9pfs/9p.h
> +++ b/hw/9pfs/9p.h
> @@ -215,6 +215,28 @@ static inline void v9fs_readdir_init(V9fsDir *dir)
>      qemu_co_mutex_init(&dir->readdir_mutex);
>  }
> 
> +/**
> + * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
> + * which is a chained list of directory entries.
> + */
> +typedef struct V9fsDirEnt {
> +    /* mandatory (must not be NULL) information for all readdir requests */
> +    struct dirent *dent;
> +    /*
> +     * optional (may be NULL): A full stat of each directory entry is just
> +     * done if explicitly told to fs driver.
> +     */
> +    struct stat *st;
> +    /*
> +     * instead of an array, directory entries are always returned as
> +     * chained list, that's because the amount of entries retrieved by fs
> +     * drivers is dependent on the individual entries' name (since response
> +     * messages are size limited), so the final amount cannot be estimated
> +     * before hand
> +     */
> +    struct V9fsDirEnt *next;
> +} V9fsDirEnt;
> +
>  /*
>   * Filled by fs driver on open and other
>   * calls.
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index ff57fb8619..8a8128ea13 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -38,6 +38,10 @@ static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
> struct dirent **dent) return err;
>  }
> 
> +/*
> + * TODO: This will be removed for performance reasons.
> + * Use v9fs_co_readdir_many() instead.
> + */
>  int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
>                                   struct dirent **dent)
>  {
> @@ -52,6 +56,167 @@ int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu,
> V9fsFidState *fidp, return err;
>  }
> 
> +/*
> + * This is solely executed on a background IO thread.
> + *
> + * See v9fs_co_readdir_many() (as its only user) below for details.
> + */
> +static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> +                           struct V9fsDirEnt **entries, off_t offset,
> +                           int32_t maxsize, bool dostat)
> +{
> +    V9fsState *s = pdu->s;
> +    V9fsString name;
> +    int len, err = 0;
> +    int32_t size = 0;
> +    off_t saved_dir_pos;
> +    struct dirent *dent;
> +    struct V9fsDirEnt *e = NULL;
> +    V9fsPath path;
> +    struct stat stbuf;
> +
> +    *entries = NULL;
> +    v9fs_path_init(&path);
> +
> +    /*
> +     * TODO: Here should be a warn_report_once() if lock failed.
> +     *
> +     * With a good 9p client we should not get into concurrency here,
> +     * because a good client would not use the same fid for concurrent
> +     * requests. We do the lock here for safety reasons though. However
> +     * the client would then suffer performance issues, so better log that
> +     * issue here.
> +     */
> +    v9fs_readdir_lock(&fidp->fs.dir);
> +
> +    /* seek directory to requested initial position */
> +    if (offset == 0) {
> +        s->ops->rewinddir(&s->ctx, &fidp->fs);
> +    } else {
> +        s->ops->seekdir(&s->ctx, &fidp->fs, offset);
> +    }
> +
> +    /* save the directory position */
> +    saved_dir_pos = s->ops->telldir(&s->ctx, &fidp->fs);
> +    if (saved_dir_pos < 0) {
> +        err = saved_dir_pos;
> +        goto out;
> +    }
> +
> +    while (true) {

To address your concern about aborting a readdir request, one could add here:

        if (v9fs_request_cancelled(pdu)) {
            err = -EINTR;
            break;
        }

That's a minimal invasive change, so it would be Ok to add it to this patch 
already, or at any time later as a separate patch in future, as you like.

> +        /* get directory entry from fs driver */
> +        err = do_readdir(pdu, fidp, &dent);
> +        if (err || !dent) {
> +            break;
> +        }
> +
> +        /*
> +         * stop this loop as soon as it would exceed the allowed maximum
> +         * response message size for the directory entries collected so
> far, +         * because anything beyond that size would need to be
> discarded by +         * 9p controller (main thread / top half) anyway
> +         */
> +        v9fs_string_init(&name);
> +        v9fs_string_sprintf(&name, "%s", dent->d_name);
> +        len = v9fs_readdir_response_size(&name);
> +        v9fs_string_free(&name);
> +        if (size + len > maxsize) {
> +            /* this is not an error case actually */
> +            break;
> +        }
> +
> +        /* append next node to result chain */
> +        if (!e) {
> +            *entries = e = g_malloc0(sizeof(V9fsDirEnt));
> +        } else {
> +            e = e->next = g_malloc0(sizeof(V9fsDirEnt));
> +        }
> +        e->dent = g_malloc0(sizeof(struct dirent));
> +        memcpy(e->dent, dent, sizeof(struct dirent));
> +
> +        /* perform a full stat() for directory entry if requested by caller
> */ +        if (dostat) {
> +            err = s->ops->name_to_path(
> +                &s->ctx, &fidp->path, dent->d_name, &path
> +            );
> +            if (err < 0) {
> +                err = -errno;
> +                break;
> +            }
> +
> +            err = s->ops->lstat(&s->ctx, &path, &stbuf);
> +            if (err < 0) {
> +                err = -errno;
> +                break;
> +            }
> +
> +            e->st = g_malloc0(sizeof(struct stat));
> +            memcpy(e->st, &stbuf, sizeof(struct stat));
> +        }
> +
> +        size += len;
> +        saved_dir_pos = dent->d_off;
> +    }
> +
> +    /* restore (last) saved position */
> +    s->ops->seekdir(&s->ctx, &fidp->fs, saved_dir_pos);
> +
> +out:
> +    v9fs_readdir_unlock(&fidp->fs.dir);
> +    v9fs_path_free(&path);
> +    if (err < 0) {
> +        return err;
> +    }
> +    return size;
> +}
> +
> +/**
> + * @brief Reads multiple directory entries in one rush.
> + *
> + * Retrieves the requested (max. amount of) directory entries from the fs
> + * driver. This function must only be called by the main IO thread (top
> half). + * Internally this function call will be dispatched to a background
> IO thread + * (bottom half) where it is eventually executed by the fs
> driver. + *
> + * @discussion Acquiring multiple directory entries in one rush from the fs
> + * driver, instead of retrieving each directory entry individually, is
> very + * beneficial from performance point of view. Because for every fs
> driver + * request latency is added, which in practice could lead to
> overall + * latencies of several hundred ms for reading all entries (of
> just a single + * directory) if every directory entry was individually
> requested from fs + * driver.
> + *
> + * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after
> calling + * v9fs_co_readdir_many(), both on success and on error cases of
> this + * function, to avoid memory leaks once @p entries are no longer
> needed. + *
> + * @param pdu - the causing 9p (T_readdir) client request
> + * @param fidp - already opened directory where readdir shall be performed
> on + * @param entries - output for directory entries (must not be NULL) + *
> @param offset - initial position inside the directory the function shall +
> *                 seek to before retrieving the directory entries + *
> @param maxsize - maximum result message body size (in bytes)
> + * @param dostat - whether a stat() should be performed and returned for
> + *                 each directory entry
> + * @returns resulting response message body size (in bytes) on success,
> + *          negative error code otherwise
> + */
> +int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> +                                      struct V9fsDirEnt **entries,
> +                                      off_t offset, int32_t maxsize,
> +                                      bool dostat)
> +{
> +    int err = 0;
> +
> +    if (v9fs_request_cancelled(pdu)) {
> +        return -EINTR;
> +    }
> +    v9fs_co_run_in_worker({
> +        err = do_readdir_many(pdu, fidp, entries, offset, maxsize, dostat);
> +    });
> +    return err;
> +}
> +
>  off_t v9fs_co_telldir(V9fsPDU *pdu, V9fsFidState *fidp)
>  {
>      off_t err;
> diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
> index c2cdc7a9ea..fd4a45bc7c 100644
> --- a/hw/9pfs/coth.h
> +++ b/hw/9pfs/coth.h
> @@ -49,6 +49,9 @@
>  void co_run_in_worker_bh(void *);
>  int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
>  int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent
> **); +int coroutine_fn v9fs_co_readdir_many(V9fsPDU *, V9fsFidState *,
> +                                      struct V9fsDirEnt **, off_t, int32_t,
> +                                      bool);
>  off_t coroutine_fn v9fs_co_telldir(V9fsPDU *, V9fsFidState *);
>  void coroutine_fn v9fs_co_seekdir(V9fsPDU *, V9fsFidState *, off_t);
>  void coroutine_fn v9fs_co_rewinddir(V9fsPDU *, V9fsFidState *);




^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v8 4/7] 9pfs: add new function v9fs_co_readdir_many()
  2020-07-30 10:08   ` Christian Schoenebeck
@ 2020-08-06  9:38     ` Christian Schoenebeck
  0 siblings, 0 replies; 15+ messages in thread
From: Christian Schoenebeck @ 2020-08-06  9:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: Greg Kurz

On Donnerstag, 30. Juli 2020 12:08:33 CEST Christian Schoenebeck wrote:
> > @@ -52,6 +56,167 @@ int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu,
> > V9fsFidState *fidp, return err;
> > 
> >  }
> > 
> > +/*
> > + * This is solely executed on a background IO thread.
> > + *
> > + * See v9fs_co_readdir_many() (as its only user) below for details.
> > + */
> > +static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> > +                           struct V9fsDirEnt **entries, off_t offset,
> > +                           int32_t maxsize, bool dostat)
> > +{
> > +    V9fsState *s = pdu->s;
> > +    V9fsString name;
> > +    int len, err = 0;
> > +    int32_t size = 0;
> > +    off_t saved_dir_pos;
> > +    struct dirent *dent;
> > +    struct V9fsDirEnt *e = NULL;
> > +    V9fsPath path;
> > +    struct stat stbuf;
> > +
> > +    *entries = NULL;
> > +    v9fs_path_init(&path);
> > +
> > +    /*
> > +     * TODO: Here should be a warn_report_once() if lock failed.
> > +     *
> > +     * With a good 9p client we should not get into concurrency here,
> > +     * because a good client would not use the same fid for concurrent
> > +     * requests. We do the lock here for safety reasons though. However
> > +     * the client would then suffer performance issues, so better log
> > that
> > +     * issue here.
> > +     */
> > +    v9fs_readdir_lock(&fidp->fs.dir);
> > +
> > +    /* seek directory to requested initial position */
> > +    if (offset == 0) {
> > +        s->ops->rewinddir(&s->ctx, &fidp->fs);
> > +    } else {
> > +        s->ops->seekdir(&s->ctx, &fidp->fs, offset);
> > +    }
> > +
> > +    /* save the directory position */
> > +    saved_dir_pos = s->ops->telldir(&s->ctx, &fidp->fs);
> > +    if (saved_dir_pos < 0) {
> > +        err = saved_dir_pos;
> > +        goto out;
> > +    }
> > +
> > +    while (true) {
> 
> To address your concern about aborting a readdir request, one could add
> here:
> 
>         if (v9fs_request_cancelled(pdu)) {
>             err = -EINTR;
>             break;
>         }
> 
> That's a minimal invasive change, so it would be Ok to add it to this patch
> already, or at any time later as a separate patch in future, as you like.

I've applied this change now, as well as the minor comment change in patch 3 
and pushed this overall patch set onto my queue:

https://github.com/cschoenebeck/qemu/commits/master

My plan is to send a PR once 5.1 is released (in presumably ~1-2 weeks), 
unless I hear back from you on anything else on this patch set.

Best regards
Christian Schoenebeck




^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-08-06 11:52 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-29  8:53 [PATCH v8 0/7] 9pfs: readdir optimization Christian Schoenebeck
2020-07-29  8:10 ` [PATCH v8 1/7] tests/virtio-9p: added split readdir tests Christian Schoenebeck
2020-07-29 15:42   ` Greg Kurz
2020-07-29 17:24     ` Christian Schoenebeck
2020-07-29  8:11 ` [PATCH v8 2/7] 9pfs: make v9fs_readdir_response_size() public Christian Schoenebeck
2020-07-29 15:44   ` Greg Kurz
2020-07-29  8:11 ` [PATCH v8 3/7] 9pfs: split out fs driver core of v9fs_co_readdir() Christian Schoenebeck
2020-07-29 16:02   ` Greg Kurz
2020-07-29 17:38     ` Christian Schoenebeck
2020-07-29  8:12 ` [PATCH v8 4/7] 9pfs: add new function v9fs_co_readdir_many() Christian Schoenebeck
2020-07-30 10:08   ` Christian Schoenebeck
2020-08-06  9:38     ` Christian Schoenebeck
2020-07-29  8:13 ` [PATCH v8 5/7] 9pfs: T_readdir latency optimization Christian Schoenebeck
2020-07-29  8:39 ` [PATCH v8 6/7] 9pfs: differentiate readdir lock between 9P2000.u vs. 9P2000.L Christian Schoenebeck
2020-07-29  8:42 ` [PATCH v8 7/7] 9pfs: clarify latency of v9fs_co_run_in_worker() Christian Schoenebeck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.