All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fam Zheng <famz@redhat.com>
To: qemu-devel@nongnu.org
Cc: eblake@redhat.com, Kevin Wolf <kwolf@redhat.com>,
	Max Reitz <mreitz@redhat.com>,
	qemu-block@nongnu.org
Subject: [Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations
Date: Wed, 26 Apr 2017 11:34:12 +0800	[thread overview]
Message-ID: <20170426033413.17192-21-famz@redhat.com> (raw)
In-Reply-To: <20170426033413.17192-1-famz@redhat.com>

This extends the permission bits of op blocker API to external using
Linux OFD locks.

Each permission in @perm and @shared_perm is represented by a locked
byte in the image file.  Requesting a permission in @perm is translated
to a shared lock of the corresponding byte; rejecting to share the same
permission is translated to a shared lock of a separate byte. With that,
we use 2x number of bytes of distinct permission types.

virtlockd in libvirt locks the first byte, so we do locking from a
higher offset.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/file-posix.c | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 264 insertions(+), 3 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 2114720..b92fdc3 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -129,12 +129,28 @@ do { \
 
 #define MAX_BLOCKSIZE	4096
 
+/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
+ * leaving a few more bytes for its future use. */
+#define RAW_LOCK_PERM_BASE             100
+#define RAW_LOCK_SHARED_BASE           200
+#ifdef F_OFD_SETLK
+#define RAW_LOCK_SUPPORTED 1
+#else
+#define RAW_LOCK_SUPPORTED 0
+#endif
+
 typedef struct BDRVRawState {
     int fd;
+    int lock_fd;
+    bool use_lock;
     int type;
     int open_flags;
     size_t buf_align;
 
+    /* The current permissions. */
+    uint64_t perm;
+    uint64_t shared_perm;
+
 #ifdef CONFIG_XFS
     bool is_xfs:1;
 #endif
@@ -440,6 +456,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
     s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
 
+    s->use_lock = qemu_opt_get_bool(opts, "locking", true);
+
     s->open_flags = open_flags;
     raw_parse_flags(bdrv_flags, &s->open_flags);
 
@@ -455,6 +473,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
     s->fd = fd;
 
+    s->lock_fd = -1;
+    fd = qemu_open(filename, O_RDONLY);
+    if (fd < 0) {
+        if (RAW_LOCK_SUPPORTED) {
+            ret = -errno;
+            error_setg_errno(errp, errno, "Could not open '%s' for locking",
+                             filename);
+            qemu_close(s->fd);
+            goto fail;
+        }
+    }
+    s->lock_fd = fd;
+    s->perm = 0;
+    s->shared_perm = BLK_PERM_ALL;
+
 #ifdef CONFIG_LINUX_AIO
      /* Currently Linux does AIO only for files opened with O_DIRECT */
     if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
@@ -542,6 +575,156 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     return raw_open_common(bs, options, flags, 0, errp);
 }
 
+typedef enum {
+    RAW_PL_PREPARE,
+    RAW_PL_COMMIT,
+    RAW_PL_ABORT,
+} RawPermLockOp;
+
+/* Lock wanted bytes by @perm and ~@shared_perm in the file; if @unlock ==
+ * true, also unlock the unneeded bytes. */
+static int raw_apply_lock_bytes(BDRVRawState *s,
+                                uint64_t perm_lock_bits,
+                                uint64_t shared_perm_lock_bits,
+                                bool unlock, Error **errp)
+{
+    int ret;
+    int i;
+
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        if (perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        if (shared_perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
+static int raw_check_lock_bytes(BDRVRawState *s,
+                                uint64_t perm, uint64_t shared_perm,
+                                Error **errp)
+{
+    int ret;
+    int i;
+
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (perm & p) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                error_setg(errp,
+                           "Failed to check byte %d for \"%s\" permission",
+                           off, bdrv_perm_names(p));
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (!(shared_perm & p)) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                error_setg(errp,
+                           "Failed to check byte %d for shared \"%s\" permission",
+                           off, bdrv_perm_names(p));
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+static int raw_handle_perm_lock(BlockDriverState *bs,
+                                RawPermLockOp op,
+                                uint64_t new_perm, uint64_t new_shared,
+                                Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret = 0;
+    Error *local_err = NULL;
+
+    if (!RAW_LOCK_SUPPORTED) {
+        return 0;
+    }
+
+    if (!s->use_lock) {
+        return 0;
+    }
+
+    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
+        return 0;
+    }
+
+    assert(s->lock_fd > 0);
+
+    switch (op) {
+    case RAW_PL_PREPARE:
+        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
+                                   ~s->shared_perm | ~new_shared,
+                                   false, errp);
+        if (!ret) {
+            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
+            if (!ret) {
+                return 0;
+            }
+        }
+        op = RAW_PL_ABORT;
+        /* fall through to unlock bytes. */
+    case RAW_PL_ABORT:
+        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    case RAW_PL_COMMIT:
+        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    }
+    return ret;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
 {
@@ -549,6 +732,8 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     BDRVRawReopenState *rs;
     int ret = 0;
     Error *local_err = NULL;
+    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
+        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
 
     assert(state != NULL);
     assert(state->bs != NULL);
@@ -613,13 +798,22 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     if (rs->fd != -1) {
         raw_probe_alignment(state->bs, rs->fd, &local_err);
         if (local_err) {
-            qemu_close(rs->fd);
-            rs->fd = -1;
             error_propagate(errp, local_err);
             ret = -EINVAL;
+            goto fail;
         }
     }
 
+    ret = raw_handle_perm_lock(state->bs, RAW_PL_PREPARE,
+                               s->perm & ~clear_perms,
+                               s->shared_perm, errp);
+    if (ret) {
+        goto fail;
+    }
+    return 0;
+fail:
+    qemu_close(rs->fd);
+    rs->fd = -1;
     return ret;
 }
 
@@ -627,6 +821,8 @@ static void raw_reopen_commit(BDRVReopenState *state)
 {
     BDRVRawReopenState *rs = state->opaque;
     BDRVRawState *s = state->bs->opaque;
+    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
+        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
 
     s->open_flags = rs->open_flags;
 
@@ -635,12 +831,17 @@ static void raw_reopen_commit(BDRVReopenState *state)
 
     g_free(state->opaque);
     state->opaque = NULL;
+    raw_handle_perm_lock(state->bs, RAW_PL_COMMIT, s->perm & ~clear_perms,
+                         s->shared_perm, NULL);
 }
 
 
 static void raw_reopen_abort(BDRVReopenState *state)
 {
+    BDRVRawState *s = state->bs->opaque;
     BDRVRawReopenState *rs = state->opaque;
+    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
+        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
 
      /* nothing to do if NULL, we didn't get far enough */
     if (rs == NULL) {
@@ -653,6 +854,8 @@ static void raw_reopen_abort(BDRVReopenState *state)
     }
     g_free(state->opaque);
     state->opaque = NULL;
+    raw_handle_perm_lock(state->bs, RAW_PL_ABORT, s->perm & ~clear_perms,
+                         s->shared_perm, NULL);
 }
 
 static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
@@ -1410,6 +1613,10 @@ static void raw_close(BlockDriverState *bs)
         qemu_close(s->fd);
         s->fd = -1;
     }
+    if (s->lock_fd >= 0) {
+        qemu_close(s->lock_fd);
+        s->lock_fd = -1;
+    }
 }
 
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -1947,6 +2154,56 @@ static QemuOptsList raw_create_opts = {
     }
 };
 
+static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
+                          Error **errp)
+{
+    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+}
+
+static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
+{
+    BDRVRawState *s = bs->opaque;
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    s->perm = perm;
+    s->shared_perm = shared;
+}
+
+static void raw_abort_perm_update(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    raw_handle_perm_lock(bs, RAW_PL_ABORT, s->perm, s->shared_perm, NULL);
+}
+
+static int raw_inactivate(BlockDriverState *bs)
+{
+    int ret;
+    uint64_t perm = 0;
+    uint64_t shared = BLK_PERM_ALL;
+
+    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, NULL);
+    if (ret) {
+        return ret;
+    }
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    return 0;
+}
+
+
+static void raw_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    assert(!(bdrv_get_flags(bs) & BDRV_O_INACTIVE));
+    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, s->perm, s->shared_perm,
+                               errp);
+    if (ret) {
+        return;
+    }
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, s->perm, s->shared_perm, NULL);
+}
+
 BlockDriver bdrv_file = {
     .format_name = "file",
     .protocol_name = "file",
@@ -1977,7 +2234,11 @@ BlockDriver bdrv_file = {
     .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
-
+    .bdrv_inactivate = raw_inactivate,
+    .bdrv_invalidate_cache = raw_invalidate_cache,
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
     .create_opts = &raw_create_opts,
 };
 
-- 
2.9.3

  parent reply	other threads:[~2017-04-26  3:35 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-26  3:33 [Qemu-devel] [PATCH v15 00/21] block: Image locking series Fam Zheng
2017-04-26  3:33 ` [Qemu-devel] [PATCH v15 01/21] block: Make bdrv_perm_names public Fam Zheng
2017-04-26  3:33 ` [Qemu-devel] [PATCH v15 02/21] block: Define BLK_PERM_MAX Fam Zheng
2017-04-26  9:36   ` Kevin Wolf
2017-04-27  2:03     ` Fam Zheng
2017-04-26  3:33 ` [Qemu-devel] [PATCH v15 03/21] block: Add, parse and store "force-share" option Fam Zheng
2017-04-26  3:33 ` [Qemu-devel] [PATCH v15 04/21] block: Respect "force-share" in perm propagating Fam Zheng
2017-04-26  3:33 ` [Qemu-devel] [PATCH v15 05/21] qemu-img: Add --force-share option to subcommands Fam Zheng
2017-04-26  3:33 ` [Qemu-devel] [PATCH v15 06/21] qemu-img: Update documentation for -U Fam Zheng
2017-04-26  3:33 ` [Qemu-devel] [PATCH v15 07/21] qemu-io: Add --force-share option Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 08/21] iotests: 030: Prepare for image locking Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 09/21] iotests: 046: " Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 10/21] iotests: 055: Don't attach the target image already for drive-backup Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 11/21] iotests: 085: Avoid image locking conflict Fam Zheng
2017-04-26 12:30   ` Kevin Wolf
2017-04-27  7:16     ` Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 12/21] iotests: 087: Don't attach test image twice Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 13/21] iotests: 091: Quit QEMU before checking image Fam Zheng
2017-04-26 12:34   ` Kevin Wolf
2017-04-27  7:04     ` Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 14/21] iotests: 172: Use separate images for multiple devices Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 15/21] tests: Use null-co:// instead of /dev/null as the dummy image Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 16/21] file-posix: Add 'locking' option Fam Zheng
2017-04-26 12:41   ` Kevin Wolf
2017-04-27  2:29     ` Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 17/21] tests: Disable image lock in test-replication Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 18/21] block: Reuse bs as backing hd for drive-backup sync=none Fam Zheng
2017-04-26 12:52   ` Kevin Wolf
2017-04-26 13:15     ` Fam Zheng
2017-04-26 14:34       ` Kevin Wolf
2017-04-27  1:50         ` Fam Zheng
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 19/21] osdep: Add qemu_lock_fd and qemu_unlock_fd Fam Zheng
2017-04-26 12:57   ` Kevin Wolf
2017-04-26 13:20     ` Fam Zheng
2017-04-26 14:24       ` Kevin Wolf
2017-04-26 14:29       ` Daniel P. Berrange
2017-04-27  1:40         ` Fam Zheng
2017-04-26  3:34 ` Fam Zheng [this message]
2017-04-26 14:22   ` [Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations Kevin Wolf
2017-04-27  6:43     ` Fam Zheng
2017-04-28 13:45   ` Kevin Wolf
2017-04-28 15:30     ` Fam Zheng
2017-04-28 18:27       ` Kevin Wolf
2017-04-26  3:34 ` [Qemu-devel] [PATCH v15 21/21] qemu-iotests: Add test case 153 for image locking Fam Zheng
2017-04-26 12:53   ` Fam Zheng
2017-04-26 14:49   ` Kevin Wolf
2017-04-27  1:32     ` Fam Zheng
2017-04-27  9:05       ` Kevin Wolf
2017-04-27 10:34         ` Fam Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170426033413.17192-21-famz@redhat.com \
    --to=famz@redhat.com \
    --cc=eblake@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.