All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PULL] qemu-ga patches for guest-fstrim command
@ 2012-06-21 23:04 Michael Roth
  2012-06-21 23:04 ` [Qemu-devel] [PATCH 1/2] qemu-ga: make names more generic for mount list functions Michael Roth
  2012-06-21 23:04 ` [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command Michael Roth
  0 siblings, 2 replies; 8+ messages in thread
From: Michael Roth @ 2012-06-21 23:04 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, aliguori

The following changes since commit 47ecbdf07ed2c37bdfd2d77137d01bb319ce13da:

  libcacard: build fixes (2012-06-21 20:04:24 +0000)

are available in the git repository at:
  git://github.com/mdroth/qemu.git qga-pull-6-21-12

Paolo Bonzini (2):
      qemu-ga: make names more generic for mount list functions
      qemu-ga: add guest-fstrim command

 qapi-schema-guest.json |   20 ++++++++
 qga/commands-posix.c   |  114 +++++++++++++++++++++++++++++++++++++++---------
 qga/commands-win32.c   |   11 +++++
 3 files changed, 124 insertions(+), 21 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 1/2] qemu-ga: make names more generic for mount list functions
  2012-06-21 23:04 [Qemu-devel] [PULL] qemu-ga patches for guest-fstrim command Michael Roth
@ 2012-06-21 23:04 ` Michael Roth
  2012-06-21 23:04 ` [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command Michael Roth
  1 sibling, 0 replies; 8+ messages in thread
From: Michael Roth @ 2012-06-21 23:04 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, aliguori

From: Paolo Bonzini <pbonzini@redhat.com>

We will use these functions and types for more than FSFREEZE, so rename them.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
 qga/commands-posix.c |   36 ++++++++++++++++++------------------
 1 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 00d035d..b1a7ce6 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -314,17 +314,17 @@ static void guest_file_init(void)
 
 #if defined(CONFIG_FSFREEZE)
 
-typedef struct GuestFsfreezeMount {
+typedef struct FsMount {
     char *dirname;
     char *devtype;
-    QTAILQ_ENTRY(GuestFsfreezeMount) next;
-} GuestFsfreezeMount;
+    QTAILQ_ENTRY(FsMount) next;
+} FsMount;
 
-typedef QTAILQ_HEAD(, GuestFsfreezeMount) GuestFsfreezeMountList;
+typedef QTAILQ_HEAD(, FsMount) FsMountList;
 
-static void guest_fsfreeze_free_mount_list(GuestFsfreezeMountList *mounts)
+static void free_fs_mount_list(FsMountList *mounts)
 {
-     GuestFsfreezeMount *mount, *temp;
+     FsMount *mount, *temp;
 
      if (!mounts) {
          return;
@@ -341,10 +341,10 @@ static void guest_fsfreeze_free_mount_list(GuestFsfreezeMountList *mounts)
 /*
  * Walk the mount table and build a list of local file systems
  */
-static int guest_fsfreeze_build_mount_list(GuestFsfreezeMountList *mounts)
+static int build_fs_mount_list(FsMountList *mounts)
 {
     struct mntent *ment;
-    GuestFsfreezeMount *mount;
+    FsMount *mount;
     char const *mtab = "/proc/self/mounts";
     FILE *fp;
 
@@ -367,7 +367,7 @@ static int guest_fsfreeze_build_mount_list(GuestFsfreezeMountList *mounts)
             continue;
         }
 
-        mount = g_malloc0(sizeof(GuestFsfreezeMount));
+        mount = g_malloc0(sizeof(FsMount));
         mount->dirname = g_strdup(ment->mnt_dir);
         mount->devtype = g_strdup(ment->mnt_type);
 
@@ -398,15 +398,15 @@ GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **err)
 int64_t qmp_guest_fsfreeze_freeze(Error **err)
 {
     int ret = 0, i = 0;
-    GuestFsfreezeMountList mounts;
-    struct GuestFsfreezeMount *mount;
+    FsMountList mounts;
+    struct FsMount *mount;
     int fd;
     char err_msg[512];
 
     slog("guest-fsfreeze called");
 
     QTAILQ_INIT(&mounts);
-    ret = guest_fsfreeze_build_mount_list(&mounts);
+    ret = build_fs_mount_list(&mounts);
     if (ret < 0) {
         return ret;
     }
@@ -447,11 +447,11 @@ int64_t qmp_guest_fsfreeze_freeze(Error **err)
         close(fd);
     }
 
-    guest_fsfreeze_free_mount_list(&mounts);
+    free_fs_mount_list(&mounts);
     return i;
 
 error:
-    guest_fsfreeze_free_mount_list(&mounts);
+    free_fs_mount_list(&mounts);
     qmp_guest_fsfreeze_thaw(NULL);
     return 0;
 }
@@ -462,12 +462,12 @@ error:
 int64_t qmp_guest_fsfreeze_thaw(Error **err)
 {
     int ret;
-    GuestFsfreezeMountList mounts;
-    GuestFsfreezeMount *mount;
+    FsMountList mounts;
+    FsMount *mount;
     int fd, i = 0, logged;
 
     QTAILQ_INIT(&mounts);
-    ret = guest_fsfreeze_build_mount_list(&mounts);
+    ret = build_fs_mount_list(&mounts);
     if (ret) {
         error_set(err, QERR_QGA_COMMAND_FAILED,
                   "failed to enumerate filesystems");
@@ -507,7 +507,7 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
     }
 
     ga_unset_frozen(ga_state);
-    guest_fsfreeze_free_mount_list(&mounts);
+    free_fs_mount_list(&mounts);
     return i;
 }
 
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command
  2012-06-21 23:04 [Qemu-devel] [PULL] qemu-ga patches for guest-fstrim command Michael Roth
  2012-06-21 23:04 ` [Qemu-devel] [PATCH 1/2] qemu-ga: make names more generic for mount list functions Michael Roth
@ 2012-06-21 23:04 ` Michael Roth
  2012-06-22 17:48   ` Chris Wedgwood
  1 sibling, 1 reply; 8+ messages in thread
From: Michael Roth @ 2012-06-21 23:04 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, aliguori

From: Paolo Bonzini <pbonzini@redhat.com>

FITRIM is a mounted filesystem feature to discard (or "trim") blocks which
are not in use by the filesystem. This is useful for solid-state drives
(SSDs) and thinly-provisioned storage.  Provide access to the feature
from the host so that filesystems can be trimmed periodically or before
migration.

Here is an example using scsi_debug:

    # modprobe scsi_debug lbpu=1 lbpws=1
    # sg_vpd -p0xb2 /dev/sdb
    Logical block provisioning VPD page (SBC):
      Unmap command supported (LBPU): 1
      Write same (16) with unmap bit supported (LBWS): 1
      Write same (10) with unmap bit supported (LBWS10): 0
    # mke2fs /dev/sdb
    # cat /sys/bus/pseudo/drivers/scsi_debug/map
    1-616,16257-16383
    # mount /dev/sdb /run/media/pbonzini/test
    # dd if=/dev/zero of=/run/media/pbonzini/test/file
    # cat map
    1-616,645-1588,1599-4026,4029-16383
    # rm /run/media/pbonzini/test/file
    # ./qemu-ga /dev/fd/0
    {"execute":"guest-fstrim"}
    {"return": {}}
    # cat map
    1-612

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
 qapi-schema-guest.json |   20 ++++++++++++
 qga/commands-posix.c   |   78 ++++++++++++++++++++++++++++++++++++++++++++++--
 qga/commands-win32.c   |   11 +++++++
 3 files changed, 106 insertions(+), 3 deletions(-)

diff --git a/qapi-schema-guest.json b/qapi-schema-guest.json
index d4055d2..d955cf1 100644
--- a/qapi-schema-guest.json
+++ b/qapi-schema-guest.json
@@ -351,6 +351,26 @@
   'returns': 'int' }
 
 ##
+# @guest-fstrim:
+#
+# Discard (or "trim") blocks which are not in use by the filesystem.
+#
+# @minimum:
+#       Minimum contiguous free range to discard, in bytes. Free ranges
+#       smaller than this may be ignored (this is a hint and the guest
+#       may not respect it).  By increasing this value, the fstrim
+#       operation will complete more quickly for filesystems with badly
+#       fragmented free space, although not all blocks will be discarded.
+#       The default value is zero, meaning "discard every free block".
+#
+# Returns: Nothing.
+#
+# Since: 1.2
+##
+{ 'command': 'guest-fstrim',
+  'data': { '*minimum': 'int' } }
+
+##
 # @guest-suspend-disk
 #
 # Suspend guest to disk.
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index b1a7ce6..ce90421 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -38,9 +38,12 @@ extern char **environ;
 #include <sys/socket.h>
 #include <net/if.h>
 
-#if defined(__linux__) && defined(FIFREEZE)
+#ifdef FIFREEZE
 #define CONFIG_FSFREEZE
 #endif
+#ifdef FITRIM
+#define CONFIG_FSTRIM
+#endif
 #endif
 
 void qmp_guest_shutdown(bool has_mode, const char *mode, Error **err)
@@ -312,8 +315,7 @@ static void guest_file_init(void)
 /* linux-specific implementations. avoid this if at all possible. */
 #if defined(__linux__)
 
-#if defined(CONFIG_FSFREEZE)
-
+#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
 typedef struct FsMount {
     char *dirname;
     char *devtype;
@@ -378,6 +380,9 @@ static int build_fs_mount_list(FsMountList *mounts)
 
     return 0;
 }
+#endif
+
+#if defined(CONFIG_FSFREEZE)
 
 /*
  * Return status of freeze/thaw
@@ -525,6 +530,65 @@ static void guest_fsfreeze_cleanup(void)
 }
 #endif /* CONFIG_FSFREEZE */
 
+#if defined(CONFIG_FSTRIM)
+/*
+ * Walk list of mounted file systems in the guest, and trim them.
+ */
+void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
+{
+    int ret = 0;
+    FsMountList mounts;
+    struct FsMount *mount;
+    int fd;
+    char err_msg[512];
+    struct fstrim_range r = {
+        .start = 0,
+        .len = -1,
+        .minlen = has_minimum ? minimum : 0,
+    };
+
+    slog("guest-fstrim called");
+
+    QTAILQ_INIT(&mounts);
+    ret = build_fs_mount_list(&mounts);
+    if (ret < 0) {
+        return;
+    }
+
+    QTAILQ_FOREACH(mount, &mounts, next) {
+        fd = qemu_open(mount->dirname, O_RDONLY);
+        if (fd == -1) {
+            sprintf(err_msg, "failed to open %s, %s", mount->dirname,
+                    strerror(errno));
+            error_set(err, QERR_QGA_COMMAND_FAILED, err_msg);
+            goto error;
+        }
+
+        /* We try to cull filesytems we know won't work in advance, but other
+         * filesytems may not implement fstrim for less obvious reasons.  These
+         * will report EOPNOTSUPP; we simply ignore these errors.  Any other
+         * error means an unexpected error, so return it in those cases.  In
+         * some other cases ENOTTY will be reported (e.g. CD-ROMs).
+         */
+        ret = ioctl(fd, FITRIM, &r);
+        if (ret == -1) {
+            if (errno != ENOTTY && errno != EOPNOTSUPP) {
+                sprintf(err_msg, "failed to trim %s, %s",
+                        mount->dirname, strerror(errno));
+                error_set(err, QERR_QGA_COMMAND_FAILED, err_msg);
+                close(fd);
+                goto error;
+            }
+        }
+        close(fd);
+    }
+
+error:
+    free_fs_mount_list(&mounts);
+}
+#endif /* CONFIG_FSTRIM */
+
+
 #define LINUX_SYS_STATE_FILE "/sys/power/state"
 #define SUSPEND_SUPPORTED 0
 #define SUSPEND_NOT_SUPPORTED 1
@@ -918,7 +982,15 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
 
     return 0;
 }
+#endif /* CONFIG_FSFREEZE */
+
+#if !defined(CONFIG_FSTRIM)
+void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
+{
+    error_set(err, QERR_UNSUPPORTED);
 
+    return;
+}
 #endif
 
 /* register init/cleanup routines for stateful command groups */
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index eb8d140..54bc546 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -173,6 +173,17 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
     return 0;
 }
 
+/*
+ * Walk list of mounted file systems in the guest, and discard unused
+ * areas.
+ */
+void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
+{
+    error_set(err, QERR_UNSUPPORTED);
+
+    return;
+}
+
 typedef enum {
     GUEST_SUSPEND_MODE_DISK,
     GUEST_SUSPEND_MODE_RAM
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command
  2012-06-21 23:04 ` [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command Michael Roth
@ 2012-06-22 17:48   ` Chris Wedgwood
  2012-06-22 21:12     ` Michael Roth
  2012-06-24 16:33     ` Christoph Hellwig
  0 siblings, 2 replies; 8+ messages in thread
From: Chris Wedgwood @ 2012-06-22 17:48 UTC (permalink / raw)
  To: Michael Roth; +Cc: pbonzini, aliguori, qemu-devel

> FITRIM is a mounted filesystem feature to discard (or "trim") blocks which
> are not in use by the filesystem. This is useful for solid-state drives
> (SSDs) and thinly-provisioned storage.  Provide access to the feature
> from the host so that filesystems can be trimmed periodically or before
> migration.

Why can't we use the block layer for this?   AHCI (I think) already
has support, others could be added some some coordination.

That was existing operating systems with current filesystems will DTRT
when needed.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command
  2012-06-22 17:48   ` Chris Wedgwood
@ 2012-06-22 21:12     ` Michael Roth
  2012-06-23  3:38       ` Chris Wedgwood
  2012-06-24 16:33     ` Christoph Hellwig
  1 sibling, 1 reply; 8+ messages in thread
From: Michael Roth @ 2012-06-22 21:12 UTC (permalink / raw)
  To: Chris Wedgwood; +Cc: pbonzini, aliguori, qemu-devel

On Fri, Jun 22, 2012 at 10:48:56AM -0700, Chris Wedgwood wrote:
> > FITRIM is a mounted filesystem feature to discard (or "trim") blocks which
> > are not in use by the filesystem. This is useful for solid-state drives
> > (SSDs) and thinly-provisioned storage.  Provide access to the feature
> > from the host so that filesystems can be trimmed periodically or before
> > migration.
> 
> Why can't we use the block layer for this?   AHCI (I think) already
> has support, others could be added some some coordination.

I'm not sure I understand, wouldn't the filesystem need to be involved
at some level? How can the block layer differentiate lazilly discarded data
blocks from ones that are still in use without the aid of the
filesystem?

> 
> That was existing operating systems with current filesystems will DTRT
> when needed.
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command
  2012-06-22 21:12     ` Michael Roth
@ 2012-06-23  3:38       ` Chris Wedgwood
  2012-06-23 20:54         ` Michael Roth
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wedgwood @ 2012-06-23  3:38 UTC (permalink / raw)
  To: Michael Roth; +Cc: pbonzini, aliguori, qemu-devel

> I'm not sure I understand, wouldn't the filesystem need to be involved
> at some level? How can the block layer differentiate lazilly discarded data
> blocks from ones that are still in use without the aid of the
> filesystem?

It might be me that doesn't understand.

Yes, the filesystem is involved.  Current linux filesystems can trim
on demand or using fstrim.  That mechanism seems to be like it should
suffice if exposed in the most common cases.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command
  2012-06-23  3:38       ` Chris Wedgwood
@ 2012-06-23 20:54         ` Michael Roth
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Roth @ 2012-06-23 20:54 UTC (permalink / raw)
  To: Chris Wedgwood; +Cc: pbonzini, aliguori, qemu-devel

On Fri, Jun 22, 2012 at 08:38:51PM -0700, Chris Wedgwood wrote:
> > I'm not sure I understand, wouldn't the filesystem need to be involved
> > at some level? How can the block layer differentiate lazilly discarded data
> > blocks from ones that are still in use without the aid of the
> > filesystem?
> 
> It might be me that doesn't understand.
> 
> Yes, the filesystem is involved.  Current linux filesystems can trim
> on demand or using fstrim.  That mechanism seems to be like it should
> suffice if exposed in the most common cases.
> 

You mean the "discard" mount option? I don't think that's generally enabled by
default due to the performance impact on bare metal. A periodic,
filesystem-wide call (like the FITRIM ioctl) is probably the better approach in
those cases as well.

I'm not sure what the penalty of auto-discard would be in the case of VMs, but
there would be at least some additional overhead there due the hole punching
operations run by the host, and the only use case I can think of where it's
useful in that context is for reducing the size of the images before we do some
work with them on the host, in which case an explicit FITRIM ioctl beforehand
makes the most sense IMO.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command
  2012-06-22 17:48   ` Chris Wedgwood
  2012-06-22 21:12     ` Michael Roth
@ 2012-06-24 16:33     ` Christoph Hellwig
  1 sibling, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2012-06-24 16:33 UTC (permalink / raw)
  To: Chris Wedgwood; +Cc: pbonzini, aliguori, Michael Roth, qemu-devel

On Fri, Jun 22, 2012 at 10:48:56AM -0700, Chris Wedgwood wrote:
> > FITRIM is a mounted filesystem feature to discard (or "trim") blocks which
> > are not in use by the filesystem. This is useful for solid-state drives
> > (SSDs) and thinly-provisioned storage.  Provide access to the feature
> > from the host so that filesystems can be trimmed periodically or before
> > migration.
> 
> Why can't we use the block layer for this?   AHCI (I think) already
> has support, others could be added some some coordination.
> 
> That was existing operating systems with current filesystems will DTRT
> when needed.

This commands calls the FITRIM command in the guest, which tells the
filesystems to issue an discard for all currently unused space.  At that
point it enters the block layer.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-06-24 16:33 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-21 23:04 [Qemu-devel] [PULL] qemu-ga patches for guest-fstrim command Michael Roth
2012-06-21 23:04 ` [Qemu-devel] [PATCH 1/2] qemu-ga: make names more generic for mount list functions Michael Roth
2012-06-21 23:04 ` [Qemu-devel] [PATCH 2/2] qemu-ga: add guest-fstrim command Michael Roth
2012-06-22 17:48   ` Chris Wedgwood
2012-06-22 21:12     ` Michael Roth
2012-06-23  3:38       ` Chris Wedgwood
2012-06-23 20:54         ` Michael Roth
2012-06-24 16:33     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.