All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 00/11] dump: Add arch section and s390x PV dump
@ 2022-07-13 13:03 Janosch Frank
  2022-07-13 13:03 ` [PATCH v2 01/11] dump: Cleanup memblock usage Janosch Frank
                   ` (10 more replies)
  0 siblings, 11 replies; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Previously this series was two separate series:
 * Arch section support
   Adds the possibility for arch code to add custom section data.

 * s390 PV dump support
   Adds PV dump data to the custom arch sections.

I've chosen to merge them so it's easier to understand why the arch
section support has been implement the way it is.

Additionally I've added a cleanup patch beforehand which cleans up the
GuestPhysBlock usage.

v2:
	* Added "dump: Cleanup memblock usage"
	* Fixed whitespace problems and review comments
	* Added missing *errp check in dump_end


Janosch Frank (11):
  dump: Cleanup memblock usage
  dump: Allocate header
  dump: Split write of section headers and data and add a prepare step
  dump: Reorder struct DumpState
  dump/dump: Add section string table support
  dump/dump: Add arch section support
  linux header sync
  s390x: Add protected dump cap
  s390x: Introduce PV query interface
  s390x: Add KVM PV dump interface
  s390x: pv: Add dump support

 dump/dump.c                  | 443 ++++++++++++++++++++++-------------
 hw/s390x/pv.c                | 112 +++++++++
 hw/s390x/s390-virtio-ccw.c   |   5 +
 include/elf.h                |   1 +
 include/hw/s390x/pv.h        |  18 ++
 include/sysemu/dump-arch.h   |  27 +++
 include/sysemu/dump.h        |  70 +++++-
 linux-headers/linux/kvm.h    |  55 +++++
 target/s390x/arch_dump.c     | 248 +++++++++++++++++---
 target/s390x/kvm/kvm.c       |   7 +
 target/s390x/kvm/kvm_s390x.h |   1 +
 11 files changed, 780 insertions(+), 207 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 01/11] dump: Cleanup memblock usage
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 15:09   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 02/11] dump: Allocate header Janosch Frank
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

The iteration over the memblocks is hard to understand so it's about
time to clean it up.

struct DumpState's next_block and start members can and should be
local variables within the iterator.

Instead of manually grabbing the next memblock we can use
QTAILQ_FOREACH to iterate over all memblocks.

The begin and length fields in the DumpState have been left untouched
since the qmp arguments share their names.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 dump/dump.c           | 91 +++++++++++--------------------------------
 include/sysemu/dump.h | 47 +++++++++++++++++++---
 2 files changed, 65 insertions(+), 73 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index 4d9658ffa2..6feba3cbfa 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -591,56 +591,27 @@ static void dump_begin(DumpState *s, Error **errp)
     write_elf_notes(s, errp);
 }
 
-static int get_next_block(DumpState *s, GuestPhysBlock *block)
-{
-    while (1) {
-        block = QTAILQ_NEXT(block, next);
-        if (!block) {
-            /* no more block */
-            return 1;
-        }
-
-        s->start = 0;
-        s->next_block = block;
-        if (s->has_filter) {
-            if (block->target_start >= s->begin + s->length ||
-                block->target_end <= s->begin) {
-                /* This block is out of the range */
-                continue;
-            }
-
-            if (s->begin > block->target_start) {
-                s->start = s->begin - block->target_start;
-            }
-        }
-
-        return 0;
-    }
-}
-
 /* write all memory to vmcore */
 static void dump_iterate(DumpState *s, Error **errp)
 {
     ERRP_GUARD();
     GuestPhysBlock *block;
-    int64_t size;
+    int64_t memblock_size, memblock_start;
 
-    do {
-        block = s->next_block;
-
-        size = block->target_end - block->target_start;
-        if (s->has_filter) {
-            size -= s->start;
-            if (s->begin + s->length < block->target_end) {
-                size -= block->target_end - (s->begin + s->length);
-            }
+    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+        memblock_start = dump_get_memblock_start(block, s->begin, s->length);
+        if (memblock_start == -1) {
+            continue;
         }
-        write_memory(s, block, s->start, size, errp);
+
+        memblock_size = dump_get_memblock_size(block, s->begin, s->length);
+
+        /* Write the memory to file */
+        write_memory(s, block, memblock_start, memblock_size, errp);
         if (*errp) {
             return;
         }
-
-    } while (!get_next_block(s, block));
+    }
 }
 
 static void create_vmcore(DumpState *s, Error **errp)
@@ -1490,30 +1461,22 @@ static void create_kdump_vmcore(DumpState *s, Error **errp)
     }
 }
 
-static ram_addr_t get_start_block(DumpState *s)
+static int validate_start_block(DumpState *s)
 {
     GuestPhysBlock *block;
 
     if (!s->has_filter) {
-        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
         return 0;
     }
 
     QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+        /* This block is out of the range */
         if (block->target_start >= s->begin + s->length ||
             block->target_end <= s->begin) {
-            /* This block is out of the range */
             continue;
         }
-
-        s->next_block = block;
-        if (s->begin > block->target_start) {
-            s->start = s->begin - block->target_start;
-        } else {
-            s->start = 0;
-        }
-        return s->start;
-    }
+        return 0;
+   }
 
     return -1;
 }
@@ -1540,25 +1503,17 @@ bool qemu_system_dump_in_progress(void)
     return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
 }
 
-/* calculate total size of memory to be dumped (taking filter into
- * acoount.) */
+/*
+ * calculate total size of memory to be dumped (taking filter into
+ * account.)
+ */
 static int64_t dump_calculate_size(DumpState *s)
 {
     GuestPhysBlock *block;
-    int64_t size = 0, total = 0, left = 0, right = 0;
+    int64_t total = 0;
 
     QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
-        if (s->has_filter) {
-            /* calculate the overlapped region. */
-            left = MAX(s->begin, block->target_start);
-            right = MIN(s->begin + s->length, block->target_end);
-            size = right - left;
-            size = size > 0 ? size : 0;
-        } else {
-            /* count the whole region in */
-            size = (block->target_end - block->target_start);
-        }
-        total += size;
+        total += dump_get_memblock_size(block, s->begin, s->length);
     }
 
     return total;
@@ -1660,8 +1615,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
         goto cleanup;
     }
 
-    s->start = get_start_block(s);
-    if (s->start == -1) {
+    /* Is the filter filtering everything? */
+    if (validate_start_block(s) == -1) {
         error_setg(errp, QERR_INVALID_PARAMETER, "begin");
         goto cleanup;
     }
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index ffc2ea1072..f3bf98c220 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -166,11 +166,10 @@ typedef struct DumpState {
     hwaddr memory_offset;
     int fd;
 
-    GuestPhysBlock *next_block;
-    ram_addr_t start;
-    bool has_filter;
-    int64_t begin;
-    int64_t length;
+    /* Guest memory related data */
+    bool has_filter;           /* Are we dumping parts of the memory? */
+    int64_t begin;             /* Start address of the chunk we want to dump */
+    int64_t length;            /* Length of the dump we want to dump */
 
     uint8_t *note_buf;          /* buffer for notes */
     size_t note_buf_offset;     /* the writing place in note_buf */
@@ -203,4 +202,42 @@ typedef struct DumpState {
 uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
 uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
 uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
+
+static inline int64_t dump_get_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
+                                             int64_t filter_area_length)
+{
+    int64_t size, left, right;
+
+    /* No filter, return full size */
+    if (!filter_area_length) {
+        return block->target_end - block->target_start;
+    }
+
+    /* calculate the overlapped region. */
+    left = MAX(filter_area_start, block->target_start);
+    right = MIN(filter_area_start + filter_area_length, block->target_end);
+    size = right - left;
+    size = size > 0 ? size : 0;
+
+    return size;
+}
+
+static inline int64_t dump_get_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
+                                  int64_t filter_area_length)
+{
+    if (filter_area_length) {
+        /*
+         * Check if block is within guest memory dump area. If not
+         * go to next one.
+         */
+        if (block->target_start >= filter_area_start + filter_area_length ||
+            block->target_end <= filter_area_start) {
+            return -1;
+        }
+        if (filter_area_start > block->target_start) {
+            return filter_area_start - block->target_start;
+        }
+    }
+    return block->target_start;
+}
 #endif
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 02/11] dump: Allocate header
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
  2022-07-13 13:03 ` [PATCH v2 01/11] dump: Cleanup memblock usage Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 15:20   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 03/11] dump: Split write of section headers and data and add a prepare step Janosch Frank
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Allocating the header lets us write it at a later time and hence also
allows us to change section and segment table offsets until we
finally write it.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 dump/dump.c           | 127 +++++++++++++++++++++---------------------
 include/sysemu/dump.h |   1 +
 2 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index 6feba3cbfa..16d7474258 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -98,6 +98,7 @@ static int dump_cleanup(DumpState *s)
     memory_mapping_list_free(&s->list);
     close(s->fd);
     g_free(s->guest_note);
+    g_free(s->elf_header);
     s->guest_note = NULL;
     if (s->resume) {
         if (s->detached) {
@@ -126,73 +127,49 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
     return 0;
 }
 
-static void write_elf64_header(DumpState *s, Error **errp)
+static void prepare_elf64_header(DumpState *s)
 {
-    /*
-     * phnum in the elf header is 16 bit, if we have more segments we
-     * set phnum to PN_XNUM and write the real number of segments to a
-     * special section.
-     */
-    uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
-    Elf64_Ehdr elf_header;
-    int ret;
+    uint16_t phnum = s->phdr_num >= PN_XNUM ? PN_XNUM : s->phdr_num;
+    Elf64_Ehdr *elf_header = s->elf_header;
 
-    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
-    memcpy(&elf_header, ELFMAG, SELFMAG);
-    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
-    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
-    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
-    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
-    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
-    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
-    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
-    elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset);
-    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
-    elf_header.e_phnum = cpu_to_dump16(s, phnum);
+    memcpy(elf_header, ELFMAG, SELFMAG);
+    elf_header->e_ident[EI_CLASS] = ELFCLASS64;
+    elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
+    elf_header->e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header->e_type = cpu_to_dump16(s, ET_CORE);
+    elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
+    elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
+    elf_header->e_ehsize = cpu_to_dump16(s, sizeof(*elf_header));
+    elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
+    elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
+    elf_header->e_phnum = cpu_to_dump16(s, phnum);
     if (s->shdr_num) {
-        elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset);
-        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
-        elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
-    }
-
-    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "dump: failed to write elf header");
+        elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
+        elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
+        elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
     }
 }
 
-static void write_elf32_header(DumpState *s, Error **errp)
+static void prepare_elf32_header(DumpState *s)
 {
-    /*
-     * phnum in the elf header is 16 bit, if we have more segments we
-     * set phnum to PN_XNUM and write the real number of segments to a
-     * special section.
-     */
-    uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
-    Elf32_Ehdr elf_header;
-    int ret;
+    uint16_t phnum = s->phdr_num >= PN_XNUM ? PN_XNUM : s->phdr_num;
+    Elf32_Ehdr *elf_header = s->elf_header;
 
-    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
-    memcpy(&elf_header, ELFMAG, SELFMAG);
-    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
-    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
-    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
-    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
-    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
-    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
-    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
-    elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset);
-    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
-    elf_header.e_phnum = cpu_to_dump16(s, phnum);
+    memcpy(elf_header, ELFMAG, SELFMAG);
+    elf_header->e_ident[EI_CLASS] = ELFCLASS32;
+    elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
+    elf_header->e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header->e_type = cpu_to_dump16(s, ET_CORE);
+    elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
+    elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
+    elf_header->e_ehsize = cpu_to_dump16(s, sizeof(*elf_header));
+    elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
+    elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
+    elf_header->e_phnum = cpu_to_dump16(s, phnum);
     if (s->shdr_num) {
-        elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset);
-        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
-        elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
-    }
-
-    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "dump: failed to write elf header");
+        elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
+        elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
+        elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
     }
 }
 
@@ -528,6 +505,26 @@ static void write_elf_notes(DumpState *s, Error **errp)
     }
 }
 
+static void prepare_elf_header(DumpState *s)
+{
+    if (dump_is_64bit(s)) {
+        prepare_elf64_header(s);
+    } else {
+        prepare_elf32_header(s);
+    }
+}
+
+static void write_elf_header(DumpState *s, Error **errp)
+{
+    size_t size = dump_is_64bit(s) ? sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr);
+    int ret;
+
+    ret = fd_write_vmcore(s->elf_header, size, s);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "dump: failed to write elf header");
+    }
+}
+
 /* write elf header, PT_NOTE and elf note to vmcore. */
 static void dump_begin(DumpState *s, Error **errp)
 {
@@ -557,12 +554,11 @@ static void dump_begin(DumpState *s, Error **errp)
      * vmcore.
      */
 
-    /* write elf header to vmcore */
-    if (dump_is_64bit(s)) {
-        write_elf64_header(s, errp);
-    } else {
-        write_elf32_header(s, errp);
-    }
+    /* Write elf header to buffer */
+    prepare_elf_header(s);
+
+    /* Start to write stuff into files*/
+    write_elf_header(s, errp);
     if (*errp) {
         return;
     }
@@ -1642,6 +1638,9 @@ static void dump_init(DumpState *s, int fd, bool has_format,
         goto cleanup;
     }
 
+    s->elf_header = g_malloc0(dump_is_64bit(s) ?
+                              sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr));
+
     /*
      * The goal of this block is to (a) update the previously guessed
      * phys_base, (b) copy the guest note out of the guest.
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index f3bf98c220..736f681d01 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -171,6 +171,7 @@ typedef struct DumpState {
     int64_t begin;             /* Start address of the chunk we want to dump */
     int64_t length;            /* Length of the dump we want to dump */
 
+    void *elf_header;
     uint8_t *note_buf;          /* buffer for notes */
     size_t note_buf_offset;     /* the writing place in note_buf */
     uint32_t nr_cpus;           /* number of guest's cpu */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 03/11] dump: Split write of section headers and data and add a prepare step
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
  2022-07-13 13:03 ` [PATCH v2 01/11] dump: Cleanup memblock usage Janosch Frank
  2022-07-13 13:03 ` [PATCH v2 02/11] dump: Allocate header Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 15:31   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 04/11] dump: Reorder struct DumpState Janosch Frank
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

By splitting the writing of the section headers and (future) section
data we prepare for the addition of a string table section and
architecture sections.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 dump/dump.c           | 116 ++++++++++++++++++++++++++++++++----------
 include/sysemu/dump.h |   4 ++
 2 files changed, 94 insertions(+), 26 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index 16d7474258..467d934bc1 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -342,30 +342,73 @@ static void write_elf_phdr_note(DumpState *s, Error **errp)
     }
 }
 
-static void write_elf_section(DumpState *s, int type, Error **errp)
+static size_t write_elf_section_hdr_zero(DumpState *s, void *buff)
 {
-    Elf32_Shdr shdr32;
-    Elf64_Shdr shdr64;
-    int shdr_size;
-    void *shdr;
-    int ret;
+    if (dump_is_64bit(s)) {
+        Elf64_Shdr *shdr64 = buff;
 
-    if (type == 0) {
-        shdr_size = sizeof(Elf32_Shdr);
-        memset(&shdr32, 0, shdr_size);
-        shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
-        shdr = &shdr32;
+        memset(buff, 0, sizeof(Elf64_Shdr));
+        shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
     } else {
-        shdr_size = sizeof(Elf64_Shdr);
-        memset(&shdr64, 0, shdr_size);
-        shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
-        shdr = &shdr64;
+        Elf32_Shdr *shdr32 = buff;
+
+        memset(buff, 0, sizeof(Elf32_Shdr));
+        shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
     }
 
-    ret = fd_write_vmcore(shdr, shdr_size, s);
+    return dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+}
+
+static void prepare_elf_section_hdrs(DumpState *s)
+{
+    uint8_t *buff_hdr;
+    size_t len, sizeof_shdr;
+
+    /*
+     * Section ordering:
+     * - HDR zero (if needed)
+     */
+    sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+    len = sizeof_shdr * s->shdr_num;
+    s->elf_section_hdrs = g_malloc0(len);
+    buff_hdr = s->elf_section_hdrs;
+
+    /* Write special section first */
+    if (s->phdr_num == PN_XNUM) {
+            write_elf_section_hdr_zero(s, buff_hdr);
+    }
+}
+
+static void prepare_elf_sections(DumpState *s, Error **errp)
+{
+    if (!s->shdr_num) {
+        return;
+    }
+
+    prepare_elf_section_hdrs(s);
+}
+
+static void write_elf_section_headers(DumpState *s, Error **errp)
+{
+    size_t sizeof_shdr;
+    int ret;
+
+    sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+
+    ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
     if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "dump: failed to write section header table");
+        error_setg_errno(errp, -ret, "dump: failed to write section data");
+    }
+}
+
+static void write_elf_sections(DumpState *s, Error **errp)
+{
+    int ret;
+
+    /* Write section zero */
+    ret = fd_write_vmcore(s->elf_section_data, s->elf_section_data_size, s);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "dump: failed to write section data");
     }
 }
 
@@ -557,12 +600,22 @@ static void dump_begin(DumpState *s, Error **errp)
     /* Write elf header to buffer */
     prepare_elf_header(s);
 
+    prepare_elf_sections(s, errp);
+    if (*errp) {
+        return;
+    }
+
     /* Start to write stuff into files*/
     write_elf_header(s, errp);
     if (*errp) {
         return;
     }
 
+    write_elf_section_headers(s, errp);
+    if (*errp) {
+        return;
+    }
+
     /* write PT_NOTE to vmcore */
     write_elf_phdr_note(s, errp);
     if (*errp) {
@@ -575,14 +628,6 @@ static void dump_begin(DumpState *s, Error **errp)
         return;
     }
 
-    /* write section to vmcore */
-    if (s->shdr_num) {
-        write_elf_section(s, 1, errp);
-        if (*errp) {
-            return;
-        }
-    }
-
     /* write notes to vmcore */
     write_elf_notes(s, errp);
 }
@@ -610,6 +655,19 @@ static void dump_iterate(DumpState *s, Error **errp)
     }
 }
 
+static void dump_end(DumpState *s, Error **errp)
+{
+    ERRP_GUARD();
+
+    if (!s->elf_section_data_size) {
+        return;
+    }
+    s->elf_section_data = g_malloc0(s->elf_section_data_size);
+
+    /* write sections to vmcore */
+    write_elf_sections(s, errp);
+}
+
 static void create_vmcore(DumpState *s, Error **errp)
 {
     ERRP_GUARD();
@@ -620,6 +678,12 @@ static void create_vmcore(DumpState *s, Error **errp)
     }
 
     dump_iterate(s, errp);
+    if (*errp) {
+        return;
+    }
+
+    /* Write section data after memory has been dumped */
+    dump_end(s, errp);
 }
 
 static int write_start_flat_header(int fd)
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 736f681d01..bd49532232 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -172,6 +172,10 @@ typedef struct DumpState {
     int64_t length;            /* Length of the dump we want to dump */
 
     void *elf_header;
+    void *elf_section_hdrs;
+    uint64_t elf_section_data_size;
+    void *elf_section_data;
+
     uint8_t *note_buf;          /* buffer for notes */
     size_t note_buf_offset;     /* the writing place in note_buf */
     uint32_t nr_cpus;           /* number of guest's cpu */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 04/11] dump: Reorder struct DumpState
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (2 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 03/11] dump: Split write of section headers and data and add a prepare step Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 15:46   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 05/11] dump/dump: Add section string table support Janosch Frank
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Let's move ELF related members into one block and guest memory related
ones into another to improve readability.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/sysemu/dump.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index bd49532232..8379e29ef6 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -154,15 +154,8 @@ typedef struct DumpState {
     GuestPhysBlockList guest_phys_blocks;
     ArchDumpInfo dump_info;
     MemoryMappingList list;
-    uint32_t phdr_num;
-    uint32_t shdr_num;
     bool resume;
     bool detached;
-    ssize_t note_size;
-    hwaddr shdr_offset;
-    hwaddr phdr_offset;
-    hwaddr section_offset;
-    hwaddr note_offset;
     hwaddr memory_offset;
     int fd;
 
@@ -171,6 +164,16 @@ typedef struct DumpState {
     int64_t begin;             /* Start address of the chunk we want to dump */
     int64_t length;            /* Length of the dump we want to dump */
 
+    /* Elf dump related data */
+    uint32_t phdr_num;
+    uint32_t shdr_num;
+    uint32_t sh_info;
+    ssize_t note_size;
+    hwaddr shdr_offset;
+    hwaddr phdr_offset;
+    hwaddr note_offset;
+    hwaddr section_offset;
+
     void *elf_header;
     void *elf_section_hdrs;
     uint64_t elf_section_data_size;
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 05/11] dump/dump: Add section string table support
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (3 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 04/11] dump: Reorder struct DumpState Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 15:58   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 06/11] dump/dump: Add arch section support Janosch Frank
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Time to add a bit more descriptiveness to the dumps.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 dump/dump.c           | 106 ++++++++++++++++++++++++++++++++++++------
 include/sysemu/dump.h |   1 +
 2 files changed, 94 insertions(+), 13 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index 467d934bc1..31e2a85372 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -99,6 +99,7 @@ static int dump_cleanup(DumpState *s)
     close(s->fd);
     g_free(s->guest_note);
     g_free(s->elf_header);
+    g_array_unref(s->string_table_buf);
     s->guest_note = NULL;
     if (s->resume) {
         if (s->detached) {
@@ -359,14 +360,47 @@ static size_t write_elf_section_hdr_zero(DumpState *s, void *buff)
     return dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
 }
 
+static void write_elf_section_hdr_string(DumpState *s, void *buff)
+{
+    Elf32_Shdr shdr32;
+    Elf64_Shdr shdr64;
+    int shdr_size;
+    void *shdr = buff;
+
+    if (dump_is_64bit(s)) {
+        shdr_size = sizeof(Elf64_Shdr);
+        memset(&shdr64, 0, shdr_size);
+        shdr64.sh_type = SHT_STRTAB;
+        shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
+        shdr64.sh_name = s->string_table_buf->len;
+        g_array_append_vals(s->string_table_buf, ".strtab", sizeof(".strtab"));
+        shdr64.sh_size = s->string_table_buf->len;
+        shdr = &shdr64;
+    } else {
+        shdr_size = sizeof(Elf32_Shdr);
+        memset(&shdr32, 0, shdr_size);
+        shdr32.sh_type = SHT_STRTAB;
+        shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
+        shdr32.sh_name = s->string_table_buf->len;
+        g_array_append_vals(s->string_table_buf, ".strtab", sizeof(".strtab"));
+        shdr32.sh_size = s->string_table_buf->len;
+        shdr = &shdr32;
+    }
+
+    memcpy(buff, shdr, shdr_size);
+}
+
 static void prepare_elf_section_hdrs(DumpState *s)
 {
     uint8_t *buff_hdr;
-    size_t len, sizeof_shdr;
+    size_t len, size = 0, sizeof_shdr;
+    Elf64_Ehdr *hdr64 = s->elf_header;
+    Elf32_Ehdr *hdr32 = s->elf_header;
 
     /*
      * Section ordering:
      * - HDR zero (if needed)
+     * - String table hdr
      */
     sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
     len = sizeof_shdr * s->shdr_num;
@@ -377,6 +411,22 @@ static void prepare_elf_section_hdrs(DumpState *s)
     if (s->phdr_num == PN_XNUM) {
             write_elf_section_hdr_zero(s, buff_hdr);
     }
+    buff_hdr += size;
+
+    if (s->shdr_num < 2) {
+        return;
+    }
+
+    /*
+     * String table needs to be last section since strings are added
+     * via arch_sections_write_hdr().
+     */
+    write_elf_section_hdr_string(s, buff_hdr);
+    if (dump_is_64bit(s)) {
+        hdr64->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
+    } else {
+        hdr32->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
+    }
 }
 
 static void prepare_elf_sections(DumpState *s, Error **errp)
@@ -405,11 +455,18 @@ static void write_elf_sections(DumpState *s, Error **errp)
 {
     int ret;
 
-    /* Write section zero */
+    /* Write section zero and arch sections */
     ret = fd_write_vmcore(s->elf_section_data, s->elf_section_data_size, s);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "dump: failed to write section data");
     }
+
+    /* Write string table data */
+    ret = fd_write_vmcore(s->string_table_buf->data,
+                          s->string_table_buf->len, s);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "dump: failed to write string table data");
+    }
 }
 
 static void write_data(DumpState *s, void *buf, int length, Error **errp)
@@ -592,6 +649,9 @@ static void dump_begin(DumpState *s, Error **errp)
      *   --------------
      *   |  memory     |
      *   --------------
+     *   |  sectn data |
+     *   --------------
+
      *
      * we only know where the memory is saved after we write elf note into
      * vmcore.
@@ -677,6 +737,7 @@ static void create_vmcore(DumpState *s, Error **errp)
         return;
     }
 
+    /* Iterate over memory and dump it to file */
     dump_iterate(s, errp);
     if (*errp) {
         return;
@@ -1659,6 +1720,13 @@ static void dump_init(DumpState *s, int fd, bool has_format,
     s->has_filter = has_filter;
     s->begin = begin;
     s->length = length;
+    /* First index is 0, it's the special null name */
+    s->string_table_buf = g_array_new(FALSE, TRUE, 1);
+    /*
+     * Allocate the null name, due to the clearing option set to true
+     * it will be 0.
+     */
+    g_array_set_size(s->string_table_buf, 1);
 
     memory_mapping_list_init(&s->list);
 
@@ -1819,19 +1887,31 @@ static void dump_init(DumpState *s, int fd, bool has_format,
         }
     }
 
-    if (dump_is_64bit(s)) {
-        s->phdr_offset = sizeof(Elf64_Ehdr);
-        s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
-        s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
-        s->memory_offset = s->note_offset + s->note_size;
-    } else {
-
-        s->phdr_offset = sizeof(Elf32_Ehdr);
-        s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
-        s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
-        s->memory_offset = s->note_offset + s->note_size;
+    /*
+     * calculate shdr_num and elf_section_data_size so we know the offsets and
+     * sizes of all parts.
+     *
+     * If phdr_num overflowed we have at least one section header
+     * More sections/hdrs can be added by the architectures
+     */
+    if (s->shdr_num > 1) {
+        /* Reserve the string table */
+        s->shdr_num += 1;
     }
 
+    tmp = (s->phdr_num == PN_XNUM) ? s->sh_info : s->phdr_num;
+    if (dump_is_64bit(s)) {
+        s->shdr_offset = sizeof(Elf64_Ehdr);
+        s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
+        s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * tmp;
+    } else {
+        s->shdr_offset = sizeof(Elf32_Ehdr);
+        s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
+        s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * tmp;
+    }
+    s->memory_offset = s->note_offset + s->note_size;
+    s->section_offset = s->memory_offset + s->total_size;
+
     return;
 
 cleanup:
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 8379e29ef6..2c25c7d309 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -178,6 +178,7 @@ typedef struct DumpState {
     void *elf_section_hdrs;
     uint64_t elf_section_data_size;
     void *elf_section_data;
+    GArray *string_table_buf;  /* String table section */
 
     uint8_t *note_buf;          /* buffer for notes */
     size_t note_buf_offset;     /* the writing place in note_buf */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 06/11] dump/dump: Add arch section support
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (4 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 05/11] dump/dump: Add section string table support Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 16:02   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 07/11] linux header sync Janosch Frank
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Add hooks which architectures can use to add arbitrary data to custom
sections.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 dump/dump.c                | 21 ++++++++++++++++++---
 include/sysemu/dump-arch.h | 27 +++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index 31e2a85372..02de00b6de 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -400,6 +400,7 @@ static void prepare_elf_section_hdrs(DumpState *s)
     /*
      * Section ordering:
      * - HDR zero (if needed)
+     * - Arch section hdrs
      * - String table hdr
      */
     sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
@@ -417,6 +418,9 @@ static void prepare_elf_section_hdrs(DumpState *s)
         return;
     }
 
+    size = dump_arch_sections_write_hdr(&s->dump_info, s, buff_hdr);
+    buff_hdr += size;
+
     /*
      * String table needs to be last section since strings are added
      * via arch_sections_write_hdr().
@@ -567,14 +571,23 @@ static void get_offset_range(hwaddr phys_addr,
     }
 }
 
-static void write_elf_loads(DumpState *s, Error **errp)
+static void write_elf_phdr_loads(DumpState *s, Error **errp)
 {
     ERRP_GUARD();
     hwaddr offset, filesz;
     MemoryMapping *memory_mapping;
     uint32_t phdr_index = 1;
+    hwaddr min = 0, max = 0;
 
     QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
+        if (memory_mapping->phys_addr < min) {
+            min = memory_mapping->phys_addr;
+        }
+        if (memory_mapping->phys_addr + memory_mapping->length > max) {
+            max = memory_mapping->phys_addr + memory_mapping->length;
+        }
+
+
         get_offset_range(memory_mapping->phys_addr,
                          memory_mapping->length,
                          s, &offset, &filesz);
@@ -682,8 +695,8 @@ static void dump_begin(DumpState *s, Error **errp)
         return;
     }
 
-    /* write all PT_LOAD to vmcore */
-    write_elf_loads(s, errp);
+    /* write all PT_LOADs to vmcore */
+    write_elf_phdr_loads(s, errp);
     if (*errp) {
         return;
     }
@@ -723,6 +736,7 @@ static void dump_end(DumpState *s, Error **errp)
         return;
     }
     s->elf_section_data = g_malloc0(s->elf_section_data_size);
+    dump_arch_sections_write(&s->dump_info, s, s->elf_section_data);
 
     /* write sections to vmcore */
     write_elf_sections(s, errp);
@@ -1894,6 +1908,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
      * If phdr_num overflowed we have at least one section header
      * More sections/hdrs can be added by the architectures
      */
+    dump_arch_sections_add(&s->dump_info, (void *)s);
     if (s->shdr_num > 1) {
         /* Reserve the string table */
         s->shdr_num += 1;
diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h
index e25b02e990..de77908424 100644
--- a/include/sysemu/dump-arch.h
+++ b/include/sysemu/dump-arch.h
@@ -21,6 +21,9 @@ typedef struct ArchDumpInfo {
     uint32_t page_size;      /* The target's page size. If it's variable and
                               * unknown, then this should be the maximum. */
     uint64_t phys_base;      /* The target's physmem base. */
+    void (*arch_sections_add_fn)(void *opaque);
+    uint64_t (*arch_sections_write_hdr_fn)(void *opaque, uint8_t *buff);
+    void (*arch_sections_write_fn)(void *opaque, uint8_t *buff);
 } ArchDumpInfo;
 
 struct GuestPhysBlockList; /* memory_mapping.h */
@@ -28,4 +31,28 @@ int cpu_get_dump_info(ArchDumpInfo *info,
                       const struct GuestPhysBlockList *guest_phys_blocks);
 ssize_t cpu_get_note_size(int class, int machine, int nr_cpus);
 
+static inline void dump_arch_sections_add(ArchDumpInfo *info, void *opaque)
+{
+    if (info->arch_sections_add_fn) {
+        info->arch_sections_add_fn(opaque);
+    }
+}
+
+static inline uint64_t dump_arch_sections_write_hdr(ArchDumpInfo *info,
+                                                void *opaque, uint8_t *buff)
+{
+    if (info->arch_sections_write_hdr_fn) {
+        return info->arch_sections_write_hdr_fn(opaque, buff);
+    }
+    return 0;
+}
+
+static inline void dump_arch_sections_write(ArchDumpInfo *info, void *opaque,
+                                            uint8_t *buff)
+{
+    if (info->arch_sections_write_fn) {
+        info->arch_sections_write_fn(opaque, buff);
+    }
+}
+
 #endif
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 07/11] linux header sync
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (5 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 06/11] dump/dump: Add arch section support Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 16:03   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 08/11] s390x: Add protected dump cap Janosch Frank
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 linux-headers/linux/kvm.h | 55 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 0d05d02ee4..ae5db2e44c 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1150,6 +1150,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DISABLE_QUIRKS2 213
 /* #define KVM_CAP_VM_TSC_CONTROL 214 */
 #define KVM_CAP_SYSTEM_EVENT_DATA 215
+#define KVM_CAP_S390_PROTECTED_DUMP 217
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1651,6 +1652,55 @@ struct kvm_s390_pv_unp {
 	__u64 tweak;
 };
 
+enum pv_cmd_info_id {
+	KVM_PV_INFO_VM,
+	KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+	__u64 dump_cpu_buffer_len;
+	__u64 dump_config_mem_buffer_per_1m;
+	__u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+	__u64 inst_calls_list[4];
+	__u64 max_cpus;
+	__u64 max_guests;
+	__u64 max_guest_addr;
+	__u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+	__u32 id;
+	__u32 len_max;
+	__u32 len_written;
+	__u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+	struct kvm_s390_pv_info_header header;
+	union {
+		struct kvm_s390_pv_info_dump dump;
+		struct kvm_s390_pv_info_vm vm;
+	};
+};
+
+enum pv_cmd_dmp_id {
+        KVM_PV_DUMP_INIT,
+        KVM_PV_DUMP_CONFIG_STATE,
+        KVM_PV_DUMP_COMPLETE,
+        KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+        __u64 subcmd;
+        __u64 buff_addr;
+        __u64 buff_len;
+        __u64 gaddr;
+        __u64 reserved[4];
+};
+
 enum pv_cmd_id {
 	KVM_PV_ENABLE,
 	KVM_PV_DISABLE,
@@ -1659,6 +1709,8 @@ enum pv_cmd_id {
 	KVM_PV_VERIFY,
 	KVM_PV_PREP_RESET,
 	KVM_PV_UNSHARE_ALL,
+        KVM_PV_INFO,
+        KVM_PV_DUMP,
 };
 
 struct kvm_pv_cmd {
@@ -1733,6 +1785,7 @@ struct kvm_xen_vcpu_attr {
 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA	0x4
 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST	0x5
 
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
@@ -2066,4 +2119,6 @@ struct kvm_stats_desc {
 /* Available with KVM_CAP_XSAVE2 */
 #define KVM_GET_XSAVE2		  _IOR(KVMIO,  0xcf, struct kvm_xsave)
 
+#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd)
+
 #endif /* __LINUX_KVM_H */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 08/11] s390x: Add protected dump cap
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (6 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 07/11] linux header sync Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 13:03 ` [PATCH v2 09/11] s390x: Introduce PV query interface Janosch Frank
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Add a protected dump capability for later feature checking.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 target/s390x/kvm/kvm.c       | 7 +++++++
 target/s390x/kvm/kvm_s390x.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 7bd8db0e7b..cbd8c91424 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -157,6 +157,7 @@ static int cap_ri;
 static int cap_hpage_1m;
 static int cap_vcpu_resets;
 static int cap_protected;
+static int cap_protected_dump;
 
 static bool mem_op_storage_key_support;
 
@@ -362,6 +363,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ);
     cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS);
     cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED);
+    cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP);
 
     kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0);
     kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0);
@@ -2043,6 +2045,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
     return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
 }
 
+int kvm_s390_get_protected_dump(void)
+{
+    return cap_protected_dump;
+}
+
 int kvm_s390_get_ri(void)
 {
     return cap_ri;
diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h
index 05a5e1e6f4..31a69f9ce2 100644
--- a/target/s390x/kvm/kvm_s390x.h
+++ b/target/s390x/kvm/kvm_s390x.h
@@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state);
 void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
 int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
 int kvm_s390_get_hpage_1m(void);
+int kvm_s390_get_protected_dump(void);
 int kvm_s390_get_ri(void);
 int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);
 int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_clock);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 09/11] s390x: Introduce PV query interface
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (7 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 08/11] s390x: Add protected dump cap Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-15  8:10   ` Marc-André Lureau
  2022-07-13 13:03 ` [PATCH v2 10/11] s390x: Add KVM PV dump interface Janosch Frank
  2022-07-13 13:03 ` [PATCH v2 11/11] s390x: pv: Add dump support Janosch Frank
  10 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Introduce an interface over which we can get information about UV data.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 hw/s390x/pv.c              | 61 ++++++++++++++++++++++++++++++++++++++
 hw/s390x/s390-virtio-ccw.c |  5 ++++
 include/hw/s390x/pv.h      | 10 +++++++
 3 files changed, 76 insertions(+)

diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
index 401b63d6cb..a5af4ddf46 100644
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@@ -20,6 +20,11 @@
 #include "exec/confidential-guest-support.h"
 #include "hw/s390x/ipl.h"
 #include "hw/s390x/pv.h"
+#include "target/s390x/kvm/kvm_s390x.h"
+
+static bool info_valid;
+static struct kvm_s390_pv_info_vm info_vm;
+static struct kvm_s390_pv_info_dump info_dump;
 
 static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
 {
@@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
     }                                  \
 }
 
+int s390_pv_query_info(void)
+{
+    struct kvm_s390_pv_info info = {
+        .header.id = KVM_PV_INFO_VM,
+        .header.len_max = sizeof(info.header) + sizeof(info.vm),
+    };
+    int rc;
+
+    /* Info API's first user is dump so they are bundled */
+    if (!kvm_s390_get_protected_dump()) {
+        return 0;
+    }
+
+    rc = s390_pv_cmd(KVM_PV_INFO, &info);
+    if (rc) {
+        error_report("KVM PV INFO cmd %x failed: %s",
+                     info.header.id, strerror(rc));
+        return rc;
+    }
+    memcpy(&info_vm, &info.vm, sizeof(info.vm));
+
+    info.header.id = KVM_PV_INFO_DUMP;
+    info.header.len_max = sizeof(info.header) + sizeof(info.dump);
+    rc = s390_pv_cmd(KVM_PV_INFO, &info);
+    if (rc) {
+        error_report("KVM PV INFO cmd %x failed: %s",
+                     info.header.id, strerror(rc));
+        return rc;
+    }
+
+    memcpy(&info_dump, &info.dump, sizeof(info.dump));
+    info_valid = true;
+
+    return rc;
+}
+
 int s390_pv_vm_enable(void)
 {
     return s390_pv_cmd(KVM_PV_ENABLE, NULL);
@@ -114,6 +155,26 @@ void s390_pv_inject_reset_error(CPUState *cs)
     env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
 }
 
+uint64_t kvm_s390_pv_dmp_get_size_cpu(void)
+{
+    return info_dump.dump_cpu_buffer_len;
+}
+
+uint64_t kvm_s390_pv_dmp_get_size_complete(void)
+{
+    return info_dump.dump_config_finalize_len;
+}
+
+uint64_t kvm_s390_pv_dmp_get_size_mem(void)
+{
+    return info_dump.dump_config_mem_buffer_per_1m;
+}
+
+bool kvm_s390_pv_info_basic_valid(void)
+{
+    return info_valid;
+}
+
 #define TYPE_S390_PV_GUEST "s390-pv-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST)
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index cc3097bfee..f9401e392b 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -366,6 +366,11 @@ static int s390_machine_protect(S390CcwMachineState *ms)
 
     ms->pv = true;
 
+    rc = s390_pv_query_info();
+    if (rc) {
+        goto out_err;
+    }
+
     /* Set SE header and unpack */
     rc = s390_ipl_prepare_pv_header();
     if (rc) {
diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
index 1f1f545bfc..6fa55bf70e 100644
--- a/include/hw/s390x/pv.h
+++ b/include/hw/s390x/pv.h
@@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
     return ccw->pv;
 }
 
+int s390_pv_query_info(void);
 int s390_pv_vm_enable(void);
 void s390_pv_vm_disable(void);
 int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
@@ -46,8 +47,13 @@ void s390_pv_prep_reset(void);
 int s390_pv_verify(void);
 void s390_pv_unshare(void);
 void s390_pv_inject_reset_error(CPUState *cs);
+uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
+uint64_t kvm_s390_pv_dmp_get_size_mem(void);
+uint64_t kvm_s390_pv_dmp_get_size_complete(void);
+bool kvm_s390_pv_info_basic_valid(void);
 #else /* CONFIG_KVM */
 static inline bool s390_is_pv(void) { return false; }
+static inline int s390_pv_query_info(void) { return 0; }
 static inline int s390_pv_vm_enable(void) { return 0; }
 static inline void s390_pv_vm_disable(void) {}
 static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
@@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {}
 static inline int s390_pv_verify(void) { return 0; }
 static inline void s390_pv_unshare(void) {}
 static inline void s390_pv_inject_reset_error(CPUState *cs) {};
+static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return 0; }
+static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
 #endif /* CONFIG_KVM */
 
 int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 10/11] s390x: Add KVM PV dump interface
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (8 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 09/11] s390x: Introduce PV query interface Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  2022-07-13 13:03 ` [PATCH v2 11/11] s390x: pv: Add dump support Janosch Frank
  10 siblings, 0 replies; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Let's add a few bits of code which hide the new KVM PV dump API from
us via new functions.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 hw/s390x/pv.c         | 51 +++++++++++++++++++++++++++++++++++++++++++
 include/hw/s390x/pv.h |  8 +++++++
 2 files changed, 59 insertions(+)

diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
index a5af4ddf46..48591c387d 100644
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@@ -175,6 +175,57 @@ bool kvm_s390_pv_info_basic_valid(void)
     return info_valid;
 }
 
+static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr,
+                            uint64_t len)
+{
+    struct kvm_s390_pv_dmp dmp = {
+        .subcmd = subcmd,
+        .buff_addr = uaddr,
+        .buff_len = len,
+        .gaddr = gaddr,
+    };
+    int ret;
+
+    ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp);
+    if (ret) {
+        error_report("KVM DUMP command %ld failed", subcmd);
+    }
+    return ret;
+}
+
+int kvm_s390_dump_cpu(S390CPU *cpu, void *buff)
+{
+    struct kvm_s390_pv_dmp dmp = {
+        .subcmd = KVM_PV_DUMP_CPU,
+        .buff_addr = (uint64_t)buff,
+        .gaddr = 0,
+        .buff_len = info_dump.dump_cpu_buffer_len,
+    };
+    struct kvm_pv_cmd pv = {
+        .cmd = KVM_PV_DUMP,
+        .data = (uint64_t)&dmp,
+    };
+
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv);
+}
+
+int kvm_s390_dump_init(void)
+{
+    return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0);
+}
+
+int kvm_s390_dump_mem(uint64_t gaddr, size_t len, void *dest)
+{
+    return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STATE, (uint64_t)dest,
+                            gaddr, len);
+}
+
+int kvm_s390_dump_finish(void *buff)
+{
+    return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0,
+                            info_dump.dump_config_finalize_len);
+}
+
 #define TYPE_S390_PV_GUEST "s390-pv-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST)
 
diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
index 6fa55bf70e..f37021e189 100644
--- a/include/hw/s390x/pv.h
+++ b/include/hw/s390x/pv.h
@@ -51,6 +51,10 @@ uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
 uint64_t kvm_s390_pv_dmp_get_size_mem(void);
 uint64_t kvm_s390_pv_dmp_get_size_complete(void);
 bool kvm_s390_pv_info_basic_valid(void);
+int kvm_s390_dump_init(void);
+int kvm_s390_dump_cpu(S390CPU *cpu, void *buff);
+int kvm_s390_dump_mem(uint64_t addr, size_t len, void *dest);
+int kvm_s390_dump_finish(void *buff);
 #else /* CONFIG_KVM */
 static inline bool s390_is_pv(void) { return false; }
 static inline int s390_pv_query_info(void) { return 0; }
@@ -66,6 +70,10 @@ static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
 static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
 static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return 0; }
 static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
+static inline int kvm_s390_dump_init(void) { return 0; }
+static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff, size_t len) { return 0; }
+static inline int kvm_s390_dump_mem(uint64_t addr, size_t len, void *dest) { return 0; }
+static inline int kvm_s390_dump_finish(void *buff) { return 0; }
 #endif /* CONFIG_KVM */
 
 int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 11/11] s390x: pv: Add dump support
  2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
                   ` (9 preceding siblings ...)
  2022-07-13 13:03 ` [PATCH v2 10/11] s390x: Add KVM PV dump interface Janosch Frank
@ 2022-07-13 13:03 ` Janosch Frank
  10 siblings, 0 replies; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 13:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: marcandre.lureau, pbonzini, mhartmay, borntraeger, imbrenda,
	pasic, cohuck, thuth, qemu-s390x, richard.henderson

Sometimes dumping a guest from the outside is the only way to get the
data that is needed. This can be the case if a dumping mechanism like
KDUMP hasn't been configured or data needs to be fetched at a specific
point. Dumping a protected guest from the outside without help from
fw/hw doesn't yield sufficient data to be useful. Hence we now
introduce PV dump support.

The PV dump support works by integrating the firmware into the dump
process. New Ultravisor calls are used to initiate the dump process,
dump cpu data, dump memory state and lastly complete the dump process.
The UV calls are exposed by KVM via the new KVM_PV_DUMP command and
its subcommands. The guest's data is fully encrypted and can only be
decrypted by the entity that owns the customer communication key for
the dumped guest. Also dumping needs to be allowed via a flag in the
SE header.

On the QEMU side of things we store the PV dump data in the newly
introduced architecture ELF sections (storage state and completion
data) and the cpu notes (for cpu dump data).

Users can use the zgetdump tool to convert the encrypted QEMU dump to an
unencrypted one.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 include/elf.h            |   1 +
 target/s390x/arch_dump.c | 248 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 219 insertions(+), 30 deletions(-)

diff --git a/include/elf.h b/include/elf.h
index 3a4bcb646a..58f76fd5b4 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1649,6 +1649,7 @@ typedef struct elf64_shdr {
 #define NT_TASKSTRUCT	4
 #define NT_AUXV		6
 #define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
+#define NT_S390_PV_DATA 0x30e           /* s390 protvirt cpu dump data */
 #define NT_S390_GS_CB   0x30b           /* s390 guarded storage registers */
 #define NT_S390_VXRS_HIGH 0x30a         /* s390 vector registers 16-31 */
 #define NT_S390_VXRS_LOW  0x309         /* s390 vector registers 0-15 (lower half) */
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index 08daf93ae1..e081aa9483 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -16,7 +16,8 @@
 #include "s390x-internal.h"
 #include "elf.h"
 #include "sysemu/dump.h"
-
+#include "hw/s390x/pv.h"
+#include "kvm/kvm_s390x.h"
 
 struct S390xUserRegsStruct {
     uint64_t psw[2];
@@ -76,9 +77,16 @@ typedef struct noteStruct {
         uint64_t todcmp;
         uint32_t todpreg;
         uint64_t ctrs[16];
+        uint8_t dynamic[1];  /*
+                              * Would be a flexible array member, if
+                              * that was legal inside a union. Real
+                              * size comes from PV info interface.
+                              */
     } contents;
 } QEMU_PACKED Note;
 
+static bool pv_dump_initialized;
+
 static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id)
 {
     int i;
@@ -177,52 +185,82 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id)
     note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa));
 }
 
+static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id)
+{
+    note->hdr.n_type = cpu_to_be32(NT_S390_PV_DATA);
+    if (!pv_dump_initialized) {
+        return;
+    }
+    kvm_s390_dump_cpu(cpu, &note->contents.dynamic);
+}
 
 typedef struct NoteFuncDescStruct {
     int contents_size;
+    uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */
     void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
+    bool pvonly;
 } NoteFuncDesc;
 
 static const NoteFuncDesc note_core[] = {
-    {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus},
-    {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset},
-    { 0, NULL}
+    {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false},
+    {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false},
+    { 0, NULL, NULL}
 };
 
 static const NoteFuncDesc note_linux[] = {
-    {sizeof_field(Note, contents.prefix),   s390x_write_elf64_prefix},
-    {sizeof_field(Note, contents.ctrs),     s390x_write_elf64_ctrs},
-    {sizeof_field(Note, contents.timer),    s390x_write_elf64_timer},
-    {sizeof_field(Note, contents.todcmp),   s390x_write_elf64_todcmp},
-    {sizeof_field(Note, contents.todpreg),  s390x_write_elf64_todpreg},
-    {sizeof_field(Note, contents.vregslo),  s390x_write_elf64_vregslo},
-    {sizeof_field(Note, contents.vregshi),  s390x_write_elf64_vregshi},
-    {sizeof_field(Note, contents.gscb),     s390x_write_elf64_gscb},
-    { 0, NULL}
+    {sizeof_field(Note, contents.prefix),   NULL, s390x_write_elf64_prefix,  false},
+    {sizeof_field(Note, contents.ctrs),     NULL, s390x_write_elf64_ctrs,    false},
+    {sizeof_field(Note, contents.timer),    NULL, s390x_write_elf64_timer,   false},
+    {sizeof_field(Note, contents.todcmp),   NULL, s390x_write_elf64_todcmp,  false},
+    {sizeof_field(Note, contents.todpreg),  NULL, s390x_write_elf64_todpreg, false},
+    {sizeof_field(Note, contents.vregslo),  NULL, s390x_write_elf64_vregslo, false},
+    {sizeof_field(Note, contents.vregshi),  NULL, s390x_write_elf64_vregshi, false},
+    {sizeof_field(Note, contents.gscb),     NULL, s390x_write_elf64_gscb,    false},
+    {0, kvm_s390_pv_dmp_get_size_cpu,       s390x_write_elf64_pv, true},
+    { 0, NULL, NULL}
 };
 
 static int s390x_write_elf64_notes(const char *note_name,
-                                       WriteCoreDumpFunction f,
-                                       S390CPU *cpu, int id,
-                                       void *opaque,
-                                       const NoteFuncDesc *funcs)
+                                   WriteCoreDumpFunction f,
+                                   S390CPU *cpu, int id,
+                                   void *opaque,
+                                   const NoteFuncDesc *funcs)
 {
-    Note note;
+    Note note, *notep;
     const NoteFuncDesc *nf;
-    int note_size;
+    int note_size, content_size;
     int ret = -1;
 
     assert(strlen(note_name) < sizeof(note.name));
 
     for (nf = funcs; nf->note_contents_func; nf++) {
-        memset(&note, 0, sizeof(note));
-        note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
-        note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
-        g_strlcpy(note.name, note_name, sizeof(note.name));
-        (*nf->note_contents_func)(&note, cpu, id);
+        notep = &note;
+        if (nf->pvonly && !s390_is_pv()) {
+            continue;
+        }
 
-        note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
-        ret = f(&note, note_size, opaque);
+        content_size = nf->contents_size ? nf->contents_size : nf->note_size_func();
+        note_size = sizeof(note) - sizeof(notep->contents) + content_size;
+
+        /* Notes with dynamic sizes need to allocate a note */
+        if (nf->note_size_func) {
+            notep = g_malloc0(note_size);
+        }
+
+        memset(notep, 0, sizeof(note));
+
+        /* Setup note header data */
+        notep->hdr.n_descsz = cpu_to_be32(content_size);
+        notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
+        g_strlcpy(notep->name, note_name, sizeof(notep->name));
+
+        /* Get contents and write them out */
+        (*nf->note_contents_func)(notep, cpu, id);
+        ret = f(notep, note_size, opaque);
+
+        if (nf->note_size_func) {
+            g_free(notep);
+        }
 
         if (ret < 0) {
             return -1;
@@ -247,12 +285,159 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
     return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, opaque, note_linux);
 }
 
+/* PV dump section size functions */
+static uint64_t get_dump_mem_size_from_len(uint64_t len)
+{
+    return (len / (1 << 20)) * kvm_s390_pv_dmp_get_size_mem();
+}
+
+static uint64_t get_size_mem(DumpState *s)
+{
+    return get_dump_mem_size_from_len(s->total_size);
+}
+
+static uint64_t get_size_complete(DumpState *s)
+{
+    return kvm_s390_pv_dmp_get_size_complete();
+}
+
+/* PV dump section data functions*/
+static int get_data_complete(DumpState *s, uint8_t *buff)
+{
+    int rc;
+
+    if (!pv_dump_initialized) {
+        return 0;
+    }
+    rc = kvm_s390_dump_finish(buff);
+    if (!rc) {
+            pv_dump_initialized = false;
+    }
+    return rc;
+}
+
+static int dump_mem(DumpState *s, uint64_t gaddr, uint8_t *buff, uint64_t buff_len)
+{
+    /* We need the gaddr + len and something to write to */
+    if (!pv_dump_initialized) {
+        return 0;
+    }
+    return kvm_s390_dump_mem(gaddr, buff_len, buff);
+}
+
+static int get_data_mem(DumpState *s, uint8_t *buff)
+{
+    int64_t memblock_size, memblock_start;
+    GuestPhysBlock *block;
+    uint64_t off;
+
+    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+        memblock_start = dump_get_memblock_start(block, s->begin, s->length);
+        if (memblock_start == -1) {
+            continue;
+        }
+
+        memblock_size = dump_get_memblock_size(block, s->begin, s->length);
+
+        off = get_dump_mem_size_from_len(block->target_start);
+        dump_mem(s, block->target_start, buff + off,
+                 get_dump_mem_size_from_len(memblock_size));
+    }
+
+    return 0;
+}
+
+struct sections {
+    uint64_t (*sections_size_func)(DumpState *s);
+    int (*sections_contents_func)(DumpState *s, uint8_t *buff);
+    char sctn_str[12];
+} sections[] = {
+    { get_size_mem, get_data_mem, "pv_mem_meta"},
+    { get_size_complete, get_data_complete, "pv_compl"},
+    {NULL , NULL, ""}
+};
+
+static uint64_t arch_sections_write_hdr(void *opaque, uint8_t *buff)
+{
+    DumpState *s = opaque;
+    Elf64_Shdr *shdr = (void *)buff;
+    struct sections *sctn = sections;
+    uint64_t off = s->section_offset;
+
+    if (!s390_is_pv()) {
+        return 0;
+    }
+
+    for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) {
+        memset(shdr, 0, sizeof(*shdr));
+        shdr->sh_type = SHT_PROGBITS;
+        shdr->sh_offset = off;
+        shdr->sh_size = sctn->sections_size_func(s);
+        shdr->sh_name = s->string_table_buf->len;
+        g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str));
+    }
+
+    return (uintptr_t)shdr - (uintptr_t)buff;
+}
+
+
+/* Add arch specific number of sections and their respective sizes */
+static void arch_sections_add(void *opaque)
+{
+    DumpState *s = opaque;
+    struct sections *sctn = sections;
+
+    /*
+     * We only do a PV dump if we are running a PV guest, KVM supports
+     * the dump API and we got valid dump length information.
+     */
+    if (!s390_is_pv() || !kvm_s390_get_protected_dump() ||
+        !kvm_s390_pv_info_basic_valid()) {
+        return;
+    }
+
+    /*
+     * Start the UV dump process by doing the initialize dump call via
+     * KVM as the proxy.
+     */
+    if (!kvm_s390_dump_init()) {
+            pv_dump_initialized = true;
+    }
+
+    for (; sctn->sections_size_func; sctn++) {
+        s->shdr_num += 1;
+        s->elf_section_data_size += sctn->sections_size_func(s);
+    }
+}
+
+/*
+ * After the PV dump has been initialized, the CPU data has been
+ * fetched and memory has been dumped, we need to grab the tweak data
+ * and the completion data.
+ */
+static void arch_sections_write(void *opaque, uint8_t *buff)
+{
+    DumpState *s = opaque;
+    struct sections *sctn = sections;
+
+    /* shdr_num should only have been set > 1 if we are protected */
+    assert(s390_is_pv());
+
+    for (; sctn->sections_size_func; sctn++) {
+        sctn->sections_contents_func(s, buff);
+        buff += sctn->sections_size_func(s);
+    }
+}
+
 int cpu_get_dump_info(ArchDumpInfo *info,
                       const struct GuestPhysBlockList *guest_phys_blocks)
 {
     info->d_machine = EM_S390;
     info->d_endian = ELFDATA2MSB;
     info->d_class = ELFCLASS64;
+    info->arch_sections_add_fn = *arch_sections_add;
+    info->arch_sections_write_hdr_fn = *arch_sections_write_hdr;
+    info->arch_sections_write_fn = *arch_sections_write;
 
     return 0;
 }
@@ -261,7 +446,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
 {
     int name_size = 8; /* "LINUX" or "CORE" + pad */
     size_t elf_note_size = 0;
-    int note_head_size;
+    int note_head_size, content_size;
     const NoteFuncDesc *nf;
 
     assert(class == ELFCLASS64);
@@ -270,12 +455,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
     note_head_size = sizeof(Elf64_Nhdr);
 
     for (nf = note_core; nf->note_contents_func; nf++) {
-        elf_note_size = elf_note_size + note_head_size + name_size +
-                        nf->contents_size;
+        elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size;
     }
     for (nf = note_linux; nf->note_contents_func; nf++) {
+        if (nf->pvonly && !s390_is_pv()) {
+            continue;
+        }
+        content_size = nf->contents_size ? nf->contents_size : nf->note_size_func();
         elf_note_size = elf_note_size + note_head_size + name_size +
-                        nf->contents_size;
+                        content_size;
     }
 
     return (elf_note_size) * nr_cpus;
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 01/11] dump: Cleanup memblock usage
  2022-07-13 13:03 ` [PATCH v2 01/11] dump: Cleanup memblock usage Janosch Frank
@ 2022-07-13 15:09   ` Marc-André Lureau
  2022-07-13 15:30     ` Janosch Frank
  0 siblings, 1 reply; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 15:09 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

Hi

On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> The iteration over the memblocks is hard to understand so it's about
> time to clean it up.
>
> struct DumpState's next_block and start members can and should be
> local variables within the iterator.
>
> Instead of manually grabbing the next memblock we can use
> QTAILQ_FOREACH to iterate over all memblocks.
>
> The begin and length fields in the DumpState have been left untouched
> since the qmp arguments share their names.
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>

After this patch:
./qemu-system-x86_64 -monitor stdio -S
(qemu) dump-guest-memory foo
Error: dump: failed to save memory: Bad address


> ---
>  dump/dump.c           | 91 +++++++++++--------------------------------
>  include/sysemu/dump.h | 47 +++++++++++++++++++---
>  2 files changed, 65 insertions(+), 73 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index 4d9658ffa2..6feba3cbfa 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -591,56 +591,27 @@ static void dump_begin(DumpState *s, Error **errp)
>      write_elf_notes(s, errp);
>  }
>
> -static int get_next_block(DumpState *s, GuestPhysBlock *block)
> -{
> -    while (1) {
> -        block = QTAILQ_NEXT(block, next);
> -        if (!block) {
> -            /* no more block */
> -            return 1;
> -        }
> -
> -        s->start = 0;
> -        s->next_block = block;
> -        if (s->has_filter) {
> -            if (block->target_start >= s->begin + s->length ||
> -                block->target_end <= s->begin) {
> -                /* This block is out of the range */
> -                continue;
> -            }
> -
> -            if (s->begin > block->target_start) {
> -                s->start = s->begin - block->target_start;
> -            }
> -        }
> -
> -        return 0;
> -    }
> -}
> -
>  /* write all memory to vmcore */
>  static void dump_iterate(DumpState *s, Error **errp)
>  {
>      ERRP_GUARD();
>      GuestPhysBlock *block;
> -    int64_t size;
> +    int64_t memblock_size, memblock_start;
>
> -    do {
> -        block = s->next_block;
> -
> -        size = block->target_end - block->target_start;
> -        if (s->has_filter) {
> -            size -= s->start;
> -            if (s->begin + s->length < block->target_end) {
> -                size -= block->target_end - (s->begin + s->length);
> -            }
> +    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
> +        memblock_start = dump_get_memblock_start(block, s->begin, s->length);
> +        if (memblock_start == -1) {
> +            continue;
>          }
> -        write_memory(s, block, s->start, size, errp);
> +
> +        memblock_size = dump_get_memblock_size(block, s->begin, s->length);
> +
> +        /* Write the memory to file */
> +        write_memory(s, block, memblock_start, memblock_size, errp);
>          if (*errp) {
>              return;
>          }
> -
> -    } while (!get_next_block(s, block));
> +    }
>  }
>
>  static void create_vmcore(DumpState *s, Error **errp)
> @@ -1490,30 +1461,22 @@ static void create_kdump_vmcore(DumpState *s, Error **errp)
>      }
>  }
>
> -static ram_addr_t get_start_block(DumpState *s)
> +static int validate_start_block(DumpState *s)
>  {
>      GuestPhysBlock *block;
>
>      if (!s->has_filter) {
> -        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
>          return 0;
>      }
>
>      QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
> +        /* This block is out of the range */
>          if (block->target_start >= s->begin + s->length ||
>              block->target_end <= s->begin) {
> -            /* This block is out of the range */
>              continue;
>          }
> -
> -        s->next_block = block;
> -        if (s->begin > block->target_start) {
> -            s->start = s->begin - block->target_start;
> -        } else {
> -            s->start = 0;
> -        }
> -        return s->start;
> -    }
> +        return 0;
> +   }
>
>      return -1;
>  }
> @@ -1540,25 +1503,17 @@ bool qemu_system_dump_in_progress(void)
>      return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
>  }
>
> -/* calculate total size of memory to be dumped (taking filter into
> - * acoount.) */
> +/*
> + * calculate total size of memory to be dumped (taking filter into
> + * account.)

thanks for fixing the typo

> + */
>  static int64_t dump_calculate_size(DumpState *s)
>  {
>      GuestPhysBlock *block;
> -    int64_t size = 0, total = 0, left = 0, right = 0;
> +    int64_t total = 0;
>
>      QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
> -        if (s->has_filter) {
> -            /* calculate the overlapped region. */
> -            left = MAX(s->begin, block->target_start);
> -            right = MIN(s->begin + s->length, block->target_end);
> -            size = right - left;
> -            size = size > 0 ? size : 0;
> -        } else {
> -            /* count the whole region in */
> -            size = (block->target_end - block->target_start);
> -        }
> -        total += size;
> +        total += dump_get_memblock_size(block, s->begin, s->length);
>      }
>
>      return total;
> @@ -1660,8 +1615,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
>          goto cleanup;
>      }
>
> -    s->start = get_start_block(s);
> -    if (s->start == -1) {
> +    /* Is the filter filtering everything? */
> +    if (validate_start_block(s) == -1) {
>          error_setg(errp, QERR_INVALID_PARAMETER, "begin");
>          goto cleanup;
>      }
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index ffc2ea1072..f3bf98c220 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -166,11 +166,10 @@ typedef struct DumpState {
>      hwaddr memory_offset;
>      int fd;
>
> -    GuestPhysBlock *next_block;
> -    ram_addr_t start;
> -    bool has_filter;
> -    int64_t begin;
> -    int64_t length;
> +    /* Guest memory related data */
> +    bool has_filter;           /* Are we dumping parts of the memory? */
> +    int64_t begin;             /* Start address of the chunk we want to dump */
> +    int64_t length;            /* Length of the dump we want to dump */
>
>      uint8_t *note_buf;          /* buffer for notes */
>      size_t note_buf_offset;     /* the writing place in note_buf */
> @@ -203,4 +202,42 @@ typedef struct DumpState {
>  uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
>  uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
>  uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
> +
> +static inline int64_t dump_get_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
> +                                             int64_t filter_area_length)
> +{
> +    int64_t size, left, right;
> +
> +    /* No filter, return full size */
> +    if (!filter_area_length) {
> +        return block->target_end - block->target_start;
> +    }
> +
> +    /* calculate the overlapped region. */
> +    left = MAX(filter_area_start, block->target_start);
> +    right = MIN(filter_area_start + filter_area_length, block->target_end);
> +    size = right - left;
> +    size = size > 0 ? size : 0;
> +
> +    return size;
> +}
> +
> +static inline int64_t dump_get_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
> +                                  int64_t filter_area_length)
> +{
> +    if (filter_area_length) {
> +        /*
> +         * Check if block is within guest memory dump area. If not
> +         * go to next one.
> +         */

Or rather "return -1 if the block is not within filter area"

> +        if (block->target_start >= filter_area_start + filter_area_length ||
> +            block->target_end <= filter_area_start) {
> +            return -1;
> +        }
> +        if (filter_area_start > block->target_start) {
> +            return filter_area_start - block->target_start;
> +        }
> +    }
> +    return block->target_start;

This used to be 0. Changing that, I think the patch looks good.
Although it could perhaps be splitted to introduce the two functions.

> +}
>  #endif
> --
> 2.34.1
>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 02/11] dump: Allocate header
  2022-07-13 13:03 ` [PATCH v2 02/11] dump: Allocate header Janosch Frank
@ 2022-07-13 15:20   ` Marc-André Lureau
  0 siblings, 0 replies; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 15:20 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> Allocating the header lets us write it at a later time and hence also
> allows us to change section and segment table offsets until we
> finally write it.
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>


> ---
>  dump/dump.c           | 127 +++++++++++++++++++++---------------------
>  include/sysemu/dump.h |   1 +
>  2 files changed, 64 insertions(+), 64 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index 6feba3cbfa..16d7474258 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -98,6 +98,7 @@ static int dump_cleanup(DumpState *s)
>      memory_mapping_list_free(&s->list);
>      close(s->fd);
>      g_free(s->guest_note);
> +    g_free(s->elf_header);
>      s->guest_note = NULL;
>      if (s->resume) {
>          if (s->detached) {
> @@ -126,73 +127,49 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
>      return 0;
>  }
>
> -static void write_elf64_header(DumpState *s, Error **errp)
> +static void prepare_elf64_header(DumpState *s)
>  {
> -    /*
> -     * phnum in the elf header is 16 bit, if we have more segments we
> -     * set phnum to PN_XNUM and write the real number of segments to a
> -     * special section.
> -     */
> -    uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
> -    Elf64_Ehdr elf_header;
> -    int ret;
> +    uint16_t phnum = s->phdr_num >= PN_XNUM ? PN_XNUM : s->phdr_num;
> +    Elf64_Ehdr *elf_header = s->elf_header;
>
> -    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> -    memcpy(&elf_header, ELFMAG, SELFMAG);
> -    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> -    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> -    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> -    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
> -    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
> -    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
> -    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
> -    elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset);
> -    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
> -    elf_header.e_phnum = cpu_to_dump16(s, phnum);
> +    memcpy(elf_header, ELFMAG, SELFMAG);
> +    elf_header->e_ident[EI_CLASS] = ELFCLASS64;
> +    elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header->e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header->e_type = cpu_to_dump16(s, ET_CORE);
> +    elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
> +    elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
> +    elf_header->e_ehsize = cpu_to_dump16(s, sizeof(*elf_header));
> +    elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
> +    elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
> +    elf_header->e_phnum = cpu_to_dump16(s, phnum);
>      if (s->shdr_num) {
> -        elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset);
> -        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
> -        elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
> -    }
> -
> -    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
> -    if (ret < 0) {
> -        error_setg_errno(errp, -ret, "dump: failed to write elf header");
> +        elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
> +        elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
> +        elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
>      }
>  }
>
> -static void write_elf32_header(DumpState *s, Error **errp)
> +static void prepare_elf32_header(DumpState *s)
>  {
> -    /*
> -     * phnum in the elf header is 16 bit, if we have more segments we
> -     * set phnum to PN_XNUM and write the real number of segments to a
> -     * special section.
> -     */
> -    uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
> -    Elf32_Ehdr elf_header;
> -    int ret;
> +    uint16_t phnum = s->phdr_num >= PN_XNUM ? PN_XNUM : s->phdr_num;
> +    Elf32_Ehdr *elf_header = s->elf_header;
>
> -    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> -    memcpy(&elf_header, ELFMAG, SELFMAG);
> -    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> -    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> -    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> -    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
> -    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
> -    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
> -    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
> -    elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset);
> -    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
> -    elf_header.e_phnum = cpu_to_dump16(s, phnum);
> +    memcpy(elf_header, ELFMAG, SELFMAG);
> +    elf_header->e_ident[EI_CLASS] = ELFCLASS32;
> +    elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header->e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header->e_type = cpu_to_dump16(s, ET_CORE);
> +    elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
> +    elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
> +    elf_header->e_ehsize = cpu_to_dump16(s, sizeof(*elf_header));
> +    elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
> +    elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
> +    elf_header->e_phnum = cpu_to_dump16(s, phnum);
>      if (s->shdr_num) {
> -        elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset);
> -        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
> -        elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
> -    }
> -
> -    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
> -    if (ret < 0) {
> -        error_setg_errno(errp, -ret, "dump: failed to write elf header");
> +        elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
> +        elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
> +        elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
>      }
>  }
>
> @@ -528,6 +505,26 @@ static void write_elf_notes(DumpState *s, Error **errp)
>      }
>  }
>
> +static void prepare_elf_header(DumpState *s)
> +{
> +    if (dump_is_64bit(s)) {
> +        prepare_elf64_header(s);
> +    } else {
> +        prepare_elf32_header(s);
> +    }
> +}
> +
> +static void write_elf_header(DumpState *s, Error **errp)
> +{
> +    size_t size = dump_is_64bit(s) ? sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr);
> +    int ret;
> +
> +    ret = fd_write_vmcore(s->elf_header, size, s);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret, "dump: failed to write elf header");
> +    }
> +}
> +
>  /* write elf header, PT_NOTE and elf note to vmcore. */
>  static void dump_begin(DumpState *s, Error **errp)
>  {
> @@ -557,12 +554,11 @@ static void dump_begin(DumpState *s, Error **errp)
>       * vmcore.
>       */
>
> -    /* write elf header to vmcore */
> -    if (dump_is_64bit(s)) {
> -        write_elf64_header(s, errp);
> -    } else {
> -        write_elf32_header(s, errp);
> -    }
> +    /* Write elf header to buffer */
> +    prepare_elf_header(s);
> +
> +    /* Start to write stuff into files*/

nits: missing space, files->file descriptor/stream ?


> +    write_elf_header(s, errp);
>      if (*errp) {
>          return;
>      }
> @@ -1642,6 +1638,9 @@ static void dump_init(DumpState *s, int fd, bool has_format,
>          goto cleanup;
>      }
>
> +    s->elf_header = g_malloc0(dump_is_64bit(s) ?
> +                              sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr));
> +
>      /*
>       * The goal of this block is to (a) update the previously guessed
>       * phys_base, (b) copy the guest note out of the guest.
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index f3bf98c220..736f681d01 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -171,6 +171,7 @@ typedef struct DumpState {
>      int64_t begin;             /* Start address of the chunk we want to dump */
>      int64_t length;            /* Length of the dump we want to dump */
>
> +    void *elf_header;
>      uint8_t *note_buf;          /* buffer for notes */
>      size_t note_buf_offset;     /* the writing place in note_buf */
>      uint32_t nr_cpus;           /* number of guest's cpu */
> --
> 2.34.1
>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 01/11] dump: Cleanup memblock usage
  2022-07-13 15:09   ` Marc-André Lureau
@ 2022-07-13 15:30     ` Janosch Frank
  2022-07-13 15:35       ` Marc-André Lureau
  0 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-13 15:30 UTC (permalink / raw)
  To: Marc-André Lureau
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

On 7/13/22 17:09, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>>
>> The iteration over the memblocks is hard to understand so it's about
>> time to clean it up.
>>
>> struct DumpState's next_block and start members can and should be
>> local variables within the iterator.
>>
>> Instead of manually grabbing the next memblock we can use
>> QTAILQ_FOREACH to iterate over all memblocks.
>>
>> The begin and length fields in the DumpState have been left untouched
>> since the qmp arguments share their names.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> 
> After this patch:
> ./qemu-system-x86_64 -monitor stdio -S
> (qemu) dump-guest-memory foo
> Error: dump: failed to save memory: Bad address

If you have more ways to check for dump errors then please send them to 
me. I'm aware that this might not have been a 100% conversion and I'm a 
bit terrified about the fact that this will affect all architectures.


Anyway, I'll have a look.

[...]

>> +static inline int64_t dump_get_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
>> +                                  int64_t filter_area_length)
>> +{
>> +    if (filter_area_length) {
>> +        /*
>> +         * Check if block is within guest memory dump area. If not
>> +         * go to next one.
>> +         */
> 
> Or rather "return -1 if the block is not within filter area"

Sure

> 
>> +        if (block->target_start >= filter_area_start + filter_area_length ||
>> +            block->target_end <= filter_area_start) {
>> +            return -1;
>> +        }
>> +        if (filter_area_start > block->target_start) {
>> +            return filter_area_start - block->target_start;
>> +        }
>> +    }
>> +    return block->target_start;
> 
> This used to be 0. Changing that, I think the patch looks good.
> Although it could perhaps be splitted to introduce the two functions.

Yes but the 0 was used to indicate that we would have needed continue 
iterating and the iteration is done via other means in this patch.

Or am I missing something?

> 
>> +}
>>   #endif
>> --
>> 2.34.1
>>
> 
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 03/11] dump: Split write of section headers and data and add a prepare step
  2022-07-13 13:03 ` [PATCH v2 03/11] dump: Split write of section headers and data and add a prepare step Janosch Frank
@ 2022-07-13 15:31   ` Marc-André Lureau
  2022-07-14 11:45     ` Janosch Frank
  0 siblings, 1 reply; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 15:31 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

Hi

On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> By splitting the writing of the section headers and (future) section
> data we prepare for the addition of a string table section and
> architecture sections.
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> ---
>  dump/dump.c           | 116 ++++++++++++++++++++++++++++++++----------
>  include/sysemu/dump.h |   4 ++
>  2 files changed, 94 insertions(+), 26 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index 16d7474258..467d934bc1 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -342,30 +342,73 @@ static void write_elf_phdr_note(DumpState *s, Error **errp)
>      }
>  }
>
> -static void write_elf_section(DumpState *s, int type, Error **errp)
> +static size_t write_elf_section_hdr_zero(DumpState *s, void *buff)

Since the function no longer write, I'd suggest to rename it with
prepare_ prefix

>  {
> -    Elf32_Shdr shdr32;
> -    Elf64_Shdr shdr64;
> -    int shdr_size;
> -    void *shdr;
> -    int ret;
> +    if (dump_is_64bit(s)) {
> +        Elf64_Shdr *shdr64 = buff;
>
> -    if (type == 0) {
> -        shdr_size = sizeof(Elf32_Shdr);
> -        memset(&shdr32, 0, shdr_size);
> -        shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
> -        shdr = &shdr32;
> +        memset(buff, 0, sizeof(Elf64_Shdr));

You can drop this

> +        shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
>      } else {
> -        shdr_size = sizeof(Elf64_Shdr);
> -        memset(&shdr64, 0, shdr_size);
> -        shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
> -        shdr = &shdr64;
> +        Elf32_Shdr *shdr32 = buff;
> +
> +        memset(buff, 0, sizeof(Elf32_Shdr));

and this

> +        shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
>      }
>
> -    ret = fd_write_vmcore(shdr, shdr_size, s);
> +    return dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
> +}
> +
> +static void prepare_elf_section_hdrs(DumpState *s)
> +{
> +    uint8_t *buff_hdr;
> +    size_t len, sizeof_shdr;
> +
> +    /*
> +     * Section ordering:
> +     * - HDR zero (if needed)
> +     */
> +    sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
> +    len = sizeof_shdr * s->shdr_num;
> +    s->elf_section_hdrs = g_malloc0(len);

since you alloc0 here

> +    buff_hdr = s->elf_section_hdrs;
> +
> +    /* Write special section first */
> +    if (s->phdr_num == PN_XNUM) {
> +            write_elf_section_hdr_zero(s, buff_hdr);

Eventually, drop buff_hdr, and pass only "s" as argument

+ Indentation is off

> +    }
> +}
> +
> +static void prepare_elf_sections(DumpState *s, Error **errp)
> +{
> +    if (!s->shdr_num) {
> +        return;
> +    }
> +
> +    prepare_elf_section_hdrs(s);
> +}
> +
> +static void write_elf_section_headers(DumpState *s, Error **errp)
> +{
> +    size_t sizeof_shdr;
> +    int ret;
> +
> +    sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
> +
> +    ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
>      if (ret < 0) {
> -        error_setg_errno(errp, -ret,
> -                         "dump: failed to write section header table");
> +        error_setg_errno(errp, -ret, "dump: failed to write section data");

nit: data->header


> +    }
> +}
> +
> +static void write_elf_sections(DumpState *s, Error **errp)
> +{
> +    int ret;
> +
> +    /* Write section zero */
> +    ret = fd_write_vmcore(s->elf_section_data, s->elf_section_data_size, s);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret, "dump: failed to write section data");
>      }
>  }
>
> @@ -557,12 +600,22 @@ static void dump_begin(DumpState *s, Error **errp)
>      /* Write elf header to buffer */
>      prepare_elf_header(s);
>
> +    prepare_elf_sections(s, errp);
> +    if (*errp) {
> +        return;
> +    }
> +
>      /* Start to write stuff into files*/
>      write_elf_header(s, errp);
>      if (*errp) {
>          return;
>      }
>
> +    write_elf_section_headers(s, errp);

Why do you reorder the sections? Could you explain in the commit
message why? Is this is format compliant? and update the comment
above? thanks

> +    if (*errp) {
> +        return;
> +    }
> +
>      /* write PT_NOTE to vmcore */
>      write_elf_phdr_note(s, errp);
>      if (*errp) {
> @@ -575,14 +628,6 @@ static void dump_begin(DumpState *s, Error **errp)
>          return;
>      }
>
> -    /* write section to vmcore */
> -    if (s->shdr_num) {
> -        write_elf_section(s, 1, errp);
> -        if (*errp) {
> -            return;
> -        }
> -    }
> -
>      /* write notes to vmcore */
>      write_elf_notes(s, errp);
>  }
> @@ -610,6 +655,19 @@ static void dump_iterate(DumpState *s, Error **errp)
>      }
>  }
>
> +static void dump_end(DumpState *s, Error **errp)
> +{
> +    ERRP_GUARD();
> +
> +    if (!s->elf_section_data_size) {
> +        return;
> +    }
> +    s->elf_section_data = g_malloc0(s->elf_section_data_size);
> +
> +    /* write sections to vmcore */
> +    write_elf_sections(s, errp);
> +}
> +
>  static void create_vmcore(DumpState *s, Error **errp)
>  {
>      ERRP_GUARD();
> @@ -620,6 +678,12 @@ static void create_vmcore(DumpState *s, Error **errp)
>      }
>
>      dump_iterate(s, errp);
> +    if (*errp) {
> +        return;
> +    }
> +
> +    /* Write section data after memory has been dumped */
> +    dump_end(s, errp);
>  }
>
>  static int write_start_flat_header(int fd)
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 736f681d01..bd49532232 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -172,6 +172,10 @@ typedef struct DumpState {
>      int64_t length;            /* Length of the dump we want to dump */
>
>      void *elf_header;
> +    void *elf_section_hdrs;
> +    uint64_t elf_section_data_size;
> +    void *elf_section_data;
> +
>      uint8_t *note_buf;          /* buffer for notes */
>      size_t note_buf_offset;     /* the writing place in note_buf */
>      uint32_t nr_cpus;           /* number of guest's cpu */
> --
> 2.34.1
>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 01/11] dump: Cleanup memblock usage
  2022-07-13 15:30     ` Janosch Frank
@ 2022-07-13 15:35       ` Marc-André Lureau
  2022-07-14  9:40         ` Janosch Frank
  0 siblings, 1 reply; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 15:35 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

Hi

On Wed, Jul 13, 2022 at 7:30 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> On 7/13/22 17:09, Marc-André Lureau wrote:
> > Hi
> >
> > On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
> >>
> >> The iteration over the memblocks is hard to understand so it's about
> >> time to clean it up.
> >>
> >> struct DumpState's next_block and start members can and should be
> >> local variables within the iterator.
> >>
> >> Instead of manually grabbing the next memblock we can use
> >> QTAILQ_FOREACH to iterate over all memblocks.
> >>
> >> The begin and length fields in the DumpState have been left untouched
> >> since the qmp arguments share their names.
> >>
> >> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> >
> > After this patch:
> > ./qemu-system-x86_64 -monitor stdio -S
> > (qemu) dump-guest-memory foo
> > Error: dump: failed to save memory: Bad address
>
> If you have more ways to check for dump errors then please send them to
> me. I'm aware that this might not have been a 100% conversion and I'm a
> bit terrified about the fact that this will affect all architectures.

Same feeling here. Maybe it's about time to write real dump tests!

>
>
> Anyway, I'll have a look.
>
> [...]
>
> >> +static inline int64_t dump_get_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
> >> +                                  int64_t filter_area_length)
> >> +{
> >> +    if (filter_area_length) {
> >> +        /*
> >> +         * Check if block is within guest memory dump area. If not
> >> +         * go to next one.
> >> +         */
> >
> > Or rather "return -1 if the block is not within filter area"
>
> Sure
>
> >
> >> +        if (block->target_start >= filter_area_start + filter_area_length ||
> >> +            block->target_end <= filter_area_start) {
> >> +            return -1;
> >> +        }
> >> +        if (filter_area_start > block->target_start) {
> >> +            return filter_area_start - block->target_start;
> >> +        }
> >> +    }
> >> +    return block->target_start;
> >
> > This used to be 0. Changing that, I think the patch looks good.
> > Although it could perhaps be splitted to introduce the two functions.
>
> Yes but the 0 was used to indicate that we would have needed continue
> iterating and the iteration is done via other means in this patch.
>
> Or am I missing something?

Well, you changed the way the loop used to work. it used to return 1/0
to indicate stop/continue and rely on s->start / s->next_block. Now
you return memblock_start.

>
> >
> >> +}
> >>   #endif
> >> --
> >> 2.34.1
> >>
> >
> >
>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 04/11] dump: Reorder struct DumpState
  2022-07-13 13:03 ` [PATCH v2 04/11] dump: Reorder struct DumpState Janosch Frank
@ 2022-07-13 15:46   ` Marc-André Lureau
  0 siblings, 0 replies; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 15:46 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> Let's move ELF related members into one block and guest memory related
> ones into another to improve readability.
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>

> ---
>  include/sysemu/dump.h | 17 ++++++++++-------
>  1 file changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index bd49532232..8379e29ef6 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -154,15 +154,8 @@ typedef struct DumpState {
>      GuestPhysBlockList guest_phys_blocks;
>      ArchDumpInfo dump_info;
>      MemoryMappingList list;
> -    uint32_t phdr_num;
> -    uint32_t shdr_num;
>      bool resume;
>      bool detached;
> -    ssize_t note_size;
> -    hwaddr shdr_offset;
> -    hwaddr phdr_offset;
> -    hwaddr section_offset;
> -    hwaddr note_offset;
>      hwaddr memory_offset;
>      int fd;
>
> @@ -171,6 +164,16 @@ typedef struct DumpState {
>      int64_t begin;             /* Start address of the chunk we want to dump */
>      int64_t length;            /* Length of the dump we want to dump */
>
> +    /* Elf dump related data */
> +    uint32_t phdr_num;
> +    uint32_t shdr_num;
> +    uint32_t sh_info;
> +    ssize_t note_size;
> +    hwaddr shdr_offset;
> +    hwaddr phdr_offset;
> +    hwaddr note_offset;
> +    hwaddr section_offset;
> +
>      void *elf_header;
>      void *elf_section_hdrs;
>      uint64_t elf_section_data_size;
> --
> 2.34.1
>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 05/11] dump/dump: Add section string table support
  2022-07-13 13:03 ` [PATCH v2 05/11] dump/dump: Add section string table support Janosch Frank
@ 2022-07-13 15:58   ` Marc-André Lureau
  2022-07-14 11:53     ` Janosch Frank
  0 siblings, 1 reply; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 15:58 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

Hi

On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> Time to add a bit more descriptiveness to the dumps.

Please add some more description & motivation to the patch (supposedly
necessary for next patches), and explain that it currently doesn't
change the dump (afaict).

>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  dump/dump.c           | 106 ++++++++++++++++++++++++++++++++++++------
>  include/sysemu/dump.h |   1 +
>  2 files changed, 94 insertions(+), 13 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index 467d934bc1..31e2a85372 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -99,6 +99,7 @@ static int dump_cleanup(DumpState *s)
>      close(s->fd);
>      g_free(s->guest_note);
>      g_free(s->elf_header);
> +    g_array_unref(s->string_table_buf);
>      s->guest_note = NULL;
>      if (s->resume) {
>          if (s->detached) {
> @@ -359,14 +360,47 @@ static size_t write_elf_section_hdr_zero(DumpState *s, void *buff)
>      return dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
>  }
>
> +static void write_elf_section_hdr_string(DumpState *s, void *buff)
> +{
> +    Elf32_Shdr shdr32;
> +    Elf64_Shdr shdr64;
> +    int shdr_size;
> +    void *shdr = buff;
> +
> +    if (dump_is_64bit(s)) {
> +        shdr_size = sizeof(Elf64_Shdr);
> +        memset(&shdr64, 0, shdr_size);
> +        shdr64.sh_type = SHT_STRTAB;
> +        shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
> +        shdr64.sh_name = s->string_table_buf->len;
> +        g_array_append_vals(s->string_table_buf, ".strtab", sizeof(".strtab"));
> +        shdr64.sh_size = s->string_table_buf->len;
> +        shdr = &shdr64;
> +    } else {
> +        shdr_size = sizeof(Elf32_Shdr);
> +        memset(&shdr32, 0, shdr_size);
> +        shdr32.sh_type = SHT_STRTAB;
> +        shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
> +        shdr32.sh_name = s->string_table_buf->len;
> +        g_array_append_vals(s->string_table_buf, ".strtab", sizeof(".strtab"));
> +        shdr32.sh_size = s->string_table_buf->len;
> +        shdr = &shdr32;
> +    }
> +
> +    memcpy(buff, shdr, shdr_size);
> +}
> +
>  static void prepare_elf_section_hdrs(DumpState *s)
>  {
>      uint8_t *buff_hdr;
> -    size_t len, sizeof_shdr;
> +    size_t len, size = 0, sizeof_shdr;
> +    Elf64_Ehdr *hdr64 = s->elf_header;
> +    Elf32_Ehdr *hdr32 = s->elf_header;
>
>      /*
>       * Section ordering:
>       * - HDR zero (if needed)
> +     * - String table hdr
>       */
>      sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
>      len = sizeof_shdr * s->shdr_num;
> @@ -377,6 +411,22 @@ static void prepare_elf_section_hdrs(DumpState *s)
>      if (s->phdr_num == PN_XNUM) {
>              write_elf_section_hdr_zero(s, buff_hdr);
>      }
> +    buff_hdr += size;
> +
> +    if (s->shdr_num < 2) {
> +        return;
> +    }
> +
> +    /*
> +     * String table needs to be last section since strings are added
> +     * via arch_sections_write_hdr().
> +     */
> +    write_elf_section_hdr_string(s, buff_hdr);
> +    if (dump_is_64bit(s)) {
> +        hdr64->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
> +    } else {
> +        hdr32->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
> +    }
>  }
>
>  static void prepare_elf_sections(DumpState *s, Error **errp)
> @@ -405,11 +455,18 @@ static void write_elf_sections(DumpState *s, Error **errp)
>  {
>      int ret;
>
> -    /* Write section zero */
> +    /* Write section zero and arch sections */
>      ret = fd_write_vmcore(s->elf_section_data, s->elf_section_data_size, s);
>      if (ret < 0) {
>          error_setg_errno(errp, -ret, "dump: failed to write section data");
>      }
> +
> +    /* Write string table data */
> +    ret = fd_write_vmcore(s->string_table_buf->data,
> +                          s->string_table_buf->len, s);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret, "dump: failed to write string table data");
> +    }
>  }
>
>  static void write_data(DumpState *s, void *buf, int length, Error **errp)
> @@ -592,6 +649,9 @@ static void dump_begin(DumpState *s, Error **errp)
>       *   --------------
>       *   |  memory     |
>       *   --------------
> +     *   |  sectn data |
> +     *   --------------
> +
>       *
>       * we only know where the memory is saved after we write elf note into
>       * vmcore.
> @@ -677,6 +737,7 @@ static void create_vmcore(DumpState *s, Error **errp)
>          return;
>      }
>
> +    /* Iterate over memory and dump it to file */
>      dump_iterate(s, errp);
>      if (*errp) {
>          return;
> @@ -1659,6 +1720,13 @@ static void dump_init(DumpState *s, int fd, bool has_format,
>      s->has_filter = has_filter;
>      s->begin = begin;
>      s->length = length;
> +    /* First index is 0, it's the special null name */
> +    s->string_table_buf = g_array_new(FALSE, TRUE, 1);
> +    /*
> +     * Allocate the null name, due to the clearing option set to true
> +     * it will be 0.
> +     */
> +    g_array_set_size(s->string_table_buf, 1);
>
>      memory_mapping_list_init(&s->list);
>
> @@ -1819,19 +1887,31 @@ static void dump_init(DumpState *s, int fd, bool has_format,
>          }
>      }
>
> -    if (dump_is_64bit(s)) {
> -        s->phdr_offset = sizeof(Elf64_Ehdr);
> -        s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
> -        s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
> -        s->memory_offset = s->note_offset + s->note_size;
> -    } else {
> -
> -        s->phdr_offset = sizeof(Elf32_Ehdr);
> -        s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
> -        s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
> -        s->memory_offset = s->note_offset + s->note_size;
> +    /*
> +     * calculate shdr_num and elf_section_data_size so we know the offsets and
> +     * sizes of all parts.
> +     *
> +     * If phdr_num overflowed we have at least one section header
> +     * More sections/hdrs can be added by the architectures
> +     */
> +    if (s->shdr_num > 1) {
> +        /* Reserve the string table */
> +        s->shdr_num += 1;
>      }
>
> +    tmp = (s->phdr_num == PN_XNUM) ? s->sh_info : s->phdr_num;
> +    if (dump_is_64bit(s)) {
> +        s->shdr_offset = sizeof(Elf64_Ehdr);
> +        s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
> +        s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * tmp;
> +    } else {
> +        s->shdr_offset = sizeof(Elf32_Ehdr);
> +        s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
> +        s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * tmp;
> +    }
> +    s->memory_offset = s->note_offset + s->note_size;

I suggest to split this in a different patch. It's not obvious that
you can change phdr_offset / shdr_offset, it deserves a comment.

> +    s->section_offset = s->memory_offset + s->total_size;
> +
>      return;
>
>  cleanup:
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 8379e29ef6..2c25c7d309 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -178,6 +178,7 @@ typedef struct DumpState {
>      void *elf_section_hdrs;
>      uint64_t elf_section_data_size;
>      void *elf_section_data;
> +    GArray *string_table_buf;  /* String table section */
>
>      uint8_t *note_buf;          /* buffer for notes */
>      size_t note_buf_offset;     /* the writing place in note_buf */
> --
> 2.34.1
>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 06/11] dump/dump: Add arch section support
  2022-07-13 13:03 ` [PATCH v2 06/11] dump/dump: Add arch section support Janosch Frank
@ 2022-07-13 16:02   ` Marc-André Lureau
  0 siblings, 0 replies; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 16:02 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

Hi

On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> Add hooks which architectures can use to add arbitrary data to custom
> sections.
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> ---
>  dump/dump.c                | 21 ++++++++++++++++++---
>  include/sysemu/dump-arch.h | 27 +++++++++++++++++++++++++++
>  2 files changed, 45 insertions(+), 3 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index 31e2a85372..02de00b6de 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -400,6 +400,7 @@ static void prepare_elf_section_hdrs(DumpState *s)
>      /*
>       * Section ordering:
>       * - HDR zero (if needed)
> +     * - Arch section hdrs
>       * - String table hdr
>       */
>      sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
> @@ -417,6 +418,9 @@ static void prepare_elf_section_hdrs(DumpState *s)
>          return;
>      }
>
> +    size = dump_arch_sections_write_hdr(&s->dump_info, s, buff_hdr);
> +    buff_hdr += size;
> +
>      /*
>       * String table needs to be last section since strings are added
>       * via arch_sections_write_hdr().
> @@ -567,14 +571,23 @@ static void get_offset_range(hwaddr phys_addr,
>      }
>  }
>
> -static void write_elf_loads(DumpState *s, Error **errp)
> +static void write_elf_phdr_loads(DumpState *s, Error **errp)
>  {
>      ERRP_GUARD();
>      hwaddr offset, filesz;
>      MemoryMapping *memory_mapping;
>      uint32_t phdr_index = 1;
> +    hwaddr min = 0, max = 0;
>
>      QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> +        if (memory_mapping->phys_addr < min) {
> +            min = memory_mapping->phys_addr;
> +        }
> +        if (memory_mapping->phys_addr + memory_mapping->length > max) {
> +            max = memory_mapping->phys_addr + memory_mapping->length;
> +        }
> +
> +

Extra line & this belongs to a different patch.

>          get_offset_range(memory_mapping->phys_addr,
>                           memory_mapping->length,
>                           s, &offset, &filesz);
> @@ -682,8 +695,8 @@ static void dump_begin(DumpState *s, Error **errp)
>          return;
>      }
>
> -    /* write all PT_LOAD to vmcore */
> -    write_elf_loads(s, errp);
> +    /* write all PT_LOADs to vmcore */
> +    write_elf_phdr_loads(s, errp);
>      if (*errp) {
>          return;
>      }
> @@ -723,6 +736,7 @@ static void dump_end(DumpState *s, Error **errp)
>          return;
>      }
>      s->elf_section_data = g_malloc0(s->elf_section_data_size);
> +    dump_arch_sections_write(&s->dump_info, s, s->elf_section_data);
>
>      /* write sections to vmcore */
>      write_elf_sections(s, errp);
> @@ -1894,6 +1908,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
>       * If phdr_num overflowed we have at least one section header
>       * More sections/hdrs can be added by the architectures
>       */
> +    dump_arch_sections_add(&s->dump_info, (void *)s);
>      if (s->shdr_num > 1) {
>          /* Reserve the string table */
>          s->shdr_num += 1;
> diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h
> index e25b02e990..de77908424 100644
> --- a/include/sysemu/dump-arch.h
> +++ b/include/sysemu/dump-arch.h
> @@ -21,6 +21,9 @@ typedef struct ArchDumpInfo {
>      uint32_t page_size;      /* The target's page size. If it's variable and
>                                * unknown, then this should be the maximum. */
>      uint64_t phys_base;      /* The target's physmem base. */
> +    void (*arch_sections_add_fn)(void *opaque);
> +    uint64_t (*arch_sections_write_hdr_fn)(void *opaque, uint8_t *buff);
> +    void (*arch_sections_write_fn)(void *opaque, uint8_t *buff);
>  } ArchDumpInfo;
>
>  struct GuestPhysBlockList; /* memory_mapping.h */
> @@ -28,4 +31,28 @@ int cpu_get_dump_info(ArchDumpInfo *info,
>                        const struct GuestPhysBlockList *guest_phys_blocks);
>  ssize_t cpu_get_note_size(int class, int machine, int nr_cpus);
>
> +static inline void dump_arch_sections_add(ArchDumpInfo *info, void *opaque)
> +{
> +    if (info->arch_sections_add_fn) {
> +        info->arch_sections_add_fn(opaque);
> +    }
> +}
> +
> +static inline uint64_t dump_arch_sections_write_hdr(ArchDumpInfo *info,
> +                                                void *opaque, uint8_t *buff)
> +{
> +    if (info->arch_sections_write_hdr_fn) {
> +        return info->arch_sections_write_hdr_fn(opaque, buff);
> +    }
> +    return 0;
> +}
> +
> +static inline void dump_arch_sections_write(ArchDumpInfo *info, void *opaque,
> +                                            uint8_t *buff)
> +{
> +    if (info->arch_sections_write_fn) {
> +        info->arch_sections_write_fn(opaque, buff);
> +    }
> +}
> +
>  #endif
> --
> 2.34.1
>

otherwise, seems ok to me



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 07/11] linux header sync
  2022-07-13 13:03 ` [PATCH v2 07/11] linux header sync Janosch Frank
@ 2022-07-13 16:03   ` Marc-André Lureau
  0 siblings, 0 replies; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-13 16:03 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>

Please tell which version this update come from. Otherwise, it should be fine
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>

> ---
>  linux-headers/linux/kvm.h | 55 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 55 insertions(+)
>
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 0d05d02ee4..ae5db2e44c 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -1150,6 +1150,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_DISABLE_QUIRKS2 213
>  /* #define KVM_CAP_VM_TSC_CONTROL 214 */
>  #define KVM_CAP_SYSTEM_EVENT_DATA 215
> +#define KVM_CAP_S390_PROTECTED_DUMP 217
>
>  #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -1651,6 +1652,55 @@ struct kvm_s390_pv_unp {
>         __u64 tweak;
>  };
>
> +enum pv_cmd_info_id {
> +       KVM_PV_INFO_VM,
> +       KVM_PV_INFO_DUMP,
> +};
> +
> +struct kvm_s390_pv_info_dump {
> +       __u64 dump_cpu_buffer_len;
> +       __u64 dump_config_mem_buffer_per_1m;
> +       __u64 dump_config_finalize_len;
> +};
> +
> +struct kvm_s390_pv_info_vm {
> +       __u64 inst_calls_list[4];
> +       __u64 max_cpus;
> +       __u64 max_guests;
> +       __u64 max_guest_addr;
> +       __u64 feature_indication;
> +};
> +
> +struct kvm_s390_pv_info_header {
> +       __u32 id;
> +       __u32 len_max;
> +       __u32 len_written;
> +       __u32 reserved;
> +};
> +
> +struct kvm_s390_pv_info {
> +       struct kvm_s390_pv_info_header header;
> +       union {
> +               struct kvm_s390_pv_info_dump dump;
> +               struct kvm_s390_pv_info_vm vm;
> +       };
> +};
> +
> +enum pv_cmd_dmp_id {
> +        KVM_PV_DUMP_INIT,
> +        KVM_PV_DUMP_CONFIG_STATE,
> +        KVM_PV_DUMP_COMPLETE,
> +        KVM_PV_DUMP_CPU,
> +};
> +
> +struct kvm_s390_pv_dmp {
> +        __u64 subcmd;
> +        __u64 buff_addr;
> +        __u64 buff_len;
> +        __u64 gaddr;
> +        __u64 reserved[4];
> +};
> +
>  enum pv_cmd_id {
>         KVM_PV_ENABLE,
>         KVM_PV_DISABLE,
> @@ -1659,6 +1709,8 @@ enum pv_cmd_id {
>         KVM_PV_VERIFY,
>         KVM_PV_PREP_RESET,
>         KVM_PV_UNSHARE_ALL,
> +        KVM_PV_INFO,
> +        KVM_PV_DUMP,
>  };
>
>  struct kvm_pv_cmd {
> @@ -1733,6 +1785,7 @@ struct kvm_xen_vcpu_attr {
>  #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
>  #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
>
> +
>  /* Secure Encrypted Virtualization command */
>  enum sev_cmd_id {
>         /* Guest initialization commands */
> @@ -2066,4 +2119,6 @@ struct kvm_stats_desc {
>  /* Available with KVM_CAP_XSAVE2 */
>  #define KVM_GET_XSAVE2           _IOR(KVMIO,  0xcf, struct kvm_xsave)
>
> +#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd)
> +
>  #endif /* __LINUX_KVM_H */
> --
> 2.34.1
>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 01/11] dump: Cleanup memblock usage
  2022-07-13 15:35       ` Marc-André Lureau
@ 2022-07-14  9:40         ` Janosch Frank
  2022-07-15  8:34           ` Marc-André Lureau
  0 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-14  9:40 UTC (permalink / raw)
  To: Marc-André Lureau
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

On 7/13/22 17:35, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Jul 13, 2022 at 7:30 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>>
>> On 7/13/22 17:09, Marc-André Lureau wrote:
>>> Hi
>>>
>>> On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>>>>
>>>> The iteration over the memblocks is hard to understand so it's about
>>>> time to clean it up.
>>>>
>>>> struct DumpState's next_block and start members can and should be
>>>> local variables within the iterator.
>>>>
>>>> Instead of manually grabbing the next memblock we can use
>>>> QTAILQ_FOREACH to iterate over all memblocks.
>>>>
>>>> The begin and length fields in the DumpState have been left untouched
>>>> since the qmp arguments share their names.
>>>>
>>>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>>>
>>> After this patch:
>>> ./qemu-system-x86_64 -monitor stdio -S
>>> (qemu) dump-guest-memory foo
>>> Error: dump: failed to save memory: Bad address
>>
>> If you have more ways to check for dump errors then please send them to
>> me. I'm aware that this might not have been a 100% conversion and I'm a
>> bit terrified about the fact that this will affect all architectures.
> 
> Same feeling here. Maybe it's about time to write real dump tests!

We have tests for s390 and I've prompted for tests with filtering so we 
can also cover that. Unfortunately s390 differs in the use of memory 
because we only have one large block which hid this error from me.


>>>
>>>> +        if (block->target_start >= filter_area_start + filter_area_length ||
>>>> +            block->target_end <= filter_area_start) {
>>>> +            return -1;
>>>> +        }
>>>> +        if (filter_area_start > block->target_start) {
>>>> +            return filter_area_start - block->target_start;
>>>> +        }
>>>> +    }
>>>> +    return block->target_start;
>>>
>>> This used to be 0. Changing that, I think the patch looks good.
>>> Although it could perhaps be splitted to introduce the two functions.
>>
>> Yes but the 0 was used to indicate that we would have needed continue
>> iterating and the iteration is done via other means in this patch.
>>
>> Or am I missing something?

Had a look, turns out I missed something.

> 
> Well, you changed the way the loop used to work. it used to return 1/0
> to indicate stop/continue and rely on s->start / s->next_block. Now
> you return memblock_start.

Maybe we should call this "dump_get_memblock_start_offset()" to make it 
clearer that we don't return block->target_start i.e. a start address 
but rather an offset that we tack on the host address to read the memory?

> 
>>
>>>
>>>> +}
>>>>    #endif
>>>> --
>>>> 2.34.1
>>>>
>>>
>>>
>>
> 
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 03/11] dump: Split write of section headers and data and add a prepare step
  2022-07-13 15:31   ` Marc-André Lureau
@ 2022-07-14 11:45     ` Janosch Frank
  0 siblings, 0 replies; 29+ messages in thread
From: Janosch Frank @ 2022-07-14 11:45 UTC (permalink / raw)
  To: Marc-André Lureau
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

On 7/13/22 17:31, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>>
>> By splitting the writing of the section headers and (future) section
>> data we prepare for the addition of a string table section and
>> architecture sections.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>> ---

[...]

>> @@ -557,12 +600,22 @@ static void dump_begin(DumpState *s, Error **errp)
>>       /* Write elf header to buffer */
>>       prepare_elf_header(s);
>>
>> +    prepare_elf_sections(s, errp);
>> +    if (*errp) {
>> +        return;
>> +    }
>> +
>>       /* Start to write stuff into files*/
>>       write_elf_header(s, errp);
>>       if (*errp) {
>>           return;
>>       }
>>
>> +    write_elf_section_headers(s, errp);
> 
> Why do you reorder the sections? Could you explain in the commit
> message why? Is this is format compliant? and update the comment
> above? thanks


Having the section data at the end of the file is unfortunately a s390 
PV requirement since we can only grab the encrypted page tweaks and 
counts *after* all of the memory has been encrypted.

The sections are the most obvious way to add such data to the file since 
they are basically unused right now and we're able to write a string 
table at the very end after everyone registered their strings.

All of this is ELF compliant AFAIK, that's why elf specifies offsets of 
the headers and the data. From what I see only the main elf header needs 
to start at offset 0.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 05/11] dump/dump: Add section string table support
  2022-07-13 15:58   ` Marc-André Lureau
@ 2022-07-14 11:53     ` Janosch Frank
  2022-07-14 11:55       ` Marc-André Lureau
  0 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-14 11:53 UTC (permalink / raw)
  To: Marc-André Lureau
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

On 7/13/22 17:58, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com> wrote:
>>
>> Time to add a bit more descriptiveness to the dumps.
> 
> Please add some more description & motivation to the patch (supposedly
> necessary for next patches), and explain that it currently doesn't
> change the dump (afaict).

How about:

As sections don't have a type like the notes do we need another way to 
determine their contents. The string table allows us to assign each 
section an identification string which architectures can then use to tag 
their sections with.

There will be no string table if the architecture doesn't add custom 
sections which are introduced in a following patch.


>>
>> -    if (dump_is_64bit(s)) {
>> -        s->phdr_offset = sizeof(Elf64_Ehdr);
>> -        s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
>> -        s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
>> -        s->memory_offset = s->note_offset + s->note_size;
>> -    } else {
>> -
>> -        s->phdr_offset = sizeof(Elf32_Ehdr);
>> -        s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
>> -        s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
>> -        s->memory_offset = s->note_offset + s->note_size;
>> +    /*
>> +     * calculate shdr_num and elf_section_data_size so we know the offsets and
>> +     * sizes of all parts.
>> +     *
>> +     * If phdr_num overflowed we have at least one section header
>> +     * More sections/hdrs can be added by the architectures
>> +     */
>> +    if (s->shdr_num > 1) {
>> +        /* Reserve the string table */
>> +        s->shdr_num += 1;
>>       }
>>
>> +    tmp = (s->phdr_num == PN_XNUM) ? s->sh_info : s->phdr_num;
>> +    if (dump_is_64bit(s)) {
>> +        s->shdr_offset = sizeof(Elf64_Ehdr);
>> +        s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
>> +        s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * tmp;
>> +    } else {
>> +        s->shdr_offset = sizeof(Elf32_Ehdr);
>> +        s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
>> +        s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * tmp;
>> +    }
>> +    s->memory_offset = s->note_offset + s->note_size;
> 
> I suggest to split this in a different patch. It's not obvious that
> you can change phdr_offset / shdr_offset, it deserves a comment.

Right, will do

> 
>> +    s->section_offset = s->memory_offset + s->total_size;
>> +
>>       return;
>>
>>   cleanup:
>> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
>> index 8379e29ef6..2c25c7d309 100644
>> --- a/include/sysemu/dump.h
>> +++ b/include/sysemu/dump.h
>> @@ -178,6 +178,7 @@ typedef struct DumpState {
>>       void *elf_section_hdrs;
>>       uint64_t elf_section_data_size;
>>       void *elf_section_data;
>> +    GArray *string_table_buf;  /* String table section */
>>
>>       uint8_t *note_buf;          /* buffer for notes */
>>       size_t note_buf_offset;     /* the writing place in note_buf */
>> --
>> 2.34.1
>>
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 05/11] dump/dump: Add section string table support
  2022-07-14 11:53     ` Janosch Frank
@ 2022-07-14 11:55       ` Marc-André Lureau
  0 siblings, 0 replies; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-14 11:55 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

[-- Attachment #1: Type: text/plain, Size: 3528 bytes --]

Hi

On Thu, Jul 14, 2022 at 3:54 PM Janosch Frank <frankja@linux.ibm.com> wrote:

> On 7/13/22 17:58, Marc-André Lureau wrote:
> > Hi
> >
> > On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com>
> wrote:
> >>
> >> Time to add a bit more descriptiveness to the dumps.
> >
> > Please add some more description & motivation to the patch (supposedly
> > necessary for next patches), and explain that it currently doesn't
> > change the dump (afaict).
>
> How about:
>
> As sections don't have a type like the notes do we need another way to
> determine their contents. The string table allows us to assign each
> section an identification string which architectures can then use to tag
> their sections with.
>
> There will be no string table if the architecture doesn't add custom
> sections which are introduced in a following patch.
>

lgtm, thanks


>
>
> >>
> >> -    if (dump_is_64bit(s)) {
> >> -        s->phdr_offset = sizeof(Elf64_Ehdr);
> >> -        s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) *
> s->phdr_num;
> >> -        s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) *
> s->shdr_num;
> >> -        s->memory_offset = s->note_offset + s->note_size;
> >> -    } else {
> >> -
> >> -        s->phdr_offset = sizeof(Elf32_Ehdr);
> >> -        s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) *
> s->phdr_num;
> >> -        s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) *
> s->shdr_num;
> >> -        s->memory_offset = s->note_offset + s->note_size;
> >> +    /*
> >> +     * calculate shdr_num and elf_section_data_size so we know the
> offsets and
> >> +     * sizes of all parts.
> >> +     *
> >> +     * If phdr_num overflowed we have at least one section header
> >> +     * More sections/hdrs can be added by the architectures
> >> +     */
> >> +    if (s->shdr_num > 1) {
> >> +        /* Reserve the string table */
> >> +        s->shdr_num += 1;
> >>       }
> >>
> >> +    tmp = (s->phdr_num == PN_XNUM) ? s->sh_info : s->phdr_num;
> >> +    if (dump_is_64bit(s)) {
> >> +        s->shdr_offset = sizeof(Elf64_Ehdr);
> >> +        s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) *
> s->shdr_num;
> >> +        s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * tmp;
> >> +    } else {
> >> +        s->shdr_offset = sizeof(Elf32_Ehdr);
> >> +        s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) *
> s->shdr_num;
> >> +        s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * tmp;
> >> +    }
> >> +    s->memory_offset = s->note_offset + s->note_size;
> >
> > I suggest to split this in a different patch. It's not obvious that
> > you can change phdr_offset / shdr_offset, it deserves a comment.
>
> Right, will do
>
> >
> >> +    s->section_offset = s->memory_offset + s->total_size;
> >> +
> >>       return;
> >>
> >>   cleanup:
> >> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> >> index 8379e29ef6..2c25c7d309 100644
> >> --- a/include/sysemu/dump.h
> >> +++ b/include/sysemu/dump.h
> >> @@ -178,6 +178,7 @@ typedef struct DumpState {
> >>       void *elf_section_hdrs;
> >>       uint64_t elf_section_data_size;
> >>       void *elf_section_data;
> >> +    GArray *string_table_buf;  /* String table section */
> >>
> >>       uint8_t *note_buf;          /* buffer for notes */
> >>       size_t note_buf_offset;     /* the writing place in note_buf */
> >> --
> >> 2.34.1
> >>
> >
>
>

-- 
Marc-André Lureau

[-- Attachment #2: Type: text/html, Size: 4967 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 09/11] s390x: Introduce PV query interface
  2022-07-13 13:03 ` [PATCH v2 09/11] s390x: Introduce PV query interface Janosch Frank
@ 2022-07-15  8:10   ` Marc-André Lureau
  2022-07-15  8:18     ` Janosch Frank
  0 siblings, 1 reply; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-15  8:10 UTC (permalink / raw)
  To: Janosch Frank
  Cc: QEMU, Paolo Bonzini, mhartmay, Christian Borntraeger, imbrenda,
	Halil Pasic, Cornelia Huck, Thomas Huth, Qemu-s390x list,
	Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 5372 bytes --]

Hi

On Wed, Jul 13, 2022 at 5:18 PM Janosch Frank <frankja@linux.ibm.com> wrote:

> Introduce an interface over which we can get information about UV data.
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> ---
>  hw/s390x/pv.c              | 61 ++++++++++++++++++++++++++++++++++++++
>  hw/s390x/s390-virtio-ccw.c |  5 ++++
>  include/hw/s390x/pv.h      | 10 +++++++
>  3 files changed, 76 insertions(+)
>
> diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
> index 401b63d6cb..a5af4ddf46 100644
> --- a/hw/s390x/pv.c
> +++ b/hw/s390x/pv.c
> @@ -20,6 +20,11 @@
>  #include "exec/confidential-guest-support.h"
>  #include "hw/s390x/ipl.h"
>  #include "hw/s390x/pv.h"
> +#include "target/s390x/kvm/kvm_s390x.h"
> +
> +static bool info_valid;
> +static struct kvm_s390_pv_info_vm info_vm;
> +static struct kvm_s390_pv_info_dump info_dump;
>
>  static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
>  {
> @@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char
> *cmdname, void *data)
>      }                                  \
>  }
>
> +int s390_pv_query_info(void)
> +{
> +    struct kvm_s390_pv_info info = {
> +        .header.id = KVM_PV_INFO_VM,
> +        .header.len_max = sizeof(info.header) + sizeof(info.vm),
> +    };
> +    int rc;
> +
> +    /* Info API's first user is dump so they are bundled */
> +    if (!kvm_s390_get_protected_dump()) {
> +        return 0;
> +    }
> +
> +    rc = s390_pv_cmd(KVM_PV_INFO, &info);
> +    if (rc) {
> +        error_report("KVM PV INFO cmd %x failed: %s",
> +                     info.header.id, strerror(rc));
> +        return rc;
> +    }
> +    memcpy(&info_vm, &info.vm, sizeof(info.vm));
> +
> +    info.header.id = KVM_PV_INFO_DUMP;
> +    info.header.len_max = sizeof(info.header) + sizeof(info.dump);
> +    rc = s390_pv_cmd(KVM_PV_INFO, &info);
> +    if (rc) {
> +        error_report("KVM PV INFO cmd %x failed: %s",
> +                     info.header.id, strerror(rc));
> +        return rc;
> +    }
> +
> +    memcpy(&info_dump, &info.dump, sizeof(info.dump));
> +    info_valid = true;
> +
> +    return rc;
> +}
> +
>  int s390_pv_vm_enable(void)
>  {
>      return s390_pv_cmd(KVM_PV_ENABLE, NULL);
> @@ -114,6 +155,26 @@ void s390_pv_inject_reset_error(CPUState *cs)
>      env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
>  }
>
> +uint64_t kvm_s390_pv_dmp_get_size_cpu(void)
> +{
> +    return info_dump.dump_cpu_buffer_len;
> +}
> +
> +uint64_t kvm_s390_pv_dmp_get_size_complete(void)
> +{
> +    return info_dump.dump_config_finalize_len;
> +}
> +
> +uint64_t kvm_s390_pv_dmp_get_size_mem(void)
> +{
> +    return info_dump.dump_config_mem_buffer_per_1m;
> +}
> +
> +bool kvm_s390_pv_info_basic_valid(void)
> +{
> +    return info_valid;
> +}
> +
>  #define TYPE_S390_PV_GUEST "s390-pv-guest"
>  OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST)
>
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index cc3097bfee..f9401e392b 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -366,6 +366,11 @@ static int s390_machine_protect(S390CcwMachineState
> *ms)
>
>      ms->pv = true;
>
> +    rc = s390_pv_query_info();
> +    if (rc) {
> +        goto out_err;
>

Maybe it's not necessary to make it fatal on error?

lgtm otherwise


> +    }
> +
>      /* Set SE header and unpack */
>      rc = s390_ipl_prepare_pv_header();
>      if (rc) {
> diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
> index 1f1f545bfc..6fa55bf70e 100644
> --- a/include/hw/s390x/pv.h
> +++ b/include/hw/s390x/pv.h
> @@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
>      return ccw->pv;
>  }
>
> +int s390_pv_query_info(void);
>  int s390_pv_vm_enable(void);
>  void s390_pv_vm_disable(void);
>  int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
> @@ -46,8 +47,13 @@ void s390_pv_prep_reset(void);
>  int s390_pv_verify(void);
>  void s390_pv_unshare(void);
>  void s390_pv_inject_reset_error(CPUState *cs);
> +uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
> +uint64_t kvm_s390_pv_dmp_get_size_mem(void);
> +uint64_t kvm_s390_pv_dmp_get_size_complete(void);
> +bool kvm_s390_pv_info_basic_valid(void);
>  #else /* CONFIG_KVM */
>  static inline bool s390_is_pv(void) { return false; }
> +static inline int s390_pv_query_info(void) { return 0; }
>  static inline int s390_pv_vm_enable(void) { return 0; }
>  static inline void s390_pv_vm_disable(void) {}
>  static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
> { return 0; }
> @@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {}
>  static inline int s390_pv_verify(void) { return 0; }
>  static inline void s390_pv_unshare(void) {}
>  static inline void s390_pv_inject_reset_error(CPUState *cs) {};
> +static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
> +static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
> +static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return
> 0; }
> +static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
>  #endif /* CONFIG_KVM */
>
>  int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
> --
> 2.34.1
>
>
>

-- 
Marc-André Lureau

[-- Attachment #2: Type: text/html, Size: 6870 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 09/11] s390x: Introduce PV query interface
  2022-07-15  8:10   ` Marc-André Lureau
@ 2022-07-15  8:18     ` Janosch Frank
  2022-07-15  8:23       ` Marc-André Lureau
  0 siblings, 1 reply; 29+ messages in thread
From: Janosch Frank @ 2022-07-15  8:18 UTC (permalink / raw)
  To: Marc-André Lureau
  Cc: QEMU, Paolo Bonzini, mhartmay, Christian Borntraeger, imbrenda,
	Halil Pasic, Cornelia Huck, Thomas Huth, Qemu-s390x list,
	Richard Henderson

On 7/15/22 10:10, Marc-André Lureau wrote:
[...]
>>       ms->pv = true;
>>
>> +    rc = s390_pv_query_info();
>> +    if (rc) {
>> +        goto out_err;
>>
> 
> Maybe it's not necessary to make it fatal on error?
> 
> lgtm otherwise

Hmm, yes and no.
The info API is fenced by the dump CAP so I don't ever expect an error 
here but on the other hand an optional info API fail might not warrant 
an error.

> 
> 
>> +    }
>> +
>>       /* Set SE header and unpack */
>>       rc = s390_ipl_prepare_pv_header();
>>       if (rc) {
>> diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
>> index 1f1f545bfc..6fa55bf70e 100644
>> --- a/include/hw/s390x/pv.h
>> +++ b/include/hw/s390x/pv.h
>> @@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
>>       return ccw->pv;
>>   }
>>
>> +int s390_pv_query_info(void);
>>   int s390_pv_vm_enable(void);
>>   void s390_pv_vm_disable(void);
>>   int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
>> @@ -46,8 +47,13 @@ void s390_pv_prep_reset(void);
>>   int s390_pv_verify(void);
>>   void s390_pv_unshare(void);
>>   void s390_pv_inject_reset_error(CPUState *cs);
>> +uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
>> +uint64_t kvm_s390_pv_dmp_get_size_mem(void);
>> +uint64_t kvm_s390_pv_dmp_get_size_complete(void);
>> +bool kvm_s390_pv_info_basic_valid(void);
>>   #else /* CONFIG_KVM */
>>   static inline bool s390_is_pv(void) { return false; }
>> +static inline int s390_pv_query_info(void) { return 0; }
>>   static inline int s390_pv_vm_enable(void) { return 0; }
>>   static inline void s390_pv_vm_disable(void) {}
>>   static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
>> { return 0; }
>> @@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {}
>>   static inline int s390_pv_verify(void) { return 0; }
>>   static inline void s390_pv_unshare(void) {}
>>   static inline void s390_pv_inject_reset_error(CPUState *cs) {};
>> +static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
>> +static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
>> +static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return
>> 0; }
>> +static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
>>   #endif /* CONFIG_KVM */
>>
>>   int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
>> --
>> 2.34.1
>>
>>
>>
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 09/11] s390x: Introduce PV query interface
  2022-07-15  8:18     ` Janosch Frank
@ 2022-07-15  8:23       ` Marc-André Lureau
  0 siblings, 0 replies; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-15  8:23 UTC (permalink / raw)
  To: Janosch Frank
  Cc: QEMU, Paolo Bonzini, mhartmay, Christian Borntraeger, imbrenda,
	Halil Pasic, Cornelia Huck, Thomas Huth, Qemu-s390x list,
	Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 2813 bytes --]

On Fri, Jul 15, 2022 at 12:18 PM Janosch Frank <frankja@linux.ibm.com>
wrote:

> On 7/15/22 10:10, Marc-André Lureau wrote:
> [...]
> >>       ms->pv = true;
> >>
> >> +    rc = s390_pv_query_info();
> >> +    if (rc) {
> >> +        goto out_err;
> >>
> >
> > Maybe it's not necessary to make it fatal on error?
> >
> > lgtm otherwise
>
> Hmm, yes and no.
> The info API is fenced by the dump CAP so I don't ever expect an error
> here but on the other hand an optional info API fail might not warrant
> an error.
>
>
I see. You could explain more explicitly in the commit messages and/or
comments the kernel version/requirements.



> >
> >
> >> +    }
> >> +
> >>       /* Set SE header and unpack */
> >>       rc = s390_ipl_prepare_pv_header();
> >>       if (rc) {
> >> diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
> >> index 1f1f545bfc..6fa55bf70e 100644
> >> --- a/include/hw/s390x/pv.h
> >> +++ b/include/hw/s390x/pv.h
> >> @@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
> >>       return ccw->pv;
> >>   }
> >>
> >> +int s390_pv_query_info(void);
> >>   int s390_pv_vm_enable(void);
> >>   void s390_pv_vm_disable(void);
> >>   int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
> >> @@ -46,8 +47,13 @@ void s390_pv_prep_reset(void);
> >>   int s390_pv_verify(void);
> >>   void s390_pv_unshare(void);
> >>   void s390_pv_inject_reset_error(CPUState *cs);
> >> +uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
> >> +uint64_t kvm_s390_pv_dmp_get_size_mem(void);
> >> +uint64_t kvm_s390_pv_dmp_get_size_complete(void);
> >> +bool kvm_s390_pv_info_basic_valid(void);
> >>   #else /* CONFIG_KVM */
> >>   static inline bool s390_is_pv(void) { return false; }
> >> +static inline int s390_pv_query_info(void) { return 0; }
> >>   static inline int s390_pv_vm_enable(void) { return 0; }
> >>   static inline void s390_pv_vm_disable(void) {}
> >>   static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t
> length)
> >> { return 0; }
> >> @@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {}
> >>   static inline int s390_pv_verify(void) { return 0; }
> >>   static inline void s390_pv_unshare(void) {}
> >>   static inline void s390_pv_inject_reset_error(CPUState *cs) {};
> >> +static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
> >> +static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
> >> +static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return
> >> 0; }
> >> +static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
> >>   #endif /* CONFIG_KVM */
> >>
> >>   int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
> >> --
> >> 2.34.1
> >>
> >>
> >>
> >
>
>

-- 
Marc-André Lureau

[-- Attachment #2: Type: text/html, Size: 3940 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 01/11] dump: Cleanup memblock usage
  2022-07-14  9:40         ` Janosch Frank
@ 2022-07-15  8:34           ` Marc-André Lureau
  0 siblings, 0 replies; 29+ messages in thread
From: Marc-André Lureau @ 2022-07-15  8:34 UTC (permalink / raw)
  To: Janosch Frank
  Cc: qemu-devel, Bonzini, Paolo, mhartmay, Christian Borntraeger,
	imbrenda, Halil Pasic, Cornelia Huck, Thomas Huth,
	open list:S390 SCLP-backed...,
	Henderson, Richard

[-- Attachment #1: Type: text/plain, Size: 3168 bytes --]

Hi

On Thu, Jul 14, 2022 at 1:46 PM Janosch Frank <frankja@linux.ibm.com> wrote:

> On 7/13/22 17:35, Marc-André Lureau wrote:
> > Hi
> >
> > On Wed, Jul 13, 2022 at 7:30 PM Janosch Frank <frankja@linux.ibm.com>
> wrote:
> >>
> >> On 7/13/22 17:09, Marc-André Lureau wrote:
> >>> Hi
> >>>
> >>> On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank <frankja@linux.ibm.com>
> wrote:
> >>>>
> >>>> The iteration over the memblocks is hard to understand so it's about
> >>>> time to clean it up.
> >>>>
> >>>> struct DumpState's next_block and start members can and should be
> >>>> local variables within the iterator.
> >>>>
> >>>> Instead of manually grabbing the next memblock we can use
> >>>> QTAILQ_FOREACH to iterate over all memblocks.
> >>>>
> >>>> The begin and length fields in the DumpState have been left untouched
> >>>> since the qmp arguments share their names.
> >>>>
> >>>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> >>>
> >>> After this patch:
> >>> ./qemu-system-x86_64 -monitor stdio -S
> >>> (qemu) dump-guest-memory foo
> >>> Error: dump: failed to save memory: Bad address
> >>
> >> If you have more ways to check for dump errors then please send them to
> >> me. I'm aware that this might not have been a 100% conversion and I'm a
> >> bit terrified about the fact that this will affect all architectures.
> >
> > Same feeling here. Maybe it's about time to write real dump tests!
>
> We have tests for s390 and I've prompted for tests with filtering so we
> can also cover that. Unfortunately s390 differs in the use of memory
> because we only have one large block which hid this error from me.
>
>
> >>>
> >>>> +        if (block->target_start >= filter_area_start +
> filter_area_length ||
> >>>> +            block->target_end <= filter_area_start) {
> >>>> +            return -1;
> >>>> +        }
> >>>> +        if (filter_area_start > block->target_start) {
> >>>> +            return filter_area_start - block->target_start;
> >>>> +        }
> >>>> +    }
> >>>> +    return block->target_start;
> >>>
> >>> This used to be 0. Changing that, I think the patch looks good.
> >>> Although it could perhaps be splitted to introduce the two functions.
> >>
> >> Yes but the 0 was used to indicate that we would have needed continue
> >> iterating and the iteration is done via other means in this patch.
> >>
> >> Or am I missing something?
>
> Had a look, turns out I missed something.
>
> >
> > Well, you changed the way the loop used to work. it used to return 1/0
> > to indicate stop/continue and rely on s->start / s->next_block. Now
> > you return memblock_start.
>
> Maybe we should call this "dump_get_memblock_start_offset()" to make it
> clearer that we don't return block->target_start i.e. a start address
> but rather an offset that we tack on the host address to read the memory?
>
>
Not a big difference to me. You would need to adjust write_memory() "start"
argument name as well then.


> >
> >>
> >>>
> >>>> +}
> >>>>    #endif
> >>>> --
> >>>> 2.34.1
> >>>>
> >>>
> >>>
> >>
> >
> >
>
>

-- 
Marc-André Lureau

[-- Attachment #2: Type: text/html, Size: 4779 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2022-07-15  8:37 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-13 13:03 [PATCH v2 00/11] dump: Add arch section and s390x PV dump Janosch Frank
2022-07-13 13:03 ` [PATCH v2 01/11] dump: Cleanup memblock usage Janosch Frank
2022-07-13 15:09   ` Marc-André Lureau
2022-07-13 15:30     ` Janosch Frank
2022-07-13 15:35       ` Marc-André Lureau
2022-07-14  9:40         ` Janosch Frank
2022-07-15  8:34           ` Marc-André Lureau
2022-07-13 13:03 ` [PATCH v2 02/11] dump: Allocate header Janosch Frank
2022-07-13 15:20   ` Marc-André Lureau
2022-07-13 13:03 ` [PATCH v2 03/11] dump: Split write of section headers and data and add a prepare step Janosch Frank
2022-07-13 15:31   ` Marc-André Lureau
2022-07-14 11:45     ` Janosch Frank
2022-07-13 13:03 ` [PATCH v2 04/11] dump: Reorder struct DumpState Janosch Frank
2022-07-13 15:46   ` Marc-André Lureau
2022-07-13 13:03 ` [PATCH v2 05/11] dump/dump: Add section string table support Janosch Frank
2022-07-13 15:58   ` Marc-André Lureau
2022-07-14 11:53     ` Janosch Frank
2022-07-14 11:55       ` Marc-André Lureau
2022-07-13 13:03 ` [PATCH v2 06/11] dump/dump: Add arch section support Janosch Frank
2022-07-13 16:02   ` Marc-André Lureau
2022-07-13 13:03 ` [PATCH v2 07/11] linux header sync Janosch Frank
2022-07-13 16:03   ` Marc-André Lureau
2022-07-13 13:03 ` [PATCH v2 08/11] s390x: Add protected dump cap Janosch Frank
2022-07-13 13:03 ` [PATCH v2 09/11] s390x: Introduce PV query interface Janosch Frank
2022-07-15  8:10   ` Marc-André Lureau
2022-07-15  8:18     ` Janosch Frank
2022-07-15  8:23       ` Marc-André Lureau
2022-07-13 13:03 ` [PATCH v2 10/11] s390x: Add KVM PV dump interface Janosch Frank
2022-07-13 13:03 ` [PATCH v2 11/11] s390x: pv: Add dump support Janosch Frank

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.