* [PATCH v2 00/10] makedumpfile: parallel processing
@ 2015-06-19 8:56 Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
` (10 more replies)
0 siblings, 11 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec
This patch set implements parallel processing by means of multiple threads.
With this patch set, it is available to use multiple threads to read
and compress pages. This parallel process will save time.
This feature only supports creating dumpfile in kdump-compressed format from
vmcore in kdump-compressed format or elf format. Currently, sadump and
xen kdump are not supported.
Qiao Nuohan (10):
Add readpage_kdump_compressed_parallel
Add mappage_elf_parallel
Add readpage_elf_parallel
Add read_pfn_parallel
Add function to initial bitmap for parallel use
Add filter_data_buffer_parallel
Add write_kdump_pages_parallel to allow parallel process
Initial and free data used for parallel process
Make makedumpfile available to read and compress pages parallelly
Add usage and manual about multiple threads process
Makefile | 2 +
erase_info.c | 29 ++-
erase_info.h | 2 +
makedumpfile.8 | 24 ++
makedumpfile.c | 1096 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
makedumpfile.h | 80 ++++
print_info.c | 16 +
7 files changed, 1246 insertions(+), 3 deletions(-)
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 01/10] Add readpage_kdump_compressed_parallel
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 02/10] Add mappage_elf_parallel Zhou Wenjian
` (9 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
readpage_kdump_compressed_parallel is used to enable reading pages from
vmcore in kdump-compressed format parallel. fd_memory and bitmap_memory
should be initialized and offered to each thread individually to avoid
conflict.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Signed-off-by: Zhou wenjian <zhouwj-fnst@cn.fujitsu.com>
---
makedumpfile.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 137 insertions(+), 0 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index cc71f20..3657d4f 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -251,6 +251,20 @@ pfn_to_pos(mdf_pfn_t pfn)
return desc_pos;
}
+unsigned long
+pfn_to_pos_parallel(mdf_pfn_t pfn, struct dump_bitmap* bitmap_memory_parallel)
+{
+ unsigned long desc_pos;
+ mdf_pfn_t i;
+
+ desc_pos = info->valid_pages[pfn / BITMAP_SECT_LEN];
+ for (i = round(pfn, BITMAP_SECT_LEN); i < pfn; i++)
+ if (is_dumpable(bitmap_memory_parallel, i, NULL))
+ desc_pos++;
+
+ return desc_pos;
+}
+
int
read_page_desc(unsigned long long paddr, page_desc_t *pd)
{
@@ -293,6 +307,50 @@ read_page_desc(unsigned long long paddr, page_desc_t *pd)
return TRUE;
}
+int
+read_page_desc_parallel(int fd_memory, unsigned long long paddr,
+ page_desc_t *pd,
+ struct dump_bitmap* bitmap_memory_parallel)
+{
+ struct disk_dump_header *dh;
+ unsigned long desc_pos;
+ mdf_pfn_t pfn;
+ off_t offset;
+
+ /*
+ * Find page descriptor
+ */
+ dh = info->dh_memory;
+ offset
+ = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size + dh->bitmap_blocks)
+ * dh->block_size;
+ pfn = paddr_to_pfn(paddr);
+ desc_pos = pfn_to_pos_parallel(pfn, bitmap_memory_parallel);
+ offset += (off_t)desc_pos * sizeof(page_desc_t);
+ if (lseek(fd_memory, offset, SEEK_SET) < 0) {
+ ERRMSG("Can't seek %s. %s\n",
+ info->name_memory, strerror(errno));
+ return FALSE;
+ }
+
+ /*
+ * Read page descriptor
+ */
+ if (read(fd_memory, pd, sizeof(*pd)) != sizeof(*pd)) {
+ ERRMSG("Can't read %s. %s\n",
+ info->name_memory, strerror(errno));
+ return FALSE;
+ }
+
+ /*
+ * Sanity check
+ */
+ if (pd->size > dh->block_size)
+ return FALSE;
+
+ return TRUE;
+}
+
static void
unmap_cache(struct cache_entry *entry)
{
@@ -589,6 +647,85 @@ readpage_kdump_compressed(unsigned long long paddr, void *bufptr)
return TRUE;
}
+static int
+readpage_kdump_compressed_parallel(int fd_memory, unsigned long long paddr,
+ void *bufptr,
+ struct dump_bitmap* bitmap_memory_parallel)
+{
+ page_desc_t pd;
+ char buf[info->page_size], *rdbuf;
+ int ret;
+ unsigned long retlen;
+
+ if (!is_dumpable(bitmap_memory_parallel, paddr_to_pfn(paddr), NULL)) {
+ ERRMSG("pfn(%llx) is excluded from %s.\n",
+ paddr_to_pfn(paddr), info->name_memory);
+ return FALSE;
+ }
+
+ if (!read_page_desc_parallel(fd_memory, paddr, &pd,
+ bitmap_memory_parallel)) {
+ ERRMSG("Can't read page_desc: %llx\n", paddr);
+ return FALSE;
+ }
+
+ if (lseek(fd_memory, pd.offset, SEEK_SET) < 0) {
+ ERRMSG("Can't seek %s. %s\n",
+ info->name_memory, strerror(errno));
+ return FALSE;
+ }
+
+ /*
+ * Read page data
+ */
+ rdbuf = pd.flags & (DUMP_DH_COMPRESSED_ZLIB | DUMP_DH_COMPRESSED_LZO |
+ DUMP_DH_COMPRESSED_SNAPPY) ? buf : bufptr;
+ if (read(fd_memory, rdbuf, pd.size) != pd.size) {
+ ERRMSG("Can't read %s. %s\n",
+ info->name_memory, strerror(errno));
+ return FALSE;
+ }
+
+ if (pd.flags & DUMP_DH_COMPRESSED_ZLIB) {
+ retlen = info->page_size;
+ ret = uncompress((unsigned char *)bufptr, &retlen,
+ (unsigned char *)buf, pd.size);
+ if ((ret != Z_OK) || (retlen != info->page_size)) {
+ ERRMSG("Uncompress failed: %d\n", ret);
+ return FALSE;
+ }
+#ifdef USELZO
+ } else if (info->flag_lzo_support
+ && (pd.flags & DUMP_DH_COMPRESSED_LZO)) {
+ retlen = info->page_size;
+ ret = lzo1x_decompress_safe((unsigned char *)buf, pd.size,
+ (unsigned char *)bufptr, &retlen,
+ LZO1X_MEM_DECOMPRESS);
+ if ((ret != LZO_E_OK) || (retlen != info->page_size)) {
+ ERRMSG("Uncompress failed: %d\n", ret);
+ return FALSE;
+ }
+#endif
+#ifdef USESNAPPY
+ } else if ((pd.flags & DUMP_DH_COMPRESSED_SNAPPY)) {
+
+ ret = snappy_uncompressed_length(buf, pd.size, (size_t *)&retlen);
+ if (ret != SNAPPY_OK) {
+ ERRMSG("Uncompress failed: %d\n", ret);
+ return FALSE;
+ }
+
+ ret = snappy_uncompress(buf, pd.size, bufptr, (size_t *)&retlen);
+ if ((ret != SNAPPY_OK) || (retlen != info->page_size)) {
+ ERRMSG("Uncompress failed: %d\n", ret);
+ return FALSE;
+ }
+#endif
+ }
+
+ return TRUE;
+}
+
int
readmem(int type_addr, unsigned long long addr, void *bufptr, size_t size)
{
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 02/10] Add mappage_elf_parallel
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 03/10] Add readpage_elf_parallel Zhou Wenjian
` (8 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
mappage_elf_parallel is used to enable mmaping elf format to memory
parallelly. later patch will will use the mmapped memory to get data
of each page. fd_memory and mmap_cache should be initialized and offered
to each threads individually to avoid conflict.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
makedumpfile.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
makedumpfile.h | 14 ++++++++
2 files changed, 111 insertions(+), 0 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index 3657d4f..d1b4bc2 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -394,6 +394,46 @@ update_mmap_range(off_t offset, int initial) {
}
static int
+update_mmap_range_parallel(int fd_memory, off_t offset,
+ struct mmap_cache *mmap_cache)
+{
+ off_t start_offset, end_offset;
+ off_t map_size;
+ off_t max_offset = get_max_file_offset();
+ off_t pt_load_end = offset_to_pt_load_end(offset);
+
+ /*
+ * mmap_buf must be cleaned
+ */
+ if (mmap_cache->mmap_buf != MAP_FAILED)
+ munmap(mmap_cache->mmap_buf, mmap_cache->mmap_end_offset
+ - mmap_cache->mmap_start_offset);
+
+ /*
+ * offset for mmap() must be page aligned.
+ */
+ start_offset = roundup(offset, info->page_size);
+ end_offset = MIN(max_offset, round(pt_load_end, info->page_size));
+
+ if (!pt_load_end || (end_offset - start_offset) <= 0)
+ return FALSE;
+
+ map_size = MIN(end_offset - start_offset, info->mmap_region_size);
+
+ mmap_cache->mmap_buf = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE,
+ fd_memory, start_offset);
+
+ if (mmap_cache->mmap_buf == MAP_FAILED) {
+ return FALSE;
+ }
+
+ mmap_cache->mmap_start_offset = start_offset;
+ mmap_cache->mmap_end_offset = start_offset + map_size;
+
+ return TRUE;
+}
+
+static int
is_mapped_with_mmap(off_t offset) {
if (info->flag_usemmap == MMAP_ENABLE
@@ -404,6 +444,15 @@ is_mapped_with_mmap(off_t offset) {
return FALSE;
}
+static int
+is_mapped_with_mmap_parallel(off_t offset, struct mmap_cache *mmap_cache) {
+ if (offset >= mmap_cache->mmap_start_offset
+ && offset < mmap_cache->mmap_end_offset)
+ return TRUE;
+ else
+ return FALSE;
+}
+
int
initialize_mmap(void) {
unsigned long long phys_start;
@@ -458,6 +507,54 @@ mappage_elf(unsigned long long paddr)
return info->mmap_buf + (offset - info->mmap_start_offset);
}
+static char *
+mappage_elf_parallel(int fd_memory, unsigned long long paddr,
+ struct mmap_cache *mmap_cache)
+{
+ off_t offset, offset2;
+ int flag_usemmap;
+
+ pthread_rwlock_rdlock(&info->usemmap_rwlock);
+ flag_usemmap = info->flag_usemmap;
+ pthread_rwlock_unlock(&info->usemmap_rwlock);
+ if (flag_usemmap != MMAP_ENABLE)
+ return NULL;
+
+ offset = paddr_to_offset(paddr);
+ if (!offset || page_is_fractional(offset))
+ return NULL;
+
+ offset2 = paddr_to_offset(paddr + info->page_size - 1);
+ if (!offset2)
+ return NULL;
+
+ if (offset2 - offset != info->page_size - 1)
+ return NULL;
+
+ if (!is_mapped_with_mmap_parallel(offset, mmap_cache) &&
+ !update_mmap_range_parallel(fd_memory, offset, mmap_cache)) {
+ ERRMSG("Can't read the dump memory(%s) with mmap().\n",
+ info->name_memory);
+
+ ERRMSG("This kernel might have some problems about mmap().\n");
+ ERRMSG("read() will be used instead of mmap() from now.\n");
+
+ /*
+ * Fall back to read().
+ */
+ pthread_rwlock_wrlock(&info->usemmap_rwlock);
+ info->flag_usemmap = MMAP_DISABLE;
+ pthread_rwlock_unlock(&info->usemmap_rwlock);
+ return NULL;
+ }
+
+ if (offset < mmap_cache->mmap_start_offset ||
+ offset + info->page_size > mmap_cache->mmap_end_offset)
+ return NULL;
+
+ return mmap_cache->mmap_buf + (offset - mmap_cache->mmap_start_offset);
+}
+
static int
read_from_vmcore(off_t offset, void *bufptr, unsigned long size)
{
diff --git a/makedumpfile.h b/makedumpfile.h
index 3d6661f..bff134e 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -42,6 +42,7 @@
#include "dwarf_info.h"
#include "diskdump_mod.h"
#include "sadump_mod.h"
+#include <pthread.h>
/*
* Result of command
@@ -956,6 +957,15 @@ typedef unsigned long int ulong;
typedef unsigned long long int ulonglong;
/*
+ * for parallel process
+ */
+struct mmap_cache {
+ char *mmap_buf;
+ off_t mmap_start_offset;
+ off_t mmap_end_offset;
+};
+
+/*
* makedumpfile header
* For re-arranging the dump data on different architecture, all the
* variables are defined by 64bits. The size of signature is aligned
@@ -1219,6 +1229,10 @@ struct DumpInfo {
* for cyclic_splitting mode, setup splitblock_size
*/
long long splitblock_size;
+ /*
+ * for parallel process
+ */
+ pthread_rwlock_t usemmap_rwlock;
};
extern struct DumpInfo *info;
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 03/10] Add readpage_elf_parallel
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 02/10] Add mappage_elf_parallel Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 04/10] Add read_pfn_parallel Zhou Wenjian
` (7 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
readpage_elf_parallel is used to enable reading pages from elf format
parallelly. fd_memory should be initialize and offered to each threads
individually to avoid conflict.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
makedumpfile.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 98 insertions(+), 0 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index d1b4bc2..44c78b4 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -575,6 +575,27 @@ read_from_vmcore(off_t offset, void *bufptr, unsigned long size)
return TRUE;
}
+static int
+read_from_vmcore_parallel(int fd_memory, off_t offset, void *bufptr,
+ unsigned long size)
+{
+ const off_t failed = (off_t)-1;
+
+ if (lseek(fd_memory, offset, SEEK_SET) == failed) {
+ ERRMSG("Can't seek the dump memory(%s). (offset: %llx) %s\n",
+ info->name_memory, (unsigned long long)offset, strerror(errno));
+ return FALSE;
+ }
+
+ if (read(fd_memory, bufptr, size) != size) {
+ ERRMSG("Can't read the dump memory(%s). %s\n",
+ info->name_memory, strerror(errno));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
/*
* This function is specific for reading page from ELF.
*
@@ -669,6 +690,83 @@ readpage_elf(unsigned long long paddr, void *bufptr)
}
static int
+readpage_elf_parallel(int fd_memory, unsigned long long paddr, void *bufptr)
+{
+ off_t offset1, offset2;
+ size_t size1, size2;
+ unsigned long long phys_start, phys_end, frac_head = 0;
+
+ offset1 = paddr_to_offset(paddr);
+ offset2 = paddr_to_offset(paddr + info->page_size);
+ phys_start = paddr;
+ phys_end = paddr + info->page_size;
+
+ /*
+ * Check the case phys_start isn't aligned by page size like below:
+ *
+ * phys_start
+ * = 0x40ffda7000
+ * |<-- frac_head -->|------------- PT_LOAD -------------
+ * ----+-----------------------+---------------------+----
+ * | pfn:N | pfn:N+1 | ...
+ * ----+-----------------------+---------------------+----
+ * |
+ * pfn_to_paddr(pfn:N) # page size = 16k
+ * = 0x40ffda4000
+ */
+ if (!offset1) {
+ phys_start = page_head_to_phys_start(paddr);
+ offset1 = paddr_to_offset(phys_start);
+ frac_head = phys_start - paddr;
+ memset(bufptr, 0, frac_head);
+ }
+
+ /*
+ * Check the case phys_end isn't aligned by page size like the
+ * phys_start's case.
+ */
+ if (!offset2) {
+ phys_end = page_head_to_phys_end(paddr);
+ offset2 = paddr_to_offset(phys_end);
+ memset(bufptr + (phys_end - paddr), 0, info->page_size
+ - (phys_end - paddr));
+ }
+
+ /*
+ * Check the separated page on different PT_LOAD segments.
+ */
+ if (offset1 + (phys_end - phys_start) == offset2) {
+ size1 = phys_end - phys_start;
+ } else {
+ for (size1 = 1; size1 < info->page_size - frac_head; size1++) {
+ offset2 = paddr_to_offset(phys_start + size1);
+ if (offset1 + size1 != offset2)
+ break;
+ }
+ }
+
+ if(!read_from_vmcore_parallel(fd_memory, offset1, bufptr + frac_head,
+ size1)) {
+ ERRMSG("Can't read the dump memory(%s).\n",
+ info->name_memory);
+ return FALSE;
+ }
+
+ if (size1 + frac_head != info->page_size) {
+ size2 = phys_end - (phys_start + size1);
+
+ if(!read_from_vmcore_parallel(fd_memory, offset2,
+ bufptr + frac_head + size1, size2)) {
+ ERRMSG("Can't read the dump memory(%s).\n",
+ info->name_memory);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static int
readpage_kdump_compressed(unsigned long long paddr, void *bufptr)
{
page_desc_t pd;
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 04/10] Add read_pfn_parallel
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (2 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 03/10] Add readpage_elf_parallel Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 05/10] Add function to initial bitmap for parallel use Zhou Wenjian
` (6 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
read_pfn_parallel is used to enable reading pages from vmcore parallely.
Current supported format is kdump-compressed and elf, mmap elf format
is also supported.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
Makefile | 2 ++
makedumpfile.c | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 36 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
index fc21a3f..b1daf5b 100644
--- a/Makefile
+++ b/Makefile
@@ -67,6 +67,8 @@ LIBS := -lsnappy $(LIBS)
CFLAGS += -DUSESNAPPY
endif
+LIBS := -lpthread $(LIBS)
+
all: makedumpfile
$(OBJ_PART): $(SRC_PART)
diff --git a/makedumpfile.c b/makedumpfile.c
index 44c78b4..e15855b 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -6349,6 +6349,40 @@ read_pfn(mdf_pfn_t pfn, unsigned char *buf)
}
int
+read_pfn_parallel(int fd_memory, mdf_pfn_t pfn, unsigned char *buf,
+ struct dump_bitmap* bitmap_memory_parallel,
+ struct mmap_cache *mmap_cache)
+{
+ unsigned long long paddr;
+ unsigned long long pgaddr;
+
+ paddr = pfn_to_paddr(pfn);
+
+ pgaddr = PAGEBASE(paddr);
+
+ if (info->flag_refiltering) {
+ if (!readpage_kdump_compressed_parallel(fd_memory, pgaddr, buf,
+ bitmap_memory_parallel)) {
+ ERRMSG("Can't get the page data.\n");
+ return FALSE;
+ }
+ } else {
+ char *mapbuf = mappage_elf_parallel(fd_memory, pgaddr,
+ mmap_cache);
+ if (mapbuf) {
+ memcpy(buf, mapbuf, info->page_size);
+ } else {
+ if (!readpage_elf_parallel(fd_memory, pgaddr, buf)) {
+ ERRMSG("Can't get the page data.\n");
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+int
get_loads_dumpfile_cyclic(void)
{
int i, phnum, num_new_load = 0;
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 05/10] Add function to initial bitmap for parallel use
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (3 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 04/10] Add read_pfn_parallel Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 06/10] Add filter_data_buffer_parallel Zhou Wenjian
` (5 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
initialize_bitmap_memory_parallel and initialize_2nd_bitmap_parallel
is used for parallel process to avoid conflict on bitmap.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
makedumpfile.c | 20 ++++++++++++++++++++
makedumpfile.h | 18 ++++++++++++++++++
2 files changed, 38 insertions(+), 0 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index e15855b..9c5da35 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -3411,6 +3411,16 @@ initialize_bitmap_memory(void)
return TRUE;
}
+void
+initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int thread_num)
+{
+ bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(thread_num);
+ bitmap->file_name = info->name_memory;
+ bitmap->no_block = -1;
+ memset(bitmap->buf, 0, BUFSIZE_BITMAP);
+ bitmap->offset = info->bitmap_memory->offset;
+}
+
int
calibrate_machdep_info(void)
{
@@ -3725,6 +3735,16 @@ initialize_2nd_bitmap(struct dump_bitmap *bitmap)
bitmap->offset = info->len_bitmap / 2;
}
+void
+initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int thread_num)
+{
+ bitmap->fd = FD_BITMAP_PARALLEL(thread_num);
+ bitmap->file_name = info->name_bitmap;
+ bitmap->no_block = -1;
+ memset(bitmap->buf, 0, BUFSIZE_BITMAP);
+ bitmap->offset = info->len_bitmap / 2;
+}
+
int
set_bitmap_file(struct dump_bitmap *bitmap, mdf_pfn_t pfn, int val)
{
diff --git a/makedumpfile.h b/makedumpfile.h
index bff134e..4b0709c 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -429,6 +429,11 @@ do { \
#define SPLITTING_SIZE_EI(i) info->splitting_info[i].size_eraseinfo
/*
+ * Macro for getting parallel info.
+ */
+#define FD_BITMAP_MEMORY_PARALLEL(i) info->parallel_info[i].fd_bitmap_memory
+#define FD_BITMAP_PARALLEL(i) info->parallel_info[i].fd_bitmap
+/*
* kernel version
*
* NOTE: the format of kernel_version is as follows
@@ -1000,6 +1005,18 @@ struct splitting_info {
unsigned long size_eraseinfo;
} splitting_info_t;
+struct parallel_info {
+ int fd_memory;
+ int fd_bitmap_memory;
+ int fd_bitmap;
+ unsigned char *buf;
+ unsigned char *buf_out;
+ struct mmap_cache *mmap_cache;
+#ifdef USELZO
+ lzo_bytep wrkmem;
+#endif
+} parallel_info_t;
+
struct ppc64_vmemmap {
unsigned long phys;
unsigned long virt;
@@ -1136,6 +1153,7 @@ struct DumpInfo {
char *name_dumpfile;
int num_dumpfile;
struct splitting_info *splitting_info;
+ struct parallel_info *parallel_info;
/*
* bitmap info:
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 06/10] Add filter_data_buffer_parallel
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (4 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 05/10] Add function to initial bitmap for parallel use Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process Zhou Wenjian
` (4 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
filter_data_buffer_parallel is used to enable filtering buffer
parallely.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
erase_info.c | 29 ++++++++++++++++++++++++++++-
erase_info.h | 2 ++
2 files changed, 30 insertions(+), 1 deletions(-)
diff --git a/erase_info.c b/erase_info.c
index e0e0f71..0b253d7 100644
--- a/erase_info.c
+++ b/erase_info.c
@@ -2328,7 +2328,6 @@ extract_filter_info(unsigned long long start_paddr,
return TRUE;
}
-
/*
* External functions.
*/
@@ -2413,6 +2412,34 @@ filter_data_buffer(unsigned char *buf, unsigned long long paddr,
}
}
+/*
+ * Filter buffer if the physical address is in filter_info.
+ */
+void
+filter_data_buffer_parallel(unsigned char *buf, unsigned long long paddr,
+ size_t size, pthread_mutex_t *mutex)
+{
+ struct filter_info fl_info;
+ unsigned char *buf_ptr;
+ int found = FALSE;
+
+ while (TRUE) {
+ pthread_mutex_lock(mutex);
+ found = extract_filter_info(paddr, paddr + size, &fl_info);
+ pthread_mutex_unlock(mutex);
+
+ if (found) {
+ buf_ptr = buf + (fl_info.paddr - paddr);
+ if (fl_info.nullify)
+ memset(buf_ptr, 0, fl_info.size);
+ else
+ memset(buf_ptr, fl_info.erase_ch, fl_info.size);
+ } else {
+ break;
+ }
+ }
+}
+
unsigned long
get_size_eraseinfo(void)
{
diff --git a/erase_info.h b/erase_info.h
index 4d4957e..b363a40 100644
--- a/erase_info.h
+++ b/erase_info.h
@@ -60,6 +60,8 @@ extern unsigned long num_erase_info;
int gather_filter_info(void);
void clear_filter_info(void);
void filter_data_buffer(unsigned char *buf, unsigned long long paddr, size_t size);
+void filter_data_buffer_parallel(unsigned char *buf, unsigned long long paddr,
+ size_t size, pthread_mutex_t *mutex);
unsigned long get_size_eraseinfo(void);
int update_filter_info_raw(unsigned long long, int, int);
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (5 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 06/10] Add filter_data_buffer_parallel Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 08/10] Initial and free data used for " Zhou Wenjian
` (3 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Use several threads to read and compress pages and one thread to write
the produced pages into dumpfile. The produced pages will be stored in
a buffer, then the consumer thread will get pages from this buffer.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Signed-off-by: Zhou wenjian <zhouwj-fnst@cn.fujitsu.com>
---
makedumpfile.c | 440 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
makedumpfile.h | 45 ++++++
2 files changed, 485 insertions(+), 0 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index 9c5da35..ad38bf4 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -235,6 +235,31 @@ is_in_same_page(unsigned long vaddr1, unsigned long vaddr2)
return FALSE;
}
+static inline unsigned long
+calculate_len_buf_out(long page_size)
+{
+ unsigned long len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy;
+ unsigned long len_buf_out;
+
+ len_buf_out_zlib = len_buf_out_lzo = len_buf_out_snappy = 0;
+
+#ifdef USELZO
+ len_buf_out_lzo = page_size + page_size / 16 + 64 + 3;
+#endif
+
+#ifdef USESNAPPY
+ len_buf_out_snappy = snappy_max_compressed_length(page_size);
+#endif
+
+ len_buf_out_zlib = compressBound(page_size);
+
+ len_buf_out = MAX(len_buf_out_zlib,
+ MAX(len_buf_out_lzo,
+ len_buf_out_snappy));
+
+ return len_buf_out;
+}
+
#define BITMAP_SECT_LEN 4096
static inline int is_dumpable(struct dump_bitmap *, mdf_pfn_t, struct cycle *cycle);
unsigned long
@@ -6671,6 +6696,421 @@ write_elf_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page)
return TRUE;
}
+void *
+kdump_thread_function_cyclic(void *arg) {
+ void *retval = PTHREAD_FAIL;
+ struct thread_args *kdump_thread_args = (struct thread_args *)arg;
+ struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
+ struct cycle *cycle = kdump_thread_args->cycle;
+ int page_data_num = kdump_thread_args->page_data_num;
+ mdf_pfn_t pfn;
+ mdf_pfn_t consumed_pfn;
+ int index;
+ int found;
+ int dumpable;
+ int fd_memory = 0;
+ struct dump_bitmap bitmap_parallel = {0};
+ struct dump_bitmap bitmap_memory_parallel = {0};
+ unsigned char *buf = NULL, *buf_out = NULL;
+ struct mmap_cache *mmap_cache =
+ MMAP_CACHE_PARALLEL(kdump_thread_args->thread_num);
+ unsigned long size_out;
+#ifdef USELZO
+ lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->thread_num);
+#endif
+#ifdef USESNAPPY
+ unsigned long len_buf_out_snappy =
+ snappy_max_compressed_length(info->page_size);
+#endif
+
+ buf = BUF_PARALLEL(kdump_thread_args->thread_num);
+ buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
+
+ fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->thread_num);
+
+ if (info->fd_bitmap) {
+ bitmap_parallel.buf = malloc(BUFSIZE_BITMAP);
+ initialize_2nd_bitmap_parallel(&bitmap_parallel,
+ kdump_thread_args->thread_num);
+ }
+
+ if (info->flag_refiltering) {
+ bitmap_memory_parallel.buf = malloc(BUFSIZE_BITMAP);
+ initialize_bitmap_memory_parallel(&bitmap_memory_parallel,
+ kdump_thread_args->thread_num);
+ }
+
+ while (1) {
+ /* get next pfn */
+ pthread_mutex_lock(&info->current_pfn_mutex);
+ pfn = info->current_pfn;
+ info->current_pfn++;
+ pthread_mutex_unlock(&info->current_pfn_mutex);
+
+ if (pfn >= kdump_thread_args->end_pfn)
+ break;
+
+ index = -1;
+ found = FALSE;
+
+ while (found == FALSE) {
+ /*
+ * need a cancellation point here
+ */
+ sleep(0);
+
+ index = pfn % page_data_num;
+
+ if (page_data_buf[index].ready != 0)
+ continue;
+
+ if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
+ continue;
+
+ if (page_data_buf[index].ready != 0)
+ goto unlock;
+
+ pthread_mutex_lock(&info->consumed_pfn_mutex);
+ if ((long)page_data_buf[index].pfn >
+ (long)info->consumed_pfn)
+ info->consumed_pfn = page_data_buf[index].pfn;
+ consumed_pfn = info->consumed_pfn;
+ pthread_mutex_unlock(&info->consumed_pfn_mutex);
+
+ /*
+ * leave space for slow producer
+ */
+ if ((long)pfn - (long)consumed_pfn > page_data_num)
+ goto unlock;
+
+ found = TRUE;
+
+ page_data_buf[index].pfn = pfn;
+ page_data_buf[index].ready = 1;
+
+ if (!info->fd_bitmap)
+ dumpable = is_dumpable(info->bitmap2,
+ pfn - kdump_thread_args->start_pfn,
+ cycle);
+ else
+ dumpable = is_dumpable(&bitmap_parallel,
+ pfn - kdump_thread_args->start_pfn,
+ cycle);
+ if (!dumpable) {
+ page_data_buf[index].dumpable = FALSE;
+ goto unlock;
+ }
+
+ page_data_buf[index].dumpable = TRUE;
+
+ if (!read_pfn_parallel(fd_memory, pfn, buf,
+ &bitmap_memory_parallel,
+ mmap_cache))
+ goto fail;
+
+ filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
+ info->page_size,
+ &info->filter_mutex);
+
+ if ((info->dump_level & DL_EXCLUDE_ZERO)
+ && is_zero_page(buf, info->page_size)) {
+ page_data_buf[index].zero = TRUE;
+ goto unlock;
+ }
+
+ page_data_buf[index].zero = FALSE;
+
+ /*
+ * Compress the page data.
+ */
+ size_out = kdump_thread_args->len_buf_out;
+ if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB)
+ && ((size_out = kdump_thread_args->len_buf_out),
+ compress2(buf_out, &size_out, buf,
+ info->page_size,
+ Z_BEST_SPEED) == Z_OK)
+ && (size_out < info->page_size)) {
+ page_data_buf[index].flags =
+ DUMP_DH_COMPRESSED_ZLIB;
+ page_data_buf[index].size = size_out;
+ memcpy(page_data_buf[index].buf, buf_out, size_out);
+#ifdef USELZO
+ } else if (info->flag_lzo_support
+ && (info->flag_compress
+ & DUMP_DH_COMPRESSED_LZO)
+ && ((size_out = info->page_size),
+ lzo1x_1_compress(buf, info->page_size,
+ buf_out, &size_out,
+ wrkmem) == LZO_E_OK)
+ && (size_out < info->page_size)) {
+ page_data_buf[index].flags =
+ DUMP_DH_COMPRESSED_LZO;
+ page_data_buf[index].size = size_out;
+ memcpy(page_data_buf[index].buf, buf_out, size_out);
+#endif
+#ifdef USESNAPPY
+ } else if ((info->flag_compress
+ & DUMP_DH_COMPRESSED_SNAPPY)
+ && ((size_out = len_buf_out_snappy),
+ snappy_compress((char *)buf,
+ info->page_size,
+ (char *)buf_out,
+ (size_t *)&size_out)
+ == SNAPPY_OK)
+ && (size_out < info->page_size)) {
+ page_data_buf[index].flags =
+ DUMP_DH_COMPRESSED_SNAPPY;
+ page_data_buf[index].size = size_out;
+ memcpy(page_data_buf[index].buf, buf_out, size_out);
+#endif
+ } else {
+ page_data_buf[index].flags = 0;
+ page_data_buf[index].size = info->page_size;
+ memcpy(page_data_buf[index].buf, buf, info->page_size);
+ }
+unlock:
+ pthread_mutex_unlock(&page_data_buf[index].mutex);
+
+ }
+ }
+
+ retval = NULL;
+
+fail:
+ if (bitmap_memory_parallel.fd > 0)
+ close(bitmap_memory_parallel.fd);
+ if (bitmap_parallel.buf != NULL)
+ free(bitmap_parallel.buf);
+ if (bitmap_memory_parallel.buf != NULL)
+ free(bitmap_memory_parallel.buf);
+
+ pthread_exit(retval);
+}
+
+int
+write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
+ struct cache_data *cd_page,
+ struct page_desc *pd_zero,
+ off_t *offset_data, struct cycle *cycle)
+{
+ int ret = FALSE;
+ int res;
+ unsigned long len_buf_out;
+ mdf_pfn_t per;
+ mdf_pfn_t start_pfn, end_pfn;
+ struct page_desc pd;
+ struct timeval tv_start;
+ struct timeval last, new;
+ unsigned long long consuming_pfn;
+ pthread_t **threads = NULL;
+ struct thread_args *kdump_thread_args = NULL;
+ void *thread_result;
+ int page_data_num;
+ struct page_data *page_data_buf = NULL;
+ int i;
+ int index;
+
+ if (info->flag_elf_dumpfile)
+ return FALSE;
+
+ res = pthread_mutex_init(&info->current_pfn_mutex, NULL);
+ if (res != 0) {
+ ERRMSG("Can't initialize current_pfn_mutex. %s\n",
+ strerror(res));
+ goto out;
+ }
+
+ res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL);
+ if (res != 0) {
+ ERRMSG("Can't initialize consumed_pfn_mutex. %s\n",
+ strerror(res));
+ goto out;
+ }
+
+ res = pthread_mutex_init(&info->filter_mutex, NULL);
+ if (res != 0) {
+ ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res));
+ goto out;
+ }
+
+ res = pthread_rwlock_init(&info->usemmap_rwlock, NULL);
+ if (res != 0) {
+ ERRMSG("Can't initialize usemmap_rwlock. %s\n", strerror(res));
+ goto out;
+ }
+
+ len_buf_out = calculate_len_buf_out(info->page_size);
+
+ per = info->num_dumpable / 10000;
+ per = per ? per : 1;
+
+ gettimeofday(&tv_start, NULL);
+
+ start_pfn = cycle->start_pfn;
+ end_pfn = cycle->end_pfn;
+
+ info->current_pfn = start_pfn;
+ info->consumed_pfn = start_pfn - 1;
+
+ threads = info->threads;
+ kdump_thread_args = info->kdump_thread_args;
+
+ page_data_num = info->num_buffers;
+ page_data_buf = info->page_data_buf;
+
+ for (i = 0; i < page_data_num; i++) {
+ /*
+ * producer will use pfn in page_data_buf to decide the
+ * consumed pfn
+ */
+ page_data_buf[i].pfn = start_pfn - 1;
+ page_data_buf[i].ready = 0;
+ res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
+ if (res != 0) {
+ ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
+ strerror(res));
+ goto out;
+ }
+ }
+
+ for (i = 0; i < info->num_threads; i++) {
+ kdump_thread_args[i].thread_num = i;
+ kdump_thread_args[i].len_buf_out = len_buf_out;
+ kdump_thread_args[i].start_pfn = start_pfn;
+ kdump_thread_args[i].end_pfn = end_pfn;
+ kdump_thread_args[i].page_data_num = page_data_num;
+ kdump_thread_args[i].page_data_buf = page_data_buf;
+ kdump_thread_args[i].cycle = cycle;
+
+ res = pthread_create(threads[i], NULL,
+ kdump_thread_function_cyclic,
+ (void *)&kdump_thread_args[i]);
+ if (res != 0) {
+ ERRMSG("Can't create thread %d. %s\n",
+ i, strerror(res));
+ goto out;
+ }
+ }
+
+ consuming_pfn = start_pfn;
+ index = -1;
+
+ gettimeofday(&last, NULL);
+
+ while (consuming_pfn < end_pfn) {
+ index = consuming_pfn % page_data_num;
+
+ gettimeofday(&new, NULL);
+ if (new.tv_sec - last.tv_sec > WAIT_TIME) {
+ ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
+ goto out;
+ }
+
+ /*
+ * check pfn first without mutex locked to reduce the time
+ * trying to lock the mutex
+ */
+ if (page_data_buf[index].pfn != consuming_pfn)
+ continue;
+
+ pthread_mutex_lock(&page_data_buf[index].mutex);
+
+ /* check whether the found one is ready to be consumed */
+ if (page_data_buf[index].pfn != consuming_pfn ||
+ page_data_buf[index].ready != 1) {
+ goto unlock;
+ }
+
+ if ((num_dumped % per) == 0)
+ print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
+
+ /* next pfn is found, refresh last here */
+ last = new;
+ consuming_pfn++;
+ page_data_buf[index].ready = 0;
+
+ if (page_data_buf[index].dumpable == FALSE)
+ goto unlock;
+
+ num_dumped++;
+
+ if (page_data_buf[index].zero == TRUE) {
+ if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
+ goto out;
+ pfn_zero++;
+ } else {
+ pd.flags = page_data_buf[index].flags;
+ pd.size = page_data_buf[index].size;
+ pd.page_flags = 0;
+ pd.offset = *offset_data;
+ *offset_data += pd.size;
+ /*
+ * Write the page header.
+ */
+ if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
+ goto out;
+ /*
+ * Write the page data.
+ */
+ if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
+ goto out;
+
+ }
+unlock:
+ pthread_mutex_unlock(&page_data_buf[index].mutex);
+ }
+
+ ret = TRUE;
+ /*
+ * print [100 %]
+ */
+ print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
+ print_execution_time(PROGRESS_COPY, &tv_start);
+ PROGRESS_MSG("\n");
+
+out:
+ if (threads != NULL) {
+ for (i = 0; i < info->num_threads; i++) {
+ if (threads[i] != NULL) {
+ res = pthread_cancel(*threads[i]);
+ if (res != 0 && res != ESRCH)
+ ERRMSG("Can't cancel thread %d. %s\n",
+ i, strerror(res));
+ }
+ }
+
+ for (i = 0; i < info->num_threads; i++) {
+ if (threads[i] != NULL) {
+ res = pthread_join(*threads[i], &thread_result);
+ if (res != 0)
+ ERRMSG("Can't join with thread %d. %s\n",
+ i, strerror(res));
+
+ if (thread_result == PTHREAD_CANCELED)
+ DEBUG_MSG("Thread %d is cancelled.\n", i);
+ else if (thread_result == PTHREAD_FAIL)
+ DEBUG_MSG("Thread %d fails.\n", i);
+ else
+ DEBUG_MSG("Thread %d finishes.\n", i);
+
+ }
+ }
+ }
+
+ if (page_data_buf != NULL) {
+ for (i = 0; i < page_data_num; i++) {
+ pthread_mutex_destroy(&page_data_buf[i].mutex);
+ }
+ }
+
+ pthread_rwlock_destroy(&info->usemmap_rwlock);
+ pthread_mutex_destroy(&info->filter_mutex);
+ pthread_mutex_destroy(&info->consumed_pfn_mutex);
+ pthread_mutex_destroy(&info->current_pfn_mutex);
+
+ return ret;
+}
+
int
write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page,
struct page_desc *pd_zero, off_t *offset_data, struct cycle *cycle)
diff --git a/makedumpfile.h b/makedumpfile.h
index 4b0709c..5dbea60 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -431,8 +431,15 @@ do { \
/*
* Macro for getting parallel info.
*/
+#define FD_MEMORY_PARALLEL(i) info->parallel_info[i].fd_memory
#define FD_BITMAP_MEMORY_PARALLEL(i) info->parallel_info[i].fd_bitmap_memory
#define FD_BITMAP_PARALLEL(i) info->parallel_info[i].fd_bitmap
+#define BUF_PARALLEL(i) info->parallel_info[i].buf
+#define BUF_OUT_PARALLEL(i) info->parallel_info[i].buf_out
+#define MMAP_CACHE_PARALLEL(i) info->parallel_info[i].mmap_cache
+#ifdef USELZO
+#define WRKMEM_PARALLEL(i) info->parallel_info[i].wrkmem
+#endif
/*
* kernel version
*
@@ -964,12 +971,40 @@ typedef unsigned long long int ulonglong;
/*
* for parallel process
*/
+
+#define WAIT_TIME (60 * 10)
+#define PTHREAD_FAIL ((void *)-2)
+
struct mmap_cache {
char *mmap_buf;
off_t mmap_start_offset;
off_t mmap_end_offset;
};
+struct page_data
+{
+ mdf_pfn_t pfn;
+ int dumpable;
+ int zero;
+ unsigned int flags;
+ long size;
+ unsigned char *buf;
+ pthread_mutex_t mutex;
+ /*
+ * whether the page_data is ready to be consumed
+ */
+ int ready;
+};
+
+struct thread_args {
+ int thread_num;
+ unsigned long len_buf_out;
+ mdf_pfn_t start_pfn, end_pfn;
+ int page_data_num;
+ struct cycle *cycle;
+ struct page_data *page_data_buf;
+};
+
/*
* makedumpfile header
* For re-arranging the dump data on different architecture, all the
@@ -1250,7 +1285,17 @@ struct DumpInfo {
/*
* for parallel process
*/
+ int num_threads;
+ int num_buffers;
+ pthread_t **threads;
+ struct thread_args *kdump_thread_args;
+ struct page_data *page_data_buf;
pthread_rwlock_t usemmap_rwlock;
+ mdf_pfn_t current_pfn;
+ pthread_mutex_t current_pfn_mutex;
+ mdf_pfn_t consumed_pfn;
+ pthread_mutex_t consumed_pfn_mutex;
+ pthread_mutex_t filter_mutex;
};
extern struct DumpInfo *info;
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 08/10] Initial and free data used for parallel process
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (6 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly Zhou Wenjian
` (2 subsequent siblings)
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
This patch is used to initial/free data for parallel process and
the memory limit is concerned in this function.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
makedumpfile.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
makedumpfile.h | 1 +
2 files changed, 203 insertions(+), 0 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index ad38bf4..04d5e92 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1432,6 +1432,23 @@ open_dump_bitmap(void)
SPLITTING_FD_BITMAP(i) = fd;
}
}
+
+ if (info->num_threads) {
+ /*
+ * Reserve file descriptors of bitmap for creating dumpfiles
+ * parallelly, because a bitmap file will be unlinked just after
+ * this and it is not possible to open a bitmap file later.
+ */
+ for (i = 0; i < info->num_threads; i++) {
+ if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) {
+ ERRMSG("Can't open the bitmap file(%s). %s\n",
+ info->name_bitmap, strerror(errno));
+ return FALSE;
+ }
+ FD_BITMAP_PARALLEL(i) = fd;
+ }
+ }
+
unlink(info->name_bitmap);
return TRUE;
@@ -3459,6 +3476,191 @@ calibrate_machdep_info(void)
}
int
+initial_for_parallel()
+{
+ unsigned long len_buf_out;
+ unsigned long page_data_buf_size;
+ unsigned long limit_size;
+ int page_data_num;
+ int i;
+
+ len_buf_out = calculate_len_buf_out(info->page_size);
+
+ /*
+ * allocate memory for threads
+ */
+ if ((info->threads = malloc(sizeof(pthread_t *) * info->num_threads))
+ == NULL) {
+ MSG("Can't allocate memory for threads. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ memset(info->threads, 0, sizeof(pthread_t *) * info->num_threads);
+
+ if ((info->kdump_thread_args =
+ malloc(sizeof(struct thread_args) * info->num_threads))
+ == NULL) {
+ MSG("Can't allocate memory for arguments of threads. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_threads);
+
+ for (i = 0; i < info->num_threads; i++) {
+ if ((info->threads[i] = malloc(sizeof(pthread_t))) == NULL) {
+ MSG("Can't allocate memory for thread %d. %s",
+ i, strerror(errno));
+ return FALSE;
+ }
+
+ if ((BUF_PARALLEL(i) = malloc(info->page_size)) == NULL) {
+ MSG("Can't allocate memory for the memory buffer. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+
+ if ((BUF_OUT_PARALLEL(i) = malloc(len_buf_out)) == NULL) {
+ MSG("Can't allocate memory for the compression buffer. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+
+ if ((MMAP_CACHE_PARALLEL(i) = malloc(sizeof(struct mmap_cache))) == NULL) {
+ MSG("Can't allocate memory for mmap_cache. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+
+ /*
+ * initial for mmap_cache
+ */
+ MMAP_CACHE_PARALLEL(i)->mmap_buf = MAP_FAILED;
+ MMAP_CACHE_PARALLEL(i)->mmap_start_offset = 0;
+ MMAP_CACHE_PARALLEL(i)->mmap_end_offset = 0;
+
+#ifdef USELZO
+ if ((WRKMEM_PARALLEL(i) = malloc(LZO1X_1_MEM_COMPRESS)) == NULL) {
+ MSG("Can't allocate memory for the working memory. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+#endif
+ }
+
+ /*
+ * get a safe number of page_data
+ */
+ page_data_buf_size = MAX(len_buf_out, info->page_size);
+
+ limit_size = (get_free_memory_size()
+ - MAP_REGION * info->num_threads) * 0.6;
+
+ page_data_num = limit_size / page_data_buf_size;
+
+ if (info->num_buffers != 0)
+ info->num_buffers = MIN(info->num_buffers, page_data_num);
+ else
+ info->num_buffers = MIN(PAGE_DATA_NUM, page_data_num);
+
+ DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
+ info->num_buffers);
+
+ /*
+ * allocate memory for page_data
+ */
+ if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers))
+ == NULL) {
+ MSG("Can't allocate memory for page_data_buf. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ memset(info->page_data_buf, 0, sizeof(struct page_data) * info->num_buffers);
+
+ for (i = 0; i < info->num_buffers; i++) {
+ if ((info->page_data_buf[i].buf = malloc(page_data_buf_size)) == NULL) {
+ MSG("Can't allocate memory for buf of page_data_buf. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ }
+
+ /*
+ * initial fd_memory for threads
+ */
+ for (i = 0; i < info->num_threads; i++) {
+ if ((FD_MEMORY_PARALLEL(i) = open(info->name_memory, O_RDONLY))
+ < 0) {
+ ERRMSG("Can't open the dump memory(%s). %s\n",
+ info->name_memory, strerror(errno));
+ return FALSE;
+ }
+
+ if ((FD_BITMAP_MEMORY_PARALLEL(i) =
+ open(info->name_memory, O_RDONLY)) < 0) {
+ ERRMSG("Can't open the dump memory(%s). %s\n",
+ info->name_memory, strerror(errno));
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+void
+free_for_parallel()
+{
+ int i;
+
+ if (info->threads != NULL) {
+ for (i = 0; i < info->num_threads; i++) {
+ if (info->threads[i] != NULL)
+ free(info->threads[i]);
+
+ if (BUF_PARALLEL(i) != NULL)
+ free(BUF_PARALLEL(i));
+
+ if (BUF_OUT_PARALLEL(i) != NULL)
+ free(BUF_OUT_PARALLEL(i));
+
+ if (MMAP_CACHE_PARALLEL(i) != NULL) {
+ if (MMAP_CACHE_PARALLEL(i)->mmap_buf !=
+ MAP_FAILED)
+ munmap(MMAP_CACHE_PARALLEL(i)->mmap_buf,
+ MMAP_CACHE_PARALLEL(i)->mmap_end_offset
+ - MMAP_CACHE_PARALLEL(i)->mmap_start_offset);
+
+ free(MMAP_CACHE_PARALLEL(i));
+ }
+#ifdef USELZO
+ if (WRKMEM_PARALLEL(i) != NULL)
+ free(WRKMEM_PARALLEL(i));
+#endif
+
+ }
+ free(info->threads);
+ }
+
+ if (info->kdump_thread_args != NULL)
+ free(info->kdump_thread_args);
+
+ if (info->page_data_buf != NULL) {
+ for (i = 0; i < info->num_buffers; i++) {
+ if (info->page_data_buf[i].buf != NULL)
+ free(info->page_data_buf[i].buf);
+ }
+ free(info->page_data_buf);
+ }
+
+ for (i = 0; i < info->num_threads; i++) {
+ if (FD_MEMORY_PARALLEL(i) > 0)
+ close(FD_MEMORY_PARALLEL(i));
+
+ if (FD_BITMAP_MEMORY_PARALLEL(i) > 0)
+ close(FD_BITMAP_MEMORY_PARALLEL(i));
+ }
+}
+
+int
initial(void)
{
off_t offset;
diff --git a/makedumpfile.h b/makedumpfile.h
index 5dbea60..d0760d9 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -972,6 +972,7 @@ typedef unsigned long long int ulonglong;
* for parallel process
*/
+#define PAGE_DATA_NUM (50)
#define WAIT_TIME (60 * 10)
#define PTHREAD_FAIL ((void *)-2)
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (7 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 08/10] Initial and free data used for " Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 10/10] Add usage and manual about multiple threads process Zhou Wenjian
2015-06-25 2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Using this patch, it is available to use multiple threads to read
and compress pages. This parallel process will save time.
Currently, sadump and xen kdump is not supported.
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
makedumpfile.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
makedumpfile.h | 2 +
2 files changed, 68 insertions(+), 2 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index 04d5e92..8ffe174 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -3857,6 +3857,27 @@ out:
DEBUG_MSG("Buffer size for the cyclic mode: %ld\n", info->bufsize_cyclic);
}
+ if (info->num_threads) {
+ if (is_xen_memory()) {
+ MSG("'--num-threads' option is disable,\n");
+ MSG("because %s is Xen's memory core image.\n",
+ info->name_memory);
+ return FALSE;
+ }
+
+ if (info->flag_sadump) {
+ MSG("'--num-threads' option is disable,\n");
+ MSG("because %s is sadump %s format.\n",
+ info->name_memory, sadump_format_type_name());
+ return FALSE;
+ }
+
+ if (!initial_for_parallel()) {
+ MSG("Fail to initial for parallel process.\n");
+ return FALSE;
+ }
+ }
+
if (!is_xen_memory() && !cache_init())
return FALSE;
@@ -7906,9 +7927,16 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d
if (!write_kdump_bitmap2(&cycle))
return FALSE;
- if (!write_kdump_pages_cyclic(cd_header, cd_page, &pd_zero,
+ if (info->num_threads) {
+ if (!write_kdump_pages_parallel_cyclic(cd_header,
+ cd_page, &pd_zero,
+ &offset_data, &cycle))
+ return FALSE;
+ } else {
+ if (!write_kdump_pages_cyclic(cd_header, cd_page, &pd_zero,
&offset_data, &cycle))
- return FALSE;
+ return FALSE;
+ }
}
free_bitmap2_buffer();
@@ -9875,6 +9903,18 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
if (info->flag_sadump_diskset && !sadump_is_supported_arch())
return FALSE;
+ if (info->num_threads) {
+ if (info->flag_split) {
+ MSG("--num-threads cannot used with --split.\n");
+ return FALSE;
+ }
+
+ if (info->flag_elf_dumpfile) {
+ MSG("--num-threads cannot used with ELF format.\n");
+ return FALSE;
+ }
+ }
+
if ((argc == optind + 2) && !info->flag_flatten
&& !info->flag_split
&& !info->flag_sadump_diskset) {
@@ -9939,6 +9979,18 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
} else
return FALSE;
+ if (info->num_threads) {
+ if ((info->parallel_info =
+ malloc(sizeof(parallel_info_t) * info->num_threads))
+ == NULL) {
+ MSG("Can't allocate memory for parallel_info.\n");
+ return FALSE;
+ }
+
+ memset(info->parallel_info, 0, sizeof(parallel_info_t)
+ * info->num_threads);
+ }
+
return TRUE;
}
@@ -10255,6 +10307,8 @@ static struct option longopts[] = {
{"mem-usage", no_argument, NULL, OPT_MEM_USAGE},
{"splitblock-size", required_argument, NULL, OPT_SPLITBLOCK_SIZE},
{"work-dir", required_argument, NULL, OPT_WORKING_DIR},
+ {"num-threads", required_argument, NULL, OPT_NUM_THREADS},
+ {"num-buffers", required_argument, NULL, OPT_NUM_BUFFERS},
{0, 0, 0, 0}
};
@@ -10399,6 +10453,12 @@ main(int argc, char *argv[])
case OPT_WORKING_DIR:
info->working_dir = optarg;
break;
+ case OPT_NUM_THREADS:
+ info->num_threads = atoi(optarg);
+ break;
+ case OPT_NUM_BUFFERS:
+ info->num_buffers = atoi(optarg);
+ break;
case '?':
MSG("Commandline parameter is invalid.\n");
MSG("Try `makedumpfile --help' for more information.\n");
@@ -10542,6 +10602,8 @@ out:
else if (!info->flag_mem_usage)
MSG("makedumpfile Completed.\n");
+ free_for_parallel();
+
if (info) {
if (info->dh_memory)
free(info->dh_memory);
@@ -10569,6 +10631,8 @@ out:
free(info->p2m_mfn_frame_list);
if (info->page_buf != NULL)
free(info->page_buf);
+ if (info->parallel_info != NULL)
+ free(info->parallel_info);
free(info);
if (splitblock) {
diff --git a/makedumpfile.h b/makedumpfile.h
index d0760d9..9dfe5b6 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -2032,6 +2032,8 @@ struct elf_prstatus {
#define OPT_MEM_USAGE OPT_START+13
#define OPT_SPLITBLOCK_SIZE OPT_START+14
#define OPT_WORKING_DIR OPT_START+15
+#define OPT_NUM_THREADS OPT_START+16
+#define OPT_NUM_BUFFERS OPT_START+17
/*
* Function Prototype.
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 10/10] Add usage and manual about multiple threads process
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (8 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly Zhou Wenjian
@ 2015-06-19 8:56 ` Zhou Wenjian
2015-06-25 2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19 8:56 UTC (permalink / raw)
To: kexec; +Cc: Qiao Nuohan
From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
makedumpfile.8 | 24 ++++++++++++++++++++++++
print_info.c | 16 ++++++++++++++++
2 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/makedumpfile.8 b/makedumpfile.8
index 2d38cd0..b400a14 100644
--- a/makedumpfile.8
+++ b/makedumpfile.8
@@ -12,6 +12,8 @@ makedumpfile \- make a small dumpfile of kdump
.br
\fBmakedumpfile\fR \-\-split [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \fIVMCORE\fR \fIDUMPFILE1\fR \fIDUMPFILE2\fR [\fIDUMPFILE3\fR ..]
.br
+\fBmakedumpfile\fR [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \-\-num\-threads \fITHREADNUM\fR [\-\-num\-buffers \fIBUFNUM\fR] \fIVMCORE\fR \fIDUMPFILE\fR
+.br
\fBmakedumpfile\fR \-\-reassemble \fIDUMPFILE1\fR \fIDUMPFILE2\fR [\fIDUMPFILE3\fR ..] \fIDUMPFILE\fR
.br
\fBmakedumpfile\fR \-g \fIVMCOREINFO\fR \-x \fIVMLINUX\fR
@@ -371,6 +373,28 @@ the kdump\-compressed format.
# makedumpfile \-\-split \-d 31 \-x vmlinux /proc/vmcore dumpfile1 dumpfile2
.TP
+\fB\-\-num\-threads\fR \fITHREADNUM\fR
+Using multiple threads to read and compress data of each page in parallel.
+And it will reduces time for saving \fIDUMPFILE\fR.
+This feature only supports creating \fIDUMPFILE\fR in kdump\-comressed
+format from \fIVMCORE\fR in kdump\-compressed format or elf format.
+.br
+.B Example:
+.br
+# makedumpfile \-d 31 \-\-num\-threads 4 /proc/vmcore dumpfile
+
+.TP
+\fB\-\-num\-buffers\fR \fIBUFNUM\fR
+This option is used for multiple threads process, please check \-\-num\-threads
+option. Multiple threads process will need buffers to store generated page
+data by threads temporarily, and this option is used to specify the number
+of pages can be stored.
+.br
+.B Example:
+.br
+# makedumpfile \-d 31 \-\-num\-threads 4 \-\-num\-buffers 30 /proc/vmcore dumpfile
+
+.TP
\fB\-\-reassemble\fR
Reassemble multiple \fIDUMPFILE\fRs, which are created by \-\-split option,
into one \fIDUMPFILE\fR. dumpfile1 and dumpfile2 are reassembled into dumpfile
diff --git a/print_info.c b/print_info.c
index 9c36bec..e8a6b40 100644
--- a/print_info.c
+++ b/print_info.c
@@ -76,6 +76,10 @@ print_usage(void)
MSG(" # makedumpfile --split [OPTION] [-x VMLINUX|-i VMCOREINFO] VMCORE DUMPFILE1\n");
MSG(" DUMPFILE2 [DUMPFILE3 ..]\n");
MSG("\n");
+ MSG(" Using multiple threads to create DUMPFILE in parallel:\n");
+ MSG(" # makedumpfile [OPTION] [-x VMLINUX|-i VMCOREINFO] --num-threads THREADNUM\n");
+ MSG(" [--num-buffers BUFNUM] VMCORE DUMPFILE1\n");
+ MSG("\n");
MSG(" Reassemble multiple DUMPFILEs:\n");
MSG(" # makedumpfile --reassemble DUMPFILE1 DUMPFILE2 [DUMPFILE3 ..] DUMPFILE\n");
MSG("\n");
@@ -184,6 +188,18 @@ print_usage(void)
MSG(" by the number of DUMPFILEs.\n");
MSG(" This feature supports only the kdump-compressed format.\n");
MSG("\n");
+ MSG(" [--num-threads THREADNUM]:\n");
+ MSG(" Using multiple threads to read and compress data of each page in parallel.\n");
+ MSG(" And it will reduces time for saving DUMPFILE.\n");
+ MSG(" This feature only supports creating DUMPFILE in kdump-comressed format from\n");
+ MSG(" VMCORE in kdump-compressed format or elf format.\n");
+ MSG("\n");
+ MSG(" [--num-buffers BUFNUM]:\n");
+ MSG(" This option is used for multiple threads process, please check --num-threads\n");
+ MSG(" option. Multiple threads process will need buffers to store generated page\n");
+ MSG(" data by threads temporarily, and this option is used to specify the number\n");
+ MSG(" of pages can be stored.\n");
+ MSG("\n");
MSG(" [--reassemble]:\n");
MSG(" Reassemble multiple DUMPFILEs, which are created by --split option,\n");
MSG(" into one DUMPFILE. dumpfile1 and dumpfile2 are reassembled into dumpfile.\n");
--
1.7.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 00/10] makedumpfile: parallel processing
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
` (9 preceding siblings ...)
2015-06-19 8:56 ` [PATCH v2 10/10] Add usage and manual about multiple threads process Zhou Wenjian
@ 2015-06-25 2:25 ` "Zhou, Wenjian/周文剑"
2015-06-26 7:07 ` Atsushi Kumagai
10 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-06-25 2:25 UTC (permalink / raw)
To: Atsushi Kumagai; +Cc: kexec
Hello Atsushi Kumagai,
I test this patch set in several machines and the following is the benchmark.
These tables show the time that makedumpfile spends. And the unit is second.
"core-data" in the table means the context in the vmcore.
For example:
core-data's value is 256. It means that in the vmcore, 256 * 8 bits of each page
are set to 1.
threads-num in the table means how many threads are used. 0 means original single thread
implementation.
"-l" in the table means producing lzo format vmcore
"-c" in the table means producing kdump-compressed format vmcore
###################################
- System: PRIMERGY RX300 S6
- CPU: Intel(R) Xeon(R) CPU x5660
- memory: 16GB
###################################
************ makedumpfile -d 0 ******************
core-data 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840
threads-num
-l
0 11 112 163 168 167 167 167 167 170 169 167 166 167 168 169 113
4 5 111 158 166 167 167 167 166 169 170 166 166 167 167 158 111
8 5 111 158 167 169 170 167 169 169 170 167 168 168 168 159 111
12 6 111 158 168 167 167 169 168 170 169 167 168 167 167 161 114
-c
0 54 544 643 666 589 517 468 405 408 429 491 528 592 676 654 527
4 60 179 210 216 206 220 209 214 211 222 220 208 209 210 245 177
8 43 113 146 169 179 215 195 211 199 195 216 197 185 165 158 109
12 44 106 142 162 169 171 168 173 174 171 200 173 177 165 139 107
************ makedumpfile -d 1 ******************
core-data 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840
threads-num
-l
0 10 137 163 167 167 167 167 167 171 170 166 166 166 168 169 138
4 5 111 158 166 167 166 167 166 170 171 166 167 166 166 159 111
8 5 114 159 167 167 169 167 167 169 169 169 167 168 167 160 111
12 6 113 159 168 167 168 167 168 169 170 168 168 167 168 159 112
-c
0 53 522 645 663 591 512 470 401 412 413 479 532 587 666 648 524
4 57 205 244 216 208 217 216 225 221 225 223 217 213 209 246 174
8 41 118 154 176 191 213 219 223 212 210 222 207 198 176 164 118
12 43 111 148 174 189 194 191 182 178 173 204 196 194 170 150 112
************ makedumpfile -d 7 ******************
core-data 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840
threads-num
-l
0 10 144 199 168 167 167 167 167 171 169 167 166 166 168 169 144
4 5 110 158 167 167 167 166 166 170 169 166 166 166 166 159 110
8 5 111 159 167 166 170 167 166 169 169 168 167 167 167 159 110
12 6 111 159 167 167 166 168 167 169 169 168 167 167 167 160 111
-c
0 52 523 642 659 592 511 490 402 410 422 464 525 591 666 647 529
4 58 177 210 213 208 224 217 228 223 229 227 216 210 212 215 175
8 41 116 152 178 191 213 219 225 216 211 221 211 196 180 158 116
12 41 111 151 176 192 187 192 190 174 175 218 186 189 172 151 112
************ makedumpfile -d 31 ******************
core-data 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840
threads-num
-l
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 2 2 2 3 3 3 2 3 3 3 3 2 3 3 3 3
8 2 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3
12 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
-c
0 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
4 4 5 5 4 5 5 5 5 4 5 5 5 4 4 5 5
8 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4
12 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
###################################
- System: PRIMERGY RX300 S7
- CPU: Intel(R) Xeon(R) CPU E5-2620
- memory: 32GB
###################################
************ makedumpfile -d 0 ******************
core-data 0 256 512 768 1024 1280 1536 1792
threads-num
-l
0 23 211 246 255 252 252 253 257
4 14 168 234 250 253 251 251 252
12 17 165 237 250 251 251 253 253
-c
0 117 1210 1620 1699 1443 1294 1235 1051
4 161 438 525 543 506 465 484 415
12 131 211 245 250 246 248 260 265
************ makedumpfile -d 7 ******************
core-data 0 256 512 768 1024 1280 1536 1792
threads-num
-l
0 24 215 253 256 255 254 257 255
4 14 165 235 247 249 250 252 253
12 16 169 237 251 255 253 252 250
-c
0 120 1202 1625 1698 1434 1274 1223 1040
4 157 438 533 542 507 461 479 400
12 132 204 242 248 242 244 257 263
###################################
- System: PRIMEQUEST 1800E
- CPU: Intel(R) Xeon(R) CPU E7540
- memory: 32GB
###################################
************ makedumpfile -d 0 ******************
core-data 0 256 512 768 1024 1280 1536 1792
threads-num
-l
0 34 282 245 179 179 179 179 180
4 63 143 224 230 220 212 207 204
8 65 129 200 225 235 235 225 220
12 67 149 186 211 222 229 237 236
-c
0 158 1505 2119 2129 1707 1483 1440 1273
4 207 589 672 673 636 564 536 514
8 176 327 377 387 367 336 314 291
12 191 272 295 306 288 259 257 240
************ makedumpfile -d 7 ******************
core-data 0 256 512 768 1024 1280 1536 1792
threads-num
-l
0 34 270 248 187 188 187 187 187
4 63 154 186 188 189 189 190 190
8 64 131 220 218 197 186 187 188
12 65 159 198 232 229 207 190 188
-c
0 154 1508 2089 2133 1792 1660 1462 1312
4 203 594 684 701 627 592 535 503
8 172 326 377 393 366 334 313 286
12 182 273 295 308 283 258 249 237
************ makedumpfile -d 31 ******************
core-data 0 256 512 768 1024 1280 1536 1792
threads-num
-l
0 2 1 1 2 1 2 2 2
4 48 48 48 48 49 48 48 49
8 48 49 48 49 48 47 49 48
12 49 50 49 49 49 48 50 49
-c
0 10 9 10 10 9 10 9 9
4 52 53 52 52 53 52 53 52
8 51 51 52 52 52 51 51 52
12 53 52 52 53 52 51 52 52
On 06/19/2015 04:56 PM, Zhou Wenjian wrote:
> This patch set implements parallel processing by means of multiple threads.
> With this patch set, it is available to use multiple threads to read
> and compress pages. This parallel process will save time.
> This feature only supports creating dumpfile in kdump-compressed format from
> vmcore in kdump-compressed format or elf format. Currently, sadump and
> xen kdump are not supported.
>
> Qiao Nuohan (10):
> Add readpage_kdump_compressed_parallel
> Add mappage_elf_parallel
> Add readpage_elf_parallel
> Add read_pfn_parallel
> Add function to initial bitmap for parallel use
> Add filter_data_buffer_parallel
> Add write_kdump_pages_parallel to allow parallel process
> Initial and free data used for parallel process
> Make makedumpfile available to read and compress pages parallelly
> Add usage and manual about multiple threads process
>
> Makefile | 2 +
> erase_info.c | 29 ++-
> erase_info.h | 2 +
> makedumpfile.8 | 24 ++
> makedumpfile.c | 1096 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> makedumpfile.h | 80 ++++
> print_info.c | 16 +
> 7 files changed, 1246 insertions(+), 3 deletions(-)
>
>
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
--
Thanks
Zhou Wenjian
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH v2 00/10] makedumpfile: parallel processing
2015-06-25 2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
@ 2015-06-26 7:07 ` Atsushi Kumagai
2015-06-26 7:27 ` "Zhou, Wenjian/周文剑"
0 siblings, 1 reply; 19+ messages in thread
From: Atsushi Kumagai @ 2015-06-26 7:07 UTC (permalink / raw)
To: zhouwj-fnst; +Cc: kexec
Hello Zhou,
>Hello Atsushi Kumagai,
>
>I test this patch set in several machines and the following is the benchmark.
Thanks for your report, it looks good as before.
I also did simple test on kernel 3.19 on a 5GB Virtual Machine,
but I can't get such good result as below:
/ # time makedumpfile -c --num-threads 0 /proc/vmcore /mnt/dumpfile
Copying data : [100.0 %] /
The dumpfile is saved to /mnt/dumpfile.
makedumpfile Completed.
real 0m 44.40s
user 0m 43.62s
sys 0m 0.69s
/ #
/ # time makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
Copying data : [100.0 %] -
Copying data : [100.0 %] /
The dumpfile is saved to /mnt/dumpfile.
makedumpfile Completed.
real 5m 29.54s
user 6m 8.18s
sys 16m 33.25s
/ #
There is a big performance degradation.
Do you have any ideas why this happens ?
Thanks
Atsushi Kumagai
>These tables show the time that makedumpfile spends. And the unit is second.
>
>"core-data" in the table means the context in the vmcore.
> For example:
> core-data's value is 256. It means that in the vmcore, 256 * 8 bits of each page
> are set to 1.
>
>threads-num in the table means how many threads are used. 0 means original single thread
>implementation.
>
>"-l" in the table means producing lzo format vmcore
>
>"-c" in the table means producing kdump-compressed format vmcore
>
>###################################
>- System: PRIMERGY RX300 S6
>- CPU: Intel(R) Xeon(R) CPU x5660
>- memory: 16GB
>###################################
>************ makedumpfile -d 0 ******************
> core-data 0 256 512 768 1024 1280 1536 1792 2048 2304
>2560 2816 3072 3328 3584 3840
> threads-num
>-l
> 0 11 112 163 168 167 167 167 167 170 169 167
>166 167 168 169 113
> 4 5 111 158 166 167 167 167 166 169 170 166
>166 167 167 158 111
> 8 5 111 158 167 169 170 167 169 169 170 167
>168 168 168 159 111
> 12 6 111 158 168 167 167 169 168 170 169 167
>168 167 167 161 114
>-c
> 0 54 544 643 666 589 517 468 405 408 429 491
>528 592 676 654 527
> 4 60 179 210 216 206 220 209 214 211 222 220
>208 209 210 245 177
> 8 43 113 146 169 179 215 195 211 199 195 216
>197 185 165 158 109
> 12 44 106 142 162 169 171 168 173 174 171 200
>173 177 165 139 107
>
>************ makedumpfile -d 1 ******************
> core-data 0 256 512 768 1024 1280 1536 1792 2048 2304
>2560 2816 3072 3328 3584 3840
> threads-num
>-l
> 0 10 137 163 167 167 167 167 167 171 170 166
>166 166 168 169 138
> 4 5 111 158 166 167 166 167 166 170 171 166
>167 166 166 159 111
> 8 5 114 159 167 167 169 167 167 169 169 169
>167 168 167 160 111
> 12 6 113 159 168 167 168 167 168 169 170 168
>168 167 168 159 112
>-c
> 0 53 522 645 663 591 512 470 401 412 413 479
>532 587 666 648 524
> 4 57 205 244 216 208 217 216 225 221 225 223
>217 213 209 246 174
> 8 41 118 154 176 191 213 219 223 212 210 222
>207 198 176 164 118
> 12 43 111 148 174 189 194 191 182 178 173 204
>196 194 170 150 112
>
>************ makedumpfile -d 7 ******************
> core-data 0 256 512 768 1024 1280 1536 1792 2048 2304
>2560 2816 3072 3328 3584 3840
> threads-num
>-l
> 0 10 144 199 168 167 167 167 167 171 169 167
>166 166 168 169 144
> 4 5 110 158 167 167 167 166 166 170 169 166
>166 166 166 159 110
> 8 5 111 159 167 166 170 167 166 169 169 168
>167 167 167 159 110
> 12 6 111 159 167 167 166 168 167 169 169 168
>167 167 167 160 111
>-c
> 0 52 523 642 659 592 511 490 402 410 422 464
>525 591 666 647 529
> 4 58 177 210 213 208 224 217 228 223 229 227
>216 210 212 215 175
> 8 41 116 152 178 191 213 219 225 216 211 221
>211 196 180 158 116
> 12 41 111 151 176 192 187 192 190 174 175 218
>186 189 172 151 112
>
>************ makedumpfile -d 31 ******************
> core-data 0 256 512 768 1024 1280 1536 1792 2048 2304
>2560 2816 3072 3328 3584 3840
> threads-num
>-l
> 0 0 0 0 0 0 0 0 0 0 0 0
>0 0 0 0 0
> 4 2 2 2 3 3 3 2 3 3 3 3
>2 3 3 3 3
> 8 2 3 3 2 3 3 3 3 3 3 3
>3 3 3 3 3
> 12 2 3 3 3 3 3 3 3 3 3 3
>3 3 3 3 3
>-c
> 0 3 3 3 3 3 3 3 3 3 3 3
>3 3 3 3 3
> 4 4 5 5 4 5 5 5 5 4 5 5
>5 4 4 5 5
> 8 4 4 4 3 4 4 4 4 4 4 4
>4 4 4 4 4
> 12 4 4 4 4 4 4 4 4 4 4 4
>4 4 4 4 4
>
>###################################
>- System: PRIMERGY RX300 S7
>- CPU: Intel(R) Xeon(R) CPU E5-2620
>- memory: 32GB
>###################################
>************ makedumpfile -d 0 ******************
> core-data 0 256 512 768 1024 1280 1536 1792
> threads-num
>-l
> 0 23 211 246 255 252 252 253 257
> 4 14 168 234 250 253 251 251 252
> 12 17 165 237 250 251 251 253 253
>-c
> 0 117 1210 1620 1699 1443 1294 1235 1051
> 4 161 438 525 543 506 465 484 415
> 12 131 211 245 250 246 248 260 265
>
>************ makedumpfile -d 7 ******************
> core-data 0 256 512 768 1024 1280 1536 1792
> threads-num
>-l
> 0 24 215 253 256 255 254 257 255
> 4 14 165 235 247 249 250 252 253
> 12 16 169 237 251 255 253 252 250
>-c
> 0 120 1202 1625 1698 1434 1274 1223 1040
> 4 157 438 533 542 507 461 479 400
> 12 132 204 242 248 242 244 257 263
>
>###################################
>- System: PRIMEQUEST 1800E
>- CPU: Intel(R) Xeon(R) CPU E7540
>- memory: 32GB
>###################################
>************ makedumpfile -d 0 ******************
> core-data 0 256 512 768 1024 1280 1536 1792
> threads-num
>-l
> 0 34 282 245 179 179 179 179 180
> 4 63 143 224 230 220 212 207 204
> 8 65 129 200 225 235 235 225 220
> 12 67 149 186 211 222 229 237 236
>-c
> 0 158 1505 2119 2129 1707 1483 1440 1273
> 4 207 589 672 673 636 564 536 514
> 8 176 327 377 387 367 336 314 291
> 12 191 272 295 306 288 259 257 240
>
>************ makedumpfile -d 7 ******************
> core-data 0 256 512 768 1024 1280 1536 1792
> threads-num
>-l
> 0 34 270 248 187 188 187 187 187
> 4 63 154 186 188 189 189 190 190
> 8 64 131 220 218 197 186 187 188
> 12 65 159 198 232 229 207 190 188
>-c
> 0 154 1508 2089 2133 1792 1660 1462 1312
> 4 203 594 684 701 627 592 535 503
> 8 172 326 377 393 366 334 313 286
> 12 182 273 295 308 283 258 249 237
>
>************ makedumpfile -d 31 ******************
> core-data 0 256 512 768 1024 1280 1536 1792
> threads-num
>-l
> 0 2 1 1 2 1 2 2 2
> 4 48 48 48 48 49 48 48 49
> 8 48 49 48 49 48 47 49 48
> 12 49 50 49 49 49 48 50 49
>-c
> 0 10 9 10 10 9 10 9 9
> 4 52 53 52 52 53 52 53 52
> 8 51 51 52 52 52 51 51 52
> 12 53 52 52 53 52 51 52 52
>
>
>
>
>On 06/19/2015 04:56 PM, Zhou Wenjian wrote:
>> This patch set implements parallel processing by means of multiple threads.
>> With this patch set, it is available to use multiple threads to read
>> and compress pages. This parallel process will save time.
>> This feature only supports creating dumpfile in kdump-compressed format from
>> vmcore in kdump-compressed format or elf format. Currently, sadump and
>> xen kdump are not supported.
>>
>> Qiao Nuohan (10):
>> Add readpage_kdump_compressed_parallel
>> Add mappage_elf_parallel
>> Add readpage_elf_parallel
>> Add read_pfn_parallel
>> Add function to initial bitmap for parallel use
>> Add filter_data_buffer_parallel
>> Add write_kdump_pages_parallel to allow parallel process
>> Initial and free data used for parallel process
>> Make makedumpfile available to read and compress pages parallelly
>> Add usage and manual about multiple threads process
>>
>> Makefile | 2 +
>> erase_info.c | 29 ++-
>> erase_info.h | 2 +
>> makedumpfile.8 | 24 ++
>> makedumpfile.c | 1096 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> makedumpfile.h | 80 ++++
>> print_info.c | 16 +
>> 7 files changed, 1246 insertions(+), 3 deletions(-)
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
>
>
>--
>Thanks
>Zhou Wenjian
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 00/10] makedumpfile: parallel processing
2015-06-26 7:07 ` Atsushi Kumagai
@ 2015-06-26 7:27 ` "Zhou, Wenjian/周文剑"
2015-06-26 7:49 ` Atsushi Kumagai
0 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-06-26 7:27 UTC (permalink / raw)
To: Atsushi Kumagai; +Cc: kexec
On 06/26/2015 03:07 PM, Atsushi Kumagai wrote:
> Hello Zhou,
>
>> >Hello Atsushi Kumagai,
>> >
>> >I test this patch set in several machines and the following is the benchmark.
> Thanks for your report, it looks good as before.
> I also did simple test on kernel 3.19 on a 5GB Virtual Machine,
> but I can't get such good result as below:
>
> / # time makedumpfile -c --num-threads 0 /proc/vmcore /mnt/dumpfile
> Copying data : [100.0 %] /
>
> The dumpfile is saved to /mnt/dumpfile.
>
> makedumpfile Completed.
> real 0m 44.40s
> user 0m 43.62s
> sys 0m 0.69s
> / #
>
> / # time makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
> Copying data : [100.0 %] -
> Copying data : [100.0 %] /
>
> The dumpfile is saved to /mnt/dumpfile.
>
> makedumpfile Completed.
> real 5m 29.54s
> user 6m 8.18s
> sys 16m 33.25s
> / #
>
>
> There is a big performance degradation.
> Do you have any ideas why this happens ?
Does your Virtual Machine have more than 2 processors?
If so, check the value:nr_cpus in KDUMP_COMMANDLINE_APPEND of /etc/sysconfig/kdump.
It should be also set to the number larger than 2.
To "--num-threads 4", the most suitable number of processors is 5.
--
Thanks
Zhou Wenjian
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH v2 00/10] makedumpfile: parallel processing
2015-06-26 7:27 ` "Zhou, Wenjian/周文剑"
@ 2015-06-26 7:49 ` Atsushi Kumagai
2015-06-29 6:19 ` "Zhou, Wenjian/周文剑"
0 siblings, 1 reply; 19+ messages in thread
From: Atsushi Kumagai @ 2015-06-26 7:49 UTC (permalink / raw)
To: zhouwj-fnst; +Cc: kexec
>On 06/26/2015 03:07 PM, Atsushi Kumagai wrote:
>> Hello Zhou,
>>
>>> >Hello Atsushi Kumagai,
>>> >
>>> >I test this patch set in several machines and the following is the benchmark.
>> Thanks for your report, it looks good as before.
>> I also did simple test on kernel 3.19 on a 5GB Virtual Machine,
>> but I can't get such good result as below:
>>
>> / # time makedumpfile -c --num-threads 0 /proc/vmcore /mnt/dumpfile
>> Copying data : [100.0 %] /
>>
>> The dumpfile is saved to /mnt/dumpfile.
>>
>> makedumpfile Completed.
>> real 0m 44.40s
>> user 0m 43.62s
>> sys 0m 0.69s
>> / #
>>
>> / # time makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
>> Copying data : [100.0 %] -
>> Copying data : [100.0 %] /
>>
>> The dumpfile is saved to /mnt/dumpfile.
>>
>> makedumpfile Completed.
>> real 5m 29.54s
>> user 6m 8.18s
>> sys 16m 33.25s
>> / #
>>
>>
>> There is a big performance degradation.
>> Do you have any ideas why this happens ?
>
>Does your Virtual Machine have more than 2 processors?
>If so, check the value:nr_cpus in KDUMP_COMMANDLINE_APPEND of /etc/sysconfig/kdump.
>It should be also set to the number larger than 2.
>
>To "--num-threads 4", the most suitable number of processors is 5.
I attached 5 processors to the VM and I confirmed that all threads
consumed full cpu time by top(1) on the host:
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
17614 qemu 20 0 5792m 4.9g 5652 R 435.1 72.8 29:02.17 qemu-kvm
So I think the performance must be improved...
Thanks
Atsushi Kumagai
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 00/10] makedumpfile: parallel processing
2015-06-26 7:49 ` Atsushi Kumagai
@ 2015-06-29 6:19 ` "Zhou, Wenjian/周文剑"
2015-06-30 9:06 ` Atsushi Kumagai
0 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-06-29 6:19 UTC (permalink / raw)
To: Atsushi Kumagai; +Cc: kexec
[-- Attachment #1: Type: text/plain, Size: 613 bytes --]
On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
> I attached 5 processors to the VM and I confirmed that all threads
> consumed full cpu time by top(1) on the host:
>
> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> 17614 qemu 20 0 5792m 4.9g 5652 R 435.1 72.8 29:02.17 qemu-kvm
>
> So I think the performance must be improved...
Since I can't get that result in all machines here, could you test it with the patch:time
and show me the output?
Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.
The attachment is the patch time.
--
Thanks
Zhou Wenjian
[-- Attachment #2: time --]
[-- Type: text/plain, Size: 9092 bytes --]
--- makedumpfile.ori 2015-06-28 23:32:36.187000000 -0400
+++ makedumpfile.c 2015-06-29 01:51:55.884000000 -0400
@@ -26,6 +26,46 @@
#include <limits.h>
#include <assert.h>
+unsigned long write_kdump_pages_parallel_cyclic_time = 0;
+struct timeval write_kdump_pages_parallel_cyclic1, write_kdump_pages_parallel_cyclic2;
+unsigned long loop_time = 0;
+unsigned long consume_time = 0;
+struct timeval loop1, loop2;
+unsigned long check_time = 0;
+struct timeval check1, check2;
+struct timeval write1, write2;
+unsigned long write_time;
+struct timeval lock1, lock2;
+unsigned long lock_time;
+struct timeval hit1, hit2;
+unsigned long hit_time;
+struct timeval find1, find2;
+unsigned long find_time;
+struct timeval timea1, timea2;
+unsigned long timea;
+struct timeval timeb1, timeb2;
+unsigned long read_time[4];
+struct timeval read1[4], read2[4];
+unsigned long lock_current_time[4];
+struct timeval lock_current1[4], lock_current2[4];
+unsigned long found_time[4];
+struct timeval found1[4], found2[4];
+unsigned long lock_consumed_time[4];
+struct timeval lock_consumed1[4], lock_consumed2[4];
+unsigned long compress_time[4];
+struct timeval compress_time1[4], compress_time2[4];
+unsigned long timeb;
+unsigned long count1 = 0;
+unsigned long count2 = 0;
+unsigned long count3 = 0;
+unsigned long count4 = 0;
+unsigned long count5 = 0;
+unsigned long count6 = 0;
+unsigned long count7 = 0;
+unsigned long count8 = 0;
+unsigned long count9 = 0;
+
+
struct symbol_table symbol_table;
struct size_table size_table;
struct offset_table offset_table;
@@ -6944,6 +6984,7 @@
unsigned long len_buf_out_snappy =
snappy_max_compressed_length(info->page_size);
#endif
+int thread_num = kdump_thread_args->thread_num;
buf = BUF_PARALLEL(kdump_thread_args->thread_num);
buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
@@ -6964,17 +7005,20 @@
while (1) {
/* get next pfn */
+gettimeofday(&lock_current1[thread_num], NULL);
pthread_mutex_lock(&info->current_pfn_mutex);
pfn = info->current_pfn;
info->current_pfn++;
pthread_mutex_unlock(&info->current_pfn_mutex);
-
+gettimeofday(&lock_current2[thread_num], NULL);
+lock_current_time[thread_num] += (lock_current2[thread_num].tv_sec - lock_current1[thread_num].tv_sec) * 1000000 + (lock_current2[thread_num].tv_usec - lock_current1[thread_num].tv_usec);
if (pfn >= kdump_thread_args->end_pfn)
break;
index = -1;
found = FALSE;
+gettimeofday(&found1[thread_num], NULL);
while (found == FALSE) {
/*
* need a cancellation point here
@@ -6983,15 +7027,23 @@
index = pfn % page_data_num;
- if (page_data_buf[index].ready != 0)
+ if (page_data_buf[index].ready != 0){
continue;
+}
- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
+ if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0){
continue;
+}
- if (page_data_buf[index].ready != 0)
+ if (page_data_buf[index].ready != 0){
+count1++;
goto unlock;
+}
+gettimeofday(&found2[thread_num], NULL);
+found_time[thread_num] += (found2[thread_num].tv_sec - found1[thread_num].tv_sec) * 1000000 + (found2[thread_num].tv_usec - found1[thread_num].tv_usec);
+gettimeofday(&found1[thread_num], NULL);
+gettimeofday(&lock_consumed1[thread_num], NULL);
pthread_mutex_lock(&info->consumed_pfn_mutex);
if ((long)page_data_buf[index].pfn >
(long)info->consumed_pfn)
@@ -6999,11 +7051,15 @@
consumed_pfn = info->consumed_pfn;
pthread_mutex_unlock(&info->consumed_pfn_mutex);
+gettimeofday(&lock_consumed2[thread_num], NULL);
+lock_consumed_time[thread_num] += (lock_consumed2[thread_num].tv_sec - lock_consumed1[thread_num].tv_sec) * 1000000 + (lock_consumed2[thread_num].tv_usec - lock_consumed1[thread_num].tv_usec);
/*
* leave space for slow producer
*/
- if ((long)pfn - (long)consumed_pfn > page_data_num)
+ if ((long)pfn - (long)consumed_pfn > page_data_num){
+count2++;
goto unlock;
+}
found = TRUE;
@@ -7025,11 +7081,16 @@
page_data_buf[index].dumpable = TRUE;
+gettimeofday(&read1[thread_num], NULL);
if (!read_pfn_parallel(fd_memory, pfn, buf,
&bitmap_memory_parallel,
mmap_cache))
goto fail;
+gettimeofday(&read2[thread_num], NULL);
+read_time[thread_num] += (read2[thread_num].tv_sec - read1[thread_num].tv_sec) * 1000000 + (read2[thread_num].tv_usec - read1[thread_num].tv_usec);
+
+gettimeofday(&compress_time1[thread_num], NULL);
filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
info->page_size,
&info->filter_mutex);
@@ -7090,6 +7151,8 @@
page_data_buf[index].size = info->page_size;
memcpy(page_data_buf[index].buf, buf, info->page_size);
}
+gettimeofday(&compress_time2[thread_num], NULL);
+compress_time[thread_num] += (compress_time2[thread_num].tv_sec - compress_time1[thread_num].tv_sec) * 1000000 + (compress_time2[thread_num].tv_usec - compress_time1[thread_num].tv_usec);
unlock:
pthread_mutex_unlock(&page_data_buf[index].mutex);
@@ -7220,6 +7283,7 @@
gettimeofday(&last, NULL);
while (consuming_pfn < end_pfn) {
+gettimeofday(&loop1, NULL);
index = consuming_pfn % page_data_num;
gettimeofday(&new, NULL);
@@ -7232,17 +7296,26 @@
* check pfn first without mutex locked to reduce the time
* trying to lock the mutex
*/
- if (page_data_buf[index].pfn != consuming_pfn)
+ if (page_data_buf[index].pfn != consuming_pfn){
+gettimeofday(&loop2, NULL);
+loop_time += (loop2.tv_sec - loop1.tv_sec) * 1000000 + (loop2.tv_usec - loop1.tv_usec);
continue;
+}
+gettimeofday(&find1, NULL);
+gettimeofday(&lock1, NULL);
pthread_mutex_lock(&page_data_buf[index].mutex);
+gettimeofday(&lock2, NULL);
+lock_time += (lock2.tv_sec - lock1.tv_sec) * 1000000 + (lock2.tv_usec - lock1.tv_usec);
+
/* check whether the found one is ready to be consumed */
if (page_data_buf[index].pfn != consuming_pfn ||
page_data_buf[index].ready != 1) {
goto unlock;
}
+gettimeofday(&hit1, NULL);
if ((num_dumped % per) == 0)
print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
@@ -7256,6 +7329,7 @@
num_dumped++;
+gettimeofday(&write1, NULL);
if (page_data_buf[index].zero == TRUE) {
if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
goto out;
@@ -7278,8 +7352,17 @@
goto out;
}
+
+gettimeofday(&write2, NULL);
+write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
+
unlock:
pthread_mutex_unlock(&page_data_buf[index].mutex);
+gettimeofday(&hit2, NULL);
+gettimeofday(&find2, NULL);
+hit_time += (hit2.tv_sec - hit1.tv_sec) * 1000000 + (hit2.tv_usec - hit1.tv_usec);
+find_time += (find2.tv_sec - find1.tv_sec) * 1000000 + (find2.tv_usec - find1.tv_usec);
+
}
ret = TRUE;
@@ -10314,6 +10397,11 @@
int
main(int argc, char *argv[])
{
+ unsigned long total_time = 0;
+ struct timeval start_time, end_time;
+ gettimeofday(&start_time, NULL);
+
+
int i, opt, flag_debug = FALSE;
if ((info = calloc(1, sizeof(struct DumpInfo))) == NULL) {
@@ -10328,7 +10416,6 @@
goto out;
}
initialize_tables();
-
/*
* By default, makedumpfile assumes that multi-cycle processing is
* necessary to work in constant memory space.
@@ -10642,5 +10729,32 @@
}
free_elf_info();
+ gettimeofday(&end_time, NULL);
+ total_time = (end_time.tv_sec - start_time.tv_sec) * 1000000 + (end_time.tv_usec - start_time.tv_usec);
+ MSG("lock time: %lds%ldus\n", lock_time / 1000000, lock_time % 1000000);
+ MSG("write time: %lds%ldus\n", write_time / 1000000, write_time % 1000000);
+ MSG("hit time: %lds%ldus\n", hit_time / 1000000, hit_time % 1000000);
+ MSG("find time: %lds%ldus\n", find_time / 1000000, find_time % 1000000);
+ MSG("loop_time: %lds%ldus\n", loop_time / 1000000, loop_time % 1000000);
+ MSG("thread consume_time: %lds%ldus\n", consume_time / 1000000, consume_time % 1000000);
+ MSG("thread timea: %lds%ldus\n", timea / 1000000, timea % 1000000);
+ MSG("thread timeb: %lds%ldus\n", timeb / 1000000, timeb % 1000000);
+for (i = 0; i < 4; i++){
+ MSG("read_time[%d]: %lds%ldus\n", i, read_time[i] / 1000000, read_time[i] % 1000000);
+ MSG("lock_current_time[%d]: %lds%ldus\n", i, lock_current_time[i] / 1000000, lock_current_time[i] % 1000000);
+ MSG("found_time[%d]: %lds%ldus\n", i, found_time[i] / 1000000, found_time[i] % 1000000);
+ MSG("lock_consumed_time[%d]: %lds%ldus\n", i, lock_consumed_time[i] / 1000000, lock_consumed_time[i] % 1000000);
+ MSG("compress_time[%d]: %lds%ldus\n", i, compress_time[i] / 1000000, compress_time[i] % 1000000);
+}
+ MSG("count1: %ld\n", count1);
+ MSG("count2: %ld\n", count2);
+ MSG("count3: %ld\n", count3);
+ MSG("count4: %ld\n", count4);
+ MSG("count4: %ld\n", count5);
+ MSG("count4: %ld\n", count6);
+ MSG("count4: %ld\n", count7);
+ MSG("exec time: %lds%ldus\n", total_time / 1000000, total_time % 1000000);
+
+
return retcd;
}
[-- Attachment #3: Type: text/plain, Size: 143 bytes --]
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH v2 00/10] makedumpfile: parallel processing
2015-06-29 6:19 ` "Zhou, Wenjian/周文剑"
@ 2015-06-30 9:06 ` Atsushi Kumagai
2015-07-06 13:19 ` "Zhou, Wenjian/周文剑"
0 siblings, 1 reply; 19+ messages in thread
From: Atsushi Kumagai @ 2015-06-30 9:06 UTC (permalink / raw)
To: zhouwj-fnst; +Cc: kexec
>On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
>> I attached 5 processors to the VM and I confirmed that all threads
>> consumed full cpu time by top(1) on the host:
>>
>> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
>> 17614 qemu 20 0 5792m 4.9g 5652 R 435.1 72.8 29:02.17 qemu-kvm
>>
>> So I think the performance must be improved...
>
>Since I can't get that result in all machines here, could you test it with the patch:time
>and show me the output?
>Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.
>
>The attachment is the patch time.
Here is the result:
/ # makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
Copying data : [100.0 %] |
Copying data : [100.0 %] \
The dumpfile is saved to /mnt/dumpfile.
makedumpfile Completed.
lock time: 310s935500us
write time: 3s970037us
hit time: 6s316043us
find time: 317s926654us
loop_time: 37s321800us
thread consume_time: 0s0us
thread timea: 0s0us
thread timeb: 0s0us
read_time[0]: 8s637011us
lock_current_time[0]: 0s284428us
found_time[0]: 60s366795us
lock_consumed_time[0]: 2s782596us
compress_time[0]: 301s427073us
read_time[1]: 8s435914us
lock_current_time[1]: 0s271680us
found_time[1]: 60s329026us
lock_consumed_time[1]: 2s849061us
compress_time[1]: 302s98620us
read_time[2]: 8s380550us
lock_current_time[2]: 0s270388us
found_time[2]: 60s209376us
lock_consumed_time[2]: 3s297574us
compress_time[2]: 301s486768us
read_time[3]: 8s550662us
lock_current_time[3]: 0s278997us
found_time[3]: 60s476702us
lock_consumed_time[3]: 3s49184us
compress_time[3]: 301s718390us
count1: 172
count2: 70921401
count3: 0
count4: 0
count5: 0
count6: 0
count7: 0
exec time: 380s125494us
BTW, I fixed a small mistake before testing like:
- write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
+ write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
Thanks
Atsushi Kumagai
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 00/10] makedumpfile: parallel processing
2015-06-30 9:06 ` Atsushi Kumagai
@ 2015-07-06 13:19 ` "Zhou, Wenjian/周文剑"
2015-07-08 8:10 ` Atsushi Kumagai
0 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-07-06 13:19 UTC (permalink / raw)
To: Atsushi Kumagai; +Cc: kexec
[-- Attachment #1: Type: text/plain, Size: 3208 bytes --]
Hello Atsushi Kumagai,
I have tried a lot, and I think the big performance degradation only
occurs in special CPU.
I thought about two reasons, and I need your help to confirm which is
the real one.
The following tests will also be OK by using dumpfile instead of /proc/vmcore
Test 1: distinguish whether it is resulted by multi-threads.
apply patch: test1
command1: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
command2: ./makedumpfile -c /proc/vmcore vmcore --num-threads 8
better to do some test in -l too.
command1: ./makedumpfile -l /proc/vmcore vmcore
command2: ./makedumpfile -l /proc/vmcore vmcore --num-threads 1
command3: ./makedumpfile -l /proc/vmcore vmcore --num-threads 8
Test 2: distinguish whether it is resulted by doing compress in thread
2.1:
apply patch: test2.1
command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
2.2:
apply patch: test2.2
command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
Thanks a lot.
BTW, could you show me the cpu name, zlib version and glibc version ?
--
Thanks
Zhou Wenjian
On 06/30/2015 05:06 PM, Atsushi Kumagai wrote:
>> On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
>>> I attached 5 processors to the VM and I confirmed that all threads
>>> consumed full cpu time by top(1) on the host:
>>>
>>> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
>>> 17614 qemu 20 0 5792m 4.9g 5652 R 435.1 72.8 29:02.17 qemu-kvm
>>>
>>> So I think the performance must be improved...
>>
>> Since I can't get that result in all machines here, could you test it with the patch:time
>> and show me the output?
>> Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.
>>
>> The attachment is the patch time.
>
> Here is the result:
>
> / # makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
> Copying data : [100.0 %] |
> Copying data : [100.0 %] \
>
> The dumpfile is saved to /mnt/dumpfile.
>
> makedumpfile Completed.
> lock time: 310s935500us
> write time: 3s970037us
> hit time: 6s316043us
> find time: 317s926654us
> loop_time: 37s321800us
> thread consume_time: 0s0us
> thread timea: 0s0us
> thread timeb: 0s0us
> read_time[0]: 8s637011us
> lock_current_time[0]: 0s284428us
> found_time[0]: 60s366795us
> lock_consumed_time[0]: 2s782596us
> compress_time[0]: 301s427073us
> read_time[1]: 8s435914us
> lock_current_time[1]: 0s271680us
> found_time[1]: 60s329026us
> lock_consumed_time[1]: 2s849061us
> compress_time[1]: 302s98620us
> read_time[2]: 8s380550us
> lock_current_time[2]: 0s270388us
> found_time[2]: 60s209376us
> lock_consumed_time[2]: 3s297574us
> compress_time[2]: 301s486768us
> read_time[3]: 8s550662us
> lock_current_time[3]: 0s278997us
> found_time[3]: 60s476702us
> lock_consumed_time[3]: 3s49184us
> compress_time[3]: 301s718390us
> count1: 172
> count2: 70921401
> count3: 0
> count4: 0
> count5: 0
> count6: 0
> count7: 0
> exec time: 380s125494us
>
>
> BTW, I fixed a small mistake before testing like:
>
> - write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
> + write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
>
[-- Attachment #2: test1 --]
[-- Type: text/plain, Size: 7560 bytes --]
--- makedumpfile.ori 2015-07-06 11:56:11.300000000 -0400
+++ makedumpfile.c 2015-07-06 11:56:28.127000000 -0400
@@ -26,6 +26,42 @@
#include <limits.h>
#include <assert.h>
+unsigned long write_kdump_pages_parallel_cyclic_time = 0;
+struct timeval write_kdump_pages_parallel_cyclic1, write_kdump_pages_parallel_cyclic2;
+unsigned long loop_time = 0;
+unsigned long consume_time = 0;
+struct timeval loop1, loop2;
+unsigned long check_time = 0;
+struct timeval check1, check2;
+struct timeval write1, write2;
+unsigned long write_time;
+struct timeval lock1, lock2;
+unsigned long lock_time;
+struct timeval hit1, hit2;
+unsigned long hit_time;
+struct timeval find1, find2;
+unsigned long find_time;
+struct timeval timea1, timea2;
+unsigned long timea;
+struct timeval timeb1, timeb2;
+unsigned long filter_time[20];
+struct timeval filter1[20], filter2[20];
+unsigned long cp_time[20];
+struct timeval cp1[20], cp2[20];
+unsigned long compress_time[20];
+struct timeval compress_time1[20], compress_time2[20];
+unsigned long timeb;
+unsigned long count1 = 0;
+unsigned long count2 = 0;
+unsigned long count3 = 0;
+unsigned long count4 = 0;
+unsigned long count5 = 0;
+unsigned long count6 = 0;
+unsigned long count7 = 0;
+unsigned long count8 = 0;
+unsigned long count9 = 0;
+
+
struct symbol_table symbol_table;
struct size_table size_table;
struct offset_table offset_table;
@@ -6919,6 +6955,17 @@
return TRUE;
}
+int compress2015(unsigned char *buf_out, unsigned long *size_out, unsigned char *buf, int page_size, int a, int thread_num)
+{
+int ret;
+gettimeofday(&compress_time1[thread_num], NULL);
+ret = compress2(buf_out, size_out, buf, info->page_size, Z_BEST_SPEED);
+gettimeofday(&compress_time2[thread_num], NULL);
+compress_time[thread_num] += (compress_time2[thread_num].tv_sec - compress_time1[thread_num].tv_sec) * 1000000 + (compress_time2[thread_num].tv_usec - compress_time1[thread_num].tv_usec);
+
+return ret;
+}
+
void *
kdump_thread_function_cyclic(void *arg) {
void *retval = PTHREAD_FAIL;
@@ -6944,6 +6991,7 @@
unsigned long len_buf_out_snappy =
snappy_max_compressed_length(info->page_size);
#endif
+int thread_num = kdump_thread_args->thread_num;
buf = BUF_PARALLEL(kdump_thread_args->thread_num);
buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
@@ -7030,9 +7078,13 @@
mmap_cache))
goto fail;
+
+gettimeofday(&filter1[thread_num], NULL);
filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
info->page_size,
&info->filter_mutex);
+gettimeofday(&filter2[thread_num], NULL);
+filter_time[thread_num] += (filter2[thread_num].tv_sec - filter1[thread_num].tv_sec) * 1000000 + (filter2[thread_num].tv_usec - filter1[thread_num].tv_usec);
if ((info->dump_level & DL_EXCLUDE_ZERO)
&& is_zero_page(buf, info->page_size)) {
@@ -7048,14 +7100,17 @@
size_out = kdump_thread_args->len_buf_out;
if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB)
&& ((size_out = kdump_thread_args->len_buf_out),
- compress2(buf_out, &size_out, buf,
+ compress2015(buf_out, &size_out, buf,
info->page_size,
- Z_BEST_SPEED) == Z_OK)
+ Z_BEST_SPEED, thread_num) == Z_OK)
&& (size_out < info->page_size)) {
page_data_buf[index].flags =
DUMP_DH_COMPRESSED_ZLIB;
page_data_buf[index].size = size_out;
+gettimeofday(&cp1[thread_num], NULL);
memcpy(page_data_buf[index].buf, buf_out, size_out);
+gettimeofday(&cp2[thread_num], NULL);
+cp_time[thread_num] += (cp2[thread_num].tv_sec - cp1[thread_num].tv_sec) * 1000000 + (cp2[thread_num].tv_usec - cp1[thread_num].tv_usec);
#ifdef USELZO
} else if (info->flag_lzo_support
&& (info->flag_compress
@@ -7220,6 +7275,7 @@
gettimeofday(&last, NULL);
while (consuming_pfn < end_pfn) {
+gettimeofday(&loop1, NULL);
index = consuming_pfn % page_data_num;
gettimeofday(&new, NULL);
@@ -7232,17 +7288,26 @@
* check pfn first without mutex locked to reduce the time
* trying to lock the mutex
*/
- if (page_data_buf[index].pfn != consuming_pfn)
+ if (page_data_buf[index].pfn != consuming_pfn){
+gettimeofday(&loop2, NULL);
+loop_time += (loop2.tv_sec - loop1.tv_sec) * 1000000 + (loop2.tv_usec - loop1.tv_usec);
continue;
+}
+gettimeofday(&find1, NULL);
+gettimeofday(&lock1, NULL);
pthread_mutex_lock(&page_data_buf[index].mutex);
+gettimeofday(&lock2, NULL);
+lock_time += (lock2.tv_sec - lock1.tv_sec) * 1000000 + (lock2.tv_usec - lock1.tv_usec);
+
/* check whether the found one is ready to be consumed */
if (page_data_buf[index].pfn != consuming_pfn ||
page_data_buf[index].ready != 1) {
goto unlock;
}
+gettimeofday(&hit1, NULL);
if ((num_dumped % per) == 0)
print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
@@ -7256,6 +7321,7 @@
num_dumped++;
+gettimeofday(&write1, NULL);
if (page_data_buf[index].zero == TRUE) {
if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
goto out;
@@ -7278,8 +7344,17 @@
goto out;
}
+
+gettimeofday(&write2, NULL);
+write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
+
unlock:
pthread_mutex_unlock(&page_data_buf[index].mutex);
+gettimeofday(&hit2, NULL);
+gettimeofday(&find2, NULL);
+hit_time += (hit2.tv_sec - hit1.tv_sec) * 1000000 + (hit2.tv_usec - hit1.tv_usec);
+find_time += (find2.tv_sec - find1.tv_sec) * 1000000 + (find2.tv_usec - find1.tv_usec);
+
}
ret = TRUE;
@@ -7438,8 +7513,8 @@
size_out = len_buf_out;
if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB)
&& ((size_out = len_buf_out),
- compress2(buf_out, &size_out, buf, info->page_size,
- Z_BEST_SPEED) == Z_OK)
+ compress2015(buf_out, &size_out, buf, info->page_size,
+ Z_BEST_SPEED, 0) == Z_OK)
&& (size_out < info->page_size)) {
pd.flags = DUMP_DH_COMPRESSED_ZLIB;
pd.size = size_out;
@@ -10314,6 +10389,11 @@
int
main(int argc, char *argv[])
{
+ unsigned long total_time = 0;
+ struct timeval start_time, end_time;
+ gettimeofday(&start_time, NULL);
+
+
int i, opt, flag_debug = FALSE;
if ((info = calloc(1, sizeof(struct DumpInfo))) == NULL) {
@@ -10328,7 +10408,6 @@
goto out;
}
initialize_tables();
-
/*
* By default, makedumpfile assumes that multi-cycle processing is
* necessary to work in constant memory space.
@@ -10642,5 +10721,21 @@
}
free_elf_info();
+ gettimeofday(&end_time, NULL);
+ total_time = (end_time.tv_sec - start_time.tv_sec) * 1000000 + (end_time.tv_usec - start_time.tv_usec);
+ MSG("lock time: %lds%ldus\n", lock_time / 1000000, lock_time % 1000000);
+ MSG("write time: %lds%ldus\n", write_time / 1000000, write_time % 1000000);
+ MSG("hit time: %lds%ldus\n", hit_time / 1000000, hit_time % 1000000);
+ MSG("find time: %lds%ldus\n", find_time / 1000000, find_time % 1000000);
+ MSG("loop_time: %lds%ldus\n", loop_time / 1000000, loop_time % 1000000);
+ MSG("thread consume_time: %lds%ldus\n", consume_time / 1000000, consume_time % 1000000);
+for (i = 0; i < info->num_threads; i++){
+ MSG("filter_time[%d]: %lds%ldus\n", i, filter_time[i] / 1000000, filter_time[i] % 1000000);
+ MSG("cp_time[%d]: %lds%ldus\n", i, cp_time[i] / 1000000, cp_time[i] % 1000000);
+ MSG("compress_time[%d]: %lds%ldus\n", i, compress_time[i] / 1000000, compress_time[i] % 1000000);
+}
+ MSG("exec time: %lds%ldus\n", total_time / 1000000, total_time % 1000000);
+
+
return retcd;
}
[-- Attachment #3: test2.1 --]
[-- Type: application/x-troff-man, Size: 8159 bytes --]
[-- Attachment #4: test2.2 --]
[-- Type: application/x-troff-man, Size: 8501 bytes --]
[-- Attachment #5: Type: text/plain, Size: 143 bytes --]
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH v2 00/10] makedumpfile: parallel processing
2015-07-06 13:19 ` "Zhou, Wenjian/周文剑"
@ 2015-07-08 8:10 ` Atsushi Kumagai
0 siblings, 0 replies; 19+ messages in thread
From: Atsushi Kumagai @ 2015-07-08 8:10 UTC (permalink / raw)
To: zhouwj-fnst; +Cc: kexec
[-- Attachment #1: Type: text/plain, Size: 6868 bytes --]
>Hello Atsushi Kumagai,
>
>I have tried a lot, and I think the big performance degradation only
>occurs in special CPU.
>I thought about two reasons, and I need your help to confirm which is
>the real one.
>
>The following tests will also be OK by using dumpfile instead of /proc/vmcore
>
>Test 1: distinguish whether it is resulted by multi-threads.
>apply patch: test1
>command1: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
>command2: ./makedumpfile -c /proc/vmcore vmcore --num-threads 8
>
>better to do some test in -l too.
>command1: ./makedumpfile -l /proc/vmcore vmcore
>command2: ./makedumpfile -l /proc/vmcore vmcore --num-threads 1
>command3: ./makedumpfile -l /proc/vmcore vmcore --num-threads 8
>
>Test 2: distinguish whether it is resulted by doing compress in thread
>2.1:
> apply patch: test2.1
> command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
>2.2:
> apply patch: test2.2
> command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
>
>Thanks a lot.
>
>BTW, could you show me the cpu name, zlib version and glibc version ?
Here is the my environments, I tested on 1st kernel on a physical
machine from this time:
CPU: Intel(R) Xeon(R) CPU E7- 8870 @ 2.40GHz (80 cores)
zlib: 1.2.3-29
glibc: 2.12-1.149
Here is the summary of the result, the whole logs are attached:
Test1:
| threads | compress time | exec time |
| 1 | 82.75 | 95.21 |
| 8 | 34.1~34.8 | 40.32 |
Test1(lzo):
| threads | compress time | exec time |
| - | - | 30.33 |
| 1 | - | 33.10 |
| 8 | - | 28.77 |
Test2-1:
| threads | compress time | exec time |
| 1 | 76.12 | 82.13 |
Test2-2:
| threads | compress time | exec time |
| 1 | 41.97 | 51.46 |
There are better results than on virtual machines, but still worse than
without creating thread.
Additionally, I collected performance logs by perf for debugging as below:
Test2-1:
38.93% makedumpfile-2- libz.so.1.2.3 [.] 0x0000000000006e30 // compress2(?)
5.16% makedumpfile-2- [kernel.kallsyms] [k] page_fault
5.02% makedumpfile-2- [kernel.kallsyms] [k] clear_page_c
4.96% makedumpfile-2- libc-2.12.so [.] __memset_sse2
4.64% makedumpfile-2- [kernel.kallsyms] [k] tick_program_event
3.77% makedumpfile-2- libz.so.1.2.3 [.] adler32
2.08% makedumpfile-2- libc-2.12.so [.] memcpy
1.95% makedumpfile-2- [kernel.kallsyms] [k] __alloc_pages_nodemask
1.54% makedumpfile-2- [kernel.kallsyms] [k] get_page_from_freelist
1.53% makedumpfile-2- [kernel.kallsyms] [k] tick_dev_program_event
1.32% makedumpfile-2- [kernel.kallsyms] [k] __mem_cgroup_commit_charge
1.08% makedumpfile-2- [kernel.kallsyms] [k] hrtimer_interrupt
0.94% makedumpfile-2- [kernel.kallsyms] [k] release_pages
0.85% makedumpfile-2- [kernel.kallsyms] [k] alloc_pages_vma
...
Test2-2:
66.46% makedumpfile-2- libz.so.1.2.3 [.] 0x000000000000564e // compress2(?)
6.71% makedumpfile-2- libz.so.1.2.3 [.] adler32
5.56% makedumpfile-2- libc-2.12.so [.] __memset_sse2
3.44% makedumpfile-2- libc-2.12.so [.] memcpy
2.85% makedumpfile-2- [kernel.kallsyms] [k] tick_dev_program_event
1.30% makedumpfile-2- libz.so.1.2.3 [.] _tr_flush_block
0.95% makedumpfile-2- [kernel.kallsyms] [k] tick_program_event
0.92% makedumpfile-2- libz.so.1.2.3 [.] _tr_init
0.69% makedumpfile-2- [kernel.kallsyms] [k] hrtimer_interrupt
0.63% makedumpfile-2- [kernel.kallsyms] [k] page_fault
0.34% makedumpfile-2- libpthread-2.12.so [.] __lll_unlock_wake
0.33% makedumpfile-2- libc-2.12.so [.] _int_malloc
0.33% makedumpfile-2- libc-2.12.so [.] _int_free
In the case of Test2-1(using pthread_create), longer cpu time was spent for some
kernel functions like page_fault() than Test2-2(calling kdump_thread_function_cyclic()
directly), it may be a one of the cause of the degradation. I haven't found the reason
yet, this is just for your information.
Thanks
Atsushi Kumagai
>--
>Thanks
>Zhou Wenjian
>
>On 06/30/2015 05:06 PM, Atsushi Kumagai wrote:
>>> On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
>>>> I attached 5 processors to the VM and I confirmed that all threads
>>>> consumed full cpu time by top(1) on the host:
>>>>
>>>> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
>>>> 17614 qemu 20 0 5792m 4.9g 5652 R 435.1 72.8 29:02.17 qemu-kvm
>>>>
>>>> So I think the performance must be improved...
>>>
>>> Since I can't get that result in all machines here, could you test it with the patch:time
>>> and show me the output?
>>> Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.
>>>
>>> The attachment is the patch time.
>>
>> Here is the result:
>>
>> / # makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
>> Copying data : [100.0 %] |
>> Copying data : [100.0 %] \
>>
>> The dumpfile is saved to /mnt/dumpfile.
>>
>> makedumpfile Completed.
>> lock time: 310s935500us
>> write time: 3s970037us
>> hit time: 6s316043us
>> find time: 317s926654us
>> loop_time: 37s321800us
>> thread consume_time: 0s0us
>> thread timea: 0s0us
>> thread timeb: 0s0us
>> read_time[0]: 8s637011us
>> lock_current_time[0]: 0s284428us
>> found_time[0]: 60s366795us
>> lock_consumed_time[0]: 2s782596us
>> compress_time[0]: 301s427073us
>> read_time[1]: 8s435914us
>> lock_current_time[1]: 0s271680us
>> found_time[1]: 60s329026us
>> lock_consumed_time[1]: 2s849061us
>> compress_time[1]: 302s98620us
>> read_time[2]: 8s380550us
>> lock_current_time[2]: 0s270388us
>> found_time[2]: 60s209376us
>> lock_consumed_time[2]: 3s297574us
>> compress_time[2]: 301s486768us
>> read_time[3]: 8s550662us
>> lock_current_time[3]: 0s278997us
>> found_time[3]: 60s476702us
>> lock_consumed_time[3]: 3s49184us
>> compress_time[3]: 301s718390us
>> count1: 172
>> count2: 70921401
>> count3: 0
>> count4: 0
>> count5: 0
>> count6: 0
>> count7: 0
>> exec time: 380s125494us
>>
>>
>> BTW, I fixed a small mistake before testing like:
>>
>> - write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
>> + write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
>>
[-- Attachment #2: test1.log --]
[-- Type: application/octet-stream, Size: 8195 bytes --]
# ./makedumpfile -c --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes : [ 0.0 %] /Checking for memory holes : [100.0 %] |Copying data : [ 0.5 %] \Copying data : [ 1.1 %] -Copying data : [ 1.5 %] /Copying data : [ 2.0 %] |Copying data : [ 2.5 %] \Copying data : [ 2.9 %] -Copying data : [ 3.4 %] /Copying data : [ 3.9 %] |Copying data : [ 4.4 %] \Copying data : [ 4.9 %] -Copying data : [ 5.4 %] /Copying data : [ 6.0 %] |Copying data : [ 6.5 %] \Copying data : [ 7.1 %] -Copying data : [ 7.7 %] /Copying data : [ 8.3 %] |Copying data : [ 8.8 %] \Copying data : [ 9.4 %] -Copying data : [ 10.1 %] /Copying data : [ 11.6 %] |Copying data : [ 13.1 %] \Copying data : [ 14.7 %] -Copying data : [ 16.2 %] /Copying data : [ 17.8 %] |Copying data : [ 19.3 %] \Copying data : [ 20.7 %] -Copying data : [ 22.0 %] /Copying data : [ 23.6 %] |Copying data : [ 25.1 %] \Copying data : [ 26.6 %] -Copying data : [ 28.2 %] /Copying data : [ 29.7 %] |Copying data : [ 30.5 %] \Copying data : [ 32.0 %] -Copying data : [ 33.5 %] /Copying data : [ 35.0 %] |Copying data : [ 36.6 %] \Copying data : [ 38.1 %] -Copying data : [ 39.5 %] /Copying data : [ 40.2 %] |Copying data : [ 41.7 %] \Copying data : [ 43.3 %] -Copying data : [ 44.8 %] /Copying data : [ 46.3 %] |Copying data : [ 47.8 %] \Copying data : [ 49.4 %] -Copying data : [ 50.9 %] /Copying data : [ 52.4 %] |Copying data : [ 53.9 %] \Copying data : [ 55.5 %] -Copying data : [ 57.0 %] /Copying data : [ 58.5 %] |Copying data : [ 60.0 %] \Copying data : [ 61.6 %] -Copying data : [ 63.1 %] /Copying data : [ 64.6 %] |Copying data : [ 66.1 %] \Copying data : [ 67.5 %] -Copying data : [ 69.0 %] /Copying data : [ 70.6 %] |Copying data : [ 72.1 %] \Copying data : [ 73.7 %] -Copying data : [ 75.2 %] /Copying data : [ 76.3 %] |Copying data : [ 77.0 %] \Copying data : [ 77.6 %] -Copying data : [ 78.2 %] /Copying data : [ 78.8 %] |Copying data : [ 79.3 %] \Copying data : [ 79.8 %] -Copying data : [ 80.4 %] /Copying data : [ 80.9 %] |Copying data : [ 81.5 %] \Copying data : [ 82.0 %] -Copying data : [ 82.5 %] /Copying data : [ 83.0 %] |Copying data : [ 83.6 %] \Copying data : [ 84.1 %] -Copying data : [ 84.7 %] /Copying data : [ 85.3 %] |Copying data : [ 86.0 %] \Copying data : [ 86.5 %] -Copying data : [ 87.1 %] /Copying data : [ 87.8 %] |Copying data : [ 89.3 %] \Copying data : [ 90.8 %] -Copying data : [ 92.4 %] /Copying data : [ 94.0 %] |Copying data : [ 95.5 %] \Copying data : [ 96.7 %] -Copying data : [ 97.3 %] /Copying data : [ 97.8 %] |Copying data : [ 98.1 %] \Copying data : [ 98.9 %] -Copying data : [ 99.7 %] /Copying data : [100.0 %] |
Copying data : [100.0 %] \
The dumpfile is saved to dumpfile.
makedumpfile Completed.
lock time: 90s646416us
write time: 1s352335us
hit time: 3s624169us
find time: 94s712561us
loop_time: 0s70230us
thread consume_time: 0s0us
filter_time[0]: 0s154772us
cp_time[0]: 0s359576us
compress_time[0]: 82s759306us
exec time: 95s208148us
#
********
# ./makedumpfile -c --num-thread 8 vmcore-3.19 dumpfile
Checking for memory holes : [ 0.0 %] /Checking for memory holes : [100.0 %] |Copying data : [ 1.8 %] \Copying data : [ 4.2 %] -Copying data : [ 6.7 %] /Copying data : [ 9.5 %] |Copying data : [ 12.2 %] \Copying data : [ 14.9 %] -Copying data : [ 17.7 %] /Copying data : [ 20.4 %] |Copying data : [ 23.1 %] \Copying data : [ 25.8 %] -Copying data : [ 28.5 %] /Copying data : [ 31.2 %] |Copying data : [ 33.9 %] \Copying data : [ 36.7 %] -Copying data : [ 39.5 %] /Copying data : [ 42.3 %] |Copying data : [ 45.0 %] \Copying data : [ 47.7 %] -Copying data : [ 50.3 %] /Copying data : [ 53.1 %] |Copying data : [ 55.8 %] \Copying data : [ 58.5 %] -Copying data : [ 61.2 %] /Copying data : [ 63.8 %] |Copying data : [ 66.1 %] \Copying data : [ 68.8 %] -Copying data : [ 71.7 %] /Copying data : [ 74.5 %] |Copying data : [ 77.2 %] \Copying data : [ 79.7 %] -Copying data : [ 82.2 %] /Copying data : [ 84.7 %] |Copying data : [ 87.3 %] \Copying data : [ 88.2 %] -Copying data : [ 88.3 %] /Copying data : [ 89.7 %] |Copying data : [ 92.4 %] \Copying data : [ 95.0 %] -Copying data : [ 97.5 %] /Copying data : [ 98.6 %] |Copying data : [100.0 %] \
Copying data : [100.0 %] -
The dumpfile is saved to dumpfile.
makedumpfile Completed.
lock time: 37s702803us
write time: 1s156990us
hit time: 2s3878us
find time: 39s988815us
loop_time: 0s37889us
thread consume_time: 0s0us
filter_time[0]: 0s57096us
cp_time[0]: 0s50824us
compress_time[0]: 34s102436us
filter_time[1]: 0s53229us
cp_time[1]: 0s43997us
compress_time[1]: 34s125652us
filter_time[2]: 0s56553us
cp_time[2]: 0s50431us
compress_time[2]: 34s54656us
filter_time[3]: 0s55513us
cp_time[3]: 0s51893us
compress_time[3]: 34s79953us
filter_time[4]: 0s58375us
cp_time[4]: 0s55276us
compress_time[4]: 34s74430us
filter_time[5]: 0s53602us
cp_time[5]: 0s55498us
compress_time[5]: 34s62992us
filter_time[6]: 0s57965us
cp_time[6]: 0s52278us
compress_time[6]: 34s124603us
filter_time[7]: 0s56307us
cp_time[7]: 0s52477us
compress_time[7]: 34s49827us
exec time: 40s316905us
#
[-- Attachment #3: test1-lzo.log --]
[-- Type: application/octet-stream, Size: 6548 bytes --]
# ./makedumpfile -l vmcore-3.19 dumpfile
Checking for memory holes : [ 0.0 %] /Checking for memory holes : [100.0 %] |Copying data : [ 0.7 %] \Copying data : [ 2.9 %] -Copying data : [ 5.2 %] /Copying data : [ 7.8 %] |Copying data : [ 10.3 %] \Copying data : [ 14.2 %] -Copying data : [ 18.0 %] /Copying data : [ 21.7 %] |Copying data : [ 25.4 %] \Copying data : [ 29.1 %] -Copying data : [ 32.9 %] /Copying data : [ 36.6 %] |Copying data : [ 40.2 %] \Copying data : [ 43.9 %] -Copying data : [ 47.6 %] /Copying data : [ 51.3 %] |Copying data : [ 54.8 %] \Copying data : [ 58.6 %] -Copying data : [ 62.3 %] /Copying data : [ 65.9 %] |Copying data : [ 69.4 %] \Copying data : [ 73.2 %] -Copying data : [ 76.7 %] /Copying data : [ 79.4 %] |Copying data : [ 81.8 %] \Copying data : [ 84.2 %] -Copying data : [ 86.8 %] /Copying data : [ 90.0 %] |Copying data : [ 93.7 %] \Copying data : [ 97.1 %] -Copying data : [100.0 %] /Copying data : [100.0 %] |
The dumpfile is saved to dumpfile.
makedumpfile Completed.
lock time: 0s0us
write time: 0s0us
hit time: 0s0us
find time: 0s0us
loop_time: 0s0us
thread consume_time: 0s0us
exec time: 30s331895us
#
******
# ./makedumpfile -l --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes : [ 0.0 %] /Checking for memory holes : [100.0 %] |Copying data : [ 0.1 %] \Copying data : [ 2.1 %] -Copying data : [ 3.9 %] /Copying data : [ 6.2 %] |Copying data : [ 8.6 %] \Copying data : [ 11.3 %] -Copying data : [ 15.2 %] /Copying data : [ 18.9 %] |Copying data : [ 22.6 %] \Copying data : [ 26.3 %] -Copying data : [ 30.0 %] /Copying data : [ 33.8 %] |Copying data : [ 37.5 %] \Copying data : [ 41.2 %] -Copying data : [ 44.9 %] /Copying data : [ 48.7 %] |Copying data : [ 52.4 %] \Copying data : [ 56.1 %] -Copying data : [ 59.9 %] /Copying data : [ 63.6 %] |Copying data : [ 67.3 %] \Copying data : [ 71.0 %] -Copying data : [ 74.7 %] /Copying data : [ 77.3 %] |Copying data : [ 77.8 %] \Copying data : [ 78.2 %] -Copying data : [ 80.3 %] /Copying data : [ 82.4 %] |Copying data : [ 84.7 %] \Copying data : [ 87.1 %] -Copying data : [ 89.9 %] /Copying data : [ 93.5 %] |Copying data : [ 96.9 %] \Copying data : [ 99.9 %] -Copying data : [100.0 %] /
Copying data : [100.0 %] |
The dumpfile is saved to dumpfile.
makedumpfile Completed.
lock time: 29s572487us
write time: 1s418602us
hit time: 2s736059us
find time: 32s664560us
loop_time: 0s77047us
thread consume_time: 0s0us
filter_time[0]: 0s100350us
cp_time[0]: 0s0us
compress_time[0]: 0s0us
exec time: 33s101991us
#
******
# echo 3 > /proc/sys/vm/drop_caches ; rm -rf dumpfile./makedumpfile -l --num-thread 1 vmcore-3.198
Checking for memory holes : [ 0.0 %] /Checking for memory holes : [100.0 %] |Copying data : [ 2.4 %] \Copying data : [ 5.3 %] -Copying data : [ 9.0 %] /Copying data : [ 12.6 %] |Copying data : [ 16.4 %] \Copying data : [ 19.4 %] -Copying data : [ 22.7 %] /Copying data : [ 26.2 %] |Copying data : [ 29.8 %] \Copying data : [ 33.5 %] -Copying data : [ 37.2 %] /Copying data : [ 40.7 %] |Copying data : [ 44.4 %] \Copying data : [ 48.0 %] -Copying data : [ 51.2 %] /Copying data : [ 54.8 %] |Copying data : [ 58.5 %] \Copying data : [ 62.2 %] -Copying data : [ 65.5 %] /Copying data : [ 69.2 %] |Copying data : [ 72.8 %] \Copying data : [ 76.5 %] -Copying data : [ 80.2 %] /Copying data : [ 84.1 %] |Copying data : [ 87.3 %] \Copying data : [ 90.0 %] -Copying data : [ 93.4 %] /Copying data : [ 97.0 %] |Copying data : [100.0 %] \
Copying data : [100.0 %] -
The dumpfile is saved to dumpfile.
makedumpfile Completed.
lock time: 26s564655us
write time: 1s153599us
hit time: 1s546357us
find time: 28s365307us
loop_time: 0s84746us
thread consume_time: 0s0us
filter_time[0]: 0s68056us
cp_time[0]: 0s0us
compress_time[0]: 0s0us
filter_time[1]: 0s69492us
cp_time[1]: 0s0us
compress_time[1]: 0s0us
filter_time[2]: 0s68802us
cp_time[2]: 0s0us
compress_time[2]: 0s0us
filter_time[3]: 0s69148us
cp_time[3]: 0s0us
compress_time[3]: 0s0us
filter_time[4]: 0s69572us
cp_time[4]: 0s0us
compress_time[4]: 0s0us
filter_time[5]: 0s68262us
cp_time[5]: 0s0us
compress_time[5]: 0s0us
filter_time[6]: 0s68632us
cp_time[6]: 0s0us
compress_time[6]: 0s0us
filter_time[7]: 0s70118us
cp_time[7]: 0s0us
compress_time[7]: 0s0us
exec time: 28s771737us
#
[-- Attachment #4: test2-1.log --]
[-- Type: application/octet-stream, Size: 404 bytes --]
# ./makedumpfile -c --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes : [ 0.0 %] /
Checking for memory holes : [100.0 %] |
makedumpfile Failed.
lock time: 0s0us
write time: 0s0us
hit time: 0s0us
find time: 0s0us
loop_time: 0s0us
thread consume_time: 0s0us
filter_time[0]: 0s136256us
cp_time[0]: 0s121550us
compress_time[0]: 76s118949us
exec time: 82s12723us
#
[-- Attachment #5: test2-2.log --]
[-- Type: application/octet-stream, Size: 404 bytes --]
# ./makedumpfile -c --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes : [ 0.0 %] /
Checking for memory holes : [100.0 %] |
makedumpfile Failed.
lock time: 0s0us
write time: 0s0us
hit time: 0s0us
find time: 0s0us
loop_time: 0s0us
thread consume_time: 0s0us
filter_time[0]: 0s91555us
cp_time[0]: 0s74878us
compress_time[0]: 41s973805us
exec time: 51s460377us
#
[-- Attachment #6: Type: text/plain, Size: 143 bytes --]
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2015-07-08 8:17 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-19 8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 02/10] Add mappage_elf_parallel Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 03/10] Add readpage_elf_parallel Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 04/10] Add read_pfn_parallel Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 05/10] Add function to initial bitmap for parallel use Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 06/10] Add filter_data_buffer_parallel Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 08/10] Initial and free data used for " Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly Zhou Wenjian
2015-06-19 8:56 ` [PATCH v2 10/10] Add usage and manual about multiple threads process Zhou Wenjian
2015-06-25 2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
2015-06-26 7:07 ` Atsushi Kumagai
2015-06-26 7:27 ` "Zhou, Wenjian/周文剑"
2015-06-26 7:49 ` Atsushi Kumagai
2015-06-29 6:19 ` "Zhou, Wenjian/周文剑"
2015-06-30 9:06 ` Atsushi Kumagai
2015-07-06 13:19 ` "Zhou, Wenjian/周文剑"
2015-07-08 8:10 ` Atsushi Kumagai
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.