All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 00/10] makedumpfile: parallel processing
@ 2015-06-19  8:56 Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
                   ` (10 more replies)
  0 siblings, 11 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec

This patch set implements parallel processing by means of multiple threads.
With this patch set, it is available to use multiple threads to read
and compress pages. This parallel process will save time.
This feature only supports creating dumpfile in kdump-compressed format from
vmcore in kdump-compressed format or elf format. Currently, sadump and
 xen kdump are not supported.

Qiao Nuohan (10):
  Add readpage_kdump_compressed_parallel
  Add mappage_elf_parallel
  Add readpage_elf_parallel
  Add read_pfn_parallel
  Add function to initial bitmap for parallel use
  Add filter_data_buffer_parallel
  Add write_kdump_pages_parallel to allow parallel process
  Initial and free data used for parallel process
  Make makedumpfile available to read and compress pages parallelly
  Add usage and manual about multiple threads process

 Makefile       |    2 +
 erase_info.c   |   29 ++-
 erase_info.h   |    2 +
 makedumpfile.8 |   24 ++
 makedumpfile.c | 1096 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 makedumpfile.h |   80 ++++
 print_info.c   |   16 +
 7 files changed, 1246 insertions(+), 3 deletions(-)


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2 01/10] Add readpage_kdump_compressed_parallel
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 02/10] Add mappage_elf_parallel Zhou Wenjian
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

readpage_kdump_compressed_parallel is used to enable reading pages from
vmcore in kdump-compressed format parallel. fd_memory and bitmap_memory
should be initialized and offered to each thread individually to avoid
conflict.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Signed-off-by: Zhou wenjian <zhouwj-fnst@cn.fujitsu.com>
---
 makedumpfile.c |  137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 137 insertions(+), 0 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index cc71f20..3657d4f 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -251,6 +251,20 @@ pfn_to_pos(mdf_pfn_t pfn)
 	return desc_pos;
 }
 
+unsigned long
+pfn_to_pos_parallel(mdf_pfn_t pfn, struct dump_bitmap* bitmap_memory_parallel)
+{
+	unsigned long desc_pos;
+	mdf_pfn_t i;
+
+	desc_pos = info->valid_pages[pfn / BITMAP_SECT_LEN];
+	for (i = round(pfn, BITMAP_SECT_LEN); i < pfn; i++)
+		if (is_dumpable(bitmap_memory_parallel, i, NULL))
+			desc_pos++;
+
+	return desc_pos;
+}
+
 int
 read_page_desc(unsigned long long paddr, page_desc_t *pd)
 {
@@ -293,6 +307,50 @@ read_page_desc(unsigned long long paddr, page_desc_t *pd)
 	return TRUE;
 }
 
+int
+read_page_desc_parallel(int fd_memory, unsigned long long paddr,
+			page_desc_t *pd,
+			struct dump_bitmap* bitmap_memory_parallel)
+{
+	struct disk_dump_header *dh;
+	unsigned long desc_pos;
+	mdf_pfn_t pfn;
+	off_t offset;
+
+	/*
+	 * Find page descriptor
+	 */
+	dh = info->dh_memory;
+	offset
+	    = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size + dh->bitmap_blocks)
+		* dh->block_size;
+	pfn = paddr_to_pfn(paddr);
+	desc_pos = pfn_to_pos_parallel(pfn, bitmap_memory_parallel);
+	offset += (off_t)desc_pos * sizeof(page_desc_t);
+	if (lseek(fd_memory, offset, SEEK_SET) < 0) {
+		ERRMSG("Can't seek %s. %s\n",
+				 info->name_memory, strerror(errno));
+		return FALSE;
+	}
+
+	/*
+	 * Read page descriptor
+	 */
+	if (read(fd_memory, pd, sizeof(*pd)) != sizeof(*pd)) {
+		ERRMSG("Can't read %s. %s\n",
+				info->name_memory, strerror(errno));
+		return FALSE;
+	}
+
+	/*
+	 * Sanity check
+	 */
+	if (pd->size > dh->block_size)
+		return FALSE;
+
+	return TRUE;
+}
+
 static void
 unmap_cache(struct cache_entry *entry)
 {
@@ -589,6 +647,85 @@ readpage_kdump_compressed(unsigned long long paddr, void *bufptr)
 	return TRUE;
 }
 
+static int
+readpage_kdump_compressed_parallel(int fd_memory, unsigned long long paddr,
+				   void *bufptr,
+				   struct dump_bitmap* bitmap_memory_parallel)
+{
+	page_desc_t pd;
+	char buf[info->page_size], *rdbuf;
+	int ret;
+	unsigned long retlen;
+
+	if (!is_dumpable(bitmap_memory_parallel, paddr_to_pfn(paddr), NULL)) {
+		ERRMSG("pfn(%llx) is excluded from %s.\n",
+				paddr_to_pfn(paddr), info->name_memory);
+		return FALSE;
+	}
+
+	if (!read_page_desc_parallel(fd_memory, paddr, &pd,
+						bitmap_memory_parallel)) {
+		ERRMSG("Can't read page_desc: %llx\n", paddr);
+		return FALSE;
+	}
+
+	if (lseek(fd_memory, pd.offset, SEEK_SET) < 0) {
+		ERRMSG("Can't seek %s. %s\n",
+				info->name_memory, strerror(errno));
+		return FALSE;
+	}
+
+	/*
+	 * Read page data
+	 */
+	rdbuf = pd.flags & (DUMP_DH_COMPRESSED_ZLIB | DUMP_DH_COMPRESSED_LZO |
+		DUMP_DH_COMPRESSED_SNAPPY) ? buf : bufptr;
+	if (read(fd_memory, rdbuf, pd.size) != pd.size) {
+		ERRMSG("Can't read %s. %s\n",
+				info->name_memory, strerror(errno));
+		return FALSE;
+	}
+
+	if (pd.flags & DUMP_DH_COMPRESSED_ZLIB) {
+		retlen = info->page_size;
+		ret = uncompress((unsigned char *)bufptr, &retlen,
+					(unsigned char *)buf, pd.size);
+		if ((ret != Z_OK) || (retlen != info->page_size)) {
+			ERRMSG("Uncompress failed: %d\n", ret);
+			return FALSE;
+		}
+#ifdef USELZO
+	} else if (info->flag_lzo_support
+		   && (pd.flags & DUMP_DH_COMPRESSED_LZO)) {
+		retlen = info->page_size;
+		ret = lzo1x_decompress_safe((unsigned char *)buf, pd.size,
+					    (unsigned char *)bufptr, &retlen,
+					    LZO1X_MEM_DECOMPRESS);
+		if ((ret != LZO_E_OK) || (retlen != info->page_size)) {
+			ERRMSG("Uncompress failed: %d\n", ret);
+			return FALSE;
+		}
+#endif
+#ifdef USESNAPPY
+	} else if ((pd.flags & DUMP_DH_COMPRESSED_SNAPPY)) {
+
+		ret = snappy_uncompressed_length(buf, pd.size, (size_t *)&retlen);
+		if (ret != SNAPPY_OK) {
+			ERRMSG("Uncompress failed: %d\n", ret);
+			return FALSE;
+		}
+
+		ret = snappy_uncompress(buf, pd.size, bufptr, (size_t *)&retlen);
+		if ((ret != SNAPPY_OK) || (retlen != info->page_size)) {
+			ERRMSG("Uncompress failed: %d\n", ret);
+			return FALSE;
+		}
+#endif
+	}
+
+	return TRUE;
+}
+
 int
 readmem(int type_addr, unsigned long long addr, void *bufptr, size_t size)
 {
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 02/10] Add mappage_elf_parallel
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 03/10] Add readpage_elf_parallel Zhou Wenjian
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

mappage_elf_parallel is used to enable mmaping elf format to memory
parallelly. later patch will will use the mmapped memory to get data
of each page. fd_memory and mmap_cache should be initialized and offered
to each threads individually to avoid conflict.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 makedumpfile.c |   97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 makedumpfile.h |   14 ++++++++
 2 files changed, 111 insertions(+), 0 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index 3657d4f..d1b4bc2 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -394,6 +394,46 @@ update_mmap_range(off_t offset, int initial) {
 }
 
 static int
+update_mmap_range_parallel(int fd_memory, off_t offset,
+			   struct mmap_cache *mmap_cache)
+{
+	off_t start_offset, end_offset;
+	off_t map_size;
+	off_t max_offset = get_max_file_offset();
+	off_t pt_load_end = offset_to_pt_load_end(offset);
+
+	/*
+	 * mmap_buf must be cleaned
+	 */
+	if (mmap_cache->mmap_buf != MAP_FAILED)
+		munmap(mmap_cache->mmap_buf, mmap_cache->mmap_end_offset
+					     - mmap_cache->mmap_start_offset);
+
+	/*
+	 * offset for mmap() must be page aligned.
+	 */
+	start_offset = roundup(offset, info->page_size);
+	end_offset = MIN(max_offset, round(pt_load_end, info->page_size));
+
+	if (!pt_load_end || (end_offset - start_offset) <= 0)
+		return FALSE;
+
+	map_size = MIN(end_offset - start_offset, info->mmap_region_size);
+
+	mmap_cache->mmap_buf = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE,
+					fd_memory, start_offset);
+
+	if (mmap_cache->mmap_buf == MAP_FAILED) {
+		return FALSE;
+	}
+
+	mmap_cache->mmap_start_offset = start_offset;
+	mmap_cache->mmap_end_offset = start_offset + map_size;
+
+	return TRUE;
+}
+
+static int
 is_mapped_with_mmap(off_t offset) {
 
 	if (info->flag_usemmap == MMAP_ENABLE
@@ -404,6 +444,15 @@ is_mapped_with_mmap(off_t offset) {
 		return FALSE;
 }
 
+static int
+is_mapped_with_mmap_parallel(off_t offset, struct mmap_cache *mmap_cache) {
+	if (offset >= mmap_cache->mmap_start_offset
+	    && offset < mmap_cache->mmap_end_offset)
+		return TRUE;
+	else
+		return FALSE;
+}
+
 int
 initialize_mmap(void) {
 	unsigned long long phys_start;
@@ -458,6 +507,54 @@ mappage_elf(unsigned long long paddr)
 	return info->mmap_buf + (offset - info->mmap_start_offset);
 }
 
+static char *
+mappage_elf_parallel(int fd_memory, unsigned long long paddr,
+		     struct mmap_cache *mmap_cache)
+{
+	off_t offset, offset2;
+	int flag_usemmap;
+
+	pthread_rwlock_rdlock(&info->usemmap_rwlock);
+	flag_usemmap = info->flag_usemmap;
+	pthread_rwlock_unlock(&info->usemmap_rwlock);
+	if (flag_usemmap != MMAP_ENABLE)
+		return NULL;
+
+	offset = paddr_to_offset(paddr);
+	if (!offset || page_is_fractional(offset))
+		return NULL;
+
+	offset2 = paddr_to_offset(paddr + info->page_size - 1);
+	if (!offset2)
+		return NULL;
+
+	if (offset2 - offset != info->page_size - 1)
+		return NULL;
+
+	if (!is_mapped_with_mmap_parallel(offset, mmap_cache) &&
+	    !update_mmap_range_parallel(fd_memory, offset, mmap_cache)) {
+		ERRMSG("Can't read the dump memory(%s) with mmap().\n",
+		       info->name_memory);
+
+		ERRMSG("This kernel might have some problems about mmap().\n");
+		ERRMSG("read() will be used instead of mmap() from now.\n");
+
+		/*
+		 * Fall back to read().
+		 */
+		pthread_rwlock_wrlock(&info->usemmap_rwlock);
+		info->flag_usemmap = MMAP_DISABLE;
+		pthread_rwlock_unlock(&info->usemmap_rwlock);
+		return NULL;
+	}
+
+	if (offset < mmap_cache->mmap_start_offset ||
+	    offset + info->page_size > mmap_cache->mmap_end_offset)
+		return NULL;
+
+	return mmap_cache->mmap_buf + (offset - mmap_cache->mmap_start_offset);
+}
+
 static int
 read_from_vmcore(off_t offset, void *bufptr, unsigned long size)
 {
diff --git a/makedumpfile.h b/makedumpfile.h
index 3d6661f..bff134e 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -42,6 +42,7 @@
 #include "dwarf_info.h"
 #include "diskdump_mod.h"
 #include "sadump_mod.h"
+#include <pthread.h>
 
 /*
  * Result of command
@@ -956,6 +957,15 @@ typedef unsigned long int ulong;
 typedef unsigned long long int ulonglong;
 
 /*
+ * for parallel process
+ */
+struct mmap_cache {
+	char	*mmap_buf;
+	off_t	mmap_start_offset;
+	off_t   mmap_end_offset;
+};
+
+/*
  * makedumpfile header
  *   For re-arranging the dump data on different architecture, all the
  *   variables are defined by 64bits. The size of signature is aligned
@@ -1219,6 +1229,10 @@ struct DumpInfo {
 	 * for cyclic_splitting mode, setup splitblock_size
 	 */
 	long long splitblock_size;
+	/*
+	 * for parallel process
+	 */
+	pthread_rwlock_t usemmap_rwlock;
 };
 extern struct DumpInfo		*info;
 
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 03/10] Add readpage_elf_parallel
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 02/10] Add mappage_elf_parallel Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 04/10] Add read_pfn_parallel Zhou Wenjian
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

readpage_elf_parallel is used to enable reading pages from elf format
parallelly. fd_memory should be initialize and offered  to each threads
individually to avoid conflict.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 makedumpfile.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 98 insertions(+), 0 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index d1b4bc2..44c78b4 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -575,6 +575,27 @@ read_from_vmcore(off_t offset, void *bufptr, unsigned long size)
 	return TRUE;
 }
 
+static int
+read_from_vmcore_parallel(int fd_memory, off_t offset, void *bufptr,
+			  unsigned long size)
+{
+	const off_t failed = (off_t)-1;
+
+	if (lseek(fd_memory, offset, SEEK_SET) == failed) {
+		ERRMSG("Can't seek the dump memory(%s). (offset: %llx) %s\n",
+		       info->name_memory, (unsigned long long)offset, strerror(errno));
+		return FALSE;
+	}
+
+	if (read(fd_memory, bufptr, size) != size) {
+		ERRMSG("Can't read the dump memory(%s). %s\n",
+		       info->name_memory, strerror(errno));
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
 /*
  * This function is specific for reading page from ELF.
  *
@@ -669,6 +690,83 @@ readpage_elf(unsigned long long paddr, void *bufptr)
 }
 
 static int
+readpage_elf_parallel(int fd_memory, unsigned long long paddr, void *bufptr)
+{
+	off_t offset1, offset2;
+	size_t size1, size2;
+	unsigned long long phys_start, phys_end, frac_head = 0;
+
+	offset1 = paddr_to_offset(paddr);
+	offset2 = paddr_to_offset(paddr + info->page_size);
+	phys_start = paddr;
+	phys_end = paddr + info->page_size;
+
+	/*
+	 * Check the case phys_start isn't aligned by page size like below:
+	 *
+	 *                           phys_start
+	 *                           = 0x40ffda7000
+	 *         |<-- frac_head -->|------------- PT_LOAD -------------
+	 *     ----+-----------------------+---------------------+----
+	 *         |         pfn:N         |       pfn:N+1       | ...
+	 *     ----+-----------------------+---------------------+----
+	 *         |
+	 *     pfn_to_paddr(pfn:N)               # page size = 16k
+	 *     = 0x40ffda4000
+	 */
+	if (!offset1) {
+		phys_start = page_head_to_phys_start(paddr);
+		offset1 = paddr_to_offset(phys_start);
+		frac_head = phys_start - paddr;
+		memset(bufptr, 0, frac_head);
+	}
+
+	/*
+	 * Check the case phys_end isn't aligned by page size like the
+	 * phys_start's case.
+	 */
+	if (!offset2) {
+		phys_end = page_head_to_phys_end(paddr);
+		offset2 = paddr_to_offset(phys_end);
+		memset(bufptr + (phys_end - paddr), 0, info->page_size
+							- (phys_end - paddr));
+	}
+
+	/*
+	 * Check the separated page on different PT_LOAD segments.
+	 */
+	if (offset1 + (phys_end - phys_start) == offset2) {
+		size1 = phys_end - phys_start;
+	} else {
+		for (size1 = 1; size1 < info->page_size - frac_head; size1++) {
+			offset2 = paddr_to_offset(phys_start + size1);
+			if (offset1 + size1 != offset2)
+				break;
+		}
+	}
+
+	if(!read_from_vmcore_parallel(fd_memory, offset1, bufptr + frac_head,
+								size1)) {
+		ERRMSG("Can't read the dump memory(%s).\n",
+		       info->name_memory);
+		return FALSE;
+	}
+
+	if (size1 + frac_head != info->page_size) {
+		size2 = phys_end - (phys_start + size1);
+
+		if(!read_from_vmcore_parallel(fd_memory, offset2,
+					bufptr + frac_head + size1, size2)) {
+			ERRMSG("Can't read the dump memory(%s).\n",
+			       info->name_memory);
+			return FALSE;
+		}
+	}
+
+	return TRUE;
+}
+
+static int
 readpage_kdump_compressed(unsigned long long paddr, void *bufptr)
 {
 	page_desc_t pd;
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 04/10] Add read_pfn_parallel
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (2 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 03/10] Add readpage_elf_parallel Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 05/10] Add function to initial bitmap for parallel use Zhou Wenjian
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

read_pfn_parallel is used to enable reading pages from vmcore parallely.
Current supported format is kdump-compressed and elf, mmap elf format
is also supported.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 Makefile       |    2 ++
 makedumpfile.c |   34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile
index fc21a3f..b1daf5b 100644
--- a/Makefile
+++ b/Makefile
@@ -67,6 +67,8 @@ LIBS := -lsnappy $(LIBS)
 CFLAGS += -DUSESNAPPY
 endif
 
+LIBS := -lpthread $(LIBS)
+
 all: makedumpfile
 
 $(OBJ_PART): $(SRC_PART)
diff --git a/makedumpfile.c b/makedumpfile.c
index 44c78b4..e15855b 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -6349,6 +6349,40 @@ read_pfn(mdf_pfn_t pfn, unsigned char *buf)
 }
 
 int
+read_pfn_parallel(int fd_memory, mdf_pfn_t pfn, unsigned char *buf,
+		  struct dump_bitmap* bitmap_memory_parallel,
+		  struct mmap_cache *mmap_cache)
+{
+	unsigned long long paddr;
+	unsigned long long pgaddr;
+
+	paddr = pfn_to_paddr(pfn);
+
+	pgaddr = PAGEBASE(paddr);
+
+	if (info->flag_refiltering) {
+		if (!readpage_kdump_compressed_parallel(fd_memory, pgaddr, buf,
+						      bitmap_memory_parallel)) {
+			ERRMSG("Can't get the page data.\n");
+			return FALSE;
+		}
+	} else {
+		char *mapbuf = mappage_elf_parallel(fd_memory, pgaddr,
+						    mmap_cache);
+		if (mapbuf) {
+			memcpy(buf, mapbuf, info->page_size);
+		} else {
+			if (!readpage_elf_parallel(fd_memory, pgaddr, buf)) {
+				ERRMSG("Can't get the page data.\n");
+				return FALSE;
+			}
+		}
+	}
+
+	return TRUE;
+}
+
+int
 get_loads_dumpfile_cyclic(void)
 {
 	int i, phnum, num_new_load = 0;
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 05/10] Add function to initial bitmap for parallel use
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (3 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 04/10] Add read_pfn_parallel Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 06/10] Add filter_data_buffer_parallel Zhou Wenjian
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

initialize_bitmap_memory_parallel and initialize_2nd_bitmap_parallel
is used for parallel process to avoid conflict on bitmap.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 makedumpfile.c |   20 ++++++++++++++++++++
 makedumpfile.h |   18 ++++++++++++++++++
 2 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index e15855b..9c5da35 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -3411,6 +3411,16 @@ initialize_bitmap_memory(void)
 	return TRUE;
 }
 
+void
+initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int thread_num)
+{
+	bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(thread_num);
+	bitmap->file_name = info->name_memory;
+	bitmap->no_block = -1;
+	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
+	bitmap->offset = info->bitmap_memory->offset;
+}
+
 int
 calibrate_machdep_info(void)
 {
@@ -3725,6 +3735,16 @@ initialize_2nd_bitmap(struct dump_bitmap *bitmap)
 	bitmap->offset = info->len_bitmap / 2;
 }
 
+void
+initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int thread_num)
+{
+	bitmap->fd = FD_BITMAP_PARALLEL(thread_num);
+	bitmap->file_name = info->name_bitmap;
+	bitmap->no_block = -1;
+	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
+	bitmap->offset = info->len_bitmap / 2;
+}
+
 int
 set_bitmap_file(struct dump_bitmap *bitmap, mdf_pfn_t pfn, int val)
 {
diff --git a/makedumpfile.h b/makedumpfile.h
index bff134e..4b0709c 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -429,6 +429,11 @@ do { \
 #define SPLITTING_SIZE_EI(i)	info->splitting_info[i].size_eraseinfo
 
 /*
+ * Macro for getting parallel info.
+ */
+#define FD_BITMAP_MEMORY_PARALLEL(i)	info->parallel_info[i].fd_bitmap_memory
+#define FD_BITMAP_PARALLEL(i)		info->parallel_info[i].fd_bitmap
+/*
  * kernel version
  *
  * NOTE: the format of kernel_version is as follows
@@ -1000,6 +1005,18 @@ struct splitting_info {
 	unsigned long		size_eraseinfo;
 } splitting_info_t;
 
+struct parallel_info {
+	int			fd_memory;
+	int 			fd_bitmap_memory;
+	int			fd_bitmap;
+	unsigned char		*buf;
+	unsigned char 		*buf_out;
+	struct mmap_cache	*mmap_cache;
+#ifdef USELZO
+	lzo_bytep		wrkmem;
+#endif
+} parallel_info_t;
+
 struct ppc64_vmemmap {
 	unsigned long		phys;
 	unsigned long		virt;
@@ -1136,6 +1153,7 @@ struct DumpInfo {
 	char			*name_dumpfile;
 	int			num_dumpfile;
 	struct splitting_info	*splitting_info;
+	struct parallel_info	*parallel_info;
 
 	/*
 	 * bitmap info:
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 06/10] Add filter_data_buffer_parallel
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (4 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 05/10] Add function to initial bitmap for parallel use Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process Zhou Wenjian
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

filter_data_buffer_parallel is used to enable filtering buffer
parallely.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 erase_info.c |   29 ++++++++++++++++++++++++++++-
 erase_info.h |    2 ++
 2 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/erase_info.c b/erase_info.c
index e0e0f71..0b253d7 100644
--- a/erase_info.c
+++ b/erase_info.c
@@ -2328,7 +2328,6 @@ extract_filter_info(unsigned long long start_paddr,
 	return TRUE;
 }
 
-
 /*
  * External functions.
  */
@@ -2413,6 +2412,34 @@ filter_data_buffer(unsigned char *buf, unsigned long long paddr,
 	}
 }
 
+/*
+ * Filter buffer if the physical address is in filter_info.
+ */
+void
+filter_data_buffer_parallel(unsigned char *buf, unsigned long long paddr,
+					size_t size, pthread_mutex_t *mutex)
+{
+	struct filter_info fl_info;
+	unsigned char *buf_ptr;
+	int found = FALSE;
+
+	while (TRUE) {
+		pthread_mutex_lock(mutex);
+		found = extract_filter_info(paddr, paddr + size, &fl_info);
+		pthread_mutex_unlock(mutex);
+
+		if (found) {
+			buf_ptr = buf + (fl_info.paddr - paddr);
+			if (fl_info.nullify)
+				memset(buf_ptr, 0, fl_info.size);
+			else
+				memset(buf_ptr, fl_info.erase_ch, fl_info.size);
+		} else {
+			break;
+		}
+	}
+}
+
 unsigned long
 get_size_eraseinfo(void)
 {
diff --git a/erase_info.h b/erase_info.h
index 4d4957e..b363a40 100644
--- a/erase_info.h
+++ b/erase_info.h
@@ -60,6 +60,8 @@ extern unsigned long		num_erase_info;
 int gather_filter_info(void);
 void clear_filter_info(void);
 void filter_data_buffer(unsigned char *buf, unsigned long long paddr, size_t size);
+void filter_data_buffer_parallel(unsigned char *buf, unsigned long long paddr,
+					size_t size, pthread_mutex_t *mutex);
 unsigned long get_size_eraseinfo(void);
 int update_filter_info_raw(unsigned long long, int, int);
 
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (5 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 06/10] Add filter_data_buffer_parallel Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 08/10] Initial and free data used for " Zhou Wenjian
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

Use several threads to read and compress pages and one thread to write
the produced pages into dumpfile. The produced pages will be stored in
a buffer, then the consumer thread will get pages from this buffer.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Signed-off-by: Zhou wenjian <zhouwj-fnst@cn.fujitsu.com>
---
 makedumpfile.c |  440 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 makedumpfile.h |   45 ++++++
 2 files changed, 485 insertions(+), 0 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index 9c5da35..ad38bf4 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -235,6 +235,31 @@ is_in_same_page(unsigned long vaddr1, unsigned long vaddr2)
 	return FALSE;
 }
 
+static inline unsigned long
+calculate_len_buf_out(long page_size)
+{
+	unsigned long len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy;
+	unsigned long len_buf_out;
+
+	len_buf_out_zlib = len_buf_out_lzo = len_buf_out_snappy = 0;
+
+#ifdef USELZO
+	len_buf_out_lzo = page_size + page_size / 16 + 64 + 3;
+#endif
+
+#ifdef USESNAPPY
+	len_buf_out_snappy = snappy_max_compressed_length(page_size);
+#endif
+
+	len_buf_out_zlib = compressBound(page_size);
+
+	len_buf_out = MAX(len_buf_out_zlib,
+			  MAX(len_buf_out_lzo,
+			      len_buf_out_snappy));
+
+	return len_buf_out;
+}
+
 #define BITMAP_SECT_LEN 4096
 static inline int is_dumpable(struct dump_bitmap *, mdf_pfn_t, struct cycle *cycle);
 unsigned long
@@ -6671,6 +6696,421 @@ write_elf_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page)
 	return TRUE;
 }
 
+void *
+kdump_thread_function_cyclic(void *arg) {
+	void *retval = PTHREAD_FAIL;
+	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
+	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
+	struct cycle *cycle = kdump_thread_args->cycle;
+	int page_data_num = kdump_thread_args->page_data_num;
+	mdf_pfn_t pfn;
+	mdf_pfn_t consumed_pfn;
+	int index;
+	int found;
+	int dumpable;
+	int fd_memory = 0;
+	struct dump_bitmap bitmap_parallel = {0};
+	struct dump_bitmap bitmap_memory_parallel = {0};
+	unsigned char *buf = NULL, *buf_out = NULL;
+	struct mmap_cache *mmap_cache =
+			MMAP_CACHE_PARALLEL(kdump_thread_args->thread_num);
+	unsigned long size_out;
+#ifdef USELZO
+	lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->thread_num);
+#endif
+#ifdef USESNAPPY
+	unsigned long len_buf_out_snappy =
+				snappy_max_compressed_length(info->page_size);
+#endif
+
+	buf = BUF_PARALLEL(kdump_thread_args->thread_num);
+	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
+
+	fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->thread_num);
+
+	if (info->fd_bitmap) {
+		bitmap_parallel.buf = malloc(BUFSIZE_BITMAP);
+		initialize_2nd_bitmap_parallel(&bitmap_parallel,
+					kdump_thread_args->thread_num);
+	}
+
+	if (info->flag_refiltering) {
+		bitmap_memory_parallel.buf = malloc(BUFSIZE_BITMAP);
+		initialize_bitmap_memory_parallel(&bitmap_memory_parallel,
+						kdump_thread_args->thread_num);
+	}
+
+	while (1) {
+		/* get next pfn */
+		pthread_mutex_lock(&info->current_pfn_mutex);
+		pfn = info->current_pfn;
+		info->current_pfn++;
+		pthread_mutex_unlock(&info->current_pfn_mutex);
+
+		if (pfn >= kdump_thread_args->end_pfn)
+			break;
+
+		index = -1;
+		found = FALSE;
+
+		while (found == FALSE) {
+			/*
+			 * need a cancellation point here
+			 */
+			sleep(0);
+
+			index = pfn % page_data_num;
+
+			if (page_data_buf[index].ready != 0)
+				continue;
+
+			if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
+				continue;
+
+			if (page_data_buf[index].ready != 0)
+				goto unlock;
+
+			pthread_mutex_lock(&info->consumed_pfn_mutex);
+			if ((long)page_data_buf[index].pfn >
+						(long)info->consumed_pfn)
+				info->consumed_pfn = page_data_buf[index].pfn;
+			consumed_pfn = info->consumed_pfn;
+			pthread_mutex_unlock(&info->consumed_pfn_mutex);
+
+			/*
+			 * leave space for slow producer
+			 */
+			if ((long)pfn - (long)consumed_pfn > page_data_num)
+				goto unlock;
+
+			found = TRUE;
+
+			page_data_buf[index].pfn = pfn;
+			page_data_buf[index].ready = 1;
+
+			if (!info->fd_bitmap)
+				dumpable = is_dumpable(info->bitmap2,
+						pfn - kdump_thread_args->start_pfn,
+						cycle);
+			else
+				dumpable = is_dumpable(&bitmap_parallel,
+						pfn - kdump_thread_args->start_pfn,
+						cycle);
+			if (!dumpable) {
+				page_data_buf[index].dumpable = FALSE;
+				goto unlock;
+			}
+
+			page_data_buf[index].dumpable = TRUE;
+
+			if (!read_pfn_parallel(fd_memory, pfn, buf,
+					       &bitmap_memory_parallel,
+					       mmap_cache))
+					goto fail;
+
+			filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
+							info->page_size,
+							&info->filter_mutex);
+
+			if ((info->dump_level & DL_EXCLUDE_ZERO)
+			    && is_zero_page(buf, info->page_size)) {
+				page_data_buf[index].zero = TRUE;
+				goto unlock;
+			}
+
+			page_data_buf[index].zero = FALSE;
+
+			/*
+			 * Compress the page data.
+			 */
+			size_out = kdump_thread_args->len_buf_out;
+			if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB)
+			    && ((size_out = kdump_thread_args->len_buf_out),
+				compress2(buf_out, &size_out, buf,
+					  info->page_size,
+					  Z_BEST_SPEED) == Z_OK)
+			    && (size_out < info->page_size)) {
+				page_data_buf[index].flags =
+							DUMP_DH_COMPRESSED_ZLIB;
+				page_data_buf[index].size  = size_out;
+				memcpy(page_data_buf[index].buf, buf_out, size_out);
+#ifdef USELZO
+			} else if (info->flag_lzo_support
+				   && (info->flag_compress
+				       & DUMP_DH_COMPRESSED_LZO)
+				   && ((size_out = info->page_size),
+				       lzo1x_1_compress(buf, info->page_size,
+							buf_out, &size_out,
+							wrkmem) == LZO_E_OK)
+				   && (size_out < info->page_size)) {
+				page_data_buf[index].flags =
+							DUMP_DH_COMPRESSED_LZO;
+				page_data_buf[index].size  = size_out;
+				memcpy(page_data_buf[index].buf, buf_out, size_out);
+#endif
+#ifdef USESNAPPY
+			} else if ((info->flag_compress
+				    & DUMP_DH_COMPRESSED_SNAPPY)
+				   && ((size_out = len_buf_out_snappy),
+				       snappy_compress((char *)buf,
+						       info->page_size,
+						       (char *)buf_out,
+						       (size_t *)&size_out)
+				       == SNAPPY_OK)
+				   && (size_out < info->page_size)) {
+				page_data_buf[index].flags =
+						DUMP_DH_COMPRESSED_SNAPPY;
+				page_data_buf[index].size  = size_out;
+				memcpy(page_data_buf[index].buf, buf_out, size_out);
+#endif
+			} else {
+				page_data_buf[index].flags = 0;
+				page_data_buf[index].size  = info->page_size;
+				memcpy(page_data_buf[index].buf, buf, info->page_size);
+			}
+unlock:
+			pthread_mutex_unlock(&page_data_buf[index].mutex);
+
+		}
+	}
+
+	retval = NULL;
+
+fail:
+	if (bitmap_memory_parallel.fd > 0)
+		close(bitmap_memory_parallel.fd);
+	if (bitmap_parallel.buf != NULL)
+		free(bitmap_parallel.buf);
+	if (bitmap_memory_parallel.buf != NULL)
+		free(bitmap_memory_parallel.buf);
+
+	pthread_exit(retval);
+}
+
+int
+write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
+				  struct cache_data *cd_page,
+				  struct page_desc *pd_zero,
+				  off_t *offset_data, struct cycle *cycle)
+{
+	int ret = FALSE;
+	int res;
+	unsigned long len_buf_out;
+	mdf_pfn_t per;
+	mdf_pfn_t start_pfn, end_pfn;
+	struct page_desc pd;
+	struct timeval tv_start;
+	struct timeval last, new;
+	unsigned long long consuming_pfn;
+	pthread_t **threads = NULL;
+	struct thread_args *kdump_thread_args = NULL;
+	void *thread_result;
+	int page_data_num;
+	struct page_data *page_data_buf = NULL;
+	int i;
+	int index;
+
+	if (info->flag_elf_dumpfile)
+		return FALSE;
+
+	res = pthread_mutex_init(&info->current_pfn_mutex, NULL);
+	if (res != 0) {
+		ERRMSG("Can't initialize current_pfn_mutex. %s\n",
+				strerror(res));
+		goto out;
+	}
+
+	res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL);
+	if (res != 0) {
+		ERRMSG("Can't initialize consumed_pfn_mutex. %s\n",
+				strerror(res));
+		goto out;
+	}
+
+	res = pthread_mutex_init(&info->filter_mutex, NULL);
+	if (res != 0) {
+		ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res));
+		goto out;
+	}
+
+	res = pthread_rwlock_init(&info->usemmap_rwlock, NULL);
+	if (res != 0) {
+		ERRMSG("Can't initialize usemmap_rwlock. %s\n", strerror(res));
+		goto out;
+	}
+
+	len_buf_out = calculate_len_buf_out(info->page_size);
+
+	per = info->num_dumpable / 10000;
+	per = per ? per : 1;
+
+	gettimeofday(&tv_start, NULL);
+
+	start_pfn = cycle->start_pfn;
+	end_pfn   = cycle->end_pfn;
+
+	info->current_pfn = start_pfn;
+	info->consumed_pfn = start_pfn - 1;
+
+	threads = info->threads;
+	kdump_thread_args = info->kdump_thread_args;
+
+	page_data_num = info->num_buffers;
+	page_data_buf = info->page_data_buf;
+
+	for (i = 0; i < page_data_num; i++) {
+		/*
+		 * producer will use pfn in page_data_buf to decide the
+		 * consumed pfn
+		 */
+		page_data_buf[i].pfn = start_pfn - 1;
+		page_data_buf[i].ready = 0;
+		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
+		if (res != 0) {
+			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
+					strerror(res));
+			goto out;
+		}
+	}
+
+	for (i = 0; i < info->num_threads; i++) {
+		kdump_thread_args[i].thread_num = i;
+		kdump_thread_args[i].len_buf_out = len_buf_out;
+		kdump_thread_args[i].start_pfn = start_pfn;
+		kdump_thread_args[i].end_pfn = end_pfn;
+		kdump_thread_args[i].page_data_num = page_data_num;
+		kdump_thread_args[i].page_data_buf = page_data_buf;
+		kdump_thread_args[i].cycle = cycle;
+
+		res = pthread_create(threads[i], NULL,
+				     kdump_thread_function_cyclic,
+				     (void *)&kdump_thread_args[i]);
+		if (res != 0) {
+			ERRMSG("Can't create thread %d. %s\n",
+					i, strerror(res));
+			goto out;
+		}
+	}
+
+	consuming_pfn = start_pfn;
+	index = -1;
+
+	gettimeofday(&last, NULL);
+
+	while (consuming_pfn < end_pfn) {
+		index = consuming_pfn % page_data_num;
+
+		gettimeofday(&new, NULL);
+		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
+			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
+			goto out;
+		}
+
+		/*
+		 * check pfn first without mutex locked to reduce the time
+		 * trying to lock the mutex
+		 */
+		if (page_data_buf[index].pfn != consuming_pfn)
+			continue;
+
+		pthread_mutex_lock(&page_data_buf[index].mutex);
+
+		/* check whether the found one is ready to be consumed */
+		if (page_data_buf[index].pfn != consuming_pfn ||
+		    page_data_buf[index].ready != 1) {
+			goto unlock;
+		}
+
+		if ((num_dumped % per) == 0)
+			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
+
+		/* next pfn is found, refresh last here */
+		last = new;
+		consuming_pfn++;
+		page_data_buf[index].ready = 0;
+
+		if (page_data_buf[index].dumpable == FALSE)
+			goto unlock;
+
+		num_dumped++;
+
+		if (page_data_buf[index].zero == TRUE) {
+			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
+				goto out;
+			pfn_zero++;
+		} else {
+			pd.flags      = page_data_buf[index].flags;
+			pd.size       = page_data_buf[index].size;
+			pd.page_flags = 0;
+			pd.offset     = *offset_data;
+			*offset_data  += pd.size;
+			/*
+			 * Write the page header.
+			 */
+			if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
+				goto out;
+			/*
+			 * Write the page data.
+			 */
+			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
+				goto out;
+
+		}
+unlock:
+		pthread_mutex_unlock(&page_data_buf[index].mutex);
+	}
+
+	ret = TRUE;
+	/*
+	 * print [100 %]
+	 */
+	print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
+	print_execution_time(PROGRESS_COPY, &tv_start);
+	PROGRESS_MSG("\n");
+
+out:
+	if (threads != NULL) {
+		for (i = 0; i < info->num_threads; i++) {
+			if (threads[i] != NULL) {
+				res = pthread_cancel(*threads[i]);
+				if (res != 0 && res != ESRCH)
+					ERRMSG("Can't cancel thread %d. %s\n",
+							i, strerror(res));
+			}
+		}
+
+		for (i = 0; i < info->num_threads; i++) {
+			if (threads[i] != NULL) {
+				res = pthread_join(*threads[i], &thread_result);
+				if (res != 0)
+					ERRMSG("Can't join with thread %d. %s\n",
+							i, strerror(res));
+
+				if (thread_result == PTHREAD_CANCELED)
+					DEBUG_MSG("Thread %d is cancelled.\n", i);
+				else if (thread_result == PTHREAD_FAIL)
+					DEBUG_MSG("Thread %d fails.\n", i);
+				else
+					DEBUG_MSG("Thread %d finishes.\n", i);
+
+			}
+		}
+	}
+
+	if (page_data_buf != NULL) {
+		for (i = 0; i < page_data_num; i++) {
+			pthread_mutex_destroy(&page_data_buf[i].mutex);
+		}
+	}
+
+	pthread_rwlock_destroy(&info->usemmap_rwlock);
+	pthread_mutex_destroy(&info->filter_mutex);
+	pthread_mutex_destroy(&info->consumed_pfn_mutex);
+	pthread_mutex_destroy(&info->current_pfn_mutex);
+
+	return ret;
+}
+
 int
 write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page,
 			 struct page_desc *pd_zero, off_t *offset_data, struct cycle *cycle)
diff --git a/makedumpfile.h b/makedumpfile.h
index 4b0709c..5dbea60 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -431,8 +431,15 @@ do { \
 /*
  * Macro for getting parallel info.
  */
+#define FD_MEMORY_PARALLEL(i)		info->parallel_info[i].fd_memory
 #define FD_BITMAP_MEMORY_PARALLEL(i)	info->parallel_info[i].fd_bitmap_memory
 #define FD_BITMAP_PARALLEL(i)		info->parallel_info[i].fd_bitmap
+#define BUF_PARALLEL(i)			info->parallel_info[i].buf
+#define BUF_OUT_PARALLEL(i)		info->parallel_info[i].buf_out
+#define MMAP_CACHE_PARALLEL(i)		info->parallel_info[i].mmap_cache
+#ifdef USELZO
+#define WRKMEM_PARALLEL(i)		info->parallel_info[i].wrkmem
+#endif
 /*
  * kernel version
  *
@@ -964,12 +971,40 @@ typedef unsigned long long int ulonglong;
 /*
  * for parallel process
  */
+
+#define WAIT_TIME	(60 * 10)
+#define PTHREAD_FAIL	((void *)-2)
+
 struct mmap_cache {
 	char	*mmap_buf;
 	off_t	mmap_start_offset;
 	off_t   mmap_end_offset;
 };
 
+struct page_data
+{
+	mdf_pfn_t pfn;
+	int dumpable;
+	int zero;
+	unsigned int flags;
+	long size;
+	unsigned char *buf;
+	pthread_mutex_t mutex;
+	/*
+	 * whether the page_data is ready to be consumed
+	 */
+	int ready;
+};
+
+struct thread_args {
+	int thread_num;
+	unsigned long len_buf_out;
+	mdf_pfn_t start_pfn, end_pfn;
+	int page_data_num;
+	struct cycle *cycle;
+	struct page_data *page_data_buf;
+};
+
 /*
  * makedumpfile header
  *   For re-arranging the dump data on different architecture, all the
@@ -1250,7 +1285,17 @@ struct DumpInfo {
 	/*
 	 * for parallel process
 	 */
+	int num_threads;
+	int num_buffers;
+	pthread_t **threads;
+	struct thread_args *kdump_thread_args;
+	struct page_data *page_data_buf;
 	pthread_rwlock_t usemmap_rwlock;
+	mdf_pfn_t current_pfn;
+	pthread_mutex_t current_pfn_mutex;
+	mdf_pfn_t consumed_pfn;
+	pthread_mutex_t consumed_pfn_mutex;
+	pthread_mutex_t filter_mutex;
 };
 extern struct DumpInfo		*info;
 
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 08/10] Initial and free data used for parallel process
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (6 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly Zhou Wenjian
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

This patch is used to initial/free data for parallel process and
the memory limit is concerned in this function.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 makedumpfile.c |  202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 makedumpfile.h |    1 +
 2 files changed, 203 insertions(+), 0 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index ad38bf4..04d5e92 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1432,6 +1432,23 @@ open_dump_bitmap(void)
 			SPLITTING_FD_BITMAP(i) = fd;
 		}
 	}
+
+	if (info->num_threads) {
+		/*
+		 * Reserve file descriptors of bitmap for creating dumpfiles
+		 * parallelly, because a bitmap file will be unlinked just after
+		 * this and it is not possible to open a bitmap file later.
+		 */
+		for (i = 0; i < info->num_threads; i++) {
+			if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) {
+				ERRMSG("Can't open the bitmap file(%s). %s\n",
+				    info->name_bitmap, strerror(errno));
+				return FALSE;
+			}
+			FD_BITMAP_PARALLEL(i) = fd;
+		}
+	}
+
 	unlink(info->name_bitmap);
 
 	return TRUE;
@@ -3459,6 +3476,191 @@ calibrate_machdep_info(void)
 }
 
 int
+initial_for_parallel()
+{
+	unsigned long len_buf_out;
+	unsigned long page_data_buf_size;
+	unsigned long limit_size;
+	int page_data_num;
+	int i;
+
+	len_buf_out = calculate_len_buf_out(info->page_size);
+
+	/*
+	 * allocate memory for threads
+	 */
+	if ((info->threads = malloc(sizeof(pthread_t *) * info->num_threads))
+	    == NULL) {
+		MSG("Can't allocate memory for threads. %s\n",
+				strerror(errno));
+		return FALSE;
+	}
+	memset(info->threads, 0, sizeof(pthread_t *) * info->num_threads);
+
+	if ((info->kdump_thread_args =
+			malloc(sizeof(struct thread_args) * info->num_threads))
+	    == NULL) {
+		MSG("Can't allocate memory for arguments of threads. %s\n",
+				strerror(errno));
+		return FALSE;
+	}
+	memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_threads);
+
+	for (i = 0; i < info->num_threads; i++) {
+		if ((info->threads[i] = malloc(sizeof(pthread_t))) == NULL) {
+			MSG("Can't allocate memory for thread %d. %s",
+					i, strerror(errno));
+			return FALSE;
+		}
+
+		if ((BUF_PARALLEL(i) = malloc(info->page_size)) == NULL) {
+			MSG("Can't allocate memory for the memory buffer. %s\n",
+					strerror(errno));
+			return FALSE;
+		}
+
+		if ((BUF_OUT_PARALLEL(i) = malloc(len_buf_out)) == NULL) {
+			MSG("Can't allocate memory for the compression buffer. %s\n",
+					strerror(errno));
+			return FALSE;
+		}
+
+		if ((MMAP_CACHE_PARALLEL(i) = malloc(sizeof(struct mmap_cache))) == NULL) {
+			MSG("Can't allocate memory for mmap_cache. %s\n",
+					strerror(errno));
+			return FALSE;
+		}
+
+		/*
+		 * initial for mmap_cache
+		 */
+		MMAP_CACHE_PARALLEL(i)->mmap_buf = MAP_FAILED;
+		MMAP_CACHE_PARALLEL(i)->mmap_start_offset = 0;
+		MMAP_CACHE_PARALLEL(i)->mmap_end_offset = 0;
+
+#ifdef USELZO
+		if ((WRKMEM_PARALLEL(i) = malloc(LZO1X_1_MEM_COMPRESS)) == NULL) {
+			MSG("Can't allocate memory for the working memory. %s\n",
+					strerror(errno));
+			return FALSE;
+		}
+#endif
+	}
+
+	/*
+	 * get a safe number of page_data
+	 */
+	page_data_buf_size = MAX(len_buf_out, info->page_size);
+
+	limit_size = (get_free_memory_size()
+		      - MAP_REGION * info->num_threads) * 0.6;
+
+	page_data_num = limit_size / page_data_buf_size;
+
+	if (info->num_buffers != 0)
+		info->num_buffers = MIN(info->num_buffers, page_data_num);
+	else
+		info->num_buffers = MIN(PAGE_DATA_NUM, page_data_num);
+
+	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
+			info->num_buffers);
+
+	/*
+	 * allocate memory for page_data
+	 */
+	if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers))
+	    == NULL) {
+		MSG("Can't allocate memory for page_data_buf. %s\n",
+				strerror(errno));
+		return FALSE;
+	}
+	memset(info->page_data_buf, 0, sizeof(struct page_data) * info->num_buffers);
+
+	for (i = 0; i < info->num_buffers; i++) {
+		if ((info->page_data_buf[i].buf = malloc(page_data_buf_size)) == NULL) {
+			MSG("Can't allocate memory for buf of page_data_buf. %s\n",
+					strerror(errno));
+			return FALSE;
+		}
+	}
+
+	/*
+	 * initial fd_memory for threads
+	 */
+	for (i = 0; i < info->num_threads; i++) {
+		if ((FD_MEMORY_PARALLEL(i) = open(info->name_memory, O_RDONLY))
+									< 0) {
+			ERRMSG("Can't open the dump memory(%s). %s\n",
+					info->name_memory, strerror(errno));
+			return FALSE;
+		}
+
+		if ((FD_BITMAP_MEMORY_PARALLEL(i) =
+				open(info->name_memory, O_RDONLY)) < 0) {
+			ERRMSG("Can't open the dump memory(%s). %s\n",
+					info->name_memory, strerror(errno));
+			return FALSE;
+		}
+	}
+
+	return TRUE;
+}
+
+void
+free_for_parallel()
+{
+	int i;
+
+	if (info->threads != NULL) {
+		for (i = 0; i < info->num_threads; i++) {
+			if (info->threads[i] != NULL)
+				free(info->threads[i]);
+
+			if (BUF_PARALLEL(i) != NULL)
+				free(BUF_PARALLEL(i));
+
+			if (BUF_OUT_PARALLEL(i) != NULL)
+				free(BUF_OUT_PARALLEL(i));
+
+			if (MMAP_CACHE_PARALLEL(i) != NULL) {
+				if (MMAP_CACHE_PARALLEL(i)->mmap_buf !=
+								MAP_FAILED)
+					munmap(MMAP_CACHE_PARALLEL(i)->mmap_buf,
+					       MMAP_CACHE_PARALLEL(i)->mmap_end_offset
+					       - MMAP_CACHE_PARALLEL(i)->mmap_start_offset);
+
+				free(MMAP_CACHE_PARALLEL(i));
+			}
+#ifdef USELZO
+			if (WRKMEM_PARALLEL(i) != NULL)
+				free(WRKMEM_PARALLEL(i));
+#endif
+
+		}
+		free(info->threads);
+	}
+
+	if (info->kdump_thread_args != NULL)
+		free(info->kdump_thread_args);
+
+	if (info->page_data_buf != NULL) {
+		for (i = 0; i < info->num_buffers; i++) {
+			if (info->page_data_buf[i].buf != NULL)
+				free(info->page_data_buf[i].buf);
+		}
+		free(info->page_data_buf);
+	}
+
+	for (i = 0; i < info->num_threads; i++) {
+		if (FD_MEMORY_PARALLEL(i) > 0)
+			close(FD_MEMORY_PARALLEL(i));
+
+		if (FD_BITMAP_MEMORY_PARALLEL(i) > 0)
+			close(FD_BITMAP_MEMORY_PARALLEL(i));
+	}
+}
+
+int
 initial(void)
 {
 	off_t offset;
diff --git a/makedumpfile.h b/makedumpfile.h
index 5dbea60..d0760d9 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -972,6 +972,7 @@ typedef unsigned long long int ulonglong;
  * for parallel process
  */
 
+#define PAGE_DATA_NUM	(50)
 #define WAIT_TIME	(60 * 10)
 #define PTHREAD_FAIL	((void *)-2)
 
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (7 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 08/10] Initial and free data used for " Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-19  8:56 ` [PATCH v2 10/10] Add usage and manual about multiple threads process Zhou Wenjian
  2015-06-25  2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

Using this patch, it is available to use multiple threads to read
and compress pages. This parallel process will save time.

Currently, sadump and xen kdump is not supported.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 makedumpfile.c |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 makedumpfile.h |    2 +
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index 04d5e92..8ffe174 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -3857,6 +3857,27 @@ out:
 		DEBUG_MSG("Buffer size for the cyclic mode: %ld\n", info->bufsize_cyclic);
 	}
 
+	if (info->num_threads) {
+		if (is_xen_memory()) {
+			MSG("'--num-threads' option is disable,\n");
+			MSG("because %s is Xen's memory core image.\n",
+							info->name_memory);
+			return FALSE;
+		}
+
+		if (info->flag_sadump) {
+			MSG("'--num-threads' option is disable,\n");
+			MSG("because %s is sadump %s format.\n",
+			    info->name_memory, sadump_format_type_name());
+			return FALSE;
+		}
+
+		if (!initial_for_parallel()) {
+			MSG("Fail to initial for parallel process.\n");
+			return FALSE;
+		}
+	}
+
 	if (!is_xen_memory() && !cache_init())
 		return FALSE;
 
@@ -7906,9 +7927,16 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d
 		if (!write_kdump_bitmap2(&cycle))
 			return FALSE;
 
-		if (!write_kdump_pages_cyclic(cd_header, cd_page, &pd_zero,
+		if (info->num_threads) {
+			if (!write_kdump_pages_parallel_cyclic(cd_header,
+							cd_page, &pd_zero,
+							&offset_data, &cycle))
+				return FALSE;
+		} else {
+			if (!write_kdump_pages_cyclic(cd_header, cd_page, &pd_zero,
 					&offset_data, &cycle))
-			return FALSE;
+				return FALSE;
+		}
 	}
 	free_bitmap2_buffer();
 
@@ -9875,6 +9903,18 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
 	if (info->flag_sadump_diskset && !sadump_is_supported_arch())
 		return FALSE;
 
+	if (info->num_threads) {
+		if (info->flag_split) {
+			MSG("--num-threads cannot used with --split.\n");
+			return FALSE;
+		}
+
+		if (info->flag_elf_dumpfile) {
+			MSG("--num-threads cannot used with ELF format.\n");
+			return FALSE;
+		}
+	}
+
 	if ((argc == optind + 2) && !info->flag_flatten
 				 && !info->flag_split
 				 && !info->flag_sadump_diskset) {
@@ -9939,6 +9979,18 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
 	} else
 		return FALSE;
 
+	if (info->num_threads) {
+		if ((info->parallel_info =
+		     malloc(sizeof(parallel_info_t) * info->num_threads))
+		    == NULL) {
+			MSG("Can't allocate memory for parallel_info.\n");
+			return FALSE;
+		}
+
+		memset(info->parallel_info, 0, sizeof(parallel_info_t)
+							* info->num_threads);
+	}
+
 	return TRUE;
 }
 
@@ -10255,6 +10307,8 @@ static struct option longopts[] = {
 	{"mem-usage", no_argument, NULL, OPT_MEM_USAGE},
 	{"splitblock-size", required_argument, NULL, OPT_SPLITBLOCK_SIZE},
 	{"work-dir", required_argument, NULL, OPT_WORKING_DIR},
+	{"num-threads", required_argument, NULL, OPT_NUM_THREADS},
+	{"num-buffers", required_argument, NULL, OPT_NUM_BUFFERS},
 	{0, 0, 0, 0}
 };
 
@@ -10399,6 +10453,12 @@ main(int argc, char *argv[])
 		case OPT_WORKING_DIR:
 			info->working_dir = optarg;
 			break;
+		case OPT_NUM_THREADS:
+			info->num_threads = atoi(optarg);
+			break;
+		case OPT_NUM_BUFFERS:
+			info->num_buffers = atoi(optarg);
+			break;
 		case '?':
 			MSG("Commandline parameter is invalid.\n");
 			MSG("Try `makedumpfile --help' for more information.\n");
@@ -10542,6 +10602,8 @@ out:
 	else if (!info->flag_mem_usage)
 		MSG("makedumpfile Completed.\n");
 
+	free_for_parallel();
+
 	if (info) {
 		if (info->dh_memory)
 			free(info->dh_memory);
@@ -10569,6 +10631,8 @@ out:
 			free(info->p2m_mfn_frame_list);
 		if (info->page_buf != NULL)
 			free(info->page_buf);
+		if (info->parallel_info != NULL)
+			free(info->parallel_info);
 		free(info);
 
 		if (splitblock) {
diff --git a/makedumpfile.h b/makedumpfile.h
index d0760d9..9dfe5b6 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -2032,6 +2032,8 @@ struct elf_prstatus {
 #define OPT_MEM_USAGE           OPT_START+13
 #define OPT_SPLITBLOCK_SIZE	OPT_START+14
 #define OPT_WORKING_DIR         OPT_START+15
+#define OPT_NUM_THREADS	OPT_START+16
+#define OPT_NUM_BUFFERS	OPT_START+17
 
 /*
  * Function Prototype.
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 10/10] Add usage and manual about multiple threads process
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (8 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly Zhou Wenjian
@ 2015-06-19  8:56 ` Zhou Wenjian
  2015-06-25  2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
  10 siblings, 0 replies; 19+ messages in thread
From: Zhou Wenjian @ 2015-06-19  8:56 UTC (permalink / raw)
  To: kexec; +Cc: Qiao Nuohan

From: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 makedumpfile.8 |   24 ++++++++++++++++++++++++
 print_info.c   |   16 ++++++++++++++++
 2 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/makedumpfile.8 b/makedumpfile.8
index 2d38cd0..b400a14 100644
--- a/makedumpfile.8
+++ b/makedumpfile.8
@@ -12,6 +12,8 @@ makedumpfile \- make a small dumpfile of kdump
 .br
 \fBmakedumpfile\fR \-\-split [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \fIVMCORE\fR \fIDUMPFILE1\fR \fIDUMPFILE2\fR [\fIDUMPFILE3\fR ..]
 .br
+\fBmakedumpfile\fR [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \-\-num\-threads \fITHREADNUM\fR [\-\-num\-buffers \fIBUFNUM\fR] \fIVMCORE\fR \fIDUMPFILE\fR
+.br
 \fBmakedumpfile\fR \-\-reassemble \fIDUMPFILE1\fR \fIDUMPFILE2\fR [\fIDUMPFILE3\fR ..] \fIDUMPFILE\fR
 .br
 \fBmakedumpfile\fR \-g \fIVMCOREINFO\fR \-x \fIVMLINUX\fR
@@ -371,6 +373,28 @@ the kdump\-compressed format.
 # makedumpfile \-\-split \-d 31 \-x vmlinux /proc/vmcore dumpfile1 dumpfile2
 
 .TP
+\fB\-\-num\-threads\fR \fITHREADNUM\fR
+Using multiple threads to read and compress data of each page in parallel.
+And it will reduces time for saving \fIDUMPFILE\fR.
+This feature only supports creating \fIDUMPFILE\fR in kdump\-comressed
+format from \fIVMCORE\fR in kdump\-compressed format or elf format.
+.br
+.B Example:
+.br
+# makedumpfile \-d 31 \-\-num\-threads 4 /proc/vmcore dumpfile
+
+.TP
+\fB\-\-num\-buffers\fR \fIBUFNUM\fR
+This option is used for multiple threads process, please check \-\-num\-threads
+option. Multiple threads process will need buffers to store generated page
+data by threads temporarily, and this option is used to specify the number
+of pages can be stored.
+.br
+.B Example:
+.br
+# makedumpfile \-d 31 \-\-num\-threads 4 \-\-num\-buffers 30 /proc/vmcore dumpfile
+
+.TP
 \fB\-\-reassemble\fR
 Reassemble multiple \fIDUMPFILE\fRs, which are created by \-\-split option,
 into one \fIDUMPFILE\fR. dumpfile1 and dumpfile2 are reassembled into dumpfile
diff --git a/print_info.c b/print_info.c
index 9c36bec..e8a6b40 100644
--- a/print_info.c
+++ b/print_info.c
@@ -76,6 +76,10 @@ print_usage(void)
 	MSG("  # makedumpfile --split [OPTION] [-x VMLINUX|-i VMCOREINFO] VMCORE DUMPFILE1\n");
 	MSG("    DUMPFILE2 [DUMPFILE3 ..]\n");
 	MSG("\n");
+	MSG("  Using multiple threads to create DUMPFILE in parallel:\n");
+	MSG("  # makedumpfile [OPTION] [-x VMLINUX|-i VMCOREINFO] --num-threads THREADNUM\n");
+	MSG("    [--num-buffers BUFNUM] VMCORE DUMPFILE1\n");
+	MSG("\n");
 	MSG("  Reassemble multiple DUMPFILEs:\n");
 	MSG("  # makedumpfile --reassemble DUMPFILE1 DUMPFILE2 [DUMPFILE3 ..] DUMPFILE\n");
 	MSG("\n");
@@ -184,6 +188,18 @@ print_usage(void)
 	MSG("      by the number of DUMPFILEs.\n");
 	MSG("      This feature supports only the kdump-compressed format.\n");
 	MSG("\n");
+	MSG("  [--num-threads THREADNUM]:\n");
+	MSG("      Using multiple threads to read and compress data of each page in parallel.\n");
+	MSG("      And it will reduces time for saving DUMPFILE.\n");
+	MSG("      This feature only supports creating DUMPFILE in kdump-comressed format from\n");
+	MSG("      VMCORE in kdump-compressed format or elf format.\n");
+	MSG("\n");
+	MSG("  [--num-buffers BUFNUM]:\n");
+	MSG("      This option is used for multiple threads process, please check --num-threads\n");
+	MSG("      option. Multiple threads process will need buffers to store generated page\n");
+	MSG("      data by threads temporarily, and this option is used to specify the number\n");
+	MSG("      of pages can be stored.\n");
+	MSG("\n");
 	MSG("  [--reassemble]:\n");
 	MSG("      Reassemble multiple DUMPFILEs, which are created by --split option,\n");
 	MSG("      into one DUMPFILE. dumpfile1 and dumpfile2 are reassembled into dumpfile.\n");
-- 
1.7.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
                   ` (9 preceding siblings ...)
  2015-06-19  8:56 ` [PATCH v2 10/10] Add usage and manual about multiple threads process Zhou Wenjian
@ 2015-06-25  2:25 ` "Zhou, Wenjian/周文剑"
  2015-06-26  7:07   ` Atsushi Kumagai
  10 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-06-25  2:25 UTC (permalink / raw)
  To: Atsushi Kumagai; +Cc: kexec

Hello Atsushi Kumagai,

I test this patch set in several machines and the following is the benchmark.

These tables show the time that makedumpfile spends. And the unit is second.

"core-data" in the table means the context in the vmcore.
     For example:
     core-data's value is 256. It means that in the vmcore, 256 * 8 bits of each page
     are set to 1.

threads-num in the table means how many threads are used. 0 means original single thread
implementation.

"-l" in the table means producing lzo format vmcore

"-c" in the table means producing kdump-compressed format vmcore

###################################
- System: PRIMERGY RX300 S6
- CPU: Intel(R) Xeon(R) CPU x5660
- memory: 16GB
###################################
************ makedumpfile -d 0 ******************
                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304    2560    2816    3072    3328    3584    3840
         threads-num
-l
         0                       11      112     163     168     167     167     167     167     170     169     167     166     167     168     169     113
         4                       5       111     158     166     167     167     167     166     169     170     166     166     167     167     158     111
         8                       5       111     158     167     169     170     167     169     169     170     167     168     168     168     159     111
         12                      6       111     158     168     167     167     169     168     170     169     167     168     167     167     161     114
-c
         0                       54      544     643     666     589     517     468     405     408     429     491     528     592     676     654     527
         4                       60      179     210     216     206     220     209     214     211     222     220     208     209     210     245     177
         8                       43      113     146     169     179     215     195     211     199     195     216     197     185     165     158     109
         12                      44      106     142     162     169     171     168     173     174     171     200     173     177     165     139     107

************ makedumpfile -d 1 ******************
                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304    2560    2816    3072    3328    3584    3840
         threads-num
-l
         0                       10      137     163     167     167     167     167     167     171     170     166     166     166     168     169     138
         4                       5       111     158     166     167     166     167     166     170     171     166     167     166     166     159     111
         8                       5       114     159     167     167     169     167     167     169     169     169     167     168     167     160     111
         12                      6       113     159     168     167     168     167     168     169     170     168     168     167     168     159     112
-c
         0                       53      522     645     663     591     512     470     401     412     413     479     532     587     666     648     524
         4                       57      205     244     216     208     217     216     225     221     225     223     217     213     209     246     174
         8                       41      118     154     176     191     213     219     223     212     210     222     207     198     176     164     118
         12                      43      111     148     174     189     194     191     182     178     173     204     196     194     170     150     112

************ makedumpfile -d 7 ******************
                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304    2560    2816    3072    3328    3584    3840
         threads-num
-l
         0                       10      144     199     168     167     167     167     167     171     169     167     166     166     168     169     144
         4                       5       110     158     167     167     167     166     166     170     169     166     166     166     166     159     110
         8                       5       111     159     167     166     170     167     166     169     169     168     167     167     167     159     110
         12                      6       111     159     167     167     166     168     167     169     169     168     167     167     167     160     111
-c
         0                       52      523     642     659     592     511     490     402     410     422     464     525     591     666     647     529
         4                       58      177     210     213     208     224     217     228     223     229     227     216     210     212     215     175
         8                       41      116     152     178     191     213     219     225     216     211     221     211     196     180     158     116
         12                      41      111     151     176     192     187     192     190     174     175     218     186     189     172     151     112

************ makedumpfile -d 31 ******************
                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304    2560    2816    3072    3328    3584    3840
         threads-num
-l
         0                       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
         4                       2       2       2       3       3       3       2       3       3       3       3       2       3       3       3       3
         8                       2       3       3       2       3       3       3       3       3       3       3       3       3       3       3       3
         12                      2       3       3       3       3       3       3       3       3       3       3       3       3       3       3       3
-c
         0                       3       3       3       3       3       3       3       3       3       3       3       3       3       3       3       3
         4                       4       5       5       4       5       5       5       5       4       5       5       5       4       4       5       5
         8                       4       4       4       3       4       4       4       4       4       4       4       4       4       4       4       4
         12                      4       4       4       4       4       4       4       4       4       4       4       4       4       4       4       4

###################################
- System: PRIMERGY RX300 S7
- CPU: Intel(R) Xeon(R) CPU E5-2620
- memory: 32GB
###################################
************ makedumpfile -d 0 ******************
                 core-data               0       256     512     768     1024    1280    1536    1792
         threads-num
-l
         0                               23      211     246     255     252     252     253     257
         4                               14      168     234     250     253     251     251     252
         12                              17      165     237     250     251     251     253     253
-c
         0                               117     1210    1620    1699    1443    1294    1235    1051
         4                               161     438     525     543     506     465     484     415
         12                              131     211     245     250     246     248     260     265

************ makedumpfile -d 7 ******************
                 core-data               0       256     512     768     1024    1280    1536    1792
         threads-num
-l
         0                               24      215     253     256     255     254     257     255
         4                               14      165     235     247     249     250     252     253
         12                              16      169     237     251     255     253     252     250
-c
         0                               120     1202    1625    1698    1434    1274    1223    1040
         4                               157     438     533     542     507     461     479     400
         12                              132     204     242     248     242     244     257     263

###################################
- System: PRIMEQUEST 1800E
- CPU: Intel(R) Xeon(R) CPU E7540
- memory: 32GB
###################################
************ makedumpfile -d 0 ******************
                 core-data               0       256     512     768     1024    1280    1536    1792
         threads-num
-l
         0                               34      282     245     179     179     179     179     180
         4                               63      143     224     230     220     212     207     204
         8                               65      129     200     225     235     235     225     220
         12                              67      149     186     211     222     229     237     236
-c
         0                               158     1505    2119    2129    1707    1483    1440    1273
         4                               207     589     672     673     636     564     536     514
         8                               176     327     377     387     367     336     314     291
         12                              191     272     295     306     288     259     257     240

************ makedumpfile -d 7 ******************
                 core-data               0       256     512     768     1024    1280    1536    1792
         threads-num
-l
         0                               34      270     248     187     188     187     187     187
         4                               63      154     186     188     189     189     190     190
         8                               64      131     220     218     197     186     187     188
         12                              65      159     198     232     229     207     190     188
-c
         0                               154     1508    2089    2133    1792    1660    1462    1312
         4                               203     594     684     701     627     592     535     503
         8                               172     326     377     393     366     334     313     286
         12                              182     273     295     308     283     258     249     237

************ makedumpfile -d 31 ******************
                 core-data               0       256     512     768     1024    1280    1536    1792
         threads-num
-l
         0                               2       1       1       2       1       2       2       2
         4                               48      48      48      48      49      48      48      49
         8                               48      49      48      49      48      47      49      48
         12                              49      50      49      49      49      48      50      49
-c
         0                               10      9       10      10      9       10      9       9
         4                               52      53      52      52      53      52      53      52
         8                               51      51      52      52      52      51      51      52
         12                              53      52      52      53      52      51      52      52




On 06/19/2015 04:56 PM, Zhou Wenjian wrote:
> This patch set implements parallel processing by means of multiple threads.
> With this patch set, it is available to use multiple threads to read
> and compress pages. This parallel process will save time.
> This feature only supports creating dumpfile in kdump-compressed format from
> vmcore in kdump-compressed format or elf format. Currently, sadump and
>   xen kdump are not supported.
>
> Qiao Nuohan (10):
>    Add readpage_kdump_compressed_parallel
>    Add mappage_elf_parallel
>    Add readpage_elf_parallel
>    Add read_pfn_parallel
>    Add function to initial bitmap for parallel use
>    Add filter_data_buffer_parallel
>    Add write_kdump_pages_parallel to allow parallel process
>    Initial and free data used for parallel process
>    Make makedumpfile available to read and compress pages parallelly
>    Add usage and manual about multiple threads process
>
>   Makefile       |    2 +
>   erase_info.c   |   29 ++-
>   erase_info.h   |    2 +
>   makedumpfile.8 |   24 ++
>   makedumpfile.c | 1096 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>   makedumpfile.h |   80 ++++
>   print_info.c   |   16 +
>   7 files changed, 1246 insertions(+), 3 deletions(-)
>
>
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec


-- 
Thanks
Zhou Wenjian

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-06-25  2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
@ 2015-06-26  7:07   ` Atsushi Kumagai
  2015-06-26  7:27     ` "Zhou, Wenjian/周文剑"
  0 siblings, 1 reply; 19+ messages in thread
From: Atsushi Kumagai @ 2015-06-26  7:07 UTC (permalink / raw)
  To: zhouwj-fnst; +Cc: kexec

Hello Zhou,

>Hello Atsushi Kumagai,
>
>I test this patch set in several machines and the following is the benchmark.

Thanks for your report, it looks good as before.
I also did simple test on kernel 3.19 on a 5GB Virtual Machine, 
but I can't get such good result as below:

/ # time makedumpfile -c --num-threads 0 /proc/vmcore /mnt/dumpfile
Copying data                       : [100.0 %] /

The dumpfile is saved to /mnt/dumpfile.

makedumpfile Completed.
real    0m 44.40s
user    0m 43.62s
sys     0m 0.69s
/ #

/ # time makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
Copying data                       : [100.0 %] -
Copying data                       : [100.0 %] /

The dumpfile is saved to /mnt/dumpfile.

makedumpfile Completed.
real    5m 29.54s
user    6m 8.18s
sys     16m 33.25s
/ #


There is a big performance degradation.
Do you have any ideas why this happens ?


Thanks
Atsushi Kumagai

>These tables show the time that makedumpfile spends. And the unit is second.
>
>"core-data" in the table means the context in the vmcore.
>     For example:
>     core-data's value is 256. It means that in the vmcore, 256 * 8 bits of each page
>     are set to 1.
>
>threads-num in the table means how many threads are used. 0 means original single thread
>implementation.
>
>"-l" in the table means producing lzo format vmcore
>
>"-c" in the table means producing kdump-compressed format vmcore
>
>###################################
>- System: PRIMERGY RX300 S6
>- CPU: Intel(R) Xeon(R) CPU x5660
>- memory: 16GB
>###################################
>************ makedumpfile -d 0 ******************
>                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304
>2560    2816    3072    3328    3584    3840
>         threads-num
>-l
>         0                       11      112     163     168     167     167     167     167     170     169     167
>166     167     168     169     113
>         4                       5       111     158     166     167     167     167     166     169     170     166
>166     167     167     158     111
>         8                       5       111     158     167     169     170     167     169     169     170     167
>168     168     168     159     111
>         12                      6       111     158     168     167     167     169     168     170     169     167
>168     167     167     161     114
>-c
>         0                       54      544     643     666     589     517     468     405     408     429     491
>528     592     676     654     527
>         4                       60      179     210     216     206     220     209     214     211     222     220
>208     209     210     245     177
>         8                       43      113     146     169     179     215     195     211     199     195     216
>197     185     165     158     109
>         12                      44      106     142     162     169     171     168     173     174     171     200
>173     177     165     139     107
>
>************ makedumpfile -d 1 ******************
>                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304
>2560    2816    3072    3328    3584    3840
>         threads-num
>-l
>         0                       10      137     163     167     167     167     167     167     171     170     166
>166     166     168     169     138
>         4                       5       111     158     166     167     166     167     166     170     171     166
>167     166     166     159     111
>         8                       5       114     159     167     167     169     167     167     169     169     169
>167     168     167     160     111
>         12                      6       113     159     168     167     168     167     168     169     170     168
>168     167     168     159     112
>-c
>         0                       53      522     645     663     591     512     470     401     412     413     479
>532     587     666     648     524
>         4                       57      205     244     216     208     217     216     225     221     225     223
>217     213     209     246     174
>         8                       41      118     154     176     191     213     219     223     212     210     222
>207     198     176     164     118
>         12                      43      111     148     174     189     194     191     182     178     173     204
>196     194     170     150     112
>
>************ makedumpfile -d 7 ******************
>                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304
>2560    2816    3072    3328    3584    3840
>         threads-num
>-l
>         0                       10      144     199     168     167     167     167     167     171     169     167
>166     166     168     169     144
>         4                       5       110     158     167     167     167     166     166     170     169     166
>166     166     166     159     110
>         8                       5       111     159     167     166     170     167     166     169     169     168
>167     167     167     159     110
>         12                      6       111     159     167     167     166     168     167     169     169     168
>167     167     167     160     111
>-c
>         0                       52      523     642     659     592     511     490     402     410     422     464
>525     591     666     647     529
>         4                       58      177     210     213     208     224     217     228     223     229     227
>216     210     212     215     175
>         8                       41      116     152     178     191     213     219     225     216     211     221
>211     196     180     158     116
>         12                      41      111     151     176     192     187     192     190     174     175     218
>186     189     172     151     112
>
>************ makedumpfile -d 31 ******************
>                 core-data       0       256     512     768     1024    1280    1536    1792    2048    2304
>2560    2816    3072    3328    3584    3840
>         threads-num
>-l
>         0                       0       0       0       0       0       0       0       0       0       0       0
>0       0       0       0       0
>         4                       2       2       2       3       3       3       2       3       3       3       3
>2       3       3       3       3
>         8                       2       3       3       2       3       3       3       3       3       3       3
>3       3       3       3       3
>         12                      2       3       3       3       3       3       3       3       3       3       3
>3       3       3       3       3
>-c
>         0                       3       3       3       3       3       3       3       3       3       3       3
>3       3       3       3       3
>         4                       4       5       5       4       5       5       5       5       4       5       5
>5       4       4       5       5
>         8                       4       4       4       3       4       4       4       4       4       4       4
>4       4       4       4       4
>         12                      4       4       4       4       4       4       4       4       4       4       4
>4       4       4       4       4
>
>###################################
>- System: PRIMERGY RX300 S7
>- CPU: Intel(R) Xeon(R) CPU E5-2620
>- memory: 32GB
>###################################
>************ makedumpfile -d 0 ******************
>                 core-data               0       256     512     768     1024    1280    1536    1792
>         threads-num
>-l
>         0                               23      211     246     255     252     252     253     257
>         4                               14      168     234     250     253     251     251     252
>         12                              17      165     237     250     251     251     253     253
>-c
>         0                               117     1210    1620    1699    1443    1294    1235    1051
>         4                               161     438     525     543     506     465     484     415
>         12                              131     211     245     250     246     248     260     265
>
>************ makedumpfile -d 7 ******************
>                 core-data               0       256     512     768     1024    1280    1536    1792
>         threads-num
>-l
>         0                               24      215     253     256     255     254     257     255
>         4                               14      165     235     247     249     250     252     253
>         12                              16      169     237     251     255     253     252     250
>-c
>         0                               120     1202    1625    1698    1434    1274    1223    1040
>         4                               157     438     533     542     507     461     479     400
>         12                              132     204     242     248     242     244     257     263
>
>###################################
>- System: PRIMEQUEST 1800E
>- CPU: Intel(R) Xeon(R) CPU E7540
>- memory: 32GB
>###################################
>************ makedumpfile -d 0 ******************
>                 core-data               0       256     512     768     1024    1280    1536    1792
>         threads-num
>-l
>         0                               34      282     245     179     179     179     179     180
>         4                               63      143     224     230     220     212     207     204
>         8                               65      129     200     225     235     235     225     220
>         12                              67      149     186     211     222     229     237     236
>-c
>         0                               158     1505    2119    2129    1707    1483    1440    1273
>         4                               207     589     672     673     636     564     536     514
>         8                               176     327     377     387     367     336     314     291
>         12                              191     272     295     306     288     259     257     240
>
>************ makedumpfile -d 7 ******************
>                 core-data               0       256     512     768     1024    1280    1536    1792
>         threads-num
>-l
>         0                               34      270     248     187     188     187     187     187
>         4                               63      154     186     188     189     189     190     190
>         8                               64      131     220     218     197     186     187     188
>         12                              65      159     198     232     229     207     190     188
>-c
>         0                               154     1508    2089    2133    1792    1660    1462    1312
>         4                               203     594     684     701     627     592     535     503
>         8                               172     326     377     393     366     334     313     286
>         12                              182     273     295     308     283     258     249     237
>
>************ makedumpfile -d 31 ******************
>                 core-data               0       256     512     768     1024    1280    1536    1792
>         threads-num
>-l
>         0                               2       1       1       2       1       2       2       2
>         4                               48      48      48      48      49      48      48      49
>         8                               48      49      48      49      48      47      49      48
>         12                              49      50      49      49      49      48      50      49
>-c
>         0                               10      9       10      10      9       10      9       9
>         4                               52      53      52      52      53      52      53      52
>         8                               51      51      52      52      52      51      51      52
>         12                              53      52      52      53      52      51      52      52
>
>
>
>
>On 06/19/2015 04:56 PM, Zhou Wenjian wrote:
>> This patch set implements parallel processing by means of multiple threads.
>> With this patch set, it is available to use multiple threads to read
>> and compress pages. This parallel process will save time.
>> This feature only supports creating dumpfile in kdump-compressed format from
>> vmcore in kdump-compressed format or elf format. Currently, sadump and
>>   xen kdump are not supported.
>>
>> Qiao Nuohan (10):
>>    Add readpage_kdump_compressed_parallel
>>    Add mappage_elf_parallel
>>    Add readpage_elf_parallel
>>    Add read_pfn_parallel
>>    Add function to initial bitmap for parallel use
>>    Add filter_data_buffer_parallel
>>    Add write_kdump_pages_parallel to allow parallel process
>>    Initial and free data used for parallel process
>>    Make makedumpfile available to read and compress pages parallelly
>>    Add usage and manual about multiple threads process
>>
>>   Makefile       |    2 +
>>   erase_info.c   |   29 ++-
>>   erase_info.h   |    2 +
>>   makedumpfile.8 |   24 ++
>>   makedumpfile.c | 1096 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   makedumpfile.h |   80 ++++
>>   print_info.c   |   16 +
>>   7 files changed, 1246 insertions(+), 3 deletions(-)
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
>
>
>--
>Thanks
>Zhou Wenjian
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-06-26  7:07   ` Atsushi Kumagai
@ 2015-06-26  7:27     ` "Zhou, Wenjian/周文剑"
  2015-06-26  7:49       ` Atsushi Kumagai
  0 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-06-26  7:27 UTC (permalink / raw)
  To: Atsushi Kumagai; +Cc: kexec

On 06/26/2015 03:07 PM, Atsushi Kumagai wrote:
> Hello Zhou,
>
>> >Hello Atsushi Kumagai,
>> >
>> >I test this patch set in several machines and the following is the benchmark.
> Thanks for your report, it looks good as before.
> I also did simple test on kernel 3.19 on a 5GB Virtual Machine,
> but I can't get such good result as below:
>
> / # time makedumpfile -c --num-threads 0 /proc/vmcore /mnt/dumpfile
> Copying data                       : [100.0 %] /
>
> The dumpfile is saved to /mnt/dumpfile.
>
> makedumpfile Completed.
> real    0m 44.40s
> user    0m 43.62s
> sys     0m 0.69s
> / #
>
> / # time makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
> Copying data                       : [100.0 %] -
> Copying data                       : [100.0 %] /
>
> The dumpfile is saved to /mnt/dumpfile.
>
> makedumpfile Completed.
> real    5m 29.54s
> user    6m 8.18s
> sys     16m 33.25s
> / #
>
>
> There is a big performance degradation.
> Do you have any ideas why this happens ?

Does your Virtual Machine have more than 2 processors?
If so, check the value:nr_cpus in KDUMP_COMMANDLINE_APPEND of /etc/sysconfig/kdump.
It should be also set to the number larger than 2.

To "--num-threads 4", the most suitable number of processors is 5.

-- 
Thanks
Zhou Wenjian

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-06-26  7:27     ` "Zhou, Wenjian/周文剑"
@ 2015-06-26  7:49       ` Atsushi Kumagai
  2015-06-29  6:19         ` "Zhou, Wenjian/周文剑"
  0 siblings, 1 reply; 19+ messages in thread
From: Atsushi Kumagai @ 2015-06-26  7:49 UTC (permalink / raw)
  To: zhouwj-fnst; +Cc: kexec

>On 06/26/2015 03:07 PM, Atsushi Kumagai wrote:
>> Hello Zhou,
>>
>>> >Hello Atsushi Kumagai,
>>> >
>>> >I test this patch set in several machines and the following is the benchmark.
>> Thanks for your report, it looks good as before.
>> I also did simple test on kernel 3.19 on a 5GB Virtual Machine,
>> but I can't get such good result as below:
>>
>> / # time makedumpfile -c --num-threads 0 /proc/vmcore /mnt/dumpfile
>> Copying data                       : [100.0 %] /
>>
>> The dumpfile is saved to /mnt/dumpfile.
>>
>> makedumpfile Completed.
>> real    0m 44.40s
>> user    0m 43.62s
>> sys     0m 0.69s
>> / #
>>
>> / # time makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
>> Copying data                       : [100.0 %] -
>> Copying data                       : [100.0 %] /
>>
>> The dumpfile is saved to /mnt/dumpfile.
>>
>> makedumpfile Completed.
>> real    5m 29.54s
>> user    6m 8.18s
>> sys     16m 33.25s
>> / #
>>
>>
>> There is a big performance degradation.
>> Do you have any ideas why this happens ?
>
>Does your Virtual Machine have more than 2 processors?
>If so, check the value:nr_cpus in KDUMP_COMMANDLINE_APPEND of /etc/sysconfig/kdump.
>It should be also set to the number larger than 2.
>
>To "--num-threads 4", the most suitable number of processors is 5.

I attached 5 processors to the VM and I confirmed that all threads
consumed full cpu time by top(1) on the host:

    PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  17614 qemu      20   0 5792m 4.9g 5652 R 435.1 72.8  29:02.17 qemu-kvm

So I think the performance must be improved...


Thanks
Atsushi Kumagai
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-06-26  7:49       ` Atsushi Kumagai
@ 2015-06-29  6:19         ` "Zhou, Wenjian/周文剑"
  2015-06-30  9:06           ` Atsushi Kumagai
  0 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-06-29  6:19 UTC (permalink / raw)
  To: Atsushi Kumagai; +Cc: kexec

[-- Attachment #1: Type: text/plain, Size: 613 bytes --]

On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
> I attached 5 processors to the VM and I confirmed that all threads
> consumed full cpu time by top(1) on the host:
>
>      PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
>    17614 qemu      20   0 5792m 4.9g 5652 R 435.1 72.8  29:02.17 qemu-kvm
>
> So I think the performance must be improved...

Since I can't get that result in all machines here, could you test it with the patch:time
and show me the output?
Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.

The attachment is the patch time.

-- 
Thanks
Zhou Wenjian

[-- Attachment #2: time --]
[-- Type: text/plain, Size: 9092 bytes --]

--- makedumpfile.ori	2015-06-28 23:32:36.187000000 -0400
+++ makedumpfile.c	2015-06-29 01:51:55.884000000 -0400
@@ -26,6 +26,46 @@
 #include <limits.h>
 #include <assert.h>
 
+unsigned long write_kdump_pages_parallel_cyclic_time = 0;
+struct timeval write_kdump_pages_parallel_cyclic1, write_kdump_pages_parallel_cyclic2;
+unsigned long loop_time = 0;
+unsigned long consume_time = 0;
+struct timeval loop1, loop2;
+unsigned long check_time = 0;
+struct timeval check1, check2;
+struct timeval write1, write2;
+unsigned long write_time;
+struct timeval lock1, lock2;
+unsigned long lock_time;
+struct timeval hit1, hit2;
+unsigned long hit_time;
+struct timeval find1, find2;
+unsigned long find_time;
+struct timeval timea1, timea2;
+unsigned long timea;
+struct timeval timeb1, timeb2;
+unsigned long read_time[4];
+struct timeval read1[4], read2[4];
+unsigned long lock_current_time[4];
+struct timeval lock_current1[4], lock_current2[4];
+unsigned long found_time[4];
+struct timeval found1[4], found2[4];
+unsigned long lock_consumed_time[4];
+struct timeval lock_consumed1[4], lock_consumed2[4];
+unsigned long compress_time[4];
+struct timeval compress_time1[4], compress_time2[4];
+unsigned long timeb;
+unsigned long count1 = 0;
+unsigned long count2 = 0;
+unsigned long count3 = 0;
+unsigned long count4 = 0;
+unsigned long count5 = 0;
+unsigned long count6 = 0;
+unsigned long count7 = 0;
+unsigned long count8 = 0;
+unsigned long count9 = 0;
+
+
 struct symbol_table	symbol_table;
 struct size_table	size_table;
 struct offset_table	offset_table;
@@ -6944,6 +6984,7 @@
 	unsigned long len_buf_out_snappy =
 				snappy_max_compressed_length(info->page_size);
 #endif
+int thread_num = kdump_thread_args->thread_num;
 
 	buf = BUF_PARALLEL(kdump_thread_args->thread_num);
 	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
@@ -6964,17 +7005,20 @@
 
 	while (1) {
 		/* get next pfn */
+gettimeofday(&lock_current1[thread_num], NULL);
 		pthread_mutex_lock(&info->current_pfn_mutex);
 		pfn = info->current_pfn;
 		info->current_pfn++;
 		pthread_mutex_unlock(&info->current_pfn_mutex);
-
+gettimeofday(&lock_current2[thread_num], NULL);
+lock_current_time[thread_num] += (lock_current2[thread_num].tv_sec - lock_current1[thread_num].tv_sec) * 1000000 + (lock_current2[thread_num].tv_usec - lock_current1[thread_num].tv_usec);
 		if (pfn >= kdump_thread_args->end_pfn)
 			break;
 
 		index = -1;
 		found = FALSE;
 
+gettimeofday(&found1[thread_num], NULL);
 		while (found == FALSE) {
 			/*
 			 * need a cancellation point here
@@ -6983,15 +7027,23 @@
 
 			index = pfn % page_data_num;
 
-			if (page_data_buf[index].ready != 0)
+			if (page_data_buf[index].ready != 0){
 				continue;
+}
 
-			if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
+			if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0){
 				continue;
+}
 
-			if (page_data_buf[index].ready != 0)
+			if (page_data_buf[index].ready != 0){
+count1++;
 				goto unlock;
+}
+gettimeofday(&found2[thread_num], NULL);
+found_time[thread_num] += (found2[thread_num].tv_sec - found1[thread_num].tv_sec) * 1000000 + (found2[thread_num].tv_usec - found1[thread_num].tv_usec);
+gettimeofday(&found1[thread_num], NULL);
 
+gettimeofday(&lock_consumed1[thread_num], NULL);
 			pthread_mutex_lock(&info->consumed_pfn_mutex);
 			if ((long)page_data_buf[index].pfn >
 						(long)info->consumed_pfn)
@@ -6999,11 +7051,15 @@
 			consumed_pfn = info->consumed_pfn;
 			pthread_mutex_unlock(&info->consumed_pfn_mutex);
 
+gettimeofday(&lock_consumed2[thread_num], NULL);
+lock_consumed_time[thread_num] += (lock_consumed2[thread_num].tv_sec - lock_consumed1[thread_num].tv_sec) * 1000000 + (lock_consumed2[thread_num].tv_usec - lock_consumed1[thread_num].tv_usec);
 			/*
 			 * leave space for slow producer
 			 */
-			if ((long)pfn - (long)consumed_pfn > page_data_num)
+			if ((long)pfn - (long)consumed_pfn > page_data_num){
+count2++;
 				goto unlock;
+}
 
 			found = TRUE;
 
@@ -7025,11 +7081,16 @@
 
 			page_data_buf[index].dumpable = TRUE;
 
+gettimeofday(&read1[thread_num], NULL);
 			if (!read_pfn_parallel(fd_memory, pfn, buf,
 					       &bitmap_memory_parallel,
 					       mmap_cache))
 					goto fail;
 
+gettimeofday(&read2[thread_num], NULL);
+read_time[thread_num] += (read2[thread_num].tv_sec - read1[thread_num].tv_sec) * 1000000 + (read2[thread_num].tv_usec - read1[thread_num].tv_usec);
+
+gettimeofday(&compress_time1[thread_num], NULL);
 			filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
 							info->page_size,
 							&info->filter_mutex);
@@ -7090,6 +7151,8 @@
 				page_data_buf[index].size  = info->page_size;
 				memcpy(page_data_buf[index].buf, buf, info->page_size);
 			}
+gettimeofday(&compress_time2[thread_num], NULL);
+compress_time[thread_num] += (compress_time2[thread_num].tv_sec - compress_time1[thread_num].tv_sec) * 1000000 + (compress_time2[thread_num].tv_usec - compress_time1[thread_num].tv_usec);
 unlock:
 			pthread_mutex_unlock(&page_data_buf[index].mutex);
 
@@ -7220,6 +7283,7 @@
 	gettimeofday(&last, NULL);
 
 	while (consuming_pfn < end_pfn) {
+gettimeofday(&loop1, NULL);
 		index = consuming_pfn % page_data_num;
 
 		gettimeofday(&new, NULL);
@@ -7232,17 +7296,26 @@
 		 * check pfn first without mutex locked to reduce the time
 		 * trying to lock the mutex
 		 */
-		if (page_data_buf[index].pfn != consuming_pfn)
+		if (page_data_buf[index].pfn != consuming_pfn){
+gettimeofday(&loop2, NULL);
+loop_time += (loop2.tv_sec - loop1.tv_sec) * 1000000 + (loop2.tv_usec - loop1.tv_usec);
 			continue;
+}
+gettimeofday(&find1, NULL);
+gettimeofday(&lock1, NULL);
 
 		pthread_mutex_lock(&page_data_buf[index].mutex);
 
+gettimeofday(&lock2, NULL);
+lock_time += (lock2.tv_sec - lock1.tv_sec) * 1000000 + (lock2.tv_usec - lock1.tv_usec);
+
 		/* check whether the found one is ready to be consumed */
 		if (page_data_buf[index].pfn != consuming_pfn ||
 		    page_data_buf[index].ready != 1) {
 			goto unlock;
 		}
 
+gettimeofday(&hit1, NULL);
 		if ((num_dumped % per) == 0)
 			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
 
@@ -7256,6 +7329,7 @@
 
 		num_dumped++;
 
+gettimeofday(&write1, NULL);
 		if (page_data_buf[index].zero == TRUE) {
 			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
 				goto out;
@@ -7278,8 +7352,17 @@
 				goto out;
 
 		}
+
+gettimeofday(&write2, NULL);
+write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
+
 unlock:
 		pthread_mutex_unlock(&page_data_buf[index].mutex);
+gettimeofday(&hit2, NULL);
+gettimeofday(&find2, NULL);
+hit_time += (hit2.tv_sec - hit1.tv_sec) * 1000000 + (hit2.tv_usec - hit1.tv_usec);
+find_time += (find2.tv_sec - find1.tv_sec) * 1000000 + (find2.tv_usec - find1.tv_usec);
+
 	}
 
 	ret = TRUE;
@@ -10314,6 +10397,11 @@
 int
 main(int argc, char *argv[])
 {
+        unsigned long total_time = 0;
+        struct timeval start_time, end_time;
+        gettimeofday(&start_time, NULL);
+
+
 	int i, opt, flag_debug = FALSE;
 
 	if ((info = calloc(1, sizeof(struct DumpInfo))) == NULL) {
@@ -10328,7 +10416,6 @@
 		goto out;
 	}
 	initialize_tables();
-
 	/*
 	 * By default, makedumpfile assumes that multi-cycle processing is
 	 * necessary to work in constant memory space.
@@ -10642,5 +10729,32 @@
 	}
 	free_elf_info();
 
+        gettimeofday(&end_time, NULL);
+        total_time = (end_time.tv_sec - start_time.tv_sec) * 1000000 + (end_time.tv_usec - start_time.tv_usec);
+        MSG("lock time: %lds%ldus\n", lock_time / 1000000, lock_time % 1000000);
+        MSG("write time: %lds%ldus\n", write_time / 1000000, write_time % 1000000);
+        MSG("hit time: %lds%ldus\n", hit_time / 1000000, hit_time % 1000000);
+        MSG("find time: %lds%ldus\n", find_time / 1000000, find_time % 1000000);
+        MSG("loop_time: %lds%ldus\n", loop_time / 1000000, loop_time % 1000000);
+        MSG("thread consume_time: %lds%ldus\n", consume_time / 1000000, consume_time % 1000000);
+        MSG("thread timea: %lds%ldus\n", timea / 1000000, timea % 1000000);
+        MSG("thread timeb: %lds%ldus\n", timeb / 1000000, timeb % 1000000);
+for (i = 0; i < 4; i++){
+        MSG("read_time[%d]: %lds%ldus\n", i,  read_time[i] / 1000000, read_time[i] % 1000000);
+        MSG("lock_current_time[%d]: %lds%ldus\n", i,  lock_current_time[i] / 1000000, lock_current_time[i] % 1000000);
+        MSG("found_time[%d]: %lds%ldus\n", i,  found_time[i] / 1000000, found_time[i] % 1000000);
+        MSG("lock_consumed_time[%d]: %lds%ldus\n", i,  lock_consumed_time[i] / 1000000, lock_consumed_time[i] % 1000000);
+        MSG("compress_time[%d]: %lds%ldus\n", i,  compress_time[i] / 1000000, compress_time[i] % 1000000);
+}
+        MSG("count1: %ld\n", count1);
+        MSG("count2: %ld\n", count2);
+        MSG("count3: %ld\n", count3);
+        MSG("count4: %ld\n", count4);
+        MSG("count4: %ld\n", count5);
+        MSG("count4: %ld\n", count6);
+        MSG("count4: %ld\n", count7);
+        MSG("exec time: %lds%ldus\n", total_time / 1000000, total_time % 1000000);
+
+
 	return retcd;
 }

[-- Attachment #3: Type: text/plain, Size: 143 bytes --]

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-06-29  6:19         ` "Zhou, Wenjian/周文剑"
@ 2015-06-30  9:06           ` Atsushi Kumagai
  2015-07-06 13:19             ` "Zhou, Wenjian/周文剑"
  0 siblings, 1 reply; 19+ messages in thread
From: Atsushi Kumagai @ 2015-06-30  9:06 UTC (permalink / raw)
  To: zhouwj-fnst; +Cc: kexec

>On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
>> I attached 5 processors to the VM and I confirmed that all threads
>> consumed full cpu time by top(1) on the host:
>>
>>      PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
>>    17614 qemu      20   0 5792m 4.9g 5652 R 435.1 72.8  29:02.17 qemu-kvm
>>
>> So I think the performance must be improved...
>
>Since I can't get that result in all machines here, could you test it with the patch:time
>and show me the output?
>Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.
>
>The attachment is the patch time.

Here is the result:

/ # makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
Copying data                       : [100.0 %] |
Copying data                       : [100.0 %] \

The dumpfile is saved to /mnt/dumpfile.

makedumpfile Completed.
lock time: 310s935500us
write time: 3s970037us
hit time: 6s316043us
find time: 317s926654us
loop_time: 37s321800us
thread consume_time: 0s0us
thread timea: 0s0us
thread timeb: 0s0us
read_time[0]: 8s637011us
lock_current_time[0]: 0s284428us
found_time[0]: 60s366795us
lock_consumed_time[0]: 2s782596us
compress_time[0]: 301s427073us
read_time[1]: 8s435914us
lock_current_time[1]: 0s271680us
found_time[1]: 60s329026us
lock_consumed_time[1]: 2s849061us
compress_time[1]: 302s98620us
read_time[2]: 8s380550us
lock_current_time[2]: 0s270388us
found_time[2]: 60s209376us
lock_consumed_time[2]: 3s297574us
compress_time[2]: 301s486768us
read_time[3]: 8s550662us
lock_current_time[3]: 0s278997us
found_time[3]: 60s476702us
lock_consumed_time[3]: 3s49184us
compress_time[3]: 301s718390us
count1: 172
count2: 70921401
count3: 0
count4: 0
count5: 0
count6: 0
count7: 0
exec time: 380s125494us


BTW, I fixed a small mistake before testing like:

- write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
+ write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);


Thanks
Atsushi Kumagai
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-06-30  9:06           ` Atsushi Kumagai
@ 2015-07-06 13:19             ` "Zhou, Wenjian/周文剑"
  2015-07-08  8:10               ` Atsushi Kumagai
  0 siblings, 1 reply; 19+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2015-07-06 13:19 UTC (permalink / raw)
  To: Atsushi Kumagai; +Cc: kexec

[-- Attachment #1: Type: text/plain, Size: 3208 bytes --]

Hello Atsushi Kumagai,

I have tried a lot, and I think the big performance degradation only
occurs in special CPU.
I thought about two reasons, and I need your help to confirm which is
the real one.

The following tests will also be OK by using dumpfile instead of /proc/vmcore

Test 1: distinguish whether it is resulted by multi-threads.
apply patch: test1
command1: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
command2: ./makedumpfile -c /proc/vmcore vmcore --num-threads 8

better to do some test in -l too.
command1: ./makedumpfile -l /proc/vmcore vmcore
command2: ./makedumpfile -l /proc/vmcore vmcore --num-threads 1
command3: ./makedumpfile -l /proc/vmcore vmcore --num-threads 8

Test 2: distinguish whether it is resulted by doing compress in thread
2.1:
	apply patch: test2.1
	command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
2.2:
	apply patch: test2.2
	command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1

Thanks a lot.

BTW, could you show me the cpu name, zlib version and glibc version ?

-- 
Thanks
Zhou Wenjian

On 06/30/2015 05:06 PM, Atsushi Kumagai wrote:
>> On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
>>> I attached 5 processors to the VM and I confirmed that all threads
>>> consumed full cpu time by top(1) on the host:
>>>
>>>       PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
>>>     17614 qemu      20   0 5792m 4.9g 5652 R 435.1 72.8  29:02.17 qemu-kvm
>>>
>>> So I think the performance must be improved...
>>
>> Since I can't get that result in all machines here, could you test it with the patch:time
>> and show me the output?
>> Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.
>>
>> The attachment is the patch time.
>
> Here is the result:
>
> / # makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
> Copying data                       : [100.0 %] |
> Copying data                       : [100.0 %] \
>
> The dumpfile is saved to /mnt/dumpfile.
>
> makedumpfile Completed.
> lock time: 310s935500us
> write time: 3s970037us
> hit time: 6s316043us
> find time: 317s926654us
> loop_time: 37s321800us
> thread consume_time: 0s0us
> thread timea: 0s0us
> thread timeb: 0s0us
> read_time[0]: 8s637011us
> lock_current_time[0]: 0s284428us
> found_time[0]: 60s366795us
> lock_consumed_time[0]: 2s782596us
> compress_time[0]: 301s427073us
> read_time[1]: 8s435914us
> lock_current_time[1]: 0s271680us
> found_time[1]: 60s329026us
> lock_consumed_time[1]: 2s849061us
> compress_time[1]: 302s98620us
> read_time[2]: 8s380550us
> lock_current_time[2]: 0s270388us
> found_time[2]: 60s209376us
> lock_consumed_time[2]: 3s297574us
> compress_time[2]: 301s486768us
> read_time[3]: 8s550662us
> lock_current_time[3]: 0s278997us
> found_time[3]: 60s476702us
> lock_consumed_time[3]: 3s49184us
> compress_time[3]: 301s718390us
> count1: 172
> count2: 70921401
> count3: 0
> count4: 0
> count5: 0
> count6: 0
> count7: 0
> exec time: 380s125494us
>
>
> BTW, I fixed a small mistake before testing like:
>
> - write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
> + write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
>

[-- Attachment #2: test1 --]
[-- Type: text/plain, Size: 7560 bytes --]

--- makedumpfile.ori	2015-07-06 11:56:11.300000000 -0400
+++ makedumpfile.c	2015-07-06 11:56:28.127000000 -0400
@@ -26,6 +26,42 @@
 #include <limits.h>
 #include <assert.h>
 
+unsigned long write_kdump_pages_parallel_cyclic_time = 0;
+struct timeval write_kdump_pages_parallel_cyclic1, write_kdump_pages_parallel_cyclic2;
+unsigned long loop_time = 0;
+unsigned long consume_time = 0;
+struct timeval loop1, loop2;
+unsigned long check_time = 0;
+struct timeval check1, check2;
+struct timeval write1, write2;
+unsigned long write_time;
+struct timeval lock1, lock2;
+unsigned long lock_time;
+struct timeval hit1, hit2;
+unsigned long hit_time;
+struct timeval find1, find2;
+unsigned long find_time;
+struct timeval timea1, timea2;
+unsigned long timea;
+struct timeval timeb1, timeb2;
+unsigned long filter_time[20];
+struct timeval filter1[20], filter2[20];
+unsigned long cp_time[20];
+struct timeval cp1[20], cp2[20];
+unsigned long compress_time[20];
+struct timeval compress_time1[20], compress_time2[20];
+unsigned long timeb;
+unsigned long count1 = 0;
+unsigned long count2 = 0;
+unsigned long count3 = 0;
+unsigned long count4 = 0;
+unsigned long count5 = 0;
+unsigned long count6 = 0;
+unsigned long count7 = 0;
+unsigned long count8 = 0;
+unsigned long count9 = 0;
+
+
 struct symbol_table	symbol_table;
 struct size_table	size_table;
 struct offset_table	offset_table;
@@ -6919,6 +6955,17 @@
 	return TRUE;
 }
 
+int compress2015(unsigned char *buf_out, unsigned long *size_out, unsigned char *buf, int page_size, int a, int thread_num)
+{
+int ret;
+gettimeofday(&compress_time1[thread_num], NULL);
+ret = compress2(buf_out, size_out, buf, info->page_size, Z_BEST_SPEED);
+gettimeofday(&compress_time2[thread_num], NULL);
+compress_time[thread_num] += (compress_time2[thread_num].tv_sec - compress_time1[thread_num].tv_sec) * 1000000 + (compress_time2[thread_num].tv_usec - compress_time1[thread_num].tv_usec);
+
+return ret;
+}
+
 void *
 kdump_thread_function_cyclic(void *arg) {
 	void *retval = PTHREAD_FAIL;
@@ -6944,6 +6991,7 @@
 	unsigned long len_buf_out_snappy =
 				snappy_max_compressed_length(info->page_size);
 #endif
+int thread_num = kdump_thread_args->thread_num;
 
 	buf = BUF_PARALLEL(kdump_thread_args->thread_num);
 	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
@@ -7030,9 +7078,13 @@
 					       mmap_cache))
 					goto fail;
 
+
+gettimeofday(&filter1[thread_num], NULL);
 			filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
 							info->page_size,
 							&info->filter_mutex);
+gettimeofday(&filter2[thread_num], NULL);
+filter_time[thread_num] += (filter2[thread_num].tv_sec - filter1[thread_num].tv_sec) * 1000000 + (filter2[thread_num].tv_usec - filter1[thread_num].tv_usec);
 
 			if ((info->dump_level & DL_EXCLUDE_ZERO)
 			    && is_zero_page(buf, info->page_size)) {
@@ -7048,14 +7100,17 @@
 			size_out = kdump_thread_args->len_buf_out;
 			if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB)
 			    && ((size_out = kdump_thread_args->len_buf_out),
-				compress2(buf_out, &size_out, buf,
+				compress2015(buf_out, &size_out, buf,
 					  info->page_size,
-					  Z_BEST_SPEED) == Z_OK)
+					  Z_BEST_SPEED, thread_num) == Z_OK)
 			    && (size_out < info->page_size)) {
 				page_data_buf[index].flags =
 							DUMP_DH_COMPRESSED_ZLIB;
 				page_data_buf[index].size  = size_out;
+gettimeofday(&cp1[thread_num], NULL);
 				memcpy(page_data_buf[index].buf, buf_out, size_out);
+gettimeofday(&cp2[thread_num], NULL);
+cp_time[thread_num] += (cp2[thread_num].tv_sec - cp1[thread_num].tv_sec) * 1000000 + (cp2[thread_num].tv_usec - cp1[thread_num].tv_usec);
 #ifdef USELZO
 			} else if (info->flag_lzo_support
 				   && (info->flag_compress
@@ -7220,6 +7275,7 @@
 	gettimeofday(&last, NULL);
 
 	while (consuming_pfn < end_pfn) {
+gettimeofday(&loop1, NULL);
 		index = consuming_pfn % page_data_num;
 
 		gettimeofday(&new, NULL);
@@ -7232,17 +7288,26 @@
 		 * check pfn first without mutex locked to reduce the time
 		 * trying to lock the mutex
 		 */
-		if (page_data_buf[index].pfn != consuming_pfn)
+		if (page_data_buf[index].pfn != consuming_pfn){
+gettimeofday(&loop2, NULL);
+loop_time += (loop2.tv_sec - loop1.tv_sec) * 1000000 + (loop2.tv_usec - loop1.tv_usec);
 			continue;
+}
+gettimeofday(&find1, NULL);
+gettimeofday(&lock1, NULL);
 
 		pthread_mutex_lock(&page_data_buf[index].mutex);
 
+gettimeofday(&lock2, NULL);
+lock_time += (lock2.tv_sec - lock1.tv_sec) * 1000000 + (lock2.tv_usec - lock1.tv_usec);
+
 		/* check whether the found one is ready to be consumed */
 		if (page_data_buf[index].pfn != consuming_pfn ||
 		    page_data_buf[index].ready != 1) {
 			goto unlock;
 		}
 
+gettimeofday(&hit1, NULL);
 		if ((num_dumped % per) == 0)
 			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
 
@@ -7256,6 +7321,7 @@
 
 		num_dumped++;
 
+gettimeofday(&write1, NULL);
 		if (page_data_buf[index].zero == TRUE) {
 			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
 				goto out;
@@ -7278,8 +7344,17 @@
 				goto out;
 
 		}
+
+gettimeofday(&write2, NULL);
+write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
+
 unlock:
 		pthread_mutex_unlock(&page_data_buf[index].mutex);
+gettimeofday(&hit2, NULL);
+gettimeofday(&find2, NULL);
+hit_time += (hit2.tv_sec - hit1.tv_sec) * 1000000 + (hit2.tv_usec - hit1.tv_usec);
+find_time += (find2.tv_sec - find1.tv_sec) * 1000000 + (find2.tv_usec - find1.tv_usec);
+
 	}
 
 	ret = TRUE;
@@ -7438,8 +7513,8 @@
 		size_out = len_buf_out;
 		if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB)
 		    && ((size_out = len_buf_out),
-			compress2(buf_out, &size_out, buf, info->page_size,
-				  Z_BEST_SPEED) == Z_OK)
+			compress2015(buf_out, &size_out, buf, info->page_size,
+				  Z_BEST_SPEED, 0) == Z_OK)
 		    && (size_out < info->page_size)) {
 			pd.flags = DUMP_DH_COMPRESSED_ZLIB;
 			pd.size  = size_out;
@@ -10314,6 +10389,11 @@
 int
 main(int argc, char *argv[])
 {
+        unsigned long total_time = 0;
+        struct timeval start_time, end_time;
+        gettimeofday(&start_time, NULL);
+
+
 	int i, opt, flag_debug = FALSE;
 
 	if ((info = calloc(1, sizeof(struct DumpInfo))) == NULL) {
@@ -10328,7 +10408,6 @@
 		goto out;
 	}
 	initialize_tables();
-
 	/*
 	 * By default, makedumpfile assumes that multi-cycle processing is
 	 * necessary to work in constant memory space.
@@ -10642,5 +10721,21 @@
 	}
 	free_elf_info();
 
+        gettimeofday(&end_time, NULL);
+        total_time = (end_time.tv_sec - start_time.tv_sec) * 1000000 + (end_time.tv_usec - start_time.tv_usec);
+        MSG("lock time: %lds%ldus\n", lock_time / 1000000, lock_time % 1000000);
+        MSG("write time: %lds%ldus\n", write_time / 1000000, write_time % 1000000);
+        MSG("hit time: %lds%ldus\n", hit_time / 1000000, hit_time % 1000000);
+        MSG("find time: %lds%ldus\n", find_time / 1000000, find_time % 1000000);
+        MSG("loop_time: %lds%ldus\n", loop_time / 1000000, loop_time % 1000000);
+        MSG("thread consume_time: %lds%ldus\n", consume_time / 1000000, consume_time % 1000000);
+for (i = 0; i < info->num_threads; i++){
+        MSG("filter_time[%d]: %lds%ldus\n", i,  filter_time[i] / 1000000, filter_time[i] % 1000000);
+        MSG("cp_time[%d]: %lds%ldus\n", i,  cp_time[i] / 1000000, cp_time[i] % 1000000);
+        MSG("compress_time[%d]: %lds%ldus\n", i,  compress_time[i] / 1000000, compress_time[i] % 1000000);
+}
+        MSG("exec time: %lds%ldus\n", total_time / 1000000, total_time % 1000000);
+
+
 	return retcd;
 }

[-- Attachment #3: test2.1 --]
[-- Type: application/x-troff-man, Size: 8159 bytes --]

[-- Attachment #4: test2.2 --]
[-- Type: application/x-troff-man, Size: 8501 bytes --]

[-- Attachment #5: Type: text/plain, Size: 143 bytes --]

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH v2 00/10] makedumpfile: parallel processing
  2015-07-06 13:19             ` "Zhou, Wenjian/周文剑"
@ 2015-07-08  8:10               ` Atsushi Kumagai
  0 siblings, 0 replies; 19+ messages in thread
From: Atsushi Kumagai @ 2015-07-08  8:10 UTC (permalink / raw)
  To: zhouwj-fnst; +Cc: kexec

[-- Attachment #1: Type: text/plain, Size: 6868 bytes --]

>Hello Atsushi Kumagai,
>
>I have tried a lot, and I think the big performance degradation only
>occurs in special CPU.
>I thought about two reasons, and I need your help to confirm which is
>the real one.
>
>The following tests will also be OK by using dumpfile instead of /proc/vmcore
>
>Test 1: distinguish whether it is resulted by multi-threads.
>apply patch: test1
>command1: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
>command2: ./makedumpfile -c /proc/vmcore vmcore --num-threads 8
>
>better to do some test in -l too.
>command1: ./makedumpfile -l /proc/vmcore vmcore
>command2: ./makedumpfile -l /proc/vmcore vmcore --num-threads 1
>command3: ./makedumpfile -l /proc/vmcore vmcore --num-threads 8
>
>Test 2: distinguish whether it is resulted by doing compress in thread
>2.1:
>	apply patch: test2.1
>	command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
>2.2:
>	apply patch: test2.2
>	command: ./makedumpfile -c /proc/vmcore vmcore --num-threads 1
>
>Thanks a lot.
>
>BTW, could you show me the cpu name, zlib version and glibc version ?

Here is the my environments, I tested on 1st kernel on a physical
machine from this time:

  CPU:   Intel(R) Xeon(R) CPU E7- 8870  @ 2.40GHz (80 cores)
  zlib:  1.2.3-29
  glibc: 2.12-1.149

Here is the summary of the result, the whole logs are attached:

Test1:
  | threads | compress time | exec time |
  |    1    |     82.75     |   95.21   |
  |    8    |   34.1~34.8   |   40.32   |

Test1(lzo):
  | threads | compress time | exec time |
  |    -    |       -       |   30.33   |
  |    1    |       -       |   33.10   |
  |    8    |       -       |   28.77   |

Test2-1:
  | threads | compress time | exec time |
  |    1    |     76.12     |   82.13   |

Test2-2:
  | threads | compress time | exec time |
  |    1    |     41.97     |   51.46   |

There are better results than on virtual machines, but still worse than
without creating thread.

Additionally, I collected performance logs by perf for debugging as below:

Test2-1:
 38.93%  makedumpfile-2-  libz.so.1.2.3                  [.] 0x0000000000006e30   // compress2(?)
  5.16%  makedumpfile-2-  [kernel.kallsyms]              [k] page_fault
  5.02%  makedumpfile-2-  [kernel.kallsyms]              [k] clear_page_c
  4.96%  makedumpfile-2-  libc-2.12.so                   [.] __memset_sse2
  4.64%  makedumpfile-2-  [kernel.kallsyms]              [k] tick_program_event
  3.77%  makedumpfile-2-  libz.so.1.2.3                  [.] adler32
  2.08%  makedumpfile-2-  libc-2.12.so                   [.] memcpy
  1.95%  makedumpfile-2-  [kernel.kallsyms]              [k] __alloc_pages_nodemask
  1.54%  makedumpfile-2-  [kernel.kallsyms]              [k] get_page_from_freelist
  1.53%  makedumpfile-2-  [kernel.kallsyms]              [k] tick_dev_program_event
  1.32%  makedumpfile-2-  [kernel.kallsyms]              [k] __mem_cgroup_commit_charge
  1.08%  makedumpfile-2-  [kernel.kallsyms]              [k] hrtimer_interrupt
  0.94%  makedumpfile-2-  [kernel.kallsyms]              [k] release_pages
  0.85%  makedumpfile-2-  [kernel.kallsyms]              [k] alloc_pages_vma
  ...

Test2-2:
 66.46%  makedumpfile-2-  libz.so.1.2.3                  [.] 0x000000000000564e   // compress2(?)
  6.71%  makedumpfile-2-  libz.so.1.2.3                  [.] adler32
  5.56%  makedumpfile-2-  libc-2.12.so                   [.] __memset_sse2
  3.44%  makedumpfile-2-  libc-2.12.so                   [.] memcpy
  2.85%  makedumpfile-2-  [kernel.kallsyms]              [k] tick_dev_program_event
  1.30%  makedumpfile-2-  libz.so.1.2.3                  [.] _tr_flush_block
  0.95%  makedumpfile-2-  [kernel.kallsyms]              [k] tick_program_event
  0.92%  makedumpfile-2-  libz.so.1.2.3                  [.] _tr_init
  0.69%  makedumpfile-2-  [kernel.kallsyms]              [k] hrtimer_interrupt
  0.63%  makedumpfile-2-  [kernel.kallsyms]              [k] page_fault
  0.34%  makedumpfile-2-  libpthread-2.12.so             [.] __lll_unlock_wake
  0.33%  makedumpfile-2-  libc-2.12.so                   [.] _int_malloc
  0.33%  makedumpfile-2-  libc-2.12.so                   [.] _int_free

In the case of Test2-1(using pthread_create), longer cpu time was spent for some
kernel functions like page_fault() than Test2-2(calling kdump_thread_function_cyclic()
directly), it may be a one of the cause of the degradation. I haven't found the reason
yet, this is just for your information.


Thanks
Atsushi Kumagai

>--
>Thanks
>Zhou Wenjian
>
>On 06/30/2015 05:06 PM, Atsushi Kumagai wrote:
>>> On 06/26/2015 03:49 PM, Atsushi Kumagai wrote:
>>>> I attached 5 processors to the VM and I confirmed that all threads
>>>> consumed full cpu time by top(1) on the host:
>>>>
>>>>       PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
>>>>     17614 qemu      20   0 5792m 4.9g 5652 R 435.1 72.8  29:02.17 qemu-kvm
>>>>
>>>> So I think the performance must be improved...
>>>
>>> Since I can't get that result in all machines here, could you test it with the patch:time
>>> and show me the output?
>>> Using "./makedumpfile -c --num-threads 4 /proc/vmcore dumpfile" is OK.
>>>
>>> The attachment is the patch time.
>>
>> Here is the result:
>>
>> / # makedumpfile -c --num-threads 4 /proc/vmcore /mnt/dumpfile
>> Copying data                       : [100.0 %] |
>> Copying data                       : [100.0 %] \
>>
>> The dumpfile is saved to /mnt/dumpfile.
>>
>> makedumpfile Completed.
>> lock time: 310s935500us
>> write time: 3s970037us
>> hit time: 6s316043us
>> find time: 317s926654us
>> loop_time: 37s321800us
>> thread consume_time: 0s0us
>> thread timea: 0s0us
>> thread timeb: 0s0us
>> read_time[0]: 8s637011us
>> lock_current_time[0]: 0s284428us
>> found_time[0]: 60s366795us
>> lock_consumed_time[0]: 2s782596us
>> compress_time[0]: 301s427073us
>> read_time[1]: 8s435914us
>> lock_current_time[1]: 0s271680us
>> found_time[1]: 60s329026us
>> lock_consumed_time[1]: 2s849061us
>> compress_time[1]: 302s98620us
>> read_time[2]: 8s380550us
>> lock_current_time[2]: 0s270388us
>> found_time[2]: 60s209376us
>> lock_consumed_time[2]: 3s297574us
>> compress_time[2]: 301s486768us
>> read_time[3]: 8s550662us
>> lock_current_time[3]: 0s278997us
>> found_time[3]: 60s476702us
>> lock_consumed_time[3]: 3s49184us
>> compress_time[3]: 301s718390us
>> count1: 172
>> count2: 70921401
>> count3: 0
>> count4: 0
>> count5: 0
>> count6: 0
>> count7: 0
>> exec time: 380s125494us
>>
>>
>> BTW, I fixed a small mistake before testing like:
>>
>> - write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
>> + write_time += (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec);
>>

[-- Attachment #2: test1.log --]
[-- Type: application/octet-stream, Size: 8195 bytes --]

# ./makedumpfile -c --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes          : [  0.0 %] /Checking for memory holes          : [100.0 %] |Copying data                       : [  0.5 %] \Copying data                       : [  1.1 %] -Copying data                       : [  1.5 %] /Copying data                       : [  2.0 %] |Copying data                       : [  2.5 %] \Copying data                       : [  2.9 %] -Copying data                       : [  3.4 %] /Copying data                       : [  3.9 %] |Copying data                       : [  4.4 %] \Copying data                       : [  4.9 %] -Copying data                       : [  5.4 %] /Copying data                       : [  6.0 %] |Copying data                       : [  6.5 %] \Copying data                       : [  7.1 %] -Copying data                       : [  7.7 %] /Copying data                       : [  8.3 %] |Copying data                       : [  8.8 %] \Copying data                       : [  9.4 %] -Copying data                       : [ 10.1 %] /Copying data                       : [ 11.6 %] |Copying data                       : [ 13.1 %] \Copying data                       : [ 14.7 %] -Copying data                       : [ 16.2 %] /Copying data                       : [ 17.8 %] |Copying data                       : [ 19.3 %] \Copying data                       : [ 20.7 %] -Copying data                       : [ 22.0 %] /Copying data                       : [ 23.6 %] |Copying data                       : [ 25.1 %] \Copying data                       : [ 26.6 %] -Copying data                       : [ 28.2 %] /Copying data                       : [ 29.7 %] |Copying data                       : [ 30.5 %] \Copying data                       : [ 32.0 %] -Copying data                       : [ 33.5 %] /Copying data                       : [ 35.0 %] |Copying data                       : [ 36.6 %] \Copying data                       : [ 38.1 %] -Copying data                       : [ 39.5 %] /Copying data                       : [ 40.2 %] |Copying data                       : [ 41.7 %] \Copying data                       : [ 43.3 %] -Copying data                       : [ 44.8 %] /Copying data                       : [ 46.3 %] |Copying data                       : [ 47.8 %] \Copying data                       : [ 49.4 %] -Copying data                       : [ 50.9 %] /Copying data                       : [ 52.4 %] |Copying data                       : [ 53.9 %] \Copying data                       : [ 55.5 %] -Copying data                       : [ 57.0 %] /Copying data                       : [ 58.5 %] |Copying data                       : [ 60.0 %] \Copying data                       : [ 61.6 %] -Copying data                       : [ 63.1 %] /Copying data                       : [ 64.6 %] |Copying data                       : [ 66.1 %] \Copying data                       : [ 67.5 %] -Copying data                       : [ 69.0 %] /Copying data                       : [ 70.6 %] |Copying data                       : [ 72.1 %] \Copying data                       : [ 73.7 %] -Copying data                       : [ 75.2 %] /Copying data                       : [ 76.3 %] |Copying data                       : [ 77.0 %] \Copying data                       : [ 77.6 %] -Copying data                       : [ 78.2 %] /Copying data                       : [ 78.8 %] |Copying data                       : [ 79.3 %] \Copying data                       : [ 79.8 %] -Copying data                       : [ 80.4 %] /Copying data                       : [ 80.9 %] |Copying data                       : [ 81.5 %] \Copying data                       : [ 82.0 %] -Copying data                       : [ 82.5 %] /Copying data                       : [ 83.0 %] |Copying data                       : [ 83.6 %] \Copying data                       : [ 84.1 %] -Copying data                       : [ 84.7 %] /Copying data                       : [ 85.3 %] |Copying data                       : [ 86.0 %] \Copying data                       : [ 86.5 %] -Copying data                       : [ 87.1 %] /Copying data                       : [ 87.8 %] |Copying data                       : [ 89.3 %] \Copying data                       : [ 90.8 %] -Copying data                       : [ 92.4 %] /Copying data                       : [ 94.0 %] |Copying data                       : [ 95.5 %] \Copying data                       : [ 96.7 %] -Copying data                       : [ 97.3 %] /Copying data                       : [ 97.8 %] |Copying data                       : [ 98.1 %] \Copying data                       : [ 98.9 %] -Copying data                       : [ 99.7 %] /Copying data                       : [100.0 %] |
Copying data                       : [100.0 %] \

The dumpfile is saved to dumpfile.

makedumpfile Completed.
lock time: 90s646416us
write time: 1s352335us
hit time: 3s624169us
find time: 94s712561us
loop_time: 0s70230us
thread consume_time: 0s0us
filter_time[0]: 0s154772us
cp_time[0]: 0s359576us
compress_time[0]: 82s759306us
exec time: 95s208148us
#

********

# ./makedumpfile -c --num-thread 8 vmcore-3.19 dumpfile
Checking for memory holes          : [  0.0 %] /Checking for memory holes          : [100.0 %] |Copying data                       : [  1.8 %] \Copying data                       : [  4.2 %] -Copying data                       : [  6.7 %] /Copying data                       : [  9.5 %] |Copying data                       : [ 12.2 %] \Copying data                       : [ 14.9 %] -Copying data                       : [ 17.7 %] /Copying data                       : [ 20.4 %] |Copying data                       : [ 23.1 %] \Copying data                       : [ 25.8 %] -Copying data                       : [ 28.5 %] /Copying data                       : [ 31.2 %] |Copying data                       : [ 33.9 %] \Copying data                       : [ 36.7 %] -Copying data                       : [ 39.5 %] /Copying data                       : [ 42.3 %] |Copying data                       : [ 45.0 %] \Copying data                       : [ 47.7 %] -Copying data                       : [ 50.3 %] /Copying data                       : [ 53.1 %] |Copying data                       : [ 55.8 %] \Copying data                       : [ 58.5 %] -Copying data                       : [ 61.2 %] /Copying data                       : [ 63.8 %] |Copying data                       : [ 66.1 %] \Copying data                       : [ 68.8 %] -Copying data                       : [ 71.7 %] /Copying data                       : [ 74.5 %] |Copying data                       : [ 77.2 %] \Copying data                       : [ 79.7 %] -Copying data                       : [ 82.2 %] /Copying data                       : [ 84.7 %] |Copying data                       : [ 87.3 %] \Copying data                       : [ 88.2 %] -Copying data                       : [ 88.3 %] /Copying data                       : [ 89.7 %] |Copying data                       : [ 92.4 %] \Copying data                       : [ 95.0 %] -Copying data                       : [ 97.5 %] /Copying data                       : [ 98.6 %] |Copying data                       : [100.0 %] \
Copying data                       : [100.0 %] -

The dumpfile is saved to dumpfile.

makedumpfile Completed.
lock time: 37s702803us
write time: 1s156990us
hit time: 2s3878us
find time: 39s988815us
loop_time: 0s37889us
thread consume_time: 0s0us
filter_time[0]: 0s57096us
cp_time[0]: 0s50824us
compress_time[0]: 34s102436us
filter_time[1]: 0s53229us
cp_time[1]: 0s43997us
compress_time[1]: 34s125652us
filter_time[2]: 0s56553us
cp_time[2]: 0s50431us
compress_time[2]: 34s54656us
filter_time[3]: 0s55513us
cp_time[3]: 0s51893us
compress_time[3]: 34s79953us
filter_time[4]: 0s58375us
cp_time[4]: 0s55276us
compress_time[4]: 34s74430us
filter_time[5]: 0s53602us
cp_time[5]: 0s55498us
compress_time[5]: 34s62992us
filter_time[6]: 0s57965us
cp_time[6]: 0s52278us
compress_time[6]: 34s124603us
filter_time[7]: 0s56307us
cp_time[7]: 0s52477us
compress_time[7]: 34s49827us
exec time: 40s316905us
# 

[-- Attachment #3: test1-lzo.log --]
[-- Type: application/octet-stream, Size: 6548 bytes --]

# ./makedumpfile -l vmcore-3.19 dumpfile
Checking for memory holes          : [  0.0 %] /Checking for memory holes          : [100.0 %] |Copying data                       : [  0.7 %] \Copying data                       : [  2.9 %] -Copying data                       : [  5.2 %] /Copying data                       : [  7.8 %] |Copying data                       : [ 10.3 %] \Copying data                       : [ 14.2 %] -Copying data                       : [ 18.0 %] /Copying data                       : [ 21.7 %] |Copying data                       : [ 25.4 %] \Copying data                       : [ 29.1 %] -Copying data                       : [ 32.9 %] /Copying data                       : [ 36.6 %] |Copying data                       : [ 40.2 %] \Copying data                       : [ 43.9 %] -Copying data                       : [ 47.6 %] /Copying data                       : [ 51.3 %] |Copying data                       : [ 54.8 %] \Copying data                       : [ 58.6 %] -Copying data                       : [ 62.3 %] /Copying data                       : [ 65.9 %] |Copying data                       : [ 69.4 %] \Copying data                       : [ 73.2 %] -Copying data                       : [ 76.7 %] /Copying data                       : [ 79.4 %] |Copying data                       : [ 81.8 %] \Copying data                       : [ 84.2 %] -Copying data                       : [ 86.8 %] /Copying data                       : [ 90.0 %] |Copying data                       : [ 93.7 %] \Copying data                       : [ 97.1 %] -Copying data                       : [100.0 %] /Copying data                       : [100.0 %] |

The dumpfile is saved to dumpfile.

makedumpfile Completed.
lock time: 0s0us
write time: 0s0us
hit time: 0s0us
find time: 0s0us
loop_time: 0s0us
thread consume_time: 0s0us
exec time: 30s331895us
#

******

# ./makedumpfile -l --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes          : [  0.0 %] /Checking for memory holes          : [100.0 %] |Copying data                       : [  0.1 %] \Copying data                       : [  2.1 %] -Copying data                       : [  3.9 %] /Copying data                       : [  6.2 %] |Copying data                       : [  8.6 %] \Copying data                       : [ 11.3 %] -Copying data                       : [ 15.2 %] /Copying data                       : [ 18.9 %] |Copying data                       : [ 22.6 %] \Copying data                       : [ 26.3 %] -Copying data                       : [ 30.0 %] /Copying data                       : [ 33.8 %] |Copying data                       : [ 37.5 %] \Copying data                       : [ 41.2 %] -Copying data                       : [ 44.9 %] /Copying data                       : [ 48.7 %] |Copying data                       : [ 52.4 %] \Copying data                       : [ 56.1 %] -Copying data                       : [ 59.9 %] /Copying data                       : [ 63.6 %] |Copying data                       : [ 67.3 %] \Copying data                       : [ 71.0 %] -Copying data                       : [ 74.7 %] /Copying data                       : [ 77.3 %] |Copying data                       : [ 77.8 %] \Copying data                       : [ 78.2 %] -Copying data                       : [ 80.3 %] /Copying data                       : [ 82.4 %] |Copying data                       : [ 84.7 %] \Copying data                       : [ 87.1 %] -Copying data                       : [ 89.9 %] /Copying data                       : [ 93.5 %] |Copying data                       : [ 96.9 %] \Copying data                       : [ 99.9 %] -Copying data                       : [100.0 %] /
Copying data                       : [100.0 %] |

The dumpfile is saved to dumpfile.

makedumpfile Completed.
lock time: 29s572487us
write time: 1s418602us
hit time: 2s736059us
find time: 32s664560us
loop_time: 0s77047us
thread consume_time: 0s0us
filter_time[0]: 0s100350us
cp_time[0]: 0s0us
compress_time[0]: 0s0us
exec time: 33s101991us
#

******


# echo 3 > /proc/sys/vm/drop_caches ; rm -rf dumpfile./makedumpfile -l --num-thread 1 vmcore-3.198
Checking for memory holes          : [  0.0 %] /Checking for memory holes          : [100.0 %] |Copying data                       : [  2.4 %] \Copying data                       : [  5.3 %] -Copying data                       : [  9.0 %] /Copying data                       : [ 12.6 %] |Copying data                       : [ 16.4 %] \Copying data                       : [ 19.4 %] -Copying data                       : [ 22.7 %] /Copying data                       : [ 26.2 %] |Copying data                       : [ 29.8 %] \Copying data                       : [ 33.5 %] -Copying data                       : [ 37.2 %] /Copying data                       : [ 40.7 %] |Copying data                       : [ 44.4 %] \Copying data                       : [ 48.0 %] -Copying data                       : [ 51.2 %] /Copying data                       : [ 54.8 %] |Copying data                       : [ 58.5 %] \Copying data                       : [ 62.2 %] -Copying data                       : [ 65.5 %] /Copying data                       : [ 69.2 %] |Copying data                       : [ 72.8 %] \Copying data                       : [ 76.5 %] -Copying data                       : [ 80.2 %] /Copying data                       : [ 84.1 %] |Copying data                       : [ 87.3 %] \Copying data                       : [ 90.0 %] -Copying data                       : [ 93.4 %] /Copying data                       : [ 97.0 %] |Copying data                       : [100.0 %] \
Copying data                       : [100.0 %] -

The dumpfile is saved to dumpfile.

makedumpfile Completed.
lock time: 26s564655us
write time: 1s153599us
hit time: 1s546357us
find time: 28s365307us
loop_time: 0s84746us
thread consume_time: 0s0us
filter_time[0]: 0s68056us
cp_time[0]: 0s0us
compress_time[0]: 0s0us
filter_time[1]: 0s69492us
cp_time[1]: 0s0us
compress_time[1]: 0s0us
filter_time[2]: 0s68802us
cp_time[2]: 0s0us
compress_time[2]: 0s0us
filter_time[3]: 0s69148us
cp_time[3]: 0s0us
compress_time[3]: 0s0us
filter_time[4]: 0s69572us
cp_time[4]: 0s0us
compress_time[4]: 0s0us
filter_time[5]: 0s68262us
cp_time[5]: 0s0us
compress_time[5]: 0s0us
filter_time[6]: 0s68632us
cp_time[6]: 0s0us
compress_time[6]: 0s0us
filter_time[7]: 0s70118us
cp_time[7]: 0s0us
compress_time[7]: 0s0us
exec time: 28s771737us
# 

[-- Attachment #4: test2-1.log --]
[-- Type: application/octet-stream, Size: 404 bytes --]

# ./makedumpfile -c --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes          : [  0.0 %] /
Checking for memory holes          : [100.0 %] |
makedumpfile Failed.
lock time: 0s0us
write time: 0s0us
hit time: 0s0us
find time: 0s0us
loop_time: 0s0us
thread consume_time: 0s0us
filter_time[0]: 0s136256us
cp_time[0]: 0s121550us
compress_time[0]: 76s118949us
exec time: 82s12723us
#

[-- Attachment #5: test2-2.log --]
[-- Type: application/octet-stream, Size: 404 bytes --]

# ./makedumpfile -c --num-thread 1 vmcore-3.19 dumpfile
Checking for memory holes          : [  0.0 %] /
Checking for memory holes          : [100.0 %] |
makedumpfile Failed.
lock time: 0s0us
write time: 0s0us
hit time: 0s0us
find time: 0s0us
loop_time: 0s0us
thread consume_time: 0s0us
filter_time[0]: 0s91555us
cp_time[0]: 0s74878us
compress_time[0]: 41s973805us
exec time: 51s460377us
# 

[-- Attachment #6: Type: text/plain, Size: 143 bytes --]

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-07-08  8:17 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-19  8:56 [PATCH v2 00/10] makedumpfile: parallel processing Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 01/10] Add readpage_kdump_compressed_parallel Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 02/10] Add mappage_elf_parallel Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 03/10] Add readpage_elf_parallel Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 04/10] Add read_pfn_parallel Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 05/10] Add function to initial bitmap for parallel use Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 06/10] Add filter_data_buffer_parallel Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 07/10] Add write_kdump_pages_parallel to allow parallel process Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 08/10] Initial and free data used for " Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 09/10] Make makedumpfile available to read and compress pages parallelly Zhou Wenjian
2015-06-19  8:56 ` [PATCH v2 10/10] Add usage and manual about multiple threads process Zhou Wenjian
2015-06-25  2:25 ` [PATCH v2 00/10] makedumpfile: parallel processing "Zhou, Wenjian/周文剑"
2015-06-26  7:07   ` Atsushi Kumagai
2015-06-26  7:27     ` "Zhou, Wenjian/周文剑"
2015-06-26  7:49       ` Atsushi Kumagai
2015-06-29  6:19         ` "Zhou, Wenjian/周文剑"
2015-06-30  9:06           ` Atsushi Kumagai
2015-07-06 13:19             ` "Zhou, Wenjian/周文剑"
2015-07-08  8:10               ` Atsushi Kumagai

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.