All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] remoteproc: core: Add a memory efficient coredump function
@ 2020-03-27 23:56 Rishabh Bhatnagar
  2020-03-28  4:06 ` kbuild test robot
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Rishabh Bhatnagar @ 2020-03-27 23:56 UTC (permalink / raw)
  To: linux-kernel, linux-remoteproc, bjorn.andersson, ohad
  Cc: psodagud, tsoni, sidgup, Rishabh Bhatnagar

The current coredump implementation uses vmalloc area to copy
all the segments. But this might put a lot of strain on low memory
targets as the firmware size sometimes is in ten's of MBs.
The situation becomes worse if there are multiple remote processors
undergoing recovery at the same time.
This patch directly copies the device memory to userspace buffer
and avoids extra memory usage. This requires recovery to be halted
until data is read by userspace and free function is called.

Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
---
 drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
 include/linux/remoteproc.h           |   4 ++
 2 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 097f33e..2d881e5 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
 }
 EXPORT_SYMBOL(rproc_coredump_add_segment);
 
+
+void rproc_free_dump(void *data)
+{
+	struct rproc *rproc = data;
+
+	dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
+	complete(&rproc->dump_done);
+}
+
+static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
+				unsigned long *data_left)
+{
+	struct rproc_dump_segment *segment;
+
+	list_for_each_entry(segment, segments, node) {
+		if (user_offset >= segment->size)
+			user_offset -= segment->size;
+		else
+			break;
+	}
+
+	if (&segment->node == segments) {
+		*data_left = 0;
+		return 0;
+	}
+
+	*data_left = segment->size - user_offset;
+
+	return segment->da + user_offset;
+}
+
+static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
+				void *data, size_t elfcorelen)
+{
+	void *device_mem = NULL;
+	unsigned long data_left = 0;
+	unsigned long bytes_left = count;
+	unsigned long addr = 0;
+	size_t copy_size = 0;
+	struct rproc *rproc = data;
+
+	if (offset < elfcorelen) {
+		copy_size = elfcorelen - offset;
+		copy_size = min(copy_size, bytes_left);
+
+		memcpy(buffer, rproc->elfcore + offset, copy_size);
+		offset += copy_size;
+		bytes_left -= copy_size;
+		buffer += copy_size;
+	}
+
+	while (bytes_left) {
+		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
+				&data_left);
+	/* EOF check */
+		if (data_left == 0) {
+			pr_info("Ramdump complete. %lld bytes read.", offset);
+			return 0;
+		}
+
+		copy_size = min_t(size_t, bytes_left, data_left);
+
+		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
+		if (!device_mem) {
+			pr_err("Unable to ioremap: addr %lx, size %zd\n",
+				 addr, copy_size);
+			return -ENOMEM;
+		}
+		memcpy(buffer, device_mem, copy_size);
+
+		offset += copy_size;
+		buffer += copy_size;
+		bytes_left -= copy_size;
+		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
+			copy_size);
+	}
+
+	return count;
+}
+
 /**
  * rproc_coredump_add_custom_segment() - add custom coredump segment
  * @rproc:	handle of a remote processor
@@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
 	struct rproc_dump_segment *segment;
 	struct elf32_phdr *phdr;
 	struct elf32_hdr *ehdr;
-	size_t data_size;
+	size_t header_size;
 	size_t offset;
 	void *data;
-	void *ptr;
 	int phnum = 0;
 
 	if (list_empty(&rproc->dump_segments))
 		return;
 
-	data_size = sizeof(*ehdr);
+	header_size = sizeof(*ehdr);
 	list_for_each_entry(segment, &rproc->dump_segments, node) {
-		data_size += sizeof(*phdr) + segment->size;
+		header_size += sizeof(*phdr);
 
 		phnum++;
 	}
 
-	data = vmalloc(data_size);
+	data = vmalloc(header_size);
 	if (!data)
 		return;
 
 	ehdr = data;
+	rproc->elfcore = data;
 
 	memset(ehdr, 0, sizeof(*ehdr));
 	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
 
 		if (segment->dump) {
 			segment->dump(rproc, segment, data + offset);
-		} else {
-			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
-			if (!ptr) {
-				dev_err(&rproc->dev,
-					"invalid coredump segment (%pad, %zu)\n",
-					&segment->da, segment->size);
-				memset(data + offset, 0xff, segment->size);
-			} else {
-				memcpy(data + offset, ptr, segment->size);
-			}
-		}
 
 		offset += phdr->p_filesz;
 		phdr++;
 	}
+	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
+			rproc_read_dump, rproc_free_dump);
 
-	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
+	wait_for_completion(&rproc->dump_done);
 }
 
 /**
@@ -1665,6 +1736,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
 
 	/* generate coredump */
 	rproc_coredump(rproc);
+	reinit_completion(&rproc->dump_done);
 
 	/* load firmware */
 	ret = request_firmware(&firmware_p, rproc->firmware, dev);
@@ -2067,6 +2139,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
 	INIT_LIST_HEAD(&rproc->rvdevs);
 	INIT_LIST_HEAD(&rproc->subdevs);
 	INIT_LIST_HEAD(&rproc->dump_segments);
+	init_completion(&rproc->dump_done);
 
 	INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
 
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 16ad666..461b235 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -481,6 +481,8 @@ struct rproc_dump_segment {
  * @auto_boot: flag to indicate if remote processor should be auto-started
  * @dump_segments: list of segments in the firmware
  * @nb_vdev: number of vdev currently handled by rproc
+ * @dump_done: completion variable when dump is complete
+ * @elfcore: pointer to elf header buffer
  */
 struct rproc {
 	struct list_head node;
@@ -514,6 +516,8 @@ struct rproc {
 	bool auto_boot;
 	struct list_head dump_segments;
 	int nb_vdev;
+	struct completion dump_done;
+	void *elfcore;
 };
 
 /**
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-03-27 23:56 [PATCH] remoteproc: core: Add a memory efficient coredump function Rishabh Bhatnagar
@ 2020-03-28  4:06 ` kbuild test robot
  2020-03-28  4:49 ` kbuild test robot
  2020-04-01 19:51   ` Bjorn Andersson
  2 siblings, 0 replies; 17+ messages in thread
From: kbuild test robot @ 2020-03-28  4:06 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 57412 bytes --]

Hi Rishabh,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on linux/master v5.6-rc7]
[cannot apply to remoteproc/for-next rpmsg/for-next next-20200326]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Rishabh-Bhatnagar/remoteproc-core-Add-a-memory-efficient-coredump-function/20200328-075826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 69c5eea3128e775fd3c70ecf0098105d96dee330
config: s390-randconfig-a001-20200327 (attached as .config)
compiler: s390-linux-gcc (GCC) 9.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=9.2.0 make.cross ARCH=s390 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/printk.h:331,
                    from include/linux/kernel.h:15,
                    from drivers/remoteproc/remoteproc_core.c:19:
   drivers/remoteproc/remoteproc_core.c: In function 'rproc_read_dump':
>> drivers/remoteproc/remoteproc_core.c:1592:24: warning: format '%d' expects argument of type 'int', but argument 4 has type 'size_t' {aka 'long unsigned int'} [-Wformat=]
    1592 |   dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
         |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/dynamic_debug.h:125:15: note: in definition of macro '__dynamic_func_call'
     125 |   func(&id, ##__VA_ARGS__);  \
         |               ^~~~~~~~~~~
   include/linux/dynamic_debug.h:157:2: note: in expansion of macro '_dynamic_func_call'
     157 |  _dynamic_func_call(fmt,__dynamic_dev_dbg,   \
         |  ^~~~~~~~~~~~~~~~~~
   include/linux/dev_printk.h:114:2: note: in expansion of macro 'dynamic_dev_dbg'
     114 |  dynamic_dev_dbg(dev, dev_fmt(fmt), ##__VA_ARGS__)
         |  ^~~~~~~~~~~~~~~
   include/linux/dev_printk.h:114:23: note: in expansion of macro 'dev_fmt'
     114 |  dynamic_dev_dbg(dev, dev_fmt(fmt), ##__VA_ARGS__)
         |                       ^~~~~~~
>> drivers/remoteproc/remoteproc_core.c:1592:3: note: in expansion of macro 'dev_dbg'
    1592 |   dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
         |   ^~~~~~~
   drivers/remoteproc/remoteproc_core.c:1592:33: note: format string is defined here
    1592 |   dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
         |                                ~^
         |                                 |
         |                                 int
         |                                %ld
   drivers/remoteproc/remoteproc_core.c: In function 'rproc_coredump':
>> drivers/remoteproc/remoteproc_core.c:1721:1: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
    1721 | int rproc_trigger_recovery(struct rproc *rproc)
         | ^~~
>> drivers/remoteproc/remoteproc_core.c:1764:13: error: invalid storage class for function 'rproc_crash_handler_work'
    1764 | static void rproc_crash_handler_work(struct work_struct *work)
         |             ^~~~~~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> drivers/remoteproc/remoteproc_core.c:1851:15: error: non-static declaration of 'rproc_boot' follows static declaration
    1851 | EXPORT_SYMBOL(rproc_boot);
         |               ^~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:1851:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1851 | EXPORT_SYMBOL(rproc_boot);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1800:5: note: previous definition of 'rproc_boot' was here
    1800 | int rproc_boot(struct rproc *rproc)
         |     ^~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:1851:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1851 | EXPORT_SYMBOL(rproc_boot);
         | ^~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:1905:15: error: non-static declaration of 'rproc_shutdown' follows static declaration
    1905 | EXPORT_SYMBOL(rproc_shutdown);
         |               ^~~~~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1905:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1905 | EXPORT_SYMBOL(rproc_shutdown);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1872:6: note: previous definition of 'rproc_shutdown' was here
    1872 | void rproc_shutdown(struct rproc *rproc)
         |      ^~~~~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1905:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1905 | EXPORT_SYMBOL(rproc_shutdown);
         | ^~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:1955:15: error: non-static declaration of 'rproc_get_by_phandle' follows static declaration
    1955 | EXPORT_SYMBOL(rproc_get_by_phandle);
         |               ^~~~~~~~~~~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1955:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1955 | EXPORT_SYMBOL(rproc_get_by_phandle);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1950:15: note: previous definition of 'rproc_get_by_phandle' was here
    1950 | struct rproc *rproc_get_by_phandle(phandle phandle)
         |               ^~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1955:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1955 | EXPORT_SYMBOL(rproc_get_by_phandle);
         | ^~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:2005:15: error: non-static declaration of 'rproc_add' follows static declaration
    2005 | EXPORT_SYMBOL(rproc_add);
         |               ^~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2005:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2005 | EXPORT_SYMBOL(rproc_add);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1977:5: note: previous definition of 'rproc_add' was here
    1977 | int rproc_add(struct rproc *rproc)
         |     ^~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2005:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2005 | EXPORT_SYMBOL(rproc_add);
         | ^~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:2016:13: error: invalid storage class for function 'rproc_type_release'
    2016 | static void rproc_type_release(struct device *dev)
         |             ^~~~~~~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:2034:13: error: initializer element is not constant
    2034 |  .release = rproc_type_release,
         |             ^~~~~~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2034:13: note: (near initialization for 'rproc_type.release')
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> drivers/remoteproc/remoteproc_core.c:2150:15: error: non-static declaration of 'rproc_alloc' follows static declaration
    2150 | EXPORT_SYMBOL(rproc_alloc);
         |               ^~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2150:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2150 | EXPORT_SYMBOL(rproc_alloc);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2060:15: note: previous definition of 'rproc_alloc' was here
    2060 | struct rproc *rproc_alloc(struct device *dev, const char *name,
         |               ^~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2150:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2150 | EXPORT_SYMBOL(rproc_alloc);
         | ^~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:2165:15: error: non-static declaration of 'rproc_free' follows static declaration
    2165 | EXPORT_SYMBOL(rproc_free);
         |               ^~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2165:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2165 | EXPORT_SYMBOL(rproc_free);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2161:6: note: previous definition of 'rproc_free' was here
    2161 | void rproc_free(struct rproc *rproc)
         |      ^~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
>> include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2165:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2165 | EXPORT_SYMBOL(rproc_free);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2181:15: error: non-static declaration of 'rproc_put' follows static declaration
    2181 | EXPORT_SYMBOL(rproc_put);
         |               ^~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2181:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2181 | EXPORT_SYMBOL(rproc_put);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2176:6: note: previous definition of 'rproc_put' was here
    2176 | void rproc_put(struct rproc *rproc)
         |      ^~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
   include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2181:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2181 | EXPORT_SYMBOL(rproc_put);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2223:15: error: non-static declaration of 'rproc_del' follows static declaration
    2223 | EXPORT_SYMBOL(rproc_del);
         |               ^~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2223:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2223 | EXPORT_SYMBOL(rproc_del);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2198:5: note: previous definition of 'rproc_del' was here
    2198 | int rproc_del(struct rproc *rproc)
         |     ^~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
   include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2223:1: note: in expansion of macro 'EXPORT_SYMBOL'
    2223 | EXPORT_SYMBOL(rproc_del);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:2236:15: error: non-static declaration of 'rproc_add_subdev' follows static declaration
    2236 | EXPORT_SYMBOL(rproc_add_subdev);
         |               ^~~~~~~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \

vim +/rproc_crash_handler_work +1764 drivers/remoteproc/remoteproc_core.c

1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1549  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1550  static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1551  				void *data, size_t elfcorelen)
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1552  {
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1553  	void *device_mem = NULL;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1554  	unsigned long data_left = 0;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1555  	unsigned long bytes_left = count;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1556  	unsigned long addr = 0;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1557  	size_t copy_size = 0;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1558  	struct rproc *rproc = data;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1559  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1560  	if (offset < elfcorelen) {
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1561  		copy_size = elfcorelen - offset;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1562  		copy_size = min(copy_size, bytes_left);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1563  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1564  		memcpy(buffer, rproc->elfcore + offset, copy_size);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1565  		offset += copy_size;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1566  		bytes_left -= copy_size;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1567  		buffer += copy_size;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1568  	}
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1569  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1570  	while (bytes_left) {
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1571  		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1572  				&data_left);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1573  	/* EOF check */
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1574  		if (data_left == 0) {
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1575  			pr_info("Ramdump complete. %lld bytes read.", offset);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1576  			return 0;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1577  		}
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1578  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1579  		copy_size = min_t(size_t, bytes_left, data_left);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1580  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1581  		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1582  		if (!device_mem) {
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1583  			pr_err("Unable to ioremap: addr %lx, size %zd\n",
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1584  				 addr, copy_size);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1585  			return -ENOMEM;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1586  		}
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1587  		memcpy(buffer, device_mem, copy_size);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1588  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1589  		offset += copy_size;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1590  		buffer += copy_size;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1591  		bytes_left -= copy_size;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27 @1592  		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1593  			copy_size);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1594  	}
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1595  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1596  	return count;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1597  }
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1598  
ab8f873bb90da7 Sibi Sankar          2018-10-17  1599  /**
ab8f873bb90da7 Sibi Sankar          2018-10-17  1600   * rproc_coredump_add_custom_segment() - add custom coredump segment
ab8f873bb90da7 Sibi Sankar          2018-10-17  1601   * @rproc:	handle of a remote processor
ab8f873bb90da7 Sibi Sankar          2018-10-17  1602   * @da:		device address
ab8f873bb90da7 Sibi Sankar          2018-10-17  1603   * @size:	size of segment
ab8f873bb90da7 Sibi Sankar          2018-10-17  1604   * @dumpfn:	custom dump function called for each segment during coredump
ab8f873bb90da7 Sibi Sankar          2018-10-17  1605   * @priv:	private data
ab8f873bb90da7 Sibi Sankar          2018-10-17  1606   *
ab8f873bb90da7 Sibi Sankar          2018-10-17  1607   * Add device memory to the list of segments to be included in the coredump
ab8f873bb90da7 Sibi Sankar          2018-10-17  1608   * and associate the segment with the given custom dump function and private
ab8f873bb90da7 Sibi Sankar          2018-10-17  1609   * data.
ab8f873bb90da7 Sibi Sankar          2018-10-17  1610   *
ab8f873bb90da7 Sibi Sankar          2018-10-17  1611   * Return: 0 on success, negative errno on error.
ab8f873bb90da7 Sibi Sankar          2018-10-17  1612   */
ab8f873bb90da7 Sibi Sankar          2018-10-17  1613  int rproc_coredump_add_custom_segment(struct rproc *rproc,
ab8f873bb90da7 Sibi Sankar          2018-10-17  1614  				      dma_addr_t da, size_t size,
ab8f873bb90da7 Sibi Sankar          2018-10-17  1615  				      void (*dumpfn)(struct rproc *rproc,
ab8f873bb90da7 Sibi Sankar          2018-10-17  1616  						     struct rproc_dump_segment *segment,
ab8f873bb90da7 Sibi Sankar          2018-10-17  1617  						     void *dest),
ab8f873bb90da7 Sibi Sankar          2018-10-17  1618  				      void *priv)
ab8f873bb90da7 Sibi Sankar          2018-10-17  1619  {
ab8f873bb90da7 Sibi Sankar          2018-10-17  1620  	struct rproc_dump_segment *segment;
ab8f873bb90da7 Sibi Sankar          2018-10-17  1621  
ab8f873bb90da7 Sibi Sankar          2018-10-17  1622  	segment = kzalloc(sizeof(*segment), GFP_KERNEL);
ab8f873bb90da7 Sibi Sankar          2018-10-17  1623  	if (!segment)
ab8f873bb90da7 Sibi Sankar          2018-10-17  1624  		return -ENOMEM;
ab8f873bb90da7 Sibi Sankar          2018-10-17  1625  
ab8f873bb90da7 Sibi Sankar          2018-10-17  1626  	segment->da = da;
ab8f873bb90da7 Sibi Sankar          2018-10-17  1627  	segment->size = size;
ab8f873bb90da7 Sibi Sankar          2018-10-17  1628  	segment->priv = priv;
ab8f873bb90da7 Sibi Sankar          2018-10-17  1629  	segment->dump = dumpfn;
ab8f873bb90da7 Sibi Sankar          2018-10-17  1630  
ab8f873bb90da7 Sibi Sankar          2018-10-17  1631  	list_add_tail(&segment->node, &rproc->dump_segments);
ab8f873bb90da7 Sibi Sankar          2018-10-17  1632  
ab8f873bb90da7 Sibi Sankar          2018-10-17  1633  	return 0;
ab8f873bb90da7 Sibi Sankar          2018-10-17  1634  }
ab8f873bb90da7 Sibi Sankar          2018-10-17  1635  EXPORT_SYMBOL(rproc_coredump_add_custom_segment);
ab8f873bb90da7 Sibi Sankar          2018-10-17  1636  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1637  /**
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1638   * rproc_coredump() - perform coredump
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1639   * @rproc:	rproc handle
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1640   *
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1641   * This function will generate an ELF header for the registered segments
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1642   * and create a devcoredump device associated with rproc.
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1643   */
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1644  static void rproc_coredump(struct rproc *rproc)
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1645  {
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1646  	struct rproc_dump_segment *segment;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1647  	struct elf32_phdr *phdr;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1648  	struct elf32_hdr *ehdr;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1649  	size_t header_size;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1650  	size_t offset;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1651  	void *data;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1652  	int phnum = 0;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1653  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1654  	if (list_empty(&rproc->dump_segments))
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1655  		return;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1656  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1657  	header_size = sizeof(*ehdr);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1658  	list_for_each_entry(segment, &rproc->dump_segments, node) {
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1659  		header_size += sizeof(*phdr);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1660  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1661  		phnum++;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1662  	}
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1663  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1664  	data = vmalloc(header_size);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1665  	if (!data)
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1666  		return;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1667  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1668  	ehdr = data;
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1669  	rproc->elfcore = data;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1670  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1671  	memset(ehdr, 0, sizeof(*ehdr));
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1672  	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1673  	ehdr->e_ident[EI_CLASS] = ELFCLASS32;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1674  	ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1675  	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1676  	ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1677  	ehdr->e_type = ET_CORE;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1678  	ehdr->e_machine = EM_NONE;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1679  	ehdr->e_version = EV_CURRENT;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1680  	ehdr->e_entry = rproc->bootaddr;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1681  	ehdr->e_phoff = sizeof(*ehdr);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1682  	ehdr->e_ehsize = sizeof(*ehdr);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1683  	ehdr->e_phentsize = sizeof(*phdr);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1684  	ehdr->e_phnum = phnum;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1685  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1686  	phdr = data + ehdr->e_phoff;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1687  	offset = ehdr->e_phoff + sizeof(*phdr) * ehdr->e_phnum;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1688  	list_for_each_entry(segment, &rproc->dump_segments, node) {
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1689  		memset(phdr, 0, sizeof(*phdr));
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1690  		phdr->p_type = PT_LOAD;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1691  		phdr->p_offset = offset;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1692  		phdr->p_vaddr = segment->da;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1693  		phdr->p_paddr = segment->da;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1694  		phdr->p_filesz = segment->size;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1695  		phdr->p_memsz = segment->size;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1696  		phdr->p_flags = PF_R | PF_W | PF_X;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1697  		phdr->p_align = 0;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1698  
3952105df4723a Sibi Sankar          2018-10-17  1699  		if (segment->dump) {
3952105df4723a Sibi Sankar          2018-10-17  1700  			segment->dump(rproc, segment, data + offset);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1701  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1702  		offset += phdr->p_filesz;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1703  		phdr++;
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1704  	}
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1705  	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1706  			rproc_read_dump, rproc_free_dump);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1707  
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1708  	wait_for_completion(&rproc->dump_done);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1709  }
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1710  
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1711  /**
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1712   * rproc_trigger_recovery() - recover a remoteproc
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1713   * @rproc: the remote processor
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1714   *
56324d7a229486 Anna, Suman          2016-08-12  1715   * The recovery is done by resetting all the virtio devices, that way all the
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1716   * rpmsg drivers will be reseted along with the remote processor making the
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1717   * remoteproc functional again.
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1718   *
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1719   * This function can sleep, so it cannot be called from atomic context.
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1720   */
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30 @1721  int rproc_trigger_recovery(struct rproc *rproc)
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1722  {
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1723  	const struct firmware *firmware_p;
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1724  	struct device *dev = &rproc->dev;
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1725  	int ret;
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1726  
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1727  	dev_err(dev, "recovering %s\n", rproc->name);
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1728  
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1729  	ret = mutex_lock_interruptible(&rproc->lock);
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1730  	if (ret)
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1731  		return ret;
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1732  
fcd58037f28bf7 Arnaud Pouliquen     2018-04-10  1733  	ret = rproc_stop(rproc, true);
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1734  	if (ret)
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1735  		goto unlock_mutex;
ddf711872c9d2b Bjorn Andersson      2016-08-11  1736  
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1737  	/* generate coredump */
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1738  	rproc_coredump(rproc);
1aef2742e1f04a Rishabh Bhatnagar    2020-03-27  1739  	reinit_completion(&rproc->dump_done);
2666ca9197e3d3 Sarangdhar Joshi     2018-01-05  1740  
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1741  	/* load firmware */
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1742  	ret = request_firmware(&firmware_p, rproc->firmware, dev);
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1743  	if (ret < 0) {
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1744  		dev_err(dev, "request_firmware failed: %d\n", ret);
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1745  		goto unlock_mutex;
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1746  	}
ddf711872c9d2b Bjorn Andersson      2016-08-11  1747  
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1748  	/* boot the remote processor up again */
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1749  	ret = rproc_start(rproc, firmware_p);
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1750  
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1751  	release_firmware(firmware_p);
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1752  
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1753  unlock_mutex:
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1754  	mutex_unlock(&rproc->lock);
7e83cab824a867 Sarangdhar Joshi     2017-05-26  1755  	return ret;
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1756  }
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1757  
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1758  /**
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1759   * rproc_crash_handler_work() - handle a crash
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1760   *
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1761   * This function needs to handle everything related to a crash, like cpu
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1762   * registers and stack dump, information to help to debug the fatal error, etc.
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1763   */
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30 @1764  static void rproc_crash_handler_work(struct work_struct *work)
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1765  {
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1766  	struct rproc *rproc = container_of(work, struct rproc, crash_handler);
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1767  	struct device *dev = &rproc->dev;
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1768  
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1769  	dev_dbg(dev, "enter %s\n", __func__);
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1770  
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1771  	mutex_lock(&rproc->lock);
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1772  
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1773  	if (rproc->state == RPROC_CRASHED || rproc->state == RPROC_OFFLINE) {
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1774  		/* handle only the first crash detected */
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1775  		mutex_unlock(&rproc->lock);
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1776  		return;
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1777  	}
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1778  
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1779  	rproc->state = RPROC_CRASHED;
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1780  	dev_err(dev, "handling crash #%u in %s\n", ++rproc->crash_cnt,
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1781  		rproc->name);
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1782  
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1783  	mutex_unlock(&rproc->lock);
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1784  
2e37abb89a2ef1 Fernando Guzman Lugo 2012-09-18  1785  	if (!rproc->recovery_disabled)
70b85ef83ce352 Fernando Guzman Lugo 2012-08-30  1786  		rproc_trigger_recovery(rproc);
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1787  }
8afd519c3470f6 Fernando Guzman Lugo 2012-08-30  1788  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1789  /**
1b0ef9068f053d Suman Anna           2017-07-20  1790   * rproc_boot() - boot a remote processor
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1791   * @rproc: handle of a remote processor
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1792   *
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1793   * Boot a remote processor (i.e. load its firmware, power it on, ...).
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1794   *
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1795   * If the remote processor is already powered on, this function immediately
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1796   * returns (successfully).
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1797   *
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1798   * Returns 0 on success, and an appropriate error value otherwise.
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1799   */
1b0ef9068f053d Suman Anna           2017-07-20  1800  int rproc_boot(struct rproc *rproc)
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1801  {
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1802  	const struct firmware *firmware_p;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1803  	struct device *dev;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1804  	int ret;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1805  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1806  	if (!rproc) {
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1807  		pr_err("invalid rproc handle\n");
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1808  		return -EINVAL;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1809  	}
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1810  
b5ab5e24e960b9 Ohad Ben-Cohen       2012-05-30  1811  	dev = &rproc->dev;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1812  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1813  	ret = mutex_lock_interruptible(&rproc->lock);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1814  	if (ret) {
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1815  		dev_err(dev, "can't lock rproc %s: %d\n", rproc->name, ret);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1816  		return ret;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1817  	}
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1818  
2099c77d4af7d1 Sarangdhar Joshi     2017-01-23  1819  	if (rproc->state == RPROC_DELETED) {
2099c77d4af7d1 Sarangdhar Joshi     2017-01-23  1820  		ret = -ENODEV;
2099c77d4af7d1 Sarangdhar Joshi     2017-01-23  1821  		dev_err(dev, "can't boot deleted rproc %s\n", rproc->name);
2099c77d4af7d1 Sarangdhar Joshi     2017-01-23  1822  		goto unlock_mutex;
2099c77d4af7d1 Sarangdhar Joshi     2017-01-23  1823  	}
2099c77d4af7d1 Sarangdhar Joshi     2017-01-23  1824  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1825  	/* skip the boot process if rproc is already powered up */
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1826  	if (atomic_inc_return(&rproc->power) > 1) {
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1827  		ret = 0;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1828  		goto unlock_mutex;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1829  	}
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1830  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1831  	dev_info(dev, "powering up %s\n", rproc->name);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1832  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1833  	/* load firmware */
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1834  	ret = request_firmware(&firmware_p, rproc->firmware, dev);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1835  	if (ret < 0) {
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1836  		dev_err(dev, "request_firmware failed: %d\n", ret);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1837  		goto downref_rproc;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1838  	}
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1839  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1840  	ret = rproc_fw_boot(rproc, firmware_p);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1841  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1842  	release_firmware(firmware_p);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1843  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1844  downref_rproc:
fbb6aacb078285 Bjorn Andersson      2016-10-02  1845  	if (ret)
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1846  		atomic_dec(&rproc->power);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1847  unlock_mutex:
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1848  	mutex_unlock(&rproc->lock);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1849  	return ret;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1850  }
400e64df6b237e Ohad Ben-Cohen       2011-10-20 @1851  EXPORT_SYMBOL(rproc_boot);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1852  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1853  /**
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1854   * rproc_shutdown() - power off the remote processor
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1855   * @rproc: the remote processor
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1856   *
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1857   * Power off a remote processor (previously booted with rproc_boot()).
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1858   *
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1859   * In case @rproc is still being used by an additional user(s), then
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1860   * this function will just decrement the power refcount and exit,
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1861   * without really powering off the device.
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1862   *
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1863   * Every call to rproc_boot() must (eventually) be accompanied by a call
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1864   * to rproc_shutdown(). Calling rproc_shutdown() redundantly is a bug.
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1865   *
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1866   * Notes:
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1867   * - we're not decrementing the rproc's refcount, only the power refcount.
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1868   *   which means that the @rproc handle stays valid even after rproc_shutdown()
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1869   *   returns, and users can still use it with a subsequent rproc_boot(), if
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1870   *   needed.
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1871   */
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1872  void rproc_shutdown(struct rproc *rproc)
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1873  {
b5ab5e24e960b9 Ohad Ben-Cohen       2012-05-30  1874  	struct device *dev = &rproc->dev;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1875  	int ret;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1876  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1877  	ret = mutex_lock_interruptible(&rproc->lock);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1878  	if (ret) {
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1879  		dev_err(dev, "can't lock rproc %s: %d\n", rproc->name, ret);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1880  		return;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1881  	}
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1882  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1883  	/* if the remote proc is still needed, bail out */
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1884  	if (!atomic_dec_and_test(&rproc->power))
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1885  		goto out;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1886  
fcd58037f28bf7 Arnaud Pouliquen     2018-04-10  1887  	ret = rproc_stop(rproc, false);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1888  	if (ret) {
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1889  		atomic_inc(&rproc->power);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1890  		goto out;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1891  	}
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1892  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1893  	/* clean up all acquired resources */
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1894  	rproc_resource_cleanup(rproc);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1895  
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1896  	rproc_disable_iommu(rproc);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1897  
988d204cdaf604 Bjorn Andersson      2016-08-11  1898  	/* Free the copy of the resource table */
a0c10687ec9506 Bjorn Andersson      2016-12-30  1899  	kfree(rproc->cached_table);
a0c10687ec9506 Bjorn Andersson      2016-12-30  1900  	rproc->cached_table = NULL;
988d204cdaf604 Bjorn Andersson      2016-08-11  1901  	rproc->table_ptr = NULL;
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1902  out:
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1903  	mutex_unlock(&rproc->lock);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1904  }
400e64df6b237e Ohad Ben-Cohen       2011-10-20 @1905  EXPORT_SYMBOL(rproc_shutdown);
400e64df6b237e Ohad Ben-Cohen       2011-10-20  1906  
fec47d863587c2 Dave Gerlach         2015-05-22  1907  /**
fec47d863587c2 Dave Gerlach         2015-05-22  1908   * rproc_get_by_phandle() - find a remote processor by phandle
fec47d863587c2 Dave Gerlach         2015-05-22  1909   * @phandle: phandle to the rproc
fec47d863587c2 Dave Gerlach         2015-05-22  1910   *
fec47d863587c2 Dave Gerlach         2015-05-22  1911   * Finds an rproc handle using the remote processor's phandle, and then
fec47d863587c2 Dave Gerlach         2015-05-22  1912   * return a handle to the rproc.
fec47d863587c2 Dave Gerlach         2015-05-22  1913   *
fec47d863587c2 Dave Gerlach         2015-05-22  1914   * This function increments the remote processor's refcount, so always
fec47d863587c2 Dave Gerlach         2015-05-22  1915   * use rproc_put() to decrement it back once rproc isn't needed anymore.
fec47d863587c2 Dave Gerlach         2015-05-22  1916   *
fec47d863587c2 Dave Gerlach         2015-05-22  1917   * Returns the rproc handle on success, and NULL on failure.
fec47d863587c2 Dave Gerlach         2015-05-22  1918   */
8de3dbd0895beb Ohad Ben-Cohen       2015-06-18  1919  #ifdef CONFIG_OF
fec47d863587c2 Dave Gerlach         2015-05-22  1920  struct rproc *rproc_get_by_phandle(phandle phandle)
fec47d863587c2 Dave Gerlach         2015-05-22  1921  {
fec47d863587c2 Dave Gerlach         2015-05-22  1922  	struct rproc *rproc = NULL, *r;
fec47d863587c2 Dave Gerlach         2015-05-22  1923  	struct device_node *np;
fec47d863587c2 Dave Gerlach         2015-05-22  1924  
fec47d863587c2 Dave Gerlach         2015-05-22  1925  	np = of_find_node_by_phandle(phandle);
fec47d863587c2 Dave Gerlach         2015-05-22  1926  	if (!np)
fec47d863587c2 Dave Gerlach         2015-05-22  1927  		return NULL;
fec47d863587c2 Dave Gerlach         2015-05-22  1928  
fec47d863587c2 Dave Gerlach         2015-05-22  1929  	mutex_lock(&rproc_list_mutex);
fec47d863587c2 Dave Gerlach         2015-05-22  1930  	list_for_each_entry(r, &rproc_list, node) {
fec47d863587c2 Dave Gerlach         2015-05-22  1931  		if (r->dev.parent && r->dev.parent->of_node == np) {
fbb6aacb078285 Bjorn Andersson      2016-10-02  1932  			/* prevent underlying implementation from being removed */
fbb6aacb078285 Bjorn Andersson      2016-10-02  1933  			if (!try_module_get(r->dev.parent->driver->owner)) {
fbb6aacb078285 Bjorn Andersson      2016-10-02  1934  				dev_err(&r->dev, "can't get owner\n");
fbb6aacb078285 Bjorn Andersson      2016-10-02  1935  				break;
fbb6aacb078285 Bjorn Andersson      2016-10-02  1936  			}
fbb6aacb078285 Bjorn Andersson      2016-10-02  1937  
fec47d863587c2 Dave Gerlach         2015-05-22  1938  			rproc = r;
fec47d863587c2 Dave Gerlach         2015-05-22  1939  			get_device(&rproc->dev);
fec47d863587c2 Dave Gerlach         2015-05-22  1940  			break;
fec47d863587c2 Dave Gerlach         2015-05-22  1941  		}
fec47d863587c2 Dave Gerlach         2015-05-22  1942  	}
fec47d863587c2 Dave Gerlach         2015-05-22  1943  	mutex_unlock(&rproc_list_mutex);
fec47d863587c2 Dave Gerlach         2015-05-22  1944  
fec47d863587c2 Dave Gerlach         2015-05-22  1945  	of_node_put(np);
fec47d863587c2 Dave Gerlach         2015-05-22  1946  
fec47d863587c2 Dave Gerlach         2015-05-22  1947  	return rproc;
fec47d863587c2 Dave Gerlach         2015-05-22  1948  }
8de3dbd0895beb Ohad Ben-Cohen       2015-06-18  1949  #else
8de3dbd0895beb Ohad Ben-Cohen       2015-06-18  1950  struct rproc *rproc_get_by_phandle(phandle phandle)
8de3dbd0895beb Ohad Ben-Cohen       2015-06-18  1951  {
8de3dbd0895beb Ohad Ben-Cohen       2015-06-18  1952  	return NULL;
8de3dbd0895beb Ohad Ben-Cohen       2015-06-18  1953  }
8de3dbd0895beb Ohad Ben-Cohen       2015-06-18  1954  #endif
fec47d863587c2 Dave Gerlach         2015-05-22 @1955  EXPORT_SYMBOL(rproc_get_by_phandle);
fec47d863587c2 Dave Gerlach         2015-05-22  1956  

:::::: The code at line 1764 was first introduced by commit
:::::: 8afd519c3470f685f964deebd61aa51d83cde90a remoteproc: add rproc_report_crash function to notify rproc crashes

:::::: TO: Fernando Guzman Lugo <fernando.lugo@ti.com>
:::::: CC: Ohad Ben-Cohen <ohad@wizery.com>

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 30345 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-03-27 23:56 [PATCH] remoteproc: core: Add a memory efficient coredump function Rishabh Bhatnagar
  2020-03-28  4:06 ` kbuild test robot
@ 2020-03-28  4:49 ` kbuild test robot
  2020-04-01 19:51   ` Bjorn Andersson
  2 siblings, 0 replies; 17+ messages in thread
From: kbuild test robot @ 2020-03-28  4:49 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 10327 bytes --]

Hi Rishabh,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on linux/master v5.6-rc7]
[cannot apply to remoteproc/for-next rpmsg/for-next next-20200327]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Rishabh-Bhatnagar/remoteproc-core-Add-a-memory-efficient-coredump-function/20200328-075826
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 69c5eea3128e775fd3c70ecf0098105d96dee330
config: openrisc-randconfig-a001-20200327 (attached as .config)
compiler: or1k-linux-gcc (GCC) 9.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=9.2.0 make.cross ARCH=openrisc 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from drivers/remoteproc/remoteproc_core.c:19:
   drivers/remoteproc/remoteproc_core.c: In function 'rproc_read_dump':
   include/linux/kernel.h:835:29: warning: comparison of distinct pointer types lacks a cast
     835 |   (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
         |                             ^~
   include/linux/kernel.h:849:4: note: in expansion of macro '__typecheck'
     849 |   (__typecheck(x, y) && __no_side_effects(x, y))
         |    ^~~~~~~~~~~
   include/linux/kernel.h:859:24: note: in expansion of macro '__safe_cmp'
     859 |  __builtin_choose_expr(__safe_cmp(x, y), \
         |                        ^~~~~~~~~~
   include/linux/kernel.h:868:19: note: in expansion of macro '__careful_cmp'
     868 | #define min(x, y) __careful_cmp(x, y, <)
         |                   ^~~~~~~~~~~~~
>> drivers/remoteproc/remoteproc_core.c:1562:15: note: in expansion of macro 'min'
    1562 |   copy_size = min(copy_size, bytes_left);
         |               ^~~
   drivers/remoteproc/remoteproc_core.c: In function 'rproc_coredump':
   drivers/remoteproc/remoteproc_core.c:1721:1: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
    1721 | int rproc_trigger_recovery(struct rproc *rproc)
         | ^~~
   drivers/remoteproc/remoteproc_core.c:1764:13: error: invalid storage class for function 'rproc_crash_handler_work'
    1764 | static void rproc_crash_handler_work(struct work_struct *work)
         |             ^~~~~~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
   drivers/remoteproc/remoteproc_core.c:1851:15: error: non-static declaration of 'rproc_boot' follows static declaration
    1851 | EXPORT_SYMBOL(rproc_boot);
         |               ^~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1851:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1851 | EXPORT_SYMBOL(rproc_boot);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1800:5: note: previous definition of 'rproc_boot' was here
    1800 | int rproc_boot(struct rproc *rproc)
         |     ^~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
   include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1851:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1851 | EXPORT_SYMBOL(rproc_boot);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1905:15: error: non-static declaration of 'rproc_shutdown' follows static declaration
    1905 | EXPORT_SYMBOL(rproc_shutdown);
         |               ^~~~~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1905:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1905 | EXPORT_SYMBOL(rproc_shutdown);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1872:6: note: previous definition of 'rproc_shutdown' was here
    1872 | void rproc_shutdown(struct rproc *rproc)
         |      ^~~~~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from drivers/remoteproc/remoteproc_core.c:19:
   include/linux/export.h:67:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
      67 |  static const struct kernel_symbol __ksymtab_##sym  \
         |  ^~~~~~
   include/linux/export.h:108:2: note: in expansion of macro '__KSYMTAB_ENTRY'
     108 |  __KSYMTAB_ENTRY(sym, sec)
         |  ^~~~~~~~~~~~~~~
   include/linux/export.h:147:39: note: in expansion of macro '___EXPORT_SYMBOL'
     147 | #define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
         |                                       ^~~~~~~~~~~~~~~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1905:1: note: in expansion of macro 'EXPORT_SYMBOL'
    1905 | EXPORT_SYMBOL(rproc_shutdown);
         | ^~~~~~~~~~~~~
   drivers/remoteproc/remoteproc_core.c:1955:15: error: non-static declaration of 'rproc_get_by_phandle' follows static declaration
    1955 | EXPORT_SYMBOL(rproc_get_by_phandle);
         |               ^~~~~~~~~~~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'

vim +/min +1562 drivers/remoteproc/remoteproc_core.c

  1549	
  1550	static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
  1551					void *data, size_t elfcorelen)
  1552	{
  1553		void *device_mem = NULL;
  1554		unsigned long data_left = 0;
  1555		unsigned long bytes_left = count;
  1556		unsigned long addr = 0;
  1557		size_t copy_size = 0;
  1558		struct rproc *rproc = data;
  1559	
  1560		if (offset < elfcorelen) {
  1561			copy_size = elfcorelen - offset;
> 1562			copy_size = min(copy_size, bytes_left);
  1563	
  1564			memcpy(buffer, rproc->elfcore + offset, copy_size);
  1565			offset += copy_size;
  1566			bytes_left -= copy_size;
  1567			buffer += copy_size;
  1568		}
  1569	
  1570		while (bytes_left) {
  1571			addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
  1572					&data_left);
  1573		/* EOF check */
  1574			if (data_left == 0) {
  1575				pr_info("Ramdump complete. %lld bytes read.", offset);
  1576				return 0;
  1577			}
  1578	
  1579			copy_size = min_t(size_t, bytes_left, data_left);
  1580	
  1581			device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
  1582			if (!device_mem) {
  1583				pr_err("Unable to ioremap: addr %lx, size %zd\n",
  1584					 addr, copy_size);
  1585				return -ENOMEM;
  1586			}
  1587			memcpy(buffer, device_mem, copy_size);
  1588	
  1589			offset += copy_size;
  1590			buffer += copy_size;
  1591			bytes_left -= copy_size;
  1592			dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
  1593				copy_size);
  1594		}
  1595	
  1596		return count;
  1597	}
  1598	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 25713 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-03-27 23:56 [PATCH] remoteproc: core: Add a memory efficient coredump function Rishabh Bhatnagar
@ 2020-04-01 19:51   ` Bjorn Andersson
  2020-03-28  4:49 ` kbuild test robot
  2020-04-01 19:51   ` Bjorn Andersson
  2 siblings, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-01 19:51 UTC (permalink / raw)
  To: Rishabh Bhatnagar
  Cc: linux-kernel, linux-remoteproc, ohad, psodagud, tsoni, sidgup

On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:

> The current coredump implementation uses vmalloc area to copy
> all the segments. But this might put a lot of strain on low memory
> targets as the firmware size sometimes is in ten's of MBs.
> The situation becomes worse if there are multiple remote processors
> undergoing recovery at the same time.
> This patch directly copies the device memory to userspace buffer
> and avoids extra memory usage. This requires recovery to be halted
> until data is read by userspace and free function is called.
> 
> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> ---
>  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
>  include/linux/remoteproc.h           |   4 ++
>  2 files changed, 94 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 097f33e..2d881e5 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
>  }
>  EXPORT_SYMBOL(rproc_coredump_add_segment);
>  
> +
> +void rproc_free_dump(void *data)

static

> +{
> +	struct rproc *rproc = data;
> +
> +	dev_info(&rproc->dev, "Userspace done reading rproc dump\n");

Please drop the info prints throughout.

> +	complete(&rproc->dump_done);
> +}
> +
> +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> +				unsigned long *data_left)

Please rename this rproc_coredump_resolve_segment(), or something along
those lines.

> +{
> +	struct rproc_dump_segment *segment;
> +
> +	list_for_each_entry(segment, segments, node) {
> +		if (user_offset >= segment->size)
> +			user_offset -= segment->size;
> +		else
> +			break;
> +	}
> +
> +	if (&segment->node == segments) {
> +		*data_left = 0;
> +		return 0;
> +	}
> +
> +	*data_left = segment->size - user_offset;
> +
> +	return segment->da + user_offset;
> +}
> +
> +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> +				void *data, size_t elfcorelen)
> +{
> +	void *device_mem = NULL;
> +	unsigned long data_left = 0;
> +	unsigned long bytes_left = count;
> +	unsigned long addr = 0;
> +	size_t copy_size = 0;
> +	struct rproc *rproc = data;
> +
> +	if (offset < elfcorelen) {
> +		copy_size = elfcorelen - offset;
> +		copy_size = min(copy_size, bytes_left);
> +
> +		memcpy(buffer, rproc->elfcore + offset, copy_size);
> +		offset += copy_size;
> +		bytes_left -= copy_size;
> +		buffer += copy_size;
> +	}
> +
> +	while (bytes_left) {
> +		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> +				&data_left);
> +	/* EOF check */

Indentation, and "if no data left" does indicate that this is the end of
the loop already.

> +		if (data_left == 0) {
> +			pr_info("Ramdump complete. %lld bytes read.", offset);
> +			return 0;

You might have copied data to the buffer, so returning 0 here doesn't
seem right. Presumably instead you should break and return offset -
original offset or something like that.

> +		}
> +
> +		copy_size = min_t(size_t, bytes_left, data_left);
> +
> +		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> +		if (!device_mem) {
> +			pr_err("Unable to ioremap: addr %lx, size %zd\n",
> +				 addr, copy_size);
> +			return -ENOMEM;
> +		}
> +		memcpy(buffer, device_mem, copy_size);
> +
> +		offset += copy_size;
> +		buffer += copy_size;
> +		bytes_left -= copy_size;
> +		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> +			copy_size);
> +	}
> +
> +	return count;

This should be the number of bytes actually returned, so if count is
larger than the sum of the segment sizes this will be wrong.

> +}
> +
>  /**
>   * rproc_coredump_add_custom_segment() - add custom coredump segment
>   * @rproc:	handle of a remote processor
> @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
>  	struct rproc_dump_segment *segment;
>  	struct elf32_phdr *phdr;
>  	struct elf32_hdr *ehdr;
> -	size_t data_size;
> +	size_t header_size;
>  	size_t offset;
>  	void *data;
> -	void *ptr;
>  	int phnum = 0;
>  
>  	if (list_empty(&rproc->dump_segments))
>  		return;
>  
> -	data_size = sizeof(*ehdr);
> +	header_size = sizeof(*ehdr);
>  	list_for_each_entry(segment, &rproc->dump_segments, node) {
> -		data_size += sizeof(*phdr) + segment->size;
> +		header_size += sizeof(*phdr);
>  
>  		phnum++;
>  	}
>  
> -	data = vmalloc(data_size);
> +	data = vmalloc(header_size);
>  	if (!data)
>  		return;
>  
>  	ehdr = data;
> +	rproc->elfcore = data;

Rather than using a rproc-global variable I would prefer that you create
a new rproc_coredump_state struct that carries the header pointer and
the information needed by the read & free functions.

>  
>  	memset(ehdr, 0, sizeof(*ehdr));
>  	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
>  
>  		if (segment->dump) {
>  			segment->dump(rproc, segment, data + offset);
> -		} else {
> -			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> -			if (!ptr) {
> -				dev_err(&rproc->dev,
> -					"invalid coredump segment (%pad, %zu)\n",
> -					&segment->da, segment->size);
> -				memset(data + offset, 0xff, segment->size);
> -			} else {
> -				memcpy(data + offset, ptr, segment->size);
> -			}
> -		}
>  
>  		offset += phdr->p_filesz;
>  		phdr++;
>  	}
> +	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> +			rproc_read_dump, rproc_free_dump);
>  
> -	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> +	wait_for_completion(&rproc->dump_done);

This will mean that recovery handling will break on installations that
doesn't have your ramdump collector - as it will just sit here forever
(5 minutes) waiting for userspace to do its job.

I think we need to device a new sysfs attribute, through which you can
enable the "inline" coredump mechanism. That way recovery would work for
all systems and in your specific case you could reconfigure it - perhaps
once the ramdump collector starts.

Regards,
Bjorn

>  }
>  
>  /**
> @@ -1665,6 +1736,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
>  
>  	/* generate coredump */
>  	rproc_coredump(rproc);
> +	reinit_completion(&rproc->dump_done);
>  
>  	/* load firmware */
>  	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> @@ -2067,6 +2139,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
>  	INIT_LIST_HEAD(&rproc->rvdevs);
>  	INIT_LIST_HEAD(&rproc->subdevs);
>  	INIT_LIST_HEAD(&rproc->dump_segments);
> +	init_completion(&rproc->dump_done);
>  
>  	INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
>  
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 16ad666..461b235 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -481,6 +481,8 @@ struct rproc_dump_segment {
>   * @auto_boot: flag to indicate if remote processor should be auto-started
>   * @dump_segments: list of segments in the firmware
>   * @nb_vdev: number of vdev currently handled by rproc
> + * @dump_done: completion variable when dump is complete
> + * @elfcore: pointer to elf header buffer
>   */
>  struct rproc {
>  	struct list_head node;
> @@ -514,6 +516,8 @@ struct rproc {
>  	bool auto_boot;
>  	struct list_head dump_segments;
>  	int nb_vdev;
> +	struct completion dump_done;
> +	void *elfcore;
>  };
>  
>  /**
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
@ 2020-04-01 19:51   ` Bjorn Andersson
  0 siblings, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-01 19:51 UTC (permalink / raw)
  To: Rishabh Bhatnagar
  Cc: linux-kernel, linux-remoteproc, ohad, psodagud, tsoni, sidgup

On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:

> The current coredump implementation uses vmalloc area to copy
> all the segments. But this might put a lot of strain on low memory
> targets as the firmware size sometimes is in ten's of MBs.
> The situation becomes worse if there are multiple remote processors
> undergoing recovery at the same time.
> This patch directly copies the device memory to userspace buffer
> and avoids extra memory usage. This requires recovery to be halted
> until data is read by userspace and free function is called.
> 
> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> ---
>  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
>  include/linux/remoteproc.h           |   4 ++
>  2 files changed, 94 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 097f33e..2d881e5 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
>  }
>  EXPORT_SYMBOL(rproc_coredump_add_segment);
>  
> +
> +void rproc_free_dump(void *data)

static

> +{
> +	struct rproc *rproc = data;
> +
> +	dev_info(&rproc->dev, "Userspace done reading rproc dump\n");

Please drop the info prints throughout.

> +	complete(&rproc->dump_done);
> +}
> +
> +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> +				unsigned long *data_left)

Please rename this rproc_coredump_resolve_segment(), or something along
those lines.

> +{
> +	struct rproc_dump_segment *segment;
> +
> +	list_for_each_entry(segment, segments, node) {
> +		if (user_offset >= segment->size)
> +			user_offset -= segment->size;
> +		else
> +			break;
> +	}
> +
> +	if (&segment->node == segments) {
> +		*data_left = 0;
> +		return 0;
> +	}
> +
> +	*data_left = segment->size - user_offset;
> +
> +	return segment->da + user_offset;
> +}
> +
> +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> +				void *data, size_t elfcorelen)
> +{
> +	void *device_mem = NULL;
> +	unsigned long data_left = 0;
> +	unsigned long bytes_left = count;
> +	unsigned long addr = 0;
> +	size_t copy_size = 0;
> +	struct rproc *rproc = data;
> +
> +	if (offset < elfcorelen) {
> +		copy_size = elfcorelen - offset;
> +		copy_size = min(copy_size, bytes_left);
> +
> +		memcpy(buffer, rproc->elfcore + offset, copy_size);
> +		offset += copy_size;
> +		bytes_left -= copy_size;
> +		buffer += copy_size;
> +	}
> +
> +	while (bytes_left) {
> +		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> +				&data_left);
> +	/* EOF check */

Indentation, and "if no data left" does indicate that this is the end of
the loop already.

> +		if (data_left == 0) {
> +			pr_info("Ramdump complete. %lld bytes read.", offset);
> +			return 0;

You might have copied data to the buffer, so returning 0 here doesn't
seem right. Presumably instead you should break and return offset -
original offset or something like that.

> +		}
> +
> +		copy_size = min_t(size_t, bytes_left, data_left);
> +
> +		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> +		if (!device_mem) {
> +			pr_err("Unable to ioremap: addr %lx, size %zd\n",
> +				 addr, copy_size);
> +			return -ENOMEM;
> +		}
> +		memcpy(buffer, device_mem, copy_size);
> +
> +		offset += copy_size;
> +		buffer += copy_size;
> +		bytes_left -= copy_size;
> +		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> +			copy_size);
> +	}
> +
> +	return count;

This should be the number of bytes actually returned, so if count is
larger than the sum of the segment sizes this will be wrong.

> +}
> +
>  /**
>   * rproc_coredump_add_custom_segment() - add custom coredump segment
>   * @rproc:	handle of a remote processor
> @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
>  	struct rproc_dump_segment *segment;
>  	struct elf32_phdr *phdr;
>  	struct elf32_hdr *ehdr;
> -	size_t data_size;
> +	size_t header_size;
>  	size_t offset;
>  	void *data;
> -	void *ptr;
>  	int phnum = 0;
>  
>  	if (list_empty(&rproc->dump_segments))
>  		return;
>  
> -	data_size = sizeof(*ehdr);
> +	header_size = sizeof(*ehdr);
>  	list_for_each_entry(segment, &rproc->dump_segments, node) {
> -		data_size += sizeof(*phdr) + segment->size;
> +		header_size += sizeof(*phdr);
>  
>  		phnum++;
>  	}
>  
> -	data = vmalloc(data_size);
> +	data = vmalloc(header_size);
>  	if (!data)
>  		return;
>  
>  	ehdr = data;
> +	rproc->elfcore = data;

Rather than using a rproc-global variable I would prefer that you create
a new rproc_coredump_state struct that carries the header pointer and
the information needed by the read & free functions.

>  
>  	memset(ehdr, 0, sizeof(*ehdr));
>  	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
>  
>  		if (segment->dump) {
>  			segment->dump(rproc, segment, data + offset);
> -		} else {
> -			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> -			if (!ptr) {
> -				dev_err(&rproc->dev,
> -					"invalid coredump segment (%pad, %zu)\n",
> -					&segment->da, segment->size);
> -				memset(data + offset, 0xff, segment->size);
> -			} else {
> -				memcpy(data + offset, ptr, segment->size);
> -			}
> -		}
>  
>  		offset += phdr->p_filesz;
>  		phdr++;
>  	}
> +	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> +			rproc_read_dump, rproc_free_dump);
>  
> -	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> +	wait_for_completion(&rproc->dump_done);

This will mean that recovery handling will break on installations that
doesn't have your ramdump collector - as it will just sit here forever
(5 minutes) waiting for userspace to do its job.

I think we need to device a new sysfs attribute, through which you can
enable the "inline" coredump mechanism. That way recovery would work for
all systems and in your specific case you could reconfigure it - perhaps
once the ramdump collector starts.

Regards,
Bjorn

>  }
>  
>  /**
> @@ -1665,6 +1736,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
>  
>  	/* generate coredump */
>  	rproc_coredump(rproc);
> +	reinit_completion(&rproc->dump_done);
>  
>  	/* load firmware */
>  	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> @@ -2067,6 +2139,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
>  	INIT_LIST_HEAD(&rproc->rvdevs);
>  	INIT_LIST_HEAD(&rproc->subdevs);
>  	INIT_LIST_HEAD(&rproc->dump_segments);
> +	init_completion(&rproc->dump_done);
>  
>  	INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
>  
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 16ad666..461b235 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -481,6 +481,8 @@ struct rproc_dump_segment {
>   * @auto_boot: flag to indicate if remote processor should be auto-started
>   * @dump_segments: list of segments in the firmware
>   * @nb_vdev: number of vdev currently handled by rproc
> + * @dump_done: completion variable when dump is complete
> + * @elfcore: pointer to elf header buffer
>   */
>  struct rproc {
>  	struct list_head node;
> @@ -514,6 +516,8 @@ struct rproc {
>  	bool auto_boot;
>  	struct list_head dump_segments;
>  	int nb_vdev;
> +	struct completion dump_done;
> +	void *elfcore;
>  };
>  
>  /**
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-04-01 19:51   ` Bjorn Andersson
  (?)
@ 2020-04-02 17:24   ` Mathieu Poirier
  2020-04-03  5:16       ` Bjorn Andersson
  -1 siblings, 1 reply; 17+ messages in thread
From: Mathieu Poirier @ 2020-04-02 17:24 UTC (permalink / raw)
  To: Bjorn Andersson
  Cc: Rishabh Bhatnagar, linux-kernel, linux-remoteproc, ohad,
	psodagud, tsoni, sidgup

On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
> On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
> 
> > The current coredump implementation uses vmalloc area to copy
> > all the segments. But this might put a lot of strain on low memory
> > targets as the firmware size sometimes is in ten's of MBs.
> > The situation becomes worse if there are multiple remote processors
> > undergoing recovery at the same time.
> > This patch directly copies the device memory to userspace buffer
> > and avoids extra memory usage. This requires recovery to be halted
> > until data is read by userspace and free function is called.
> > 
> > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> > ---
> >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
> >  include/linux/remoteproc.h           |   4 ++
> >  2 files changed, 94 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> > index 097f33e..2d881e5 100644
> > --- a/drivers/remoteproc/remoteproc_core.c
> > +++ b/drivers/remoteproc/remoteproc_core.c
> > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
> >  }
> >  EXPORT_SYMBOL(rproc_coredump_add_segment);
> >  
> > +
> > +void rproc_free_dump(void *data)
> 
> static
> 
> > +{
> > +	struct rproc *rproc = data;
> > +
> > +	dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
> 
> Please drop the info prints throughout.
> 
> > +	complete(&rproc->dump_done);
> > +}
> > +
> > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> > +				unsigned long *data_left)
> 
> Please rename this rproc_coredump_resolve_segment(), or something along
> those lines.
> 
> > +{
> > +	struct rproc_dump_segment *segment;
> > +
> > +	list_for_each_entry(segment, segments, node) {
> > +		if (user_offset >= segment->size)
> > +			user_offset -= segment->size;
> > +		else
> > +			break;
> > +	}
> > +
> > +	if (&segment->node == segments) {
> > +		*data_left = 0;
> > +		return 0;
> > +	}
> > +
> > +	*data_left = segment->size - user_offset;
> > +
> > +	return segment->da + user_offset;
> > +}
> > +
> > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> > +				void *data, size_t elfcorelen)
> > +{
> > +	void *device_mem = NULL;
> > +	unsigned long data_left = 0;
> > +	unsigned long bytes_left = count;
> > +	unsigned long addr = 0;
> > +	size_t copy_size = 0;
> > +	struct rproc *rproc = data;
> > +
> > +	if (offset < elfcorelen) {
> > +		copy_size = elfcorelen - offset;
> > +		copy_size = min(copy_size, bytes_left);
> > +
> > +		memcpy(buffer, rproc->elfcore + offset, copy_size);
> > +		offset += copy_size;
> > +		bytes_left -= copy_size;
> > +		buffer += copy_size;
> > +	}
> > +
> > +	while (bytes_left) {
> > +		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> > +				&data_left);
> > +	/* EOF check */
> 
> Indentation, and "if no data left" does indicate that this is the end of
> the loop already.
> 
> > +		if (data_left == 0) {
> > +			pr_info("Ramdump complete. %lld bytes read.", offset);
> > +			return 0;
> 
> You might have copied data to the buffer, so returning 0 here doesn't
> seem right. Presumably instead you should break and return offset -
> original offset or something like that.
> 
> > +		}
> > +
> > +		copy_size = min_t(size_t, bytes_left, data_left);
> > +
> > +		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> > +		if (!device_mem) {
> > +			pr_err("Unable to ioremap: addr %lx, size %zd\n",
> > +				 addr, copy_size);
> > +			return -ENOMEM;
> > +		}
> > +		memcpy(buffer, device_mem, copy_size);
> > +
> > +		offset += copy_size;
> > +		buffer += copy_size;
> > +		bytes_left -= copy_size;
> > +		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> > +			copy_size);
> > +	}
> > +
> > +	return count;
> 
> This should be the number of bytes actually returned, so if count is
> larger than the sum of the segment sizes this will be wrong.
> 
> > +}
> > +
> >  /**
> >   * rproc_coredump_add_custom_segment() - add custom coredump segment
> >   * @rproc:	handle of a remote processor
> > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
> >  	struct rproc_dump_segment *segment;
> >  	struct elf32_phdr *phdr;
> >  	struct elf32_hdr *ehdr;
> > -	size_t data_size;
> > +	size_t header_size;
> >  	size_t offset;
> >  	void *data;
> > -	void *ptr;
> >  	int phnum = 0;
> >  
> >  	if (list_empty(&rproc->dump_segments))
> >  		return;
> >  
> > -	data_size = sizeof(*ehdr);
> > +	header_size = sizeof(*ehdr);
> >  	list_for_each_entry(segment, &rproc->dump_segments, node) {
> > -		data_size += sizeof(*phdr) + segment->size;
> > +		header_size += sizeof(*phdr);
> >  
> >  		phnum++;
> >  	}
> >  
> > -	data = vmalloc(data_size);
> > +	data = vmalloc(header_size);
> >  	if (!data)
> >  		return;
> >  
> >  	ehdr = data;
> > +	rproc->elfcore = data;
> 
> Rather than using a rproc-global variable I would prefer that you create
> a new rproc_coredump_state struct that carries the header pointer and
> the information needed by the read & free functions.
> 
> >  
> >  	memset(ehdr, 0, sizeof(*ehdr));
> >  	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
> >  
> >  		if (segment->dump) {
> >  			segment->dump(rproc, segment, data + offset);

I'm not exactly sure why custom segments can be copied to the elf image but not
generic ones... And as far as I can tell accessing "data + offset" will blow up
because only the memory for the program headers has been allocated, not for the
program segments. 


> > -		} else {
> > -			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> > -			if (!ptr) {
> > -				dev_err(&rproc->dev,
> > -					"invalid coredump segment (%pad, %zu)\n",
> > -					&segment->da, segment->size);
> > -				memset(data + offset, 0xff, segment->size);
> > -			} else {
> > -				memcpy(data + offset, ptr, segment->size);
> > -			}
> > -		}
> >  
> >  		offset += phdr->p_filesz;
> >  		phdr++;
> >  	}
> > +	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> > +			rproc_read_dump, rproc_free_dump);
> >  
> > -	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> > +	wait_for_completion(&rproc->dump_done);
> 
> This will mean that recovery handling will break on installations that
> doesn't have your ramdump collector - as it will just sit here forever
> (5 minutes) waiting for userspace to do its job.

Right, that problem also came to mind.

> 
> I think we need to device a new sysfs attribute, through which you can
> enable the "inline" coredump mechanism. That way recovery would work for
> all systems and in your specific case you could reconfigure it - perhaps
> once the ramdump collector starts.

Another option is to make rproc_coredump() customizable, as with all the other
functions in remoteproc_internal.h.  That way the current rproc_coredump() is
kept intact and we don't need a new sysfs entry.

Thanks,
Mathieu

> 
> Regards,
> Bjorn
> 
> >  }
> >  
> >  /**
> > @@ -1665,6 +1736,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
> >  
> >  	/* generate coredump */
> >  	rproc_coredump(rproc);
> > +	reinit_completion(&rproc->dump_done);
> >  
> >  	/* load firmware */
> >  	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> > @@ -2067,6 +2139,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
> >  	INIT_LIST_HEAD(&rproc->rvdevs);
> >  	INIT_LIST_HEAD(&rproc->subdevs);
> >  	INIT_LIST_HEAD(&rproc->dump_segments);
> > +	init_completion(&rproc->dump_done);
> >  
> >  	INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
> >  
> > diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> > index 16ad666..461b235 100644
> > --- a/include/linux/remoteproc.h
> > +++ b/include/linux/remoteproc.h
> > @@ -481,6 +481,8 @@ struct rproc_dump_segment {
> >   * @auto_boot: flag to indicate if remote processor should be auto-started
> >   * @dump_segments: list of segments in the firmware
> >   * @nb_vdev: number of vdev currently handled by rproc
> > + * @dump_done: completion variable when dump is complete
> > + * @elfcore: pointer to elf header buffer
> >   */
> >  struct rproc {
> >  	struct list_head node;
> > @@ -514,6 +516,8 @@ struct rproc {
> >  	bool auto_boot;
> >  	struct list_head dump_segments;
> >  	int nb_vdev;
> > +	struct completion dump_done;
> > +	void *elfcore;
> >  };
> >  
> >  /**
> > -- 
> > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> > a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-04-02 17:24   ` Mathieu Poirier
@ 2020-04-03  5:16       ` Bjorn Andersson
  0 siblings, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-03  5:16 UTC (permalink / raw)
  To: Mathieu Poirier
  Cc: Rishabh Bhatnagar, linux-kernel, linux-remoteproc, ohad,
	psodagud, tsoni, sidgup

On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:

> On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
> > On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
> > 
> > > The current coredump implementation uses vmalloc area to copy
> > > all the segments. But this might put a lot of strain on low memory
> > > targets as the firmware size sometimes is in ten's of MBs.
> > > The situation becomes worse if there are multiple remote processors
> > > undergoing recovery at the same time.
> > > This patch directly copies the device memory to userspace buffer
> > > and avoids extra memory usage. This requires recovery to be halted
> > > until data is read by userspace and free function is called.
> > > 
> > > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> > > ---
> > >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
> > >  include/linux/remoteproc.h           |   4 ++
> > >  2 files changed, 94 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> > > index 097f33e..2d881e5 100644
> > > --- a/drivers/remoteproc/remoteproc_core.c
> > > +++ b/drivers/remoteproc/remoteproc_core.c
> > > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
> > >  }
> > >  EXPORT_SYMBOL(rproc_coredump_add_segment);
> > >  
> > > +
> > > +void rproc_free_dump(void *data)
> > 
> > static
> > 
> > > +{
> > > +	struct rproc *rproc = data;
> > > +
> > > +	dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
> > 
> > Please drop the info prints throughout.
> > 
> > > +	complete(&rproc->dump_done);
> > > +}
> > > +
> > > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> > > +				unsigned long *data_left)
> > 
> > Please rename this rproc_coredump_resolve_segment(), or something along
> > those lines.
> > 
> > > +{
> > > +	struct rproc_dump_segment *segment;
> > > +
> > > +	list_for_each_entry(segment, segments, node) {
> > > +		if (user_offset >= segment->size)
> > > +			user_offset -= segment->size;
> > > +		else
> > > +			break;
> > > +	}
> > > +
> > > +	if (&segment->node == segments) {
> > > +		*data_left = 0;
> > > +		return 0;
> > > +	}
> > > +
> > > +	*data_left = segment->size - user_offset;
> > > +
> > > +	return segment->da + user_offset;
> > > +}
> > > +
> > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> > > +				void *data, size_t elfcorelen)
> > > +{
> > > +	void *device_mem = NULL;
> > > +	unsigned long data_left = 0;
> > > +	unsigned long bytes_left = count;
> > > +	unsigned long addr = 0;
> > > +	size_t copy_size = 0;
> > > +	struct rproc *rproc = data;
> > > +
> > > +	if (offset < elfcorelen) {
> > > +		copy_size = elfcorelen - offset;
> > > +		copy_size = min(copy_size, bytes_left);
> > > +
> > > +		memcpy(buffer, rproc->elfcore + offset, copy_size);
> > > +		offset += copy_size;
> > > +		bytes_left -= copy_size;
> > > +		buffer += copy_size;
> > > +	}
> > > +
> > > +	while (bytes_left) {
> > > +		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> > > +				&data_left);
> > > +	/* EOF check */
> > 
> > Indentation, and "if no data left" does indicate that this is the end of
> > the loop already.
> > 
> > > +		if (data_left == 0) {
> > > +			pr_info("Ramdump complete. %lld bytes read.", offset);
> > > +			return 0;
> > 
> > You might have copied data to the buffer, so returning 0 here doesn't
> > seem right. Presumably instead you should break and return offset -
> > original offset or something like that.
> > 
> > > +		}
> > > +
> > > +		copy_size = min_t(size_t, bytes_left, data_left);
> > > +
> > > +		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> > > +		if (!device_mem) {
> > > +			pr_err("Unable to ioremap: addr %lx, size %zd\n",
> > > +				 addr, copy_size);
> > > +			return -ENOMEM;
> > > +		}
> > > +		memcpy(buffer, device_mem, copy_size);
> > > +
> > > +		offset += copy_size;
> > > +		buffer += copy_size;
> > > +		bytes_left -= copy_size;
> > > +		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> > > +			copy_size);
> > > +	}
> > > +
> > > +	return count;
> > 
> > This should be the number of bytes actually returned, so if count is
> > larger than the sum of the segment sizes this will be wrong.
> > 
> > > +}
> > > +
> > >  /**
> > >   * rproc_coredump_add_custom_segment() - add custom coredump segment
> > >   * @rproc:	handle of a remote processor
> > > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
> > >  	struct rproc_dump_segment *segment;
> > >  	struct elf32_phdr *phdr;
> > >  	struct elf32_hdr *ehdr;
> > > -	size_t data_size;
> > > +	size_t header_size;
> > >  	size_t offset;
> > >  	void *data;
> > > -	void *ptr;
> > >  	int phnum = 0;
> > >  
> > >  	if (list_empty(&rproc->dump_segments))
> > >  		return;
> > >  
> > > -	data_size = sizeof(*ehdr);
> > > +	header_size = sizeof(*ehdr);
> > >  	list_for_each_entry(segment, &rproc->dump_segments, node) {
> > > -		data_size += sizeof(*phdr) + segment->size;
> > > +		header_size += sizeof(*phdr);
> > >  
> > >  		phnum++;
> > >  	}
> > >  
> > > -	data = vmalloc(data_size);
> > > +	data = vmalloc(header_size);
> > >  	if (!data)
> > >  		return;
> > >  
> > >  	ehdr = data;
> > > +	rproc->elfcore = data;
> > 
> > Rather than using a rproc-global variable I would prefer that you create
> > a new rproc_coredump_state struct that carries the header pointer and
> > the information needed by the read & free functions.
> > 
> > >  
> > >  	memset(ehdr, 0, sizeof(*ehdr));
> > >  	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> > > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
> > >  
> > >  		if (segment->dump) {
> > >  			segment->dump(rproc, segment, data + offset);
> 
> I'm not exactly sure why custom segments can be copied to the elf image but not
> generic ones... And as far as I can tell accessing "data + offset" will blow up
> because only the memory for the program headers has been allocated, not for the
> program segments. 
> 

Thanks, I missed that, but you're correct.

> 
> > > -		} else {
> > > -			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> > > -			if (!ptr) {
> > > -				dev_err(&rproc->dev,
> > > -					"invalid coredump segment (%pad, %zu)\n",
> > > -					&segment->da, segment->size);
> > > -				memset(data + offset, 0xff, segment->size);
> > > -			} else {
> > > -				memcpy(data + offset, ptr, segment->size);
> > > -			}
> > > -		}
> > >  
> > >  		offset += phdr->p_filesz;
> > >  		phdr++;
> > >  	}
> > > +	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> > > +			rproc_read_dump, rproc_free_dump);
> > >  
> > > -	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> > > +	wait_for_completion(&rproc->dump_done);
> > 
> > This will mean that recovery handling will break on installations that
> > doesn't have your ramdump collector - as it will just sit here forever
> > (5 minutes) waiting for userspace to do its job.
> 
> Right, that problem also came to mind.
> 
> > 
> > I think we need to device a new sysfs attribute, through which you can
> > enable the "inline" coredump mechanism. That way recovery would work for
> > all systems and in your specific case you could reconfigure it - perhaps
> > once the ramdump collector starts.
> 
> Another option is to make rproc_coredump() customizable, as with all the other
> functions in remoteproc_internal.h.  That way the current rproc_coredump() is
> kept intact and we don't need a new sysfs entry.
> 

Rishabh suggested this in a discussion we had earlier this week as well,
but we still have the problem that the same platform driver will need to
support both modes, depending on which user space is running. So even if
we push this out to the platform driver we still need some mechanism
for userspace to enable the "inline" mode.

Regards,
Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
@ 2020-04-03  5:16       ` Bjorn Andersson
  0 siblings, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-03  5:16 UTC (permalink / raw)
  To: Mathieu Poirier
  Cc: Rishabh Bhatnagar, linux-kernel, linux-remoteproc, ohad,
	psodagud, tsoni, sidgup

On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:

> On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
> > On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
> > 
> > > The current coredump implementation uses vmalloc area to copy
> > > all the segments. But this might put a lot of strain on low memory
> > > targets as the firmware size sometimes is in ten's of MBs.
> > > The situation becomes worse if there are multiple remote processors
> > > undergoing recovery at the same time.
> > > This patch directly copies the device memory to userspace buffer
> > > and avoids extra memory usage. This requires recovery to be halted
> > > until data is read by userspace and free function is called.
> > > 
> > > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> > > ---
> > >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
> > >  include/linux/remoteproc.h           |   4 ++
> > >  2 files changed, 94 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> > > index 097f33e..2d881e5 100644
> > > --- a/drivers/remoteproc/remoteproc_core.c
> > > +++ b/drivers/remoteproc/remoteproc_core.c
> > > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
> > >  }
> > >  EXPORT_SYMBOL(rproc_coredump_add_segment);
> > >  
> > > +
> > > +void rproc_free_dump(void *data)
> > 
> > static
> > 
> > > +{
> > > +	struct rproc *rproc = data;
> > > +
> > > +	dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
> > 
> > Please drop the info prints throughout.
> > 
> > > +	complete(&rproc->dump_done);
> > > +}
> > > +
> > > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> > > +				unsigned long *data_left)
> > 
> > Please rename this rproc_coredump_resolve_segment(), or something along
> > those lines.
> > 
> > > +{
> > > +	struct rproc_dump_segment *segment;
> > > +
> > > +	list_for_each_entry(segment, segments, node) {
> > > +		if (user_offset >= segment->size)
> > > +			user_offset -= segment->size;
> > > +		else
> > > +			break;
> > > +	}
> > > +
> > > +	if (&segment->node == segments) {
> > > +		*data_left = 0;
> > > +		return 0;
> > > +	}
> > > +
> > > +	*data_left = segment->size - user_offset;
> > > +
> > > +	return segment->da + user_offset;
> > > +}
> > > +
> > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> > > +				void *data, size_t elfcorelen)
> > > +{
> > > +	void *device_mem = NULL;
> > > +	unsigned long data_left = 0;
> > > +	unsigned long bytes_left = count;
> > > +	unsigned long addr = 0;
> > > +	size_t copy_size = 0;
> > > +	struct rproc *rproc = data;
> > > +
> > > +	if (offset < elfcorelen) {
> > > +		copy_size = elfcorelen - offset;
> > > +		copy_size = min(copy_size, bytes_left);
> > > +
> > > +		memcpy(buffer, rproc->elfcore + offset, copy_size);
> > > +		offset += copy_size;
> > > +		bytes_left -= copy_size;
> > > +		buffer += copy_size;
> > > +	}
> > > +
> > > +	while (bytes_left) {
> > > +		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> > > +				&data_left);
> > > +	/* EOF check */
> > 
> > Indentation, and "if no data left" does indicate that this is the end of
> > the loop already.
> > 
> > > +		if (data_left == 0) {
> > > +			pr_info("Ramdump complete. %lld bytes read.", offset);
> > > +			return 0;
> > 
> > You might have copied data to the buffer, so returning 0 here doesn't
> > seem right. Presumably instead you should break and return offset -
> > original offset or something like that.
> > 
> > > +		}
> > > +
> > > +		copy_size = min_t(size_t, bytes_left, data_left);
> > > +
> > > +		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> > > +		if (!device_mem) {
> > > +			pr_err("Unable to ioremap: addr %lx, size %zd\n",
> > > +				 addr, copy_size);
> > > +			return -ENOMEM;
> > > +		}
> > > +		memcpy(buffer, device_mem, copy_size);
> > > +
> > > +		offset += copy_size;
> > > +		buffer += copy_size;
> > > +		bytes_left -= copy_size;
> > > +		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> > > +			copy_size);
> > > +	}
> > > +
> > > +	return count;
> > 
> > This should be the number of bytes actually returned, so if count is
> > larger than the sum of the segment sizes this will be wrong.
> > 
> > > +}
> > > +
> > >  /**
> > >   * rproc_coredump_add_custom_segment() - add custom coredump segment
> > >   * @rproc:	handle of a remote processor
> > > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
> > >  	struct rproc_dump_segment *segment;
> > >  	struct elf32_phdr *phdr;
> > >  	struct elf32_hdr *ehdr;
> > > -	size_t data_size;
> > > +	size_t header_size;
> > >  	size_t offset;
> > >  	void *data;
> > > -	void *ptr;
> > >  	int phnum = 0;
> > >  
> > >  	if (list_empty(&rproc->dump_segments))
> > >  		return;
> > >  
> > > -	data_size = sizeof(*ehdr);
> > > +	header_size = sizeof(*ehdr);
> > >  	list_for_each_entry(segment, &rproc->dump_segments, node) {
> > > -		data_size += sizeof(*phdr) + segment->size;
> > > +		header_size += sizeof(*phdr);
> > >  
> > >  		phnum++;
> > >  	}
> > >  
> > > -	data = vmalloc(data_size);
> > > +	data = vmalloc(header_size);
> > >  	if (!data)
> > >  		return;
> > >  
> > >  	ehdr = data;
> > > +	rproc->elfcore = data;
> > 
> > Rather than using a rproc-global variable I would prefer that you create
> > a new rproc_coredump_state struct that carries the header pointer and
> > the information needed by the read & free functions.
> > 
> > >  
> > >  	memset(ehdr, 0, sizeof(*ehdr));
> > >  	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> > > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
> > >  
> > >  		if (segment->dump) {
> > >  			segment->dump(rproc, segment, data + offset);
> 
> I'm not exactly sure why custom segments can be copied to the elf image but not
> generic ones... And as far as I can tell accessing "data + offset" will blow up
> because only the memory for the program headers has been allocated, not for the
> program segments. 
> 

Thanks, I missed that, but you're correct.

> 
> > > -		} else {
> > > -			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> > > -			if (!ptr) {
> > > -				dev_err(&rproc->dev,
> > > -					"invalid coredump segment (%pad, %zu)\n",
> > > -					&segment->da, segment->size);
> > > -				memset(data + offset, 0xff, segment->size);
> > > -			} else {
> > > -				memcpy(data + offset, ptr, segment->size);
> > > -			}
> > > -		}
> > >  
> > >  		offset += phdr->p_filesz;
> > >  		phdr++;
> > >  	}
> > > +	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> > > +			rproc_read_dump, rproc_free_dump);
> > >  
> > > -	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> > > +	wait_for_completion(&rproc->dump_done);
> > 
> > This will mean that recovery handling will break on installations that
> > doesn't have your ramdump collector - as it will just sit here forever
> > (5 minutes) waiting for userspace to do its job.
> 
> Right, that problem also came to mind.
> 
> > 
> > I think we need to device a new sysfs attribute, through which you can
> > enable the "inline" coredump mechanism. That way recovery would work for
> > all systems and in your specific case you could reconfigure it - perhaps
> > once the ramdump collector starts.
> 
> Another option is to make rproc_coredump() customizable, as with all the other
> functions in remoteproc_internal.h.  That way the current rproc_coredump() is
> kept intact and we don't need a new sysfs entry.
> 

Rishabh suggested this in a discussion we had earlier this week as well,
but we still have the problem that the same platform driver will need to
support both modes, depending on which user space is running. So even if
we push this out to the platform driver we still need some mechanism
for userspace to enable the "inline" mode.

Regards,
Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-04-03  5:16       ` Bjorn Andersson
  (?)
@ 2020-04-03 18:46       ` rishabhb
  -1 siblings, 0 replies; 17+ messages in thread
From: rishabhb @ 2020-04-03 18:46 UTC (permalink / raw)
  To: Bjorn Andersson
  Cc: Mathieu Poirier, linux-kernel, linux-remoteproc, ohad, psodagud,
	tsoni, sidgup

On 2020-04-02 22:16, Bjorn Andersson wrote:
> On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
> 
>> On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
>> > On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
>> >
>> > > The current coredump implementation uses vmalloc area to copy
>> > > all the segments. But this might put a lot of strain on low memory
>> > > targets as the firmware size sometimes is in ten's of MBs.
>> > > The situation becomes worse if there are multiple remote processors
>> > > undergoing recovery at the same time.
>> > > This patch directly copies the device memory to userspace buffer
>> > > and avoids extra memory usage. This requires recovery to be halted
>> > > until data is read by userspace and free function is called.
>> > >
>> > > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>> > > ---
>> > >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
>> > >  include/linux/remoteproc.h           |   4 ++
>> > >  2 files changed, 94 insertions(+), 17 deletions(-)
>> > >
>> > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
>> > > index 097f33e..2d881e5 100644
>> > > --- a/drivers/remoteproc/remoteproc_core.c
>> > > +++ b/drivers/remoteproc/remoteproc_core.c
>> > > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
>> > >  }
>> > >  EXPORT_SYMBOL(rproc_coredump_add_segment);
>> > >
>> > > +
>> > > +void rproc_free_dump(void *data)
>> >
>> > static
>> >
>> > > +{
>> > > +	struct rproc *rproc = data;
>> > > +
>> > > +	dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
>> >
>> > Please drop the info prints throughout.
>> >
>> > > +	complete(&rproc->dump_done);
>> > > +}
>> > > +
>> > > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
>> > > +				unsigned long *data_left)
>> >
>> > Please rename this rproc_coredump_resolve_segment(), or something along
>> > those lines.
>> >
>> > > +{
>> > > +	struct rproc_dump_segment *segment;
>> > > +
>> > > +	list_for_each_entry(segment, segments, node) {
>> > > +		if (user_offset >= segment->size)
>> > > +			user_offset -= segment->size;
>> > > +		else
>> > > +			break;
>> > > +	}
>> > > +
>> > > +	if (&segment->node == segments) {
>> > > +		*data_left = 0;
>> > > +		return 0;
>> > > +	}
>> > > +
>> > > +	*data_left = segment->size - user_offset;
>> > > +
>> > > +	return segment->da + user_offset;
>> > > +}
>> > > +
>> > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
>> > > +				void *data, size_t elfcorelen)
>> > > +{
>> > > +	void *device_mem = NULL;
>> > > +	unsigned long data_left = 0;
>> > > +	unsigned long bytes_left = count;
>> > > +	unsigned long addr = 0;
>> > > +	size_t copy_size = 0;
>> > > +	struct rproc *rproc = data;
>> > > +
>> > > +	if (offset < elfcorelen) {
>> > > +		copy_size = elfcorelen - offset;
>> > > +		copy_size = min(copy_size, bytes_left);
>> > > +
>> > > +		memcpy(buffer, rproc->elfcore + offset, copy_size);
>> > > +		offset += copy_size;
>> > > +		bytes_left -= copy_size;
>> > > +		buffer += copy_size;
>> > > +	}
>> > > +
>> > > +	while (bytes_left) {
>> > > +		addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
>> > > +				&data_left);
>> > > +	/* EOF check */
>> >
>> > Indentation, and "if no data left" does indicate that this is the end of
>> > the loop already.
>> >
>> > > +		if (data_left == 0) {
>> > > +			pr_info("Ramdump complete. %lld bytes read.", offset);
>> > > +			return 0;
>> >
>> > You might have copied data to the buffer, so returning 0 here doesn't
>> > seem right. Presumably instead you should break and return offset -
>> > original offset or something like that.
>> >
>> > > +		}
>> > > +
>> > > +		copy_size = min_t(size_t, bytes_left, data_left);
>> > > +
>> > > +		device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
>> > > +		if (!device_mem) {
>> > > +			pr_err("Unable to ioremap: addr %lx, size %zd\n",
>> > > +				 addr, copy_size);
>> > > +			return -ENOMEM;
>> > > +		}
>> > > +		memcpy(buffer, device_mem, copy_size);
>> > > +
>> > > +		offset += copy_size;
>> > > +		buffer += copy_size;
>> > > +		bytes_left -= copy_size;
>> > > +		dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
>> > > +			copy_size);
>> > > +	}
>> > > +
>> > > +	return count;
>> >
>> > This should be the number of bytes actually returned, so if count is
>> > larger than the sum of the segment sizes this will be wrong.
>> >
>> > > +}
>> > > +
>> > >  /**
>> > >   * rproc_coredump_add_custom_segment() - add custom coredump segment
>> > >   * @rproc:	handle of a remote processor
>> > > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
>> > >  	struct rproc_dump_segment *segment;
>> > >  	struct elf32_phdr *phdr;
>> > >  	struct elf32_hdr *ehdr;
>> > > -	size_t data_size;
>> > > +	size_t header_size;
>> > >  	size_t offset;
>> > >  	void *data;
>> > > -	void *ptr;
>> > >  	int phnum = 0;
>> > >
>> > >  	if (list_empty(&rproc->dump_segments))
>> > >  		return;
>> > >
>> > > -	data_size = sizeof(*ehdr);
>> > > +	header_size = sizeof(*ehdr);
>> > >  	list_for_each_entry(segment, &rproc->dump_segments, node) {
>> > > -		data_size += sizeof(*phdr) + segment->size;
>> > > +		header_size += sizeof(*phdr);
>> > >
>> > >  		phnum++;
>> > >  	}
>> > >
>> > > -	data = vmalloc(data_size);
>> > > +	data = vmalloc(header_size);
>> > >  	if (!data)
>> > >  		return;
>> > >
>> > >  	ehdr = data;
>> > > +	rproc->elfcore = data;
>> >
>> > Rather than using a rproc-global variable I would prefer that you create
>> > a new rproc_coredump_state struct that carries the header pointer and
>> > the information needed by the read & free functions.
>> >
>> > >
>> > >  	memset(ehdr, 0, sizeof(*ehdr));
>> > >  	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
>> > > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
>> > >
>> > >  		if (segment->dump) {
>> > >  			segment->dump(rproc, segment, data + offset);
>> 
>> I'm not exactly sure why custom segments can be copied to the elf 
>> image but not
>> generic ones... And as far as I can tell accessing "data + offset" 
>> will blow up
>> because only the memory for the program headers has been allocated, 
>> not for the
>> program segments.
>> 
> 
> Thanks, I missed that, but you're correct.
> 
>> 
>> > > -		} else {
>> > > -			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
>> > > -			if (!ptr) {
>> > > -				dev_err(&rproc->dev,
>> > > -					"invalid coredump segment (%pad, %zu)\n",
>> > > -					&segment->da, segment->size);
>> > > -				memset(data + offset, 0xff, segment->size);
>> > > -			} else {
>> > > -				memcpy(data + offset, ptr, segment->size);
>> > > -			}
>> > > -		}
>> > >
>> > >  		offset += phdr->p_filesz;
>> > >  		phdr++;
>> > >  	}
>> > > +	dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
>> > > +			rproc_read_dump, rproc_free_dump);
>> > >
>> > > -	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
>> > > +	wait_for_completion(&rproc->dump_done);
>> >
>> > This will mean that recovery handling will break on installations that
>> > doesn't have your ramdump collector - as it will just sit here forever
>> > (5 minutes) waiting for userspace to do its job.
>> 
>> Right, that problem also came to mind.
>> 
>> >
>> > I think we need to device a new sysfs attribute, through which you can
>> > enable the "inline" coredump mechanism. That way recovery would work for
>> > all systems and in your specific case you could reconfigure it - perhaps
>> > once the ramdump collector starts.
>> 
>> Another option is to make rproc_coredump() customizable, as with all 
>> the other
>> functions in remoteproc_internal.h.  That way the current 
>> rproc_coredump() is
>> kept intact and we don't need a new sysfs entry.
>> 
> 
> Rishabh suggested this in a discussion we had earlier this week as 
> well,
> but we still have the problem that the same platform driver will need 
> to
> support both modes, depending on which user space is running. So even 
> if
> we push this out to the platform driver we still need some mechanism
> for userspace to enable the "inline" mode.
> 
> Regards,
> Bjorn
I think doing both makes sense. Making it customizable will keep the 
original
function intact and enable platform driver to implement their own 
functionality. It
will default to rproc_coredump. Also adding a sysfs entry that would 
skip the
wait_for_completion if not set so that it doesn't block recovery.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-04-03  5:16       ` Bjorn Andersson
  (?)
  (?)
@ 2020-04-03 20:53       ` Mathieu Poirier
  2020-04-08 18:03         ` rishabhb
  2020-04-09 20:27             ` Bjorn Andersson
  -1 siblings, 2 replies; 17+ messages in thread
From: Mathieu Poirier @ 2020-04-03 20:53 UTC (permalink / raw)
  To: Bjorn Andersson
  Cc: Rishabh Bhatnagar, Linux Kernel Mailing List, linux-remoteproc,
	Ohad Ben-Cohen, psodagud, tsoni, Siddharth Gupta

On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson <bjorn.andersson@linaro.org> wrote:
>
> On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
>
> > On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
> > > On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
> > >
> > > > The current coredump implementation uses vmalloc area to copy
> > > > all the segments. But this might put a lot of strain on low memory
> > > > targets as the firmware size sometimes is in ten's of MBs.
> > > > The situation becomes worse if there are multiple remote processors
> > > > undergoing recovery at the same time.
> > > > This patch directly copies the device memory to userspace buffer
> > > > and avoids extra memory usage. This requires recovery to be halted
> > > > until data is read by userspace and free function is called.
> > > >
> > > > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> > > > ---
> > > >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
> > > >  include/linux/remoteproc.h           |   4 ++
> > > >  2 files changed, 94 insertions(+), 17 deletions(-)
> > > >
> > > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> > > > index 097f33e..2d881e5 100644
> > > > --- a/drivers/remoteproc/remoteproc_core.c
> > > > +++ b/drivers/remoteproc/remoteproc_core.c
> > > > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
> > > >  }
> > > >  EXPORT_SYMBOL(rproc_coredump_add_segment);
> > > >
> > > > +
> > > > +void rproc_free_dump(void *data)
> > >
> > > static
> > >
> > > > +{
> > > > + struct rproc *rproc = data;
> > > > +
> > > > + dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
> > >
> > > Please drop the info prints throughout.
> > >
> > > > + complete(&rproc->dump_done);
> > > > +}
> > > > +
> > > > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> > > > +                         unsigned long *data_left)
> > >
> > > Please rename this rproc_coredump_resolve_segment(), or something along
> > > those lines.
> > >
> > > > +{
> > > > + struct rproc_dump_segment *segment;
> > > > +
> > > > + list_for_each_entry(segment, segments, node) {
> > > > +         if (user_offset >= segment->size)
> > > > +                 user_offset -= segment->size;
> > > > +         else
> > > > +                 break;
> > > > + }
> > > > +
> > > > + if (&segment->node == segments) {
> > > > +         *data_left = 0;
> > > > +         return 0;
> > > > + }
> > > > +
> > > > + *data_left = segment->size - user_offset;
> > > > +
> > > > + return segment->da + user_offset;
> > > > +}
> > > > +
> > > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> > > > +                         void *data, size_t elfcorelen)
> > > > +{
> > > > + void *device_mem = NULL;
> > > > + unsigned long data_left = 0;
> > > > + unsigned long bytes_left = count;
> > > > + unsigned long addr = 0;
> > > > + size_t copy_size = 0;
> > > > + struct rproc *rproc = data;
> > > > +
> > > > + if (offset < elfcorelen) {
> > > > +         copy_size = elfcorelen - offset;
> > > > +         copy_size = min(copy_size, bytes_left);
> > > > +
> > > > +         memcpy(buffer, rproc->elfcore + offset, copy_size);
> > > > +         offset += copy_size;
> > > > +         bytes_left -= copy_size;
> > > > +         buffer += copy_size;
> > > > + }
> > > > +
> > > > + while (bytes_left) {
> > > > +         addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> > > > +                         &data_left);
> > > > + /* EOF check */
> > >
> > > Indentation, and "if no data left" does indicate that this is the end of
> > > the loop already.
> > >
> > > > +         if (data_left == 0) {
> > > > +                 pr_info("Ramdump complete. %lld bytes read.", offset);
> > > > +                 return 0;
> > >
> > > You might have copied data to the buffer, so returning 0 here doesn't
> > > seem right. Presumably instead you should break and return offset -
> > > original offset or something like that.
> > >
> > > > +         }
> > > > +
> > > > +         copy_size = min_t(size_t, bytes_left, data_left);
> > > > +
> > > > +         device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> > > > +         if (!device_mem) {
> > > > +                 pr_err("Unable to ioremap: addr %lx, size %zd\n",
> > > > +                          addr, copy_size);
> > > > +                 return -ENOMEM;
> > > > +         }
> > > > +         memcpy(buffer, device_mem, copy_size);
> > > > +
> > > > +         offset += copy_size;
> > > > +         buffer += copy_size;
> > > > +         bytes_left -= copy_size;
> > > > +         dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> > > > +                 copy_size);
> > > > + }
> > > > +
> > > > + return count;
> > >
> > > This should be the number of bytes actually returned, so if count is
> > > larger than the sum of the segment sizes this will be wrong.
> > >
> > > > +}
> > > > +
> > > >  /**
> > > >   * rproc_coredump_add_custom_segment() - add custom coredump segment
> > > >   * @rproc:       handle of a remote processor
> > > > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
> > > >   struct rproc_dump_segment *segment;
> > > >   struct elf32_phdr *phdr;
> > > >   struct elf32_hdr *ehdr;
> > > > - size_t data_size;
> > > > + size_t header_size;
> > > >   size_t offset;
> > > >   void *data;
> > > > - void *ptr;
> > > >   int phnum = 0;
> > > >
> > > >   if (list_empty(&rproc->dump_segments))
> > > >           return;
> > > >
> > > > - data_size = sizeof(*ehdr);
> > > > + header_size = sizeof(*ehdr);
> > > >   list_for_each_entry(segment, &rproc->dump_segments, node) {
> > > > -         data_size += sizeof(*phdr) + segment->size;
> > > > +         header_size += sizeof(*phdr);
> > > >
> > > >           phnum++;
> > > >   }
> > > >
> > > > - data = vmalloc(data_size);
> > > > + data = vmalloc(header_size);
> > > >   if (!data)
> > > >           return;
> > > >
> > > >   ehdr = data;
> > > > + rproc->elfcore = data;
> > >
> > > Rather than using a rproc-global variable I would prefer that you create
> > > a new rproc_coredump_state struct that carries the header pointer and
> > > the information needed by the read & free functions.
> > >
> > > >
> > > >   memset(ehdr, 0, sizeof(*ehdr));
> > > >   memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> > > > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
> > > >
> > > >           if (segment->dump) {
> > > >                   segment->dump(rproc, segment, data + offset);
> >
> > I'm not exactly sure why custom segments can be copied to the elf image but not
> > generic ones... And as far as I can tell accessing "data + offset" will blow up
> > because only the memory for the program headers has been allocated, not for the
> > program segments.
> >
>
> Thanks, I missed that, but you're correct.
>
> >
> > > > -         } else {
> > > > -                 ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> > > > -                 if (!ptr) {
> > > > -                         dev_err(&rproc->dev,
> > > > -                                 "invalid coredump segment (%pad, %zu)\n",
> > > > -                                 &segment->da, segment->size);
> > > > -                         memset(data + offset, 0xff, segment->size);
> > > > -                 } else {
> > > > -                         memcpy(data + offset, ptr, segment->size);
> > > > -                 }
> > > > -         }
> > > >
> > > >           offset += phdr->p_filesz;
> > > >           phdr++;
> > > >   }
> > > > + dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> > > > +                 rproc_read_dump, rproc_free_dump);
> > > >
> > > > - dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> > > > + wait_for_completion(&rproc->dump_done);
> > >
> > > This will mean that recovery handling will break on installations that
> > > doesn't have your ramdump collector - as it will just sit here forever
> > > (5 minutes) waiting for userspace to do its job.
> >
> > Right, that problem also came to mind.
> >
> > >
> > > I think we need to device a new sysfs attribute, through which you can
> > > enable the "inline" coredump mechanism. That way recovery would work for
> > > all systems and in your specific case you could reconfigure it - perhaps
> > > once the ramdump collector starts.
> >
> > Another option is to make rproc_coredump() customizable, as with all the other
> > functions in remoteproc_internal.h.  That way the current rproc_coredump() is
> > kept intact and we don't need a new sysfs entry.
> >
>
> Rishabh suggested this in a discussion we had earlier this week as well,
> but we still have the problem that the same platform driver will need to
> support both modes, depending on which user space is running. So even if
> we push this out to the platform driver we still need some mechanism
> for userspace to enable the "inline" mode.

So is this something that needs to be done on the fly in response to
some system event?  Any possibility to use the DT?

We are currently discussing the addition of a character driver [1]...
The file_operations could be platform specific so any scenario can be
implemented, whether it is switching on/off a remote processor in the
open/release() callback or setting the behavior of the coredump
functionality in an ioctl().  I think there is value in exploring
different opportunities so that we keep the core as clean and simple
as possible.

Thanks,
Mathieu

[1]. https://patchwork.kernel.org/project/linux-remoteproc/list/?series=264603

>
> Regards,
> Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-04-03 20:53       ` Mathieu Poirier
@ 2020-04-08 18:03         ` rishabhb
  2020-04-09 20:27             ` Bjorn Andersson
  1 sibling, 0 replies; 17+ messages in thread
From: rishabhb @ 2020-04-08 18:03 UTC (permalink / raw)
  To: Mathieu Poirier
  Cc: Bjorn Andersson, Linux Kernel Mailing List, linux-remoteproc,
	Ohad Ben-Cohen, psodagud, tsoni, Siddharth Gupta,
	linux-remoteproc-owner

On 2020-04-03 13:53, Mathieu Poirier wrote:
> On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson 
> <bjorn.andersson@linaro.org> wrote:
>> 
>> On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
>> 
>> > On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
>> > > On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
>> > >
>> > > > The current coredump implementation uses vmalloc area to copy
>> > > > all the segments. But this might put a lot of strain on low memory
>> > > > targets as the firmware size sometimes is in ten's of MBs.
>> > > > The situation becomes worse if there are multiple remote processors
>> > > > undergoing recovery at the same time.
>> > > > This patch directly copies the device memory to userspace buffer
>> > > > and avoids extra memory usage. This requires recovery to be halted
>> > > > until data is read by userspace and free function is called.
>> > > >
>> > > > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>> > > > ---
>> > > >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
>> > > >  include/linux/remoteproc.h           |   4 ++
>> > > >  2 files changed, 94 insertions(+), 17 deletions(-)
>> > > >
>> > > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
>> > > > index 097f33e..2d881e5 100644
>> > > > --- a/drivers/remoteproc/remoteproc_core.c
>> > > > +++ b/drivers/remoteproc/remoteproc_core.c
>> > > > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
>> > > >  }
>> > > >  EXPORT_SYMBOL(rproc_coredump_add_segment);
>> > > >
>> > > > +
>> > > > +void rproc_free_dump(void *data)
>> > >
>> > > static
>> > >
>> > > > +{
>> > > > + struct rproc *rproc = data;
>> > > > +
>> > > > + dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
>> > >
>> > > Please drop the info prints throughout.
>> > >
>> > > > + complete(&rproc->dump_done);
>> > > > +}
>> > > > +
>> > > > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
>> > > > +                         unsigned long *data_left)
>> > >
>> > > Please rename this rproc_coredump_resolve_segment(), or something along
>> > > those lines.
>> > >
>> > > > +{
>> > > > + struct rproc_dump_segment *segment;
>> > > > +
>> > > > + list_for_each_entry(segment, segments, node) {
>> > > > +         if (user_offset >= segment->size)
>> > > > +                 user_offset -= segment->size;
>> > > > +         else
>> > > > +                 break;
>> > > > + }
>> > > > +
>> > > > + if (&segment->node == segments) {
>> > > > +         *data_left = 0;
>> > > > +         return 0;
>> > > > + }
>> > > > +
>> > > > + *data_left = segment->size - user_offset;
>> > > > +
>> > > > + return segment->da + user_offset;
>> > > > +}
>> > > > +
>> > > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
>> > > > +                         void *data, size_t elfcorelen)
>> > > > +{
>> > > > + void *device_mem = NULL;
>> > > > + unsigned long data_left = 0;
>> > > > + unsigned long bytes_left = count;
>> > > > + unsigned long addr = 0;
>> > > > + size_t copy_size = 0;
>> > > > + struct rproc *rproc = data;
>> > > > +
>> > > > + if (offset < elfcorelen) {
>> > > > +         copy_size = elfcorelen - offset;
>> > > > +         copy_size = min(copy_size, bytes_left);
>> > > > +
>> > > > +         memcpy(buffer, rproc->elfcore + offset, copy_size);
>> > > > +         offset += copy_size;
>> > > > +         bytes_left -= copy_size;
>> > > > +         buffer += copy_size;
>> > > > + }
>> > > > +
>> > > > + while (bytes_left) {
>> > > > +         addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
>> > > > +                         &data_left);
>> > > > + /* EOF check */
>> > >
>> > > Indentation, and "if no data left" does indicate that this is the end of
>> > > the loop already.
>> > >
>> > > > +         if (data_left == 0) {
>> > > > +                 pr_info("Ramdump complete. %lld bytes read.", offset);
>> > > > +                 return 0;
>> > >
>> > > You might have copied data to the buffer, so returning 0 here doesn't
>> > > seem right. Presumably instead you should break and return offset -
>> > > original offset or something like that.
>> > >
>> > > > +         }
>> > > > +
>> > > > +         copy_size = min_t(size_t, bytes_left, data_left);
>> > > > +
>> > > > +         device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
>> > > > +         if (!device_mem) {
>> > > > +                 pr_err("Unable to ioremap: addr %lx, size %zd\n",
>> > > > +                          addr, copy_size);
>> > > > +                 return -ENOMEM;
>> > > > +         }
>> > > > +         memcpy(buffer, device_mem, copy_size);
>> > > > +
>> > > > +         offset += copy_size;
>> > > > +         buffer += copy_size;
>> > > > +         bytes_left -= copy_size;
>> > > > +         dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
>> > > > +                 copy_size);
>> > > > + }
>> > > > +
>> > > > + return count;
>> > >
>> > > This should be the number of bytes actually returned, so if count is
>> > > larger than the sum of the segment sizes this will be wrong.
>> > >
>> > > > +}
>> > > > +
>> > > >  /**
>> > > >   * rproc_coredump_add_custom_segment() - add custom coredump segment
>> > > >   * @rproc:       handle of a remote processor
>> > > > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
>> > > >   struct rproc_dump_segment *segment;
>> > > >   struct elf32_phdr *phdr;
>> > > >   struct elf32_hdr *ehdr;
>> > > > - size_t data_size;
>> > > > + size_t header_size;
>> > > >   size_t offset;
>> > > >   void *data;
>> > > > - void *ptr;
>> > > >   int phnum = 0;
>> > > >
>> > > >   if (list_empty(&rproc->dump_segments))
>> > > >           return;
>> > > >
>> > > > - data_size = sizeof(*ehdr);
>> > > > + header_size = sizeof(*ehdr);
>> > > >   list_for_each_entry(segment, &rproc->dump_segments, node) {
>> > > > -         data_size += sizeof(*phdr) + segment->size;
>> > > > +         header_size += sizeof(*phdr);
>> > > >
>> > > >           phnum++;
>> > > >   }
>> > > >
>> > > > - data = vmalloc(data_size);
>> > > > + data = vmalloc(header_size);
>> > > >   if (!data)
>> > > >           return;
>> > > >
>> > > >   ehdr = data;
>> > > > + rproc->elfcore = data;
>> > >
>> > > Rather than using a rproc-global variable I would prefer that you create
>> > > a new rproc_coredump_state struct that carries the header pointer and
>> > > the information needed by the read & free functions.
>> > >
>> > > >
>> > > >   memset(ehdr, 0, sizeof(*ehdr));
>> > > >   memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
>> > > > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
>> > > >
>> > > >           if (segment->dump) {
>> > > >                   segment->dump(rproc, segment, data + offset);
>> >
>> > I'm not exactly sure why custom segments can be copied to the elf image but not
>> > generic ones... And as far as I can tell accessing "data + offset" will blow up
>> > because only the memory for the program headers has been allocated, not for the
>> > program segments.
>> >
>> 
>> Thanks, I missed that, but you're correct.
>> 
>> >
>> > > > -         } else {
>> > > > -                 ptr = rproc_da_to_va(rproc, segment->da, segment->size);
>> > > > -                 if (!ptr) {
>> > > > -                         dev_err(&rproc->dev,
>> > > > -                                 "invalid coredump segment (%pad, %zu)\n",
>> > > > -                                 &segment->da, segment->size);
>> > > > -                         memset(data + offset, 0xff, segment->size);
>> > > > -                 } else {
>> > > > -                         memcpy(data + offset, ptr, segment->size);
>> > > > -                 }
>> > > > -         }
>> > > >
>> > > >           offset += phdr->p_filesz;
>> > > >           phdr++;
>> > > >   }
>> > > > + dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
>> > > > +                 rproc_read_dump, rproc_free_dump);
>> > > >
>> > > > - dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
>> > > > + wait_for_completion(&rproc->dump_done);
>> > >
>> > > This will mean that recovery handling will break on installations that
>> > > doesn't have your ramdump collector - as it will just sit here forever
>> > > (5 minutes) waiting for userspace to do its job.
>> >
>> > Right, that problem also came to mind.
>> >
>> > >
>> > > I think we need to device a new sysfs attribute, through which you can
>> > > enable the "inline" coredump mechanism. That way recovery would work for
>> > > all systems and in your specific case you could reconfigure it - perhaps
>> > > once the ramdump collector starts.
>> >
>> > Another option is to make rproc_coredump() customizable, as with all the other
>> > functions in remoteproc_internal.h.  That way the current rproc_coredump() is
>> > kept intact and we don't need a new sysfs entry.
>> >
>> 
>> Rishabh suggested this in a discussion we had earlier this week as 
>> well,
>> but we still have the problem that the same platform driver will need 
>> to
>> support both modes, depending on which user space is running. So even 
>> if
>> we push this out to the platform driver we still need some mechanism
>> for userspace to enable the "inline" mode.
> 
> So is this something that needs to be done on the fly in response to
> some system event?  Any possibility to use the DT?
Yes we want to make it dynamically configurable so that even if we put
any other userspace with our kernel image it should work.
> 
> We are currently discussing the addition of a character driver [1]...
> The file_operations could be platform specific so any scenario can be
> implemented, whether it is switching on/off a remote processor in the
> open/release() callback or setting the behavior of the coredump
> functionality in an ioctl().  I think there is value in exploring
> different opportunities so that we keep the core as clean and simple
> as possible.
> 
The problem with that is there can be only one userspace entity that
can open the dev node. And this application need not be the one 
responsible
for collecting ramdumps. We have a separate ramdump collector that is 
responsible
for collecting dumps for all remoteprocs.
> Thanks,
> Mathieu
> 
> [1]. 
> https://patchwork.kernel.org/project/linux-remoteproc/list/?series=264603
> 
>> 
>> Regards,
>> Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-04-03 20:53       ` Mathieu Poirier
@ 2020-04-09 20:27             ` Bjorn Andersson
  2020-04-09 20:27             ` Bjorn Andersson
  1 sibling, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-09 20:27 UTC (permalink / raw)
  To: Mathieu Poirier
  Cc: Rishabh Bhatnagar, Linux Kernel Mailing List, linux-remoteproc,
	Ohad Ben-Cohen, psodagud, tsoni, Siddharth Gupta

On Fri 03 Apr 13:53 PDT 2020, Mathieu Poirier wrote:

> On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson <bjorn.andersson@linaro.org> wrote:
> >
> > On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
> >
> > > On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
> > > > On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
> > > >
> > > > > The current coredump implementation uses vmalloc area to copy
> > > > > all the segments. But this might put a lot of strain on low memory
> > > > > targets as the firmware size sometimes is in ten's of MBs.
> > > > > The situation becomes worse if there are multiple remote processors
> > > > > undergoing recovery at the same time.
> > > > > This patch directly copies the device memory to userspace buffer
> > > > > and avoids extra memory usage. This requires recovery to be halted
> > > > > until data is read by userspace and free function is called.
> > > > >
> > > > > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> > > > > ---
> > > > >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
> > > > >  include/linux/remoteproc.h           |   4 ++
> > > > >  2 files changed, 94 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> > > > > index 097f33e..2d881e5 100644
> > > > > --- a/drivers/remoteproc/remoteproc_core.c
> > > > > +++ b/drivers/remoteproc/remoteproc_core.c
> > > > > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
> > > > >  }
> > > > >  EXPORT_SYMBOL(rproc_coredump_add_segment);
> > > > >
> > > > > +
> > > > > +void rproc_free_dump(void *data)
> > > >
> > > > static
> > > >
> > > > > +{
> > > > > + struct rproc *rproc = data;
> > > > > +
> > > > > + dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
> > > >
> > > > Please drop the info prints throughout.
> > > >
> > > > > + complete(&rproc->dump_done);
> > > > > +}
> > > > > +
> > > > > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> > > > > +                         unsigned long *data_left)
> > > >
> > > > Please rename this rproc_coredump_resolve_segment(), or something along
> > > > those lines.
> > > >
> > > > > +{
> > > > > + struct rproc_dump_segment *segment;
> > > > > +
> > > > > + list_for_each_entry(segment, segments, node) {
> > > > > +         if (user_offset >= segment->size)
> > > > > +                 user_offset -= segment->size;
> > > > > +         else
> > > > > +                 break;
> > > > > + }
> > > > > +
> > > > > + if (&segment->node == segments) {
> > > > > +         *data_left = 0;
> > > > > +         return 0;
> > > > > + }
> > > > > +
> > > > > + *data_left = segment->size - user_offset;
> > > > > +
> > > > > + return segment->da + user_offset;
> > > > > +}
> > > > > +
> > > > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> > > > > +                         void *data, size_t elfcorelen)
> > > > > +{
> > > > > + void *device_mem = NULL;
> > > > > + unsigned long data_left = 0;
> > > > > + unsigned long bytes_left = count;
> > > > > + unsigned long addr = 0;
> > > > > + size_t copy_size = 0;
> > > > > + struct rproc *rproc = data;
> > > > > +
> > > > > + if (offset < elfcorelen) {
> > > > > +         copy_size = elfcorelen - offset;
> > > > > +         copy_size = min(copy_size, bytes_left);
> > > > > +
> > > > > +         memcpy(buffer, rproc->elfcore + offset, copy_size);
> > > > > +         offset += copy_size;
> > > > > +         bytes_left -= copy_size;
> > > > > +         buffer += copy_size;
> > > > > + }
> > > > > +
> > > > > + while (bytes_left) {
> > > > > +         addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> > > > > +                         &data_left);
> > > > > + /* EOF check */
> > > >
> > > > Indentation, and "if no data left" does indicate that this is the end of
> > > > the loop already.
> > > >
> > > > > +         if (data_left == 0) {
> > > > > +                 pr_info("Ramdump complete. %lld bytes read.", offset);
> > > > > +                 return 0;
> > > >
> > > > You might have copied data to the buffer, so returning 0 here doesn't
> > > > seem right. Presumably instead you should break and return offset -
> > > > original offset or something like that.
> > > >
> > > > > +         }
> > > > > +
> > > > > +         copy_size = min_t(size_t, bytes_left, data_left);
> > > > > +
> > > > > +         device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> > > > > +         if (!device_mem) {
> > > > > +                 pr_err("Unable to ioremap: addr %lx, size %zd\n",
> > > > > +                          addr, copy_size);
> > > > > +                 return -ENOMEM;
> > > > > +         }
> > > > > +         memcpy(buffer, device_mem, copy_size);
> > > > > +
> > > > > +         offset += copy_size;
> > > > > +         buffer += copy_size;
> > > > > +         bytes_left -= copy_size;
> > > > > +         dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> > > > > +                 copy_size);
> > > > > + }
> > > > > +
> > > > > + return count;
> > > >
> > > > This should be the number of bytes actually returned, so if count is
> > > > larger than the sum of the segment sizes this will be wrong.
> > > >
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * rproc_coredump_add_custom_segment() - add custom coredump segment
> > > > >   * @rproc:       handle of a remote processor
> > > > > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
> > > > >   struct rproc_dump_segment *segment;
> > > > >   struct elf32_phdr *phdr;
> > > > >   struct elf32_hdr *ehdr;
> > > > > - size_t data_size;
> > > > > + size_t header_size;
> > > > >   size_t offset;
> > > > >   void *data;
> > > > > - void *ptr;
> > > > >   int phnum = 0;
> > > > >
> > > > >   if (list_empty(&rproc->dump_segments))
> > > > >           return;
> > > > >
> > > > > - data_size = sizeof(*ehdr);
> > > > > + header_size = sizeof(*ehdr);
> > > > >   list_for_each_entry(segment, &rproc->dump_segments, node) {
> > > > > -         data_size += sizeof(*phdr) + segment->size;
> > > > > +         header_size += sizeof(*phdr);
> > > > >
> > > > >           phnum++;
> > > > >   }
> > > > >
> > > > > - data = vmalloc(data_size);
> > > > > + data = vmalloc(header_size);
> > > > >   if (!data)
> > > > >           return;
> > > > >
> > > > >   ehdr = data;
> > > > > + rproc->elfcore = data;
> > > >
> > > > Rather than using a rproc-global variable I would prefer that you create
> > > > a new rproc_coredump_state struct that carries the header pointer and
> > > > the information needed by the read & free functions.
> > > >
> > > > >
> > > > >   memset(ehdr, 0, sizeof(*ehdr));
> > > > >   memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> > > > > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
> > > > >
> > > > >           if (segment->dump) {
> > > > >                   segment->dump(rproc, segment, data + offset);
> > >
> > > I'm not exactly sure why custom segments can be copied to the elf image but not
> > > generic ones... And as far as I can tell accessing "data + offset" will blow up
> > > because only the memory for the program headers has been allocated, not for the
> > > program segments.
> > >
> >
> > Thanks, I missed that, but you're correct.
> >
> > >
> > > > > -         } else {
> > > > > -                 ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> > > > > -                 if (!ptr) {
> > > > > -                         dev_err(&rproc->dev,
> > > > > -                                 "invalid coredump segment (%pad, %zu)\n",
> > > > > -                                 &segment->da, segment->size);
> > > > > -                         memset(data + offset, 0xff, segment->size);
> > > > > -                 } else {
> > > > > -                         memcpy(data + offset, ptr, segment->size);
> > > > > -                 }
> > > > > -         }
> > > > >
> > > > >           offset += phdr->p_filesz;
> > > > >           phdr++;
> > > > >   }
> > > > > + dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> > > > > +                 rproc_read_dump, rproc_free_dump);
> > > > >
> > > > > - dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> > > > > + wait_for_completion(&rproc->dump_done);
> > > >
> > > > This will mean that recovery handling will break on installations that
> > > > doesn't have your ramdump collector - as it will just sit here forever
> > > > (5 minutes) waiting for userspace to do its job.
> > >
> > > Right, that problem also came to mind.
> > >
> > > >
> > > > I think we need to device a new sysfs attribute, through which you can
> > > > enable the "inline" coredump mechanism. That way recovery would work for
> > > > all systems and in your specific case you could reconfigure it - perhaps
> > > > once the ramdump collector starts.
> > >
> > > Another option is to make rproc_coredump() customizable, as with all the other
> > > functions in remoteproc_internal.h.  That way the current rproc_coredump() is
> > > kept intact and we don't need a new sysfs entry.
> > >
> >
> > Rishabh suggested this in a discussion we had earlier this week as well,
> > but we still have the problem that the same platform driver will need to
> > support both modes, depending on which user space is running. So even if
> > we push this out to the platform driver we still need some mechanism
> > for userspace to enable the "inline" mode.
> 
> So is this something that needs to be done on the fly in response to
> some system event?  Any possibility to use the DT?
> 

Designing this as a dynamic property would mean that the kernel doesn't
have to be recompiled between different variants of the same software
solution for a piece of hardware.

Putting a flag in DT would mean that you need to flash different DT
depending on what apps your running in userspace.

> We are currently discussing the addition of a character driver [1]...
> The file_operations could be platform specific so any scenario can be
> implemented, whether it is switching on/off a remote processor in the
> open/release() callback or setting the behavior of the coredump
> functionality in an ioctl().

The main benefit of tying this to the character device would be that the
behavior could be reverted on release(). But this would imply that the
application starting and stopping the remoteproc is also the one
collecting ramdumps and it would also imply that there exists such an
application (e.g. this functionality is still desirable for auto_booted
remoteprocs).

Finally I think it's likely that the existing tools for collecting
devcoredump artifacts are expected to just continue to work after this
change - in both modes.



On the functionality Rishabh proposes, it would be very interesting to
hear from others on their usage and need for coredumps.

E.g. are Qualcomm really the only ones that has issues with
vmalloc(sizeof(firmware)) failing and preventing post mortem debugging
in low-memory scenarios? Or does others simply not care to debug
remoteproc firmware in these cases? Is debugging only done using JTAG?

> I think there is value in exploring different opportunities so that we
> keep the core as clean and simple as possible.
> 

I agree, but hadn't considered this fully. In particular with the
changes I'm asking Rishabh to make we have a few screen fulls of code
involved in the coredump handling. So I think it would be beneficial to
move this into a remoteproc_coredump.c.

Thanks,
Bjorn

> Thanks,
> Mathieu
> 
> [1]. https://patchwork.kernel.org/project/linux-remoteproc/list/?series=264603
> 
> >
> > Regards,
> > Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
@ 2020-04-09 20:27             ` Bjorn Andersson
  0 siblings, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-09 20:27 UTC (permalink / raw)
  To: Mathieu Poirier
  Cc: Rishabh Bhatnagar, Linux Kernel Mailing List, linux-remoteproc,
	Ohad Ben-Cohen, psodagud, tsoni, Siddharth Gupta

On Fri 03 Apr 13:53 PDT 2020, Mathieu Poirier wrote:

> On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson <bjorn.andersson@linaro.org> wrote:
> >
> > On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
> >
> > > On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
> > > > On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
> > > >
> > > > > The current coredump implementation uses vmalloc area to copy
> > > > > all the segments. But this might put a lot of strain on low memory
> > > > > targets as the firmware size sometimes is in ten's of MBs.
> > > > > The situation becomes worse if there are multiple remote processors
> > > > > undergoing recovery at the same time.
> > > > > This patch directly copies the device memory to userspace buffer
> > > > > and avoids extra memory usage. This requires recovery to be halted
> > > > > until data is read by userspace and free function is called.
> > > > >
> > > > > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> > > > > ---
> > > > >  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
> > > > >  include/linux/remoteproc.h           |   4 ++
> > > > >  2 files changed, 94 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> > > > > index 097f33e..2d881e5 100644
> > > > > --- a/drivers/remoteproc/remoteproc_core.c
> > > > > +++ b/drivers/remoteproc/remoteproc_core.c
> > > > > @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
> > > > >  }
> > > > >  EXPORT_SYMBOL(rproc_coredump_add_segment);
> > > > >
> > > > > +
> > > > > +void rproc_free_dump(void *data)
> > > >
> > > > static
> > > >
> > > > > +{
> > > > > + struct rproc *rproc = data;
> > > > > +
> > > > > + dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
> > > >
> > > > Please drop the info prints throughout.
> > > >
> > > > > + complete(&rproc->dump_done);
> > > > > +}
> > > > > +
> > > > > +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
> > > > > +                         unsigned long *data_left)
> > > >
> > > > Please rename this rproc_coredump_resolve_segment(), or something along
> > > > those lines.
> > > >
> > > > > +{
> > > > > + struct rproc_dump_segment *segment;
> > > > > +
> > > > > + list_for_each_entry(segment, segments, node) {
> > > > > +         if (user_offset >= segment->size)
> > > > > +                 user_offset -= segment->size;
> > > > > +         else
> > > > > +                 break;
> > > > > + }
> > > > > +
> > > > > + if (&segment->node == segments) {
> > > > > +         *data_left = 0;
> > > > > +         return 0;
> > > > > + }
> > > > > +
> > > > > + *data_left = segment->size - user_offset;
> > > > > +
> > > > > + return segment->da + user_offset;
> > > > > +}
> > > > > +
> > > > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> > > > > +                         void *data, size_t elfcorelen)
> > > > > +{
> > > > > + void *device_mem = NULL;
> > > > > + unsigned long data_left = 0;
> > > > > + unsigned long bytes_left = count;
> > > > > + unsigned long addr = 0;
> > > > > + size_t copy_size = 0;
> > > > > + struct rproc *rproc = data;
> > > > > +
> > > > > + if (offset < elfcorelen) {
> > > > > +         copy_size = elfcorelen - offset;
> > > > > +         copy_size = min(copy_size, bytes_left);
> > > > > +
> > > > > +         memcpy(buffer, rproc->elfcore + offset, copy_size);
> > > > > +         offset += copy_size;
> > > > > +         bytes_left -= copy_size;
> > > > > +         buffer += copy_size;
> > > > > + }
> > > > > +
> > > > > + while (bytes_left) {
> > > > > +         addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
> > > > > +                         &data_left);
> > > > > + /* EOF check */
> > > >
> > > > Indentation, and "if no data left" does indicate that this is the end of
> > > > the loop already.
> > > >
> > > > > +         if (data_left == 0) {
> > > > > +                 pr_info("Ramdump complete. %lld bytes read.", offset);
> > > > > +                 return 0;
> > > >
> > > > You might have copied data to the buffer, so returning 0 here doesn't
> > > > seem right. Presumably instead you should break and return offset -
> > > > original offset or something like that.
> > > >
> > > > > +         }
> > > > > +
> > > > > +         copy_size = min_t(size_t, bytes_left, data_left);
> > > > > +
> > > > > +         device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
> > > > > +         if (!device_mem) {
> > > > > +                 pr_err("Unable to ioremap: addr %lx, size %zd\n",
> > > > > +                          addr, copy_size);
> > > > > +                 return -ENOMEM;
> > > > > +         }
> > > > > +         memcpy(buffer, device_mem, copy_size);
> > > > > +
> > > > > +         offset += copy_size;
> > > > > +         buffer += copy_size;
> > > > > +         bytes_left -= copy_size;
> > > > > +         dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
> > > > > +                 copy_size);
> > > > > + }
> > > > > +
> > > > > + return count;
> > > >
> > > > This should be the number of bytes actually returned, so if count is
> > > > larger than the sum of the segment sizes this will be wrong.
> > > >
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * rproc_coredump_add_custom_segment() - add custom coredump segment
> > > > >   * @rproc:       handle of a remote processor
> > > > > @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
> > > > >   struct rproc_dump_segment *segment;
> > > > >   struct elf32_phdr *phdr;
> > > > >   struct elf32_hdr *ehdr;
> > > > > - size_t data_size;
> > > > > + size_t header_size;
> > > > >   size_t offset;
> > > > >   void *data;
> > > > > - void *ptr;
> > > > >   int phnum = 0;
> > > > >
> > > > >   if (list_empty(&rproc->dump_segments))
> > > > >           return;
> > > > >
> > > > > - data_size = sizeof(*ehdr);
> > > > > + header_size = sizeof(*ehdr);
> > > > >   list_for_each_entry(segment, &rproc->dump_segments, node) {
> > > > > -         data_size += sizeof(*phdr) + segment->size;
> > > > > +         header_size += sizeof(*phdr);
> > > > >
> > > > >           phnum++;
> > > > >   }
> > > > >
> > > > > - data = vmalloc(data_size);
> > > > > + data = vmalloc(header_size);
> > > > >   if (!data)
> > > > >           return;
> > > > >
> > > > >   ehdr = data;
> > > > > + rproc->elfcore = data;
> > > >
> > > > Rather than using a rproc-global variable I would prefer that you create
> > > > a new rproc_coredump_state struct that carries the header pointer and
> > > > the information needed by the read & free functions.
> > > >
> > > > >
> > > > >   memset(ehdr, 0, sizeof(*ehdr));
> > > > >   memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> > > > > @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
> > > > >
> > > > >           if (segment->dump) {
> > > > >                   segment->dump(rproc, segment, data + offset);
> > >
> > > I'm not exactly sure why custom segments can be copied to the elf image but not
> > > generic ones... And as far as I can tell accessing "data + offset" will blow up
> > > because only the memory for the program headers has been allocated, not for the
> > > program segments.
> > >
> >
> > Thanks, I missed that, but you're correct.
> >
> > >
> > > > > -         } else {
> > > > > -                 ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> > > > > -                 if (!ptr) {
> > > > > -                         dev_err(&rproc->dev,
> > > > > -                                 "invalid coredump segment (%pad, %zu)\n",
> > > > > -                                 &segment->da, segment->size);
> > > > > -                         memset(data + offset, 0xff, segment->size);
> > > > > -                 } else {
> > > > > -                         memcpy(data + offset, ptr, segment->size);
> > > > > -                 }
> > > > > -         }
> > > > >
> > > > >           offset += phdr->p_filesz;
> > > > >           phdr++;
> > > > >   }
> > > > > + dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
> > > > > +                 rproc_read_dump, rproc_free_dump);
> > > > >
> > > > > - dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> > > > > + wait_for_completion(&rproc->dump_done);
> > > >
> > > > This will mean that recovery handling will break on installations that
> > > > doesn't have your ramdump collector - as it will just sit here forever
> > > > (5 minutes) waiting for userspace to do its job.
> > >
> > > Right, that problem also came to mind.
> > >
> > > >
> > > > I think we need to device a new sysfs attribute, through which you can
> > > > enable the "inline" coredump mechanism. That way recovery would work for
> > > > all systems and in your specific case you could reconfigure it - perhaps
> > > > once the ramdump collector starts.
> > >
> > > Another option is to make rproc_coredump() customizable, as with all the other
> > > functions in remoteproc_internal.h.  That way the current rproc_coredump() is
> > > kept intact and we don't need a new sysfs entry.
> > >
> >
> > Rishabh suggested this in a discussion we had earlier this week as well,
> > but we still have the problem that the same platform driver will need to
> > support both modes, depending on which user space is running. So even if
> > we push this out to the platform driver we still need some mechanism
> > for userspace to enable the "inline" mode.
> 
> So is this something that needs to be done on the fly in response to
> some system event?  Any possibility to use the DT?
> 

Designing this as a dynamic property would mean that the kernel doesn't
have to be recompiled between different variants of the same software
solution for a piece of hardware.

Putting a flag in DT would mean that you need to flash different DT
depending on what apps your running in userspace.

> We are currently discussing the addition of a character driver [1]...
> The file_operations could be platform specific so any scenario can be
> implemented, whether it is switching on/off a remote processor in the
> open/release() callback or setting the behavior of the coredump
> functionality in an ioctl().

The main benefit of tying this to the character device would be that the
behavior could be reverted on release(). But this would imply that the
application starting and stopping the remoteproc is also the one
collecting ramdumps and it would also imply that there exists such an
application (e.g. this functionality is still desirable for auto_booted
remoteprocs).

Finally I think it's likely that the existing tools for collecting
devcoredump artifacts are expected to just continue to work after this
change - in both modes.



On the functionality Rishabh proposes, it would be very interesting to
hear from others on their usage and need for coredumps.

E.g. are Qualcomm really the only ones that has issues with
vmalloc(sizeof(firmware)) failing and preventing post mortem debugging
in low-memory scenarios? Or does others simply not care to debug
remoteproc firmware in these cases? Is debugging only done using JTAG?

> I think there is value in exploring different opportunities so that we
> keep the core as clean and simple as possible.
> 

I agree, but hadn't considered this fully. In particular with the
changes I'm asking Rishabh to make we have a few screen fulls of code
involved in the coredump handling. So I think it would be beneficial to
move this into a remoteproc_coredump.c.

Thanks,
Bjorn

> Thanks,
> Mathieu
> 
> [1]. https://patchwork.kernel.org/project/linux-remoteproc/list/?series=264603
> 
> >
> > Regards,
> > Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
@ 2020-04-10 10:31               ` Arnaud POULIQUEN
  0 siblings, 0 replies; 17+ messages in thread
From: Arnaud POULIQUEN @ 2020-04-10 10:31 UTC (permalink / raw)
  To: Bjorn Andersson, Mathieu Poirier
  Cc: Rishabh Bhatnagar, Linux Kernel Mailing List, linux-remoteproc,
	Ohad Ben-Cohen, psodagud, tsoni, Siddharth Gupta



On 4/9/20 10:27 PM, Bjorn Andersson wrote:
> On Fri 03 Apr 13:53 PDT 2020, Mathieu Poirier wrote:
> 
>> On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson <bjorn.andersson@linaro.org> wrote:
>>>
>>> On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
>>>
>>>> On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
>>>>> On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
>>>>>
>>>>>> The current coredump implementation uses vmalloc area to copy
>>>>>> all the segments. But this might put a lot of strain on low memory
>>>>>> targets as the firmware size sometimes is in ten's of MBs.
>>>>>> The situation becomes worse if there are multiple remote processors
>>>>>> undergoing recovery at the same time.
>>>>>> This patch directly copies the device memory to userspace buffer
>>>>>> and avoids extra memory usage. This requires recovery to be halted
>>>>>> until data is read by userspace and free function is called.
>>>>>>
>>>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>>>>> ---
>>>>>>  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
>>>>>>  include/linux/remoteproc.h           |   4 ++
>>>>>>  2 files changed, 94 insertions(+), 17 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
>>>>>> index 097f33e..2d881e5 100644
>>>>>> --- a/drivers/remoteproc/remoteproc_core.c
>>>>>> +++ b/drivers/remoteproc/remoteproc_core.c
>>>>>> @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
>>>>>>  }
>>>>>>  EXPORT_SYMBOL(rproc_coredump_add_segment);
>>>>>>
>>>>>> +
>>>>>> +void rproc_free_dump(void *data)
>>>>>
>>>>> static
>>>>>
>>>>>> +{
>>>>>> + struct rproc *rproc = data;
>>>>>> +
>>>>>> + dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
>>>>>
>>>>> Please drop the info prints throughout.
>>>>>
>>>>>> + complete(&rproc->dump_done);
>>>>>> +}
>>>>>> +
>>>>>> +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
>>>>>> +                         unsigned long *data_left)
>>>>>
>>>>> Please rename this rproc_coredump_resolve_segment(), or something along
>>>>> those lines.
>>>>>
>>>>>> +{
>>>>>> + struct rproc_dump_segment *segment;
>>>>>> +
>>>>>> + list_for_each_entry(segment, segments, node) {
>>>>>> +         if (user_offset >= segment->size)
>>>>>> +                 user_offset -= segment->size;
>>>>>> +         else
>>>>>> +                 break;
>>>>>> + }
>>>>>> +
>>>>>> + if (&segment->node == segments) {
>>>>>> +         *data_left = 0;
>>>>>> +         return 0;
>>>>>> + }
>>>>>> +
>>>>>> + *data_left = segment->size - user_offset;
>>>>>> +
>>>>>> + return segment->da + user_offset;
>>>>>> +}
>>>>>> +
>>>>>> +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
>>>>>> +                         void *data, size_t elfcorelen)
>>>>>> +{
>>>>>> + void *device_mem = NULL;
>>>>>> + unsigned long data_left = 0;
>>>>>> + unsigned long bytes_left = count;
>>>>>> + unsigned long addr = 0;
>>>>>> + size_t copy_size = 0;
>>>>>> + struct rproc *rproc = data;
>>>>>> +
>>>>>> + if (offset < elfcorelen) {
>>>>>> +         copy_size = elfcorelen - offset;
>>>>>> +         copy_size = min(copy_size, bytes_left);
>>>>>> +
>>>>>> +         memcpy(buffer, rproc->elfcore + offset, copy_size);
>>>>>> +         offset += copy_size;
>>>>>> +         bytes_left -= copy_size;
>>>>>> +         buffer += copy_size;
>>>>>> + }
>>>>>> +
>>>>>> + while (bytes_left) {
>>>>>> +         addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
>>>>>> +                         &data_left);
>>>>>> + /* EOF check */
>>>>>
>>>>> Indentation, and "if no data left" does indicate that this is the end of
>>>>> the loop already.
>>>>>
>>>>>> +         if (data_left == 0) {
>>>>>> +                 pr_info("Ramdump complete. %lld bytes read.", offset);
>>>>>> +                 return 0;
>>>>>
>>>>> You might have copied data to the buffer, so returning 0 here doesn't
>>>>> seem right. Presumably instead you should break and return offset -
>>>>> original offset or something like that.
>>>>>
>>>>>> +         }
>>>>>> +
>>>>>> +         copy_size = min_t(size_t, bytes_left, data_left);
>>>>>> +
>>>>>> +         device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
>>>>>> +         if (!device_mem) {
>>>>>> +                 pr_err("Unable to ioremap: addr %lx, size %zd\n",
>>>>>> +                          addr, copy_size);
>>>>>> +                 return -ENOMEM;
>>>>>> +         }
>>>>>> +         memcpy(buffer, device_mem, copy_size);
>>>>>> +
>>>>>> +         offset += copy_size;
>>>>>> +         buffer += copy_size;
>>>>>> +         bytes_left -= copy_size;
>>>>>> +         dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
>>>>>> +                 copy_size);
>>>>>> + }
>>>>>> +
>>>>>> + return count;
>>>>>
>>>>> This should be the number of bytes actually returned, so if count is
>>>>> larger than the sum of the segment sizes this will be wrong.
>>>>>
>>>>>> +}
>>>>>> +
>>>>>>  /**
>>>>>>   * rproc_coredump_add_custom_segment() - add custom coredump segment
>>>>>>   * @rproc:       handle of a remote processor
>>>>>> @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
>>>>>>   struct rproc_dump_segment *segment;
>>>>>>   struct elf32_phdr *phdr;
>>>>>>   struct elf32_hdr *ehdr;
>>>>>> - size_t data_size;
>>>>>> + size_t header_size;
>>>>>>   size_t offset;
>>>>>>   void *data;
>>>>>> - void *ptr;
>>>>>>   int phnum = 0;
>>>>>>
>>>>>>   if (list_empty(&rproc->dump_segments))
>>>>>>           return;
>>>>>>
>>>>>> - data_size = sizeof(*ehdr);
>>>>>> + header_size = sizeof(*ehdr);
>>>>>>   list_for_each_entry(segment, &rproc->dump_segments, node) {
>>>>>> -         data_size += sizeof(*phdr) + segment->size;
>>>>>> +         header_size += sizeof(*phdr);
>>>>>>
>>>>>>           phnum++;
>>>>>>   }
>>>>>>
>>>>>> - data = vmalloc(data_size);
>>>>>> + data = vmalloc(header_size);
>>>>>>   if (!data)
>>>>>>           return;
>>>>>>
>>>>>>   ehdr = data;
>>>>>> + rproc->elfcore = data;
>>>>>
>>>>> Rather than using a rproc-global variable I would prefer that you create
>>>>> a new rproc_coredump_state struct that carries the header pointer and
>>>>> the information needed by the read & free functions.
>>>>>
>>>>>>
>>>>>>   memset(ehdr, 0, sizeof(*ehdr));
>>>>>>   memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
>>>>>> @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
>>>>>>
>>>>>>           if (segment->dump) {
>>>>>>                   segment->dump(rproc, segment, data + offset);
>>>>
>>>> I'm not exactly sure why custom segments can be copied to the elf image but not
>>>> generic ones... And as far as I can tell accessing "data + offset" will blow up
>>>> because only the memory for the program headers has been allocated, not for the
>>>> program segments.
>>>>
>>>
>>> Thanks, I missed that, but you're correct.
>>>
>>>>
>>>>>> -         } else {
>>>>>> -                 ptr = rproc_da_to_va(rproc, segment->da, segment->size);
>>>>>> -                 if (!ptr) {
>>>>>> -                         dev_err(&rproc->dev,
>>>>>> -                                 "invalid coredump segment (%pad, %zu)\n",
>>>>>> -                                 &segment->da, segment->size);
>>>>>> -                         memset(data + offset, 0xff, segment->size);
>>>>>> -                 } else {
>>>>>> -                         memcpy(data + offset, ptr, segment->size);
>>>>>> -                 }
>>>>>> -         }
>>>>>>
>>>>>>           offset += phdr->p_filesz;
>>>>>>           phdr++;
>>>>>>   }
>>>>>> + dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
>>>>>> +                 rproc_read_dump, rproc_free_dump);
>>>>>>
>>>>>> - dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
>>>>>> + wait_for_completion(&rproc->dump_done);
>>>>>
>>>>> This will mean that recovery handling will break on installations that
>>>>> doesn't have your ramdump collector - as it will just sit here forever
>>>>> (5 minutes) waiting for userspace to do its job.
>>>>
>>>> Right, that problem also came to mind.
>>>>
>>>>>
>>>>> I think we need to device a new sysfs attribute, through which you can
>>>>> enable the "inline" coredump mechanism. That way recovery would work for
>>>>> all systems and in your specific case you could reconfigure it - perhaps
>>>>> once the ramdump collector starts.
>>>>
>>>> Another option is to make rproc_coredump() customizable, as with all the other
>>>> functions in remoteproc_internal.h.  That way the current rproc_coredump() is
>>>> kept intact and we don't need a new sysfs entry.
>>>>
>>>
>>> Rishabh suggested this in a discussion we had earlier this week as well,
>>> but we still have the problem that the same platform driver will need to
>>> support both modes, depending on which user space is running. So even if
>>> we push this out to the platform driver we still need some mechanism
>>> for userspace to enable the "inline" mode.
>>
>> So is this something that needs to be done on the fly in response to
>> some system event?  Any possibility to use the DT?
>>
> 
> Designing this as a dynamic property would mean that the kernel doesn't
> have to be recompiled between different variants of the same software
> solution for a piece of hardware.
> 
> Putting a flag in DT would mean that you need to flash different DT
> depending on what apps your running in userspace.
> 
>> We are currently discussing the addition of a character driver [1]...
>> The file_operations could be platform specific so any scenario can be
>> implemented, whether it is switching on/off a remote processor in the
>> open/release() callback or setting the behavior of the coredump
>> functionality in an ioctl().
> 
> The main benefit of tying this to the character device would be that the
> behavior could be reverted on release(). But this would imply that the
> application starting and stopping the remoteproc is also the one
> collecting ramdumps and it would also imply that there exists such an
> application (e.g. this functionality is still desirable for auto_booted
> remoteprocs).

What about to have a dedicated sub device for the coredump, with its own
interface (sysfs or char dev)?
Then kernel configs could enable/disable the driver and set the mode.
This could help to decorrelate the coredump and the recovery and manage
different behaviors for debug, production and final product.

> 
> Finally I think it's likely that the existing tools for collecting
> devcoredump artifacts are expected to just continue to work after this
> change - in both modes.
agree
> 
> 
> 
> On the functionality Rishabh proposes, it would be very interesting to
> hear from others on their usage and need for coredumps.
Concerning the stm32 platform the usage will depend on customer choice,
as they implement their own remote firmware. So i suppose that the needs would be:
- to enable /disable the coredump depending onproject phases(dev, production,
  final product)
- to perform a post-mortem analysis:
   - remote proc trace
   - code and associated data section analysis
 
> 
> E.g. are Qualcomm really the only ones that has issues with
> vmalloc(sizeof(firmware)) failing and preventing post mortem debugging
> in low-memory scenarios? Or does others simply not care to debug
> remoteproc firmware in these cases? Is debugging only done using JTAG?
> 
>> I think there is value in exploring different opportunities so that we
>> keep the core as clean and simple as possible.
>>
> 
> I agree, but hadn't considered this fully. In particular with the
> changes I'm asking Rishabh to make we have a few screen fulls of code
> involved in the coredump handling. So I think it would be beneficial to
> move this into a remoteproc_coredump.c.

Agree, i would also consider a separate device (but perhaps in a second step). 

One challenge of having a separate device is that we need to ensure that
everything is ready(probed) before starting the firmware especially for the
autoboot mode.
Component bind/unbind mechanism could help to synchronize sub device with
rproc device on start and stop.
FYI, I plan to sent a RFC soon (internal review on going) about the 
de-correlation of the rproc virtio that also introduces the component
bind/unbind mechanism in rproc core.

Regards,
Arnaud

> 
> Thanks,
> Bjorn
> 
>> Thanks,
>> Mathieu
>>
>> [1]. https://patchwork.kernel.org/project/linux-remoteproc/list/?series=264603
>>
>>>
>>> Regards,
>>> Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
@ 2020-04-10 10:31               ` Arnaud POULIQUEN
  0 siblings, 0 replies; 17+ messages in thread
From: Arnaud POULIQUEN @ 2020-04-10 10:31 UTC (permalink / raw)
  To: Bjorn Andersson, Mathieu Poirier
  Cc: Rishabh Bhatnagar, Linux Kernel Mailing List, linux-remoteproc,
	Ohad Ben-Cohen, psodagud, tsoni, Siddharth Gupta



On 4/9/20 10:27 PM, Bjorn Andersson wrote:
> On Fri 03 Apr 13:53 PDT 2020, Mathieu Poirier wrote:
> 
>> On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson <bjorn.andersson@linaro.org> wrote:
>>>
>>> On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
>>>
>>>> On Wed, Apr 01, 2020 at 12:51:14PM -0700, Bjorn Andersson wrote:
>>>>> On Fri 27 Mar 16:56 PDT 2020, Rishabh Bhatnagar wrote:
>>>>>
>>>>>> The current coredump implementation uses vmalloc area to copy
>>>>>> all the segments. But this might put a lot of strain on low memory
>>>>>> targets as the firmware size sometimes is in ten's of MBs.
>>>>>> The situation becomes worse if there are multiple remote processors
>>>>>> undergoing recovery at the same time.
>>>>>> This patch directly copies the device memory to userspace buffer
>>>>>> and avoids extra memory usage. This requires recovery to be halted
>>>>>> until data is read by userspace and free function is called.
>>>>>>
>>>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>>>>> ---
>>>>>>  drivers/remoteproc/remoteproc_core.c | 107 +++++++++++++++++++++++++++++------
>>>>>>  include/linux/remoteproc.h           |   4 ++
>>>>>>  2 files changed, 94 insertions(+), 17 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
>>>>>> index 097f33e..2d881e5 100644
>>>>>> --- a/drivers/remoteproc/remoteproc_core.c
>>>>>> +++ b/drivers/remoteproc/remoteproc_core.c
>>>>>> @@ -1516,6 +1516,86 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
>>>>>>  }
>>>>>>  EXPORT_SYMBOL(rproc_coredump_add_segment);
>>>>>>
>>>>>> +
>>>>>> +void rproc_free_dump(void *data)
>>>>>
>>>>> static
>>>>>
>>>>>> +{
>>>>>> + struct rproc *rproc = data;
>>>>>> +
>>>>>> + dev_info(&rproc->dev, "Userspace done reading rproc dump\n");
>>>>>
>>>>> Please drop the info prints throughout.
>>>>>
>>>>>> + complete(&rproc->dump_done);
>>>>>> +}
>>>>>> +
>>>>>> +static unsigned long get_offset(loff_t user_offset, struct list_head *segments,
>>>>>> +                         unsigned long *data_left)
>>>>>
>>>>> Please rename this rproc_coredump_resolve_segment(), or something along
>>>>> those lines.
>>>>>
>>>>>> +{
>>>>>> + struct rproc_dump_segment *segment;
>>>>>> +
>>>>>> + list_for_each_entry(segment, segments, node) {
>>>>>> +         if (user_offset >= segment->size)
>>>>>> +                 user_offset -= segment->size;
>>>>>> +         else
>>>>>> +                 break;
>>>>>> + }
>>>>>> +
>>>>>> + if (&segment->node == segments) {
>>>>>> +         *data_left = 0;
>>>>>> +         return 0;
>>>>>> + }
>>>>>> +
>>>>>> + *data_left = segment->size - user_offset;
>>>>>> +
>>>>>> + return segment->da + user_offset;
>>>>>> +}
>>>>>> +
>>>>>> +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
>>>>>> +                         void *data, size_t elfcorelen)
>>>>>> +{
>>>>>> + void *device_mem = NULL;
>>>>>> + unsigned long data_left = 0;
>>>>>> + unsigned long bytes_left = count;
>>>>>> + unsigned long addr = 0;
>>>>>> + size_t copy_size = 0;
>>>>>> + struct rproc *rproc = data;
>>>>>> +
>>>>>> + if (offset < elfcorelen) {
>>>>>> +         copy_size = elfcorelen - offset;
>>>>>> +         copy_size = min(copy_size, bytes_left);
>>>>>> +
>>>>>> +         memcpy(buffer, rproc->elfcore + offset, copy_size);
>>>>>> +         offset += copy_size;
>>>>>> +         bytes_left -= copy_size;
>>>>>> +         buffer += copy_size;
>>>>>> + }
>>>>>> +
>>>>>> + while (bytes_left) {
>>>>>> +         addr = get_offset(offset - elfcorelen, &rproc->dump_segments,
>>>>>> +                         &data_left);
>>>>>> + /* EOF check */
>>>>>
>>>>> Indentation, and "if no data left" does indicate that this is the end of
>>>>> the loop already.
>>>>>
>>>>>> +         if (data_left == 0) {
>>>>>> +                 pr_info("Ramdump complete. %lld bytes read.", offset);
>>>>>> +                 return 0;
>>>>>
>>>>> You might have copied data to the buffer, so returning 0 here doesn't
>>>>> seem right. Presumably instead you should break and return offset -
>>>>> original offset or something like that.
>>>>>
>>>>>> +         }
>>>>>> +
>>>>>> +         copy_size = min_t(size_t, bytes_left, data_left);
>>>>>> +
>>>>>> +         device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);
>>>>>> +         if (!device_mem) {
>>>>>> +                 pr_err("Unable to ioremap: addr %lx, size %zd\n",
>>>>>> +                          addr, copy_size);
>>>>>> +                 return -ENOMEM;
>>>>>> +         }
>>>>>> +         memcpy(buffer, device_mem, copy_size);
>>>>>> +
>>>>>> +         offset += copy_size;
>>>>>> +         buffer += copy_size;
>>>>>> +         bytes_left -= copy_size;
>>>>>> +         dev_dbg(&rproc->dev, "Copied %d bytes to userspace\n",
>>>>>> +                 copy_size);
>>>>>> + }
>>>>>> +
>>>>>> + return count;
>>>>>
>>>>> This should be the number of bytes actually returned, so if count is
>>>>> larger than the sum of the segment sizes this will be wrong.
>>>>>
>>>>>> +}
>>>>>> +
>>>>>>  /**
>>>>>>   * rproc_coredump_add_custom_segment() - add custom coredump segment
>>>>>>   * @rproc:       handle of a remote processor
>>>>>> @@ -1566,27 +1646,27 @@ static void rproc_coredump(struct rproc *rproc)
>>>>>>   struct rproc_dump_segment *segment;
>>>>>>   struct elf32_phdr *phdr;
>>>>>>   struct elf32_hdr *ehdr;
>>>>>> - size_t data_size;
>>>>>> + size_t header_size;
>>>>>>   size_t offset;
>>>>>>   void *data;
>>>>>> - void *ptr;
>>>>>>   int phnum = 0;
>>>>>>
>>>>>>   if (list_empty(&rproc->dump_segments))
>>>>>>           return;
>>>>>>
>>>>>> - data_size = sizeof(*ehdr);
>>>>>> + header_size = sizeof(*ehdr);
>>>>>>   list_for_each_entry(segment, &rproc->dump_segments, node) {
>>>>>> -         data_size += sizeof(*phdr) + segment->size;
>>>>>> +         header_size += sizeof(*phdr);
>>>>>>
>>>>>>           phnum++;
>>>>>>   }
>>>>>>
>>>>>> - data = vmalloc(data_size);
>>>>>> + data = vmalloc(header_size);
>>>>>>   if (!data)
>>>>>>           return;
>>>>>>
>>>>>>   ehdr = data;
>>>>>> + rproc->elfcore = data;
>>>>>
>>>>> Rather than using a rproc-global variable I would prefer that you create
>>>>> a new rproc_coredump_state struct that carries the header pointer and
>>>>> the information needed by the read & free functions.
>>>>>
>>>>>>
>>>>>>   memset(ehdr, 0, sizeof(*ehdr));
>>>>>>   memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
>>>>>> @@ -1618,23 +1698,14 @@ static void rproc_coredump(struct rproc *rproc)
>>>>>>
>>>>>>           if (segment->dump) {
>>>>>>                   segment->dump(rproc, segment, data + offset);
>>>>
>>>> I'm not exactly sure why custom segments can be copied to the elf image but not
>>>> generic ones... And as far as I can tell accessing "data + offset" will blow up
>>>> because only the memory for the program headers has been allocated, not for the
>>>> program segments.
>>>>
>>>
>>> Thanks, I missed that, but you're correct.
>>>
>>>>
>>>>>> -         } else {
>>>>>> -                 ptr = rproc_da_to_va(rproc, segment->da, segment->size);
>>>>>> -                 if (!ptr) {
>>>>>> -                         dev_err(&rproc->dev,
>>>>>> -                                 "invalid coredump segment (%pad, %zu)\n",
>>>>>> -                                 &segment->da, segment->size);
>>>>>> -                         memset(data + offset, 0xff, segment->size);
>>>>>> -                 } else {
>>>>>> -                         memcpy(data + offset, ptr, segment->size);
>>>>>> -                 }
>>>>>> -         }
>>>>>>
>>>>>>           offset += phdr->p_filesz;
>>>>>>           phdr++;
>>>>>>   }
>>>>>> + dev_coredumpm(&rproc->dev, NULL, rproc, header_size, GFP_KERNEL,
>>>>>> +                 rproc_read_dump, rproc_free_dump);
>>>>>>
>>>>>> - dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
>>>>>> + wait_for_completion(&rproc->dump_done);
>>>>>
>>>>> This will mean that recovery handling will break on installations that
>>>>> doesn't have your ramdump collector - as it will just sit here forever
>>>>> (5 minutes) waiting for userspace to do its job.
>>>>
>>>> Right, that problem also came to mind.
>>>>
>>>>>
>>>>> I think we need to device a new sysfs attribute, through which you can
>>>>> enable the "inline" coredump mechanism. That way recovery would work for
>>>>> all systems and in your specific case you could reconfigure it - perhaps
>>>>> once the ramdump collector starts.
>>>>
>>>> Another option is to make rproc_coredump() customizable, as with all the other
>>>> functions in remoteproc_internal.h.  That way the current rproc_coredump() is
>>>> kept intact and we don't need a new sysfs entry.
>>>>
>>>
>>> Rishabh suggested this in a discussion we had earlier this week as well,
>>> but we still have the problem that the same platform driver will need to
>>> support both modes, depending on which user space is running. So even if
>>> we push this out to the platform driver we still need some mechanism
>>> for userspace to enable the "inline" mode.
>>
>> So is this something that needs to be done on the fly in response to
>> some system event?  Any possibility to use the DT?
>>
> 
> Designing this as a dynamic property would mean that the kernel doesn't
> have to be recompiled between different variants of the same software
> solution for a piece of hardware.
> 
> Putting a flag in DT would mean that you need to flash different DT
> depending on what apps your running in userspace.
> 
>> We are currently discussing the addition of a character driver [1]...
>> The file_operations could be platform specific so any scenario can be
>> implemented, whether it is switching on/off a remote processor in the
>> open/release() callback or setting the behavior of the coredump
>> functionality in an ioctl().
> 
> The main benefit of tying this to the character device would be that the
> behavior could be reverted on release(). But this would imply that the
> application starting and stopping the remoteproc is also the one
> collecting ramdumps and it would also imply that there exists such an
> application (e.g. this functionality is still desirable for auto_booted
> remoteprocs).

What about to have a dedicated sub device for the coredump, with its own
interface (sysfs or char dev)?
Then kernel configs could enable/disable the driver and set the mode.
This could help to decorrelate the coredump and the recovery and manage
different behaviors for debug, production and final product.

> 
> Finally I think it's likely that the existing tools for collecting
> devcoredump artifacts are expected to just continue to work after this
> change - in both modes.
agree
> 
> 
> 
> On the functionality Rishabh proposes, it would be very interesting to
> hear from others on their usage and need for coredumps.
Concerning the stm32 platform the usage will depend on customer choice,
as they implement their own remote firmware. So i suppose that the needs would be:
- to enable /disable the coredump depending onproject phases(dev, production,
  final product)
- to perform a post-mortem analysis:
   - remote proc trace
   - code and associated data section analysis
 
> 
> E.g. are Qualcomm really the only ones that has issues with
> vmalloc(sizeof(firmware)) failing and preventing post mortem debugging
> in low-memory scenarios? Or does others simply not care to debug
> remoteproc firmware in these cases? Is debugging only done using JTAG?
> 
>> I think there is value in exploring different opportunities so that we
>> keep the core as clean and simple as possible.
>>
> 
> I agree, but hadn't considered this fully. In particular with the
> changes I'm asking Rishabh to make we have a few screen fulls of code
> involved in the coredump handling. So I think it would be beneficial to
> move this into a remoteproc_coredump.c.

Agree, i would also consider a separate device (but perhaps in a second step). 

One challenge of having a separate device is that we need to ensure that
everything is ready(probed) before starting the firmware especially for the
autoboot mode.
Component bind/unbind mechanism could help to synchronize sub device with
rproc device on start and stop.
FYI, I plan to sent a RFC soon (internal review on going) about the 
de-correlation of the rproc virtio that also introduces the component
bind/unbind mechanism in rproc core.

Regards,
Arnaud

> 
> Thanks,
> Bjorn
> 
>> Thanks,
>> Mathieu
>>
>> [1]. https://patchwork.kernel.org/project/linux-remoteproc/list/?series=264603
>>
>>>
>>> Regards,
>>> Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
  2020-04-10 10:31               ` Arnaud POULIQUEN
@ 2020-04-11  1:16                   ` Bjorn Andersson
  -1 siblings, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-11  1:16 UTC (permalink / raw)
  To: Arnaud POULIQUEN
  Cc: Mathieu Poirier, Rishabh Bhatnagar, Linux Kernel Mailing List,
	linux-remoteproc, Ohad Ben-Cohen, psodagud, tsoni,
	Siddharth Gupta

On Fri 10 Apr 03:31 PDT 2020, Arnaud POULIQUEN wrote:

> 
> 
> On 4/9/20 10:27 PM, Bjorn Andersson wrote:
> > On Fri 03 Apr 13:53 PDT 2020, Mathieu Poirier wrote:
> > 
> >> On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson <bjorn.andersson@linaro.org> wrote:
> >>>
> >>> On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
[..]
> >> We are currently discussing the addition of a character driver [1]...
> >> The file_operations could be platform specific so any scenario can be
> >> implemented, whether it is switching on/off a remote processor in the
> >> open/release() callback or setting the behavior of the coredump
> >> functionality in an ioctl().
> > 
> > The main benefit of tying this to the character device would be that the
> > behavior could be reverted on release(). But this would imply that the
> > application starting and stopping the remoteproc is also the one
> > collecting ramdumps and it would also imply that there exists such an
> > application (e.g. this functionality is still desirable for auto_booted
> > remoteprocs).
> 
> What about to have a dedicated sub device for the coredump, with its own
> interface (sysfs or char dev)?

I did consider this when reviewing the support we have now, but I didn't
see a way to make it reliably run inbetween the stop and start during a
recovery.
Now we have the rproc_unprepare_subdevices(), so that might have worked
out, although you probably only want this when crashed = true.

That said, I don't think you should see subdevice == device here, we can
create a new device (to host the sysfs attribute) regardless of it being
a "subdevice" or not. I do however not see that the two* booleans
warrants the complexity a new device comes with.

[*] enable/disable and inline/offline coredump mode (which is actually 3
states)

> Then kernel configs could enable/disable the driver and set the mode.

It's not adequate to depend on compilation options, we really want these
features to be functional in a multiplatform kernel (e.g. upstream
arm64 defconfig)

But for certain resource constraint devices, where alternative means of
debugging the remoteprocs exists it does make sense to be able to
disable the coredumps altogether, so I think we should move the coredump
functionality out of core.c and include it conditionally using Kconfig.

> This could help to decorrelate the coredump and the recovery and manage
> different behaviors for debug, production and final product.
> 
> > 
> > Finally I think it's likely that the existing tools for collecting
> > devcoredump artifacts are expected to just continue to work after this
> > change - in both modes.
> agree
> > 
> > 
> > 
> > On the functionality Rishabh proposes, it would be very interesting to
> > hear from others on their usage and need for coredumps.
> Concerning the stm32 platform the usage will depend on customer choice,
> as they implement their own remote firmware. So i suppose that the needs would be:
> - to enable /disable the coredump depending onproject phases(dev, production,
>   final product)
> - to perform a post-mortem analysis:
>    - remote proc trace
>    - code and associated data section analysis
>  

Sounds good, then from where we are now we at least need to allow
disabling of the coredump collection.

> > 
> > E.g. are Qualcomm really the only ones that has issues with
> > vmalloc(sizeof(firmware)) failing and preventing post mortem debugging
> > in low-memory scenarios? Or does others simply not care to debug
> > remoteproc firmware in these cases? Is debugging only done using JTAG?
> > 
> >> I think there is value in exploring different opportunities so that we
> >> keep the core as clean and simple as possible.
> >>
> > 
> > I agree, but hadn't considered this fully. In particular with the
> > changes I'm asking Rishabh to make we have a few screen fulls of code
> > involved in the coredump handling. So I think it would be beneficial to
> > move this into a remoteproc_coredump.c.
> 
> Agree, i would also consider a separate device (but perhaps in a second step). 
> 
> One challenge of having a separate device is that we need to ensure that
> everything is ready(probed) before starting the firmware especially for the
> autoboot mode.
> Component bind/unbind mechanism could help to synchronize sub device with
> rproc device on start and stop.
> FYI, I plan to sent a RFC soon (internal review on going) about the 
> de-correlation of the rproc virtio that also introduces the component
> bind/unbind mechanism in rproc core.
> 

This sounds quite interesting. I'm not sure about turning the coredump
handling into its own device and use this for decoupling it from the
core, but I'm looking forward to see your patches.

Regards,
Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] remoteproc: core: Add a memory efficient coredump function
@ 2020-04-11  1:16                   ` Bjorn Andersson
  0 siblings, 0 replies; 17+ messages in thread
From: Bjorn Andersson @ 2020-04-11  1:16 UTC (permalink / raw)
  To: Arnaud POULIQUEN
  Cc: Mathieu Poirier, Rishabh Bhatnagar, Linux Kernel Mailing List,
	linux-remoteproc, Ohad Ben-Cohen, psodagud, tsoni,
	Siddharth Gupta

On Fri 10 Apr 03:31 PDT 2020, Arnaud POULIQUEN wrote:

> 
> 
> On 4/9/20 10:27 PM, Bjorn Andersson wrote:
> > On Fri 03 Apr 13:53 PDT 2020, Mathieu Poirier wrote:
> > 
> >> On Thu, 2 Apr 2020 at 23:16, Bjorn Andersson <bjorn.andersson@linaro.org> wrote:
> >>>
> >>> On Thu 02 Apr 10:24 PDT 2020, Mathieu Poirier wrote:
[..]
> >> We are currently discussing the addition of a character driver [1]...
> >> The file_operations could be platform specific so any scenario can be
> >> implemented, whether it is switching on/off a remote processor in the
> >> open/release() callback or setting the behavior of the coredump
> >> functionality in an ioctl().
> > 
> > The main benefit of tying this to the character device would be that the
> > behavior could be reverted on release(). But this would imply that the
> > application starting and stopping the remoteproc is also the one
> > collecting ramdumps and it would also imply that there exists such an
> > application (e.g. this functionality is still desirable for auto_booted
> > remoteprocs).
> 
> What about to have a dedicated sub device for the coredump, with its own
> interface (sysfs or char dev)?

I did consider this when reviewing the support we have now, but I didn't
see a way to make it reliably run inbetween the stop and start during a
recovery.
Now we have the rproc_unprepare_subdevices(), so that might have worked
out, although you probably only want this when crashed = true.

That said, I don't think you should see subdevice == device here, we can
create a new device (to host the sysfs attribute) regardless of it being
a "subdevice" or not. I do however not see that the two* booleans
warrants the complexity a new device comes with.

[*] enable/disable and inline/offline coredump mode (which is actually 3
states)

> Then kernel configs could enable/disable the driver and set the mode.

It's not adequate to depend on compilation options, we really want these
features to be functional in a multiplatform kernel (e.g. upstream
arm64 defconfig)

But for certain resource constraint devices, where alternative means of
debugging the remoteprocs exists it does make sense to be able to
disable the coredumps altogether, so I think we should move the coredump
functionality out of core.c and include it conditionally using Kconfig.

> This could help to decorrelate the coredump and the recovery and manage
> different behaviors for debug, production and final product.
> 
> > 
> > Finally I think it's likely that the existing tools for collecting
> > devcoredump artifacts are expected to just continue to work after this
> > change - in both modes.
> agree
> > 
> > 
> > 
> > On the functionality Rishabh proposes, it would be very interesting to
> > hear from others on their usage and need for coredumps.
> Concerning the stm32 platform the usage will depend on customer choice,
> as they implement their own remote firmware. So i suppose that the needs would be:
> - to enable /disable the coredump depending onproject phases(dev, production,
>   final product)
> - to perform a post-mortem analysis:
>    - remote proc trace
>    - code and associated data section analysis
>  

Sounds good, then from where we are now we at least need to allow
disabling of the coredump collection.

> > 
> > E.g. are Qualcomm really the only ones that has issues with
> > vmalloc(sizeof(firmware)) failing and preventing post mortem debugging
> > in low-memory scenarios? Or does others simply not care to debug
> > remoteproc firmware in these cases? Is debugging only done using JTAG?
> > 
> >> I think there is value in exploring different opportunities so that we
> >> keep the core as clean and simple as possible.
> >>
> > 
> > I agree, but hadn't considered this fully. In particular with the
> > changes I'm asking Rishabh to make we have a few screen fulls of code
> > involved in the coredump handling. So I think it would be beneficial to
> > move this into a remoteproc_coredump.c.
> 
> Agree, i would also consider a separate device (but perhaps in a second step). 
> 
> One challenge of having a separate device is that we need to ensure that
> everything is ready(probed) before starting the firmware especially for the
> autoboot mode.
> Component bind/unbind mechanism could help to synchronize sub device with
> rproc device on start and stop.
> FYI, I plan to sent a RFC soon (internal review on going) about the 
> de-correlation of the rproc virtio that also introduces the component
> bind/unbind mechanism in rproc core.
> 

This sounds quite interesting. I'm not sure about turning the coredump
handling into its own device and use this for decoupling it from the
core, but I'm looking forward to see your patches.

Regards,
Bjorn

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-04-11  1:16 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-27 23:56 [PATCH] remoteproc: core: Add a memory efficient coredump function Rishabh Bhatnagar
2020-03-28  4:06 ` kbuild test robot
2020-03-28  4:49 ` kbuild test robot
2020-04-01 19:51 ` Bjorn Andersson
2020-04-01 19:51   ` Bjorn Andersson
2020-04-02 17:24   ` Mathieu Poirier
2020-04-03  5:16     ` Bjorn Andersson
2020-04-03  5:16       ` Bjorn Andersson
2020-04-03 18:46       ` rishabhb
2020-04-03 20:53       ` Mathieu Poirier
2020-04-08 18:03         ` rishabhb
2020-04-09 20:27         ` Bjorn Andersson
2020-04-09 20:27           ` Bjorn Andersson
2020-04-09 20:27             ` Bjorn Andersson
2020-04-10 10:31             ` Arnaud POULIQUEN
2020-04-10 10:31               ` Arnaud POULIQUEN
2020-04-11  1:16               ` Bjorn Andersson
2020-04-11  1:16                 ` Bjorn Andersson
2020-04-11  1:16                   ` Bjorn Andersson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.