All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-04 18:01 ` greearb
  0 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Ben Greear

From: Ben Greear <greearb@candelatech.com>

Store the firmware crash registers and last 128 or so
firmware debug-log ids and present them to user-space
via debugfs.

Should help with figuring out why the firmware crashed.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---

Changes from RFC:  Fixed spacing and naming and some other
requests.  Did not change everything Michal asked for.

Also, added a bunch of info to the file header to have better
chance of understanding what hardware/firmware/kernel and such
is being reported.  Version field will let us update the file
format as needed...whatever decodes it can take different action
based on the version, etc.

 drivers/net/wireless/ath/ath10k/core.h  |  73 ++++++++++++++++
 drivers/net/wireless/ath/ath10k/debug.c | 144 ++++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/debug.h |  25 ++++++
 drivers/net/wireless/ath/ath10k/pci.c   |  82 +++++++++++++++++-
 4 files changed, 321 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 68ceef6..a8866d3 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -41,6 +41,11 @@
 #define ATH10K_FLUSH_TIMEOUT_HZ (5*HZ)
 #define ATH10K_NUM_CHANS 38
 
+/* Duplicates define in pci.h, if there is ever mis-match we should
+ * get a compile error.  Cannot just include pci.h here.
+ */
+#define REG_DUMP_COUNT_QCA988X 60
+
 /* Antenna noise floor */
 #define ATH10K_DEFAULT_NOISE_FLOOR -95
 
@@ -338,6 +343,68 @@ enum ath10k_dev_flags {
 	ATH10K_FLAG_CORE_REGISTERED,
 };
 
+/**
+ * enum ath10k_fw_error_dump_type - types of data in the dump file
+ * @ATH10K_FW_ERROR_DUMP_DBGLOG:  Recent firmware debug log entries
+ * @ATH10K_FW_ERROR_DUMP_CRASH:   Crash dump in binary format
+ */
+enum ath10k_fw_error_dump_type {
+	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
+	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
+
+	ATH10K_FW_ERROR_DUMP_MAX,
+};
+
+
+struct ath10k_tlv_dump_data {
+	u32 type; /* see ath10k_fw_error_dump_type above */
+	u32 tlv_len; /* in bytes */
+	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
+} __packed;
+
+struct ath10k_dump_file_data {
+	/* Dump file information */
+	u32 len;
+	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
+	u32 version; /* File dump version, 1 for now. */
+
+	/* Some info we can get from ath10k struct that might help. */
+	u32 chip_id;
+	u32 target_version;
+	u8 fw_version_major;
+	u8 unused8; /* pad fw_version_major */
+	u16 unused16; /* pad fw_version_major */
+	u32 fw_version_minor;
+	u16 fw_version_release;
+	u16 fw_version_build;
+	u32 phy_capability;
+	u32 hw_min_tx_power;
+	u32 hw_max_tx_power;
+	u32 ht_cap_info;
+	u32 vht_cap_info;
+	u32 num_rf_chains;
+	char fw_ver[64]; /* Firmware version string */
+
+	/* Kernel related information */
+	u32 kernel_ver_code; /* LINUX_VERSION_CODE */
+	char kernel_ver[64]; /* VERMAGIC_STRING */
+
+	u8 unused[64]; /* Room for growth w/out changing binary format */
+
+	struct ath10k_tlv_dump_data data; /* more may follow */
+} __packed;
+
+/* This will store at least the last 128 entries.  Each dbglog message
+ * is a max of 7 32-bit integers in length, but the length can be less
+ * than that as well.
+ */
+#define ATH10K_DBGLOG_DATA_LEN (128 * 7 * 4)
+struct ath10k_dbglog_entry_storage {
+	u32 next_idx; /* Where to write next chunk of data */
+	u8 data[ATH10K_DBGLOG_DATA_LEN];
+};
+
+
 struct ath10k {
 	struct ath_common ath_common;
 	struct ieee80211_hw *hw;
@@ -488,6 +555,12 @@ struct ath10k {
 
 	struct dfs_pattern_detector *dfs_detector;
 
+	/* Used for crash-dump storage */
+	/* Don't over-write dump info until someone reads the data. */
+	bool crashed_since_read;
+	struct ath10k_dbglog_entry_storage dbglog_entry_data;
+	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
+
 #ifdef CONFIG_ATH10K_DEBUGFS
 	struct ath10k_debug debug;
 #endif
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 1b7ff4b..a7d7877 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -17,6 +17,8 @@
 
 #include <linux/module.h>
 #include <linux/debugfs.h>
+#include <linux/version.h>
+#include <linux/vermagic.h>
 
 #include "core.h"
 #include "debug.h"
@@ -577,6 +579,145 @@ static const struct file_operations fops_chip_id = {
 	.llseek = default_llseek,
 };
 
+void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
+{
+	int i;
+	int z = ar->dbglog_entry_data.next_idx;
+
+	/* Don't save any new logs until user-space reads this. */
+	if (ar->crashed_since_read)
+		return;
+
+	for (i = 0; i < len; i++) {
+		ar->dbglog_entry_data.data[z] = buffer[i];
+		z++;
+		if (z >= ATH10K_DBGLOG_DATA_LEN)
+			z = 0;
+	}
+	ar->dbglog_entry_data.next_idx = z;
+}
+EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
+
+static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
+{
+	unsigned int len = (sizeof(ar->dbglog_entry_data)
+			    + sizeof(ar->reg_dump_values));
+	unsigned int sofar = 0;
+	char *buf;
+	struct ath10k_tlv_dump_data *dump_tlv;
+	struct ath10k_dump_file_data *dump_data;
+	int hdr_len = sizeof(*dump_data) - sizeof(dump_data->data);
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	len += hdr_len;
+	sofar += hdr_len;
+
+	/* So we can add headers to the data dump */
+	len += 2 * sizeof(*dump_tlv);
+
+	/* This is going to get big when we start dumping FW RAM and such,
+	 * so go ahead and use vmalloc.
+	 */
+	buf = vmalloc(len);
+	if (!buf)
+		return NULL;
+
+	memset(buf, 0, len);
+	dump_data = (struct ath10k_dump_file_data *)(buf);
+	dump_data->len = len;
+	dump_data->magic = 0x01020304;
+	dump_data->version = 1;
+	dump_data->chip_id = ar->chip_id;
+	dump_data->target_version = ar->target_version;
+	dump_data->fw_version_major = ar->fw_version_major;
+	dump_data->fw_version_minor = ar->fw_version_minor;
+	dump_data->fw_version_release = ar->fw_version_release;
+	dump_data->fw_version_build = ar->fw_version_build;
+	dump_data->phy_capability = ar->phy_capability;
+	dump_data->hw_min_tx_power = ar->hw_min_tx_power;
+	dump_data->hw_max_tx_power = ar->hw_max_tx_power;
+	dump_data->ht_cap_info = ar->ht_cap_info;
+	dump_data->vht_cap_info = ar->vht_cap_info;
+	dump_data->num_rf_chains = ar->num_rf_chains;
+
+	strncpy(dump_data->fw_ver, ar->hw->wiphy->fw_version,
+		sizeof(dump_data->fw_ver) - 1);
+
+	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
+	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
+		sizeof(dump_data->kernel_ver) - 1);
+
+	/* Gather dbg-log */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_DBGLOG;
+	dump_tlv->tlv_len = sizeof(ar->dbglog_entry_data);
+	memcpy(dump_tlv->tlv_data, &ar->dbglog_entry_data, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	/* Gather crash-dump */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_REGDUMP;
+	dump_tlv->tlv_len = sizeof(ar->reg_dump_values);
+	memcpy(dump_tlv->tlv_data, &ar->reg_dump_values, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	return dump_data;
+}
+
+static int ath10k_fw_error_dump_open(struct inode *inode, struct file *file)
+{
+	struct ath10k *ar = inode->i_private;
+	int ret;
+	struct ath10k_dump_file_data *dump;
+
+	if (!ar)
+		return -EINVAL;
+
+	mutex_lock(&ar->conf_mutex);
+
+	dump = ath10k_build_dump_file(ar);
+	if (!dump) {
+		ret = -ENODATA;
+		goto out;
+	}
+
+	file->private_data = dump;
+	ar->crashed_since_read = false;
+	ret = 0;
+
+out:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static ssize_t ath10k_fw_error_dump_read(struct file *file,
+					 char __user *user_buf,
+					 size_t count, loff_t *ppos)
+{
+	struct ath10k_dump_file_data *dump_file = file->private_data;
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       dump_file,
+				       dump_file->len);
+}
+
+static int ath10k_fw_error_dump_release(struct inode *inode,
+					struct file *file)
+{
+	vfree(file->private_data);
+
+	return 0;
+}
+
+static const struct file_operations fops_fw_error_dump = {
+	.open = ath10k_fw_error_dump_open,
+	.read = ath10k_fw_error_dump_read,
+	.release = ath10k_fw_error_dump_release,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 static int ath10k_debug_htt_stats_req(struct ath10k *ar)
 {
 	u64 cookie;
@@ -861,6 +1002,9 @@ int ath10k_debug_create(struct ath10k *ar)
 	debugfs_create_file("simulate_fw_crash", S_IRUSR, ar->debug.debugfs_phy,
 			    ar, &fops_simulate_fw_crash);
 
+	debugfs_create_file("fw_error_dump", S_IRUSR, ar->debug.debugfs_phy,
+			    ar, &fops_fw_error_dump);
+
 	debugfs_create_file("chip_id", S_IRUSR, ar->debug.debugfs_phy,
 			    ar, &fops_chip_id);
 
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index a582499..72ed34c 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -19,6 +19,7 @@
 #define _DEBUG_H_
 
 #include <linux/types.h>
+#include "pci.h"
 #include "trace.h"
 
 enum ath10k_debug_mask {
@@ -110,4 +111,28 @@ static inline void ath10k_dbg_dump(enum ath10k_debug_mask mask,
 {
 }
 #endif /* CONFIG_ATH10K_DEBUG */
+
+
+/* Target debug log related defines and structs */
+
+/* Target is 32-bit CPU, so we just use u32 for
+ * the pointers.  The memory space is relative to the
+ * target, not the host.
+ */
+struct ath10k_fw_dbglog_buf {
+	u32 next; /* pointer to dblog_buf_s. */
+	u32 buffer; /* pointer to u8 buffer */
+	u32 bufsize;
+	u32 length;
+	u32 count;
+	u32 free;
+} __packed;
+
+struct ath10k_fw_dbglog_hdr {
+	u32 dbuf; /* pointer to dbglog_buf_s */
+	u32 dropped;
+} __packed;
+
+void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len);
+
 #endif /* _DEBUG_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index d0004d5..f2cfe69 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#include <linux/bitops.h>
+#include <linux/ctype.h>
 
 #include "core.h"
 #include "debug.h"
@@ -840,6 +840,8 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 	u32 host_addr;
 	int ret;
 	u32 i;
+	struct ath10k_fw_dbglog_hdr dbg_hdr;
+	u32 dbufp; /* pointer in target memory space */
 
 	ath10k_err("firmware crashed!\n");
 	ath10k_err("hardware name %s version 0x%x\n",
@@ -851,7 +853,7 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 				       &reg_dump_area, sizeof(u32));
 	if (ret) {
 		ath10k_err("failed to read FW dump area address: %d\n", ret);
-		return;
+		goto do_restart;
 	}
 
 	ath10k_err("target register Dump Location: 0x%08X\n", reg_dump_area);
@@ -861,7 +863,7 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 				       REG_DUMP_COUNT_QCA988X * sizeof(u32));
 	if (ret != 0) {
 		ath10k_err("failed to read FW dump area: %d\n", ret);
-		return;
+		goto do_restart;
 	}
 
 	BUILD_BUG_ON(REG_DUMP_COUNT_QCA988X % 4);
@@ -875,6 +877,80 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 			   reg_dump_values[i + 2],
 			   reg_dump_values[i + 3]);
 
+	/* Dump the debug logs on the target */
+	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
+	if (ath10k_pci_diag_read_mem(ar, host_addr,
+				     &reg_dump_area, sizeof(u32)) != 0) {
+		ath10k_warn("failed to read hi_dbglog_hdr\n");
+		goto save_regs_and_restart;
+	}
+
+	ath10k_err("target register Debug Log Location: 0x%08X\n",
+		   reg_dump_area);
+
+	ret = ath10k_pci_diag_read_mem(ar, reg_dump_area,
+				       &dbg_hdr, sizeof(dbg_hdr));
+	if (ret != 0) {
+		ath10k_err("failed to dump Debug Log Area\n");
+		goto save_regs_and_restart;
+	}
+
+	ath10k_err("Debug Log Header, dbuf: 0x%x  dropped: %i\n",
+		   dbg_hdr.dbuf, dbg_hdr.dropped);
+	dbufp = dbg_hdr.dbuf;
+	i = 0;
+	while (dbufp) {
+		struct ath10k_fw_dbglog_buf dbuf;
+
+		ret = ath10k_pci_diag_read_mem(ar, dbufp,
+					       &dbuf, sizeof(dbuf));
+		if (ret != 0) {
+			ath10k_err("failed to read Debug Log Area: 0x%x\n",
+				   dbufp);
+			goto save_regs_and_restart;
+		}
+
+		/* We have a buffer of data */
+		ath10k_err("[%i] next: 0x%x buf: 0x%x sz: %i len: %i count: %i free: %i\n",
+			   i, dbuf.next, dbuf.buffer, dbuf.bufsize, dbuf.length,
+			   dbuf.count, dbuf.free);
+		if (dbuf.buffer && dbuf.length) {
+			u8 *buffer = kmalloc(dbuf.length, GFP_ATOMIC);
+
+			if (buffer) {
+				ret = ath10k_pci_diag_read_mem(ar, dbuf.buffer,
+							       buffer,
+							       dbuf.length);
+				if (ret != 0) {
+					ath10k_err("failed to read Debug Log buffer: 0x%x\n",
+						   dbuf.buffer);
+					kfree(buffer);
+					goto save_regs_and_restart;
+				}
+
+				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
+							      dbuf.length);
+				kfree(buffer);
+			}
+		}
+		dbufp = dbuf.next;
+		if (dbufp == dbg_hdr.dbuf) {
+			/* It is a circular buffer it seems, bail if next
+			 * is head
+			 */
+			break;
+		}
+		i++;
+	} /* While we have a debug buffer to read */
+
+save_regs_and_restart:
+	if (!ar->crashed_since_read) {
+		ar->crashed_since_read = true;
+		memcpy(ar->reg_dump_values, reg_dump_values,
+		       sizeof(ar->reg_dump_values));
+	}
+
+do_restart:
 	queue_work(ar->workqueue, &ar->restart_work);
 }
 
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-04 18:01 ` greearb
  0 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: Ben Greear, linux-wireless

From: Ben Greear <greearb@candelatech.com>

Store the firmware crash registers and last 128 or so
firmware debug-log ids and present them to user-space
via debugfs.

Should help with figuring out why the firmware crashed.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---

Changes from RFC:  Fixed spacing and naming and some other
requests.  Did not change everything Michal asked for.

Also, added a bunch of info to the file header to have better
chance of understanding what hardware/firmware/kernel and such
is being reported.  Version field will let us update the file
format as needed...whatever decodes it can take different action
based on the version, etc.

 drivers/net/wireless/ath/ath10k/core.h  |  73 ++++++++++++++++
 drivers/net/wireless/ath/ath10k/debug.c | 144 ++++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/debug.h |  25 ++++++
 drivers/net/wireless/ath/ath10k/pci.c   |  82 +++++++++++++++++-
 4 files changed, 321 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 68ceef6..a8866d3 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -41,6 +41,11 @@
 #define ATH10K_FLUSH_TIMEOUT_HZ (5*HZ)
 #define ATH10K_NUM_CHANS 38
 
+/* Duplicates define in pci.h, if there is ever mis-match we should
+ * get a compile error.  Cannot just include pci.h here.
+ */
+#define REG_DUMP_COUNT_QCA988X 60
+
 /* Antenna noise floor */
 #define ATH10K_DEFAULT_NOISE_FLOOR -95
 
@@ -338,6 +343,68 @@ enum ath10k_dev_flags {
 	ATH10K_FLAG_CORE_REGISTERED,
 };
 
+/**
+ * enum ath10k_fw_error_dump_type - types of data in the dump file
+ * @ATH10K_FW_ERROR_DUMP_DBGLOG:  Recent firmware debug log entries
+ * @ATH10K_FW_ERROR_DUMP_CRASH:   Crash dump in binary format
+ */
+enum ath10k_fw_error_dump_type {
+	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
+	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
+
+	ATH10K_FW_ERROR_DUMP_MAX,
+};
+
+
+struct ath10k_tlv_dump_data {
+	u32 type; /* see ath10k_fw_error_dump_type above */
+	u32 tlv_len; /* in bytes */
+	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
+} __packed;
+
+struct ath10k_dump_file_data {
+	/* Dump file information */
+	u32 len;
+	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
+	u32 version; /* File dump version, 1 for now. */
+
+	/* Some info we can get from ath10k struct that might help. */
+	u32 chip_id;
+	u32 target_version;
+	u8 fw_version_major;
+	u8 unused8; /* pad fw_version_major */
+	u16 unused16; /* pad fw_version_major */
+	u32 fw_version_minor;
+	u16 fw_version_release;
+	u16 fw_version_build;
+	u32 phy_capability;
+	u32 hw_min_tx_power;
+	u32 hw_max_tx_power;
+	u32 ht_cap_info;
+	u32 vht_cap_info;
+	u32 num_rf_chains;
+	char fw_ver[64]; /* Firmware version string */
+
+	/* Kernel related information */
+	u32 kernel_ver_code; /* LINUX_VERSION_CODE */
+	char kernel_ver[64]; /* VERMAGIC_STRING */
+
+	u8 unused[64]; /* Room for growth w/out changing binary format */
+
+	struct ath10k_tlv_dump_data data; /* more may follow */
+} __packed;
+
+/* This will store at least the last 128 entries.  Each dbglog message
+ * is a max of 7 32-bit integers in length, but the length can be less
+ * than that as well.
+ */
+#define ATH10K_DBGLOG_DATA_LEN (128 * 7 * 4)
+struct ath10k_dbglog_entry_storage {
+	u32 next_idx; /* Where to write next chunk of data */
+	u8 data[ATH10K_DBGLOG_DATA_LEN];
+};
+
+
 struct ath10k {
 	struct ath_common ath_common;
 	struct ieee80211_hw *hw;
@@ -488,6 +555,12 @@ struct ath10k {
 
 	struct dfs_pattern_detector *dfs_detector;
 
+	/* Used for crash-dump storage */
+	/* Don't over-write dump info until someone reads the data. */
+	bool crashed_since_read;
+	struct ath10k_dbglog_entry_storage dbglog_entry_data;
+	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
+
 #ifdef CONFIG_ATH10K_DEBUGFS
 	struct ath10k_debug debug;
 #endif
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 1b7ff4b..a7d7877 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -17,6 +17,8 @@
 
 #include <linux/module.h>
 #include <linux/debugfs.h>
+#include <linux/version.h>
+#include <linux/vermagic.h>
 
 #include "core.h"
 #include "debug.h"
@@ -577,6 +579,145 @@ static const struct file_operations fops_chip_id = {
 	.llseek = default_llseek,
 };
 
+void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
+{
+	int i;
+	int z = ar->dbglog_entry_data.next_idx;
+
+	/* Don't save any new logs until user-space reads this. */
+	if (ar->crashed_since_read)
+		return;
+
+	for (i = 0; i < len; i++) {
+		ar->dbglog_entry_data.data[z] = buffer[i];
+		z++;
+		if (z >= ATH10K_DBGLOG_DATA_LEN)
+			z = 0;
+	}
+	ar->dbglog_entry_data.next_idx = z;
+}
+EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
+
+static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
+{
+	unsigned int len = (sizeof(ar->dbglog_entry_data)
+			    + sizeof(ar->reg_dump_values));
+	unsigned int sofar = 0;
+	char *buf;
+	struct ath10k_tlv_dump_data *dump_tlv;
+	struct ath10k_dump_file_data *dump_data;
+	int hdr_len = sizeof(*dump_data) - sizeof(dump_data->data);
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	len += hdr_len;
+	sofar += hdr_len;
+
+	/* So we can add headers to the data dump */
+	len += 2 * sizeof(*dump_tlv);
+
+	/* This is going to get big when we start dumping FW RAM and such,
+	 * so go ahead and use vmalloc.
+	 */
+	buf = vmalloc(len);
+	if (!buf)
+		return NULL;
+
+	memset(buf, 0, len);
+	dump_data = (struct ath10k_dump_file_data *)(buf);
+	dump_data->len = len;
+	dump_data->magic = 0x01020304;
+	dump_data->version = 1;
+	dump_data->chip_id = ar->chip_id;
+	dump_data->target_version = ar->target_version;
+	dump_data->fw_version_major = ar->fw_version_major;
+	dump_data->fw_version_minor = ar->fw_version_minor;
+	dump_data->fw_version_release = ar->fw_version_release;
+	dump_data->fw_version_build = ar->fw_version_build;
+	dump_data->phy_capability = ar->phy_capability;
+	dump_data->hw_min_tx_power = ar->hw_min_tx_power;
+	dump_data->hw_max_tx_power = ar->hw_max_tx_power;
+	dump_data->ht_cap_info = ar->ht_cap_info;
+	dump_data->vht_cap_info = ar->vht_cap_info;
+	dump_data->num_rf_chains = ar->num_rf_chains;
+
+	strncpy(dump_data->fw_ver, ar->hw->wiphy->fw_version,
+		sizeof(dump_data->fw_ver) - 1);
+
+	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
+	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
+		sizeof(dump_data->kernel_ver) - 1);
+
+	/* Gather dbg-log */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_DBGLOG;
+	dump_tlv->tlv_len = sizeof(ar->dbglog_entry_data);
+	memcpy(dump_tlv->tlv_data, &ar->dbglog_entry_data, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	/* Gather crash-dump */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_REGDUMP;
+	dump_tlv->tlv_len = sizeof(ar->reg_dump_values);
+	memcpy(dump_tlv->tlv_data, &ar->reg_dump_values, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	return dump_data;
+}
+
+static int ath10k_fw_error_dump_open(struct inode *inode, struct file *file)
+{
+	struct ath10k *ar = inode->i_private;
+	int ret;
+	struct ath10k_dump_file_data *dump;
+
+	if (!ar)
+		return -EINVAL;
+
+	mutex_lock(&ar->conf_mutex);
+
+	dump = ath10k_build_dump_file(ar);
+	if (!dump) {
+		ret = -ENODATA;
+		goto out;
+	}
+
+	file->private_data = dump;
+	ar->crashed_since_read = false;
+	ret = 0;
+
+out:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static ssize_t ath10k_fw_error_dump_read(struct file *file,
+					 char __user *user_buf,
+					 size_t count, loff_t *ppos)
+{
+	struct ath10k_dump_file_data *dump_file = file->private_data;
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       dump_file,
+				       dump_file->len);
+}
+
+static int ath10k_fw_error_dump_release(struct inode *inode,
+					struct file *file)
+{
+	vfree(file->private_data);
+
+	return 0;
+}
+
+static const struct file_operations fops_fw_error_dump = {
+	.open = ath10k_fw_error_dump_open,
+	.read = ath10k_fw_error_dump_read,
+	.release = ath10k_fw_error_dump_release,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 static int ath10k_debug_htt_stats_req(struct ath10k *ar)
 {
 	u64 cookie;
@@ -861,6 +1002,9 @@ int ath10k_debug_create(struct ath10k *ar)
 	debugfs_create_file("simulate_fw_crash", S_IRUSR, ar->debug.debugfs_phy,
 			    ar, &fops_simulate_fw_crash);
 
+	debugfs_create_file("fw_error_dump", S_IRUSR, ar->debug.debugfs_phy,
+			    ar, &fops_fw_error_dump);
+
 	debugfs_create_file("chip_id", S_IRUSR, ar->debug.debugfs_phy,
 			    ar, &fops_chip_id);
 
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index a582499..72ed34c 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -19,6 +19,7 @@
 #define _DEBUG_H_
 
 #include <linux/types.h>
+#include "pci.h"
 #include "trace.h"
 
 enum ath10k_debug_mask {
@@ -110,4 +111,28 @@ static inline void ath10k_dbg_dump(enum ath10k_debug_mask mask,
 {
 }
 #endif /* CONFIG_ATH10K_DEBUG */
+
+
+/* Target debug log related defines and structs */
+
+/* Target is 32-bit CPU, so we just use u32 for
+ * the pointers.  The memory space is relative to the
+ * target, not the host.
+ */
+struct ath10k_fw_dbglog_buf {
+	u32 next; /* pointer to dblog_buf_s. */
+	u32 buffer; /* pointer to u8 buffer */
+	u32 bufsize;
+	u32 length;
+	u32 count;
+	u32 free;
+} __packed;
+
+struct ath10k_fw_dbglog_hdr {
+	u32 dbuf; /* pointer to dbglog_buf_s */
+	u32 dropped;
+} __packed;
+
+void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len);
+
 #endif /* _DEBUG_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index d0004d5..f2cfe69 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#include <linux/bitops.h>
+#include <linux/ctype.h>
 
 #include "core.h"
 #include "debug.h"
@@ -840,6 +840,8 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 	u32 host_addr;
 	int ret;
 	u32 i;
+	struct ath10k_fw_dbglog_hdr dbg_hdr;
+	u32 dbufp; /* pointer in target memory space */
 
 	ath10k_err("firmware crashed!\n");
 	ath10k_err("hardware name %s version 0x%x\n",
@@ -851,7 +853,7 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 				       &reg_dump_area, sizeof(u32));
 	if (ret) {
 		ath10k_err("failed to read FW dump area address: %d\n", ret);
-		return;
+		goto do_restart;
 	}
 
 	ath10k_err("target register Dump Location: 0x%08X\n", reg_dump_area);
@@ -861,7 +863,7 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 				       REG_DUMP_COUNT_QCA988X * sizeof(u32));
 	if (ret != 0) {
 		ath10k_err("failed to read FW dump area: %d\n", ret);
-		return;
+		goto do_restart;
 	}
 
 	BUILD_BUG_ON(REG_DUMP_COUNT_QCA988X % 4);
@@ -875,6 +877,80 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 			   reg_dump_values[i + 2],
 			   reg_dump_values[i + 3]);
 
+	/* Dump the debug logs on the target */
+	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
+	if (ath10k_pci_diag_read_mem(ar, host_addr,
+				     &reg_dump_area, sizeof(u32)) != 0) {
+		ath10k_warn("failed to read hi_dbglog_hdr\n");
+		goto save_regs_and_restart;
+	}
+
+	ath10k_err("target register Debug Log Location: 0x%08X\n",
+		   reg_dump_area);
+
+	ret = ath10k_pci_diag_read_mem(ar, reg_dump_area,
+				       &dbg_hdr, sizeof(dbg_hdr));
+	if (ret != 0) {
+		ath10k_err("failed to dump Debug Log Area\n");
+		goto save_regs_and_restart;
+	}
+
+	ath10k_err("Debug Log Header, dbuf: 0x%x  dropped: %i\n",
+		   dbg_hdr.dbuf, dbg_hdr.dropped);
+	dbufp = dbg_hdr.dbuf;
+	i = 0;
+	while (dbufp) {
+		struct ath10k_fw_dbglog_buf dbuf;
+
+		ret = ath10k_pci_diag_read_mem(ar, dbufp,
+					       &dbuf, sizeof(dbuf));
+		if (ret != 0) {
+			ath10k_err("failed to read Debug Log Area: 0x%x\n",
+				   dbufp);
+			goto save_regs_and_restart;
+		}
+
+		/* We have a buffer of data */
+		ath10k_err("[%i] next: 0x%x buf: 0x%x sz: %i len: %i count: %i free: %i\n",
+			   i, dbuf.next, dbuf.buffer, dbuf.bufsize, dbuf.length,
+			   dbuf.count, dbuf.free);
+		if (dbuf.buffer && dbuf.length) {
+			u8 *buffer = kmalloc(dbuf.length, GFP_ATOMIC);
+
+			if (buffer) {
+				ret = ath10k_pci_diag_read_mem(ar, dbuf.buffer,
+							       buffer,
+							       dbuf.length);
+				if (ret != 0) {
+					ath10k_err("failed to read Debug Log buffer: 0x%x\n",
+						   dbuf.buffer);
+					kfree(buffer);
+					goto save_regs_and_restart;
+				}
+
+				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
+							      dbuf.length);
+				kfree(buffer);
+			}
+		}
+		dbufp = dbuf.next;
+		if (dbufp == dbg_hdr.dbuf) {
+			/* It is a circular buffer it seems, bail if next
+			 * is head
+			 */
+			break;
+		}
+		i++;
+	} /* While we have a debug buffer to read */
+
+save_regs_and_restart:
+	if (!ar->crashed_since_read) {
+		ar->crashed_since_read = true;
+		memcpy(ar->reg_dump_values, reg_dump_values,
+		       sizeof(ar->reg_dump_values));
+	}
+
+do_restart:
 	queue_work(ar->workqueue, &ar->restart_work);
 }
 
-- 
1.7.11.7


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 2/4] ath10k:  save firmware debug log messages.
  2014-06-04 18:01 ` greearb
@ 2014-06-04 18:01   ` greearb
  -1 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Ben Greear

From: Ben Greear <greearb@candelatech.com>

They may be dumped through the firmware dump debugfs
file.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 4b7782a..40a1d8d 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1089,6 +1089,12 @@ static int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb)
 
 	trace_ath10k_wmi_dbglog(skb->data, skb->len);
 
+	/* First 4 bytes are a messages-dropped-due-to-overflow counter,
+	 * and should not be recorded in the dbglog buffer, so we skip
+	 * them.
+	 */
+	ath10k_dbg_save_fw_dbg_buffer(ar, skb->data + 4, skb->len - 4);
+
 	return 0;
 }
 
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 2/4] ath10k:  save firmware debug log messages.
@ 2014-06-04 18:01   ` greearb
  0 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: Ben Greear, linux-wireless

From: Ben Greear <greearb@candelatech.com>

They may be dumped through the firmware dump debugfs
file.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 4b7782a..40a1d8d 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1089,6 +1089,12 @@ static int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb)
 
 	trace_ath10k_wmi_dbglog(skb->data, skb->len);
 
+	/* First 4 bytes are a messages-dropped-due-to-overflow counter,
+	 * and should not be recorded in the dbglog buffer, so we skip
+	 * them.
+	 */
+	ath10k_dbg_save_fw_dbg_buffer(ar, skb->data + 4, skb->len - 4);
+
 	return 0;
 }
 
-- 
1.7.11.7


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 3/4] ath10k:  save firmware stack upon firmware crash.
  2014-06-04 18:01 ` greearb
@ 2014-06-04 18:01   ` greearb
  -1 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Ben Greear

From: Ben Greear <greearb@candelatech.com>

Should help debug firmware crashes, and give users a way
to provide some useful debug reports to firmware developers.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/ath/ath10k/core.h  |  3 +++
 drivers/net/wireless/ath/ath10k/debug.c | 11 ++++++++++-
 drivers/net/wireless/ath/ath10k/pci.c   | 35 +++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index a8866d3..f2c256b 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -45,6 +45,7 @@
  * get a compile error.  Cannot just include pci.h here.
  */
 #define REG_DUMP_COUNT_QCA988X 60
+#define ATH10K_FW_STACK_SIZE 4096
 
 /* Antenna noise floor */
 #define ATH10K_DEFAULT_NOISE_FLOOR -95
@@ -351,6 +352,7 @@ enum ath10k_dev_flags {
 enum ath10k_fw_error_dump_type {
 	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
 	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
+	ATH10K_FW_ERROR_DUMP_STACK = 2,
 
 	ATH10K_FW_ERROR_DUMP_MAX,
 };
@@ -560,6 +562,7 @@ struct ath10k {
 	bool crashed_since_read;
 	struct ath10k_dbglog_entry_storage dbglog_entry_data;
 	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
+	unsigned char stack_buf[ATH10K_FW_STACK_SIZE];
 
 #ifdef CONFIG_ATH10K_DEBUGFS
 	struct ath10k_debug debug;
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index a7d7877..9c9fc10 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -601,6 +601,7 @@ EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
 static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 {
 	unsigned int len = (sizeof(ar->dbglog_entry_data)
+			    + sizeof(ar->stack_buf)
 			    + sizeof(ar->reg_dump_values));
 	unsigned int sofar = 0;
 	char *buf;
@@ -614,7 +615,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	sofar += hdr_len;
 
 	/* So we can add headers to the data dump */
-	len += 2 * sizeof(*dump_tlv);
+	len += 3 * sizeof(*dump_tlv);
 
 	/* This is going to get big when we start dumping FW RAM and such,
 	 * so go ahead and use vmalloc.
@@ -662,6 +663,14 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	memcpy(dump_tlv->tlv_data, &ar->reg_dump_values, dump_tlv->tlv_len);
 	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
 
+	/* Gather firmware stack dump */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_STACK;
+	dump_tlv->tlv_len = sizeof(ar->stack_buf);
+	memcpy(dump_tlv->tlv_data, &ar->stack_buf, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	WARN_ON(sofar != len);
 	return dump_data;
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index f2cfe69..e709c02 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -833,6 +833,38 @@ static u16 ath10k_pci_hif_get_free_queue_number(struct ath10k *ar, u8 pipe)
 	return ath10k_ce_num_free_src_entries(ar_pci->pipe_info[pipe].ce_hdl);
 }
 
+static void ath10k_save_firmware_mem_helper(struct ath10k *ar, u32 sz,
+					    unsigned char *to, u32 addr)
+{
+	u32 host_addr;
+	u32 reg_dump_area;
+	int ret;
+
+	host_addr = host_interest_item_address(addr);
+
+	ret = ath10k_pci_diag_read_mem(ar, host_addr, &reg_dump_area,
+				       sizeof(u32));
+	if (ret != 0) {
+		ath10k_warn("failed to read FW addr: %d (addr %d)\n",
+			    ret, addr);
+		return;
+	}
+
+	ret = ath10k_pci_diag_read_mem(ar, reg_dump_area, to, sz);
+	if (ret != 0)
+		ath10k_err("failed to read FW memory: %d (addr %d sz %d)\n",
+			   ret, reg_dump_area, sz);
+}
+
+/* Save the main firmware stack */
+static void ath10k_save_firmware_stack(struct ath10k *ar)
+{
+	BUILD_BUG_ON(ATH10K_FW_STACK_SIZE % 8);
+	ath10k_save_firmware_mem_helper(ar, ATH10K_FW_STACK_SIZE,
+					ar->stack_buf, HI_ITEM(hi_stack));
+}
+
+
 static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 {
 	u32 reg_dump_area = 0;
@@ -877,6 +909,9 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 			   reg_dump_values[i + 2],
 			   reg_dump_values[i + 3]);
 
+	if (!ar->crashed_since_read)
+		ath10k_save_firmware_stack(ar);
+
 	/* Dump the debug logs on the target */
 	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
 	if (ath10k_pci_diag_read_mem(ar, host_addr,
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 3/4] ath10k:  save firmware stack upon firmware crash.
@ 2014-06-04 18:01   ` greearb
  0 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: Ben Greear, linux-wireless

From: Ben Greear <greearb@candelatech.com>

Should help debug firmware crashes, and give users a way
to provide some useful debug reports to firmware developers.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/ath/ath10k/core.h  |  3 +++
 drivers/net/wireless/ath/ath10k/debug.c | 11 ++++++++++-
 drivers/net/wireless/ath/ath10k/pci.c   | 35 +++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index a8866d3..f2c256b 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -45,6 +45,7 @@
  * get a compile error.  Cannot just include pci.h here.
  */
 #define REG_DUMP_COUNT_QCA988X 60
+#define ATH10K_FW_STACK_SIZE 4096
 
 /* Antenna noise floor */
 #define ATH10K_DEFAULT_NOISE_FLOOR -95
@@ -351,6 +352,7 @@ enum ath10k_dev_flags {
 enum ath10k_fw_error_dump_type {
 	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
 	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
+	ATH10K_FW_ERROR_DUMP_STACK = 2,
 
 	ATH10K_FW_ERROR_DUMP_MAX,
 };
@@ -560,6 +562,7 @@ struct ath10k {
 	bool crashed_since_read;
 	struct ath10k_dbglog_entry_storage dbglog_entry_data;
 	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
+	unsigned char stack_buf[ATH10K_FW_STACK_SIZE];
 
 #ifdef CONFIG_ATH10K_DEBUGFS
 	struct ath10k_debug debug;
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index a7d7877..9c9fc10 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -601,6 +601,7 @@ EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
 static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 {
 	unsigned int len = (sizeof(ar->dbglog_entry_data)
+			    + sizeof(ar->stack_buf)
 			    + sizeof(ar->reg_dump_values));
 	unsigned int sofar = 0;
 	char *buf;
@@ -614,7 +615,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	sofar += hdr_len;
 
 	/* So we can add headers to the data dump */
-	len += 2 * sizeof(*dump_tlv);
+	len += 3 * sizeof(*dump_tlv);
 
 	/* This is going to get big when we start dumping FW RAM and such,
 	 * so go ahead and use vmalloc.
@@ -662,6 +663,14 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	memcpy(dump_tlv->tlv_data, &ar->reg_dump_values, dump_tlv->tlv_len);
 	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
 
+	/* Gather firmware stack dump */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_STACK;
+	dump_tlv->tlv_len = sizeof(ar->stack_buf);
+	memcpy(dump_tlv->tlv_data, &ar->stack_buf, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
+	WARN_ON(sofar != len);
 	return dump_data;
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index f2cfe69..e709c02 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -833,6 +833,38 @@ static u16 ath10k_pci_hif_get_free_queue_number(struct ath10k *ar, u8 pipe)
 	return ath10k_ce_num_free_src_entries(ar_pci->pipe_info[pipe].ce_hdl);
 }
 
+static void ath10k_save_firmware_mem_helper(struct ath10k *ar, u32 sz,
+					    unsigned char *to, u32 addr)
+{
+	u32 host_addr;
+	u32 reg_dump_area;
+	int ret;
+
+	host_addr = host_interest_item_address(addr);
+
+	ret = ath10k_pci_diag_read_mem(ar, host_addr, &reg_dump_area,
+				       sizeof(u32));
+	if (ret != 0) {
+		ath10k_warn("failed to read FW addr: %d (addr %d)\n",
+			    ret, addr);
+		return;
+	}
+
+	ret = ath10k_pci_diag_read_mem(ar, reg_dump_area, to, sz);
+	if (ret != 0)
+		ath10k_err("failed to read FW memory: %d (addr %d sz %d)\n",
+			   ret, reg_dump_area, sz);
+}
+
+/* Save the main firmware stack */
+static void ath10k_save_firmware_stack(struct ath10k *ar)
+{
+	BUILD_BUG_ON(ATH10K_FW_STACK_SIZE % 8);
+	ath10k_save_firmware_mem_helper(ar, ATH10K_FW_STACK_SIZE,
+					ar->stack_buf, HI_ITEM(hi_stack));
+}
+
+
 static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 {
 	u32 reg_dump_area = 0;
@@ -877,6 +909,9 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 			   reg_dump_values[i + 2],
 			   reg_dump_values[i + 3]);
 
+	if (!ar->crashed_since_read)
+		ath10k_save_firmware_stack(ar);
+
 	/* Dump the debug logs on the target */
 	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
 	if (ath10k_pci_diag_read_mem(ar, host_addr,
-- 
1.7.11.7


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 4/4] ath10k:  Dump exception stack contents on firmware crash.
  2014-06-04 18:01 ` greearb
@ 2014-06-04 18:01   ` greearb
  -1 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Ben Greear

From: Ben Greear <greearb@candelatech.com>

Firmware developers can decode this and maybe figure out
why the firmware crashed.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/ath/ath10k/core.h  |  2 ++
 drivers/net/wireless/ath/ath10k/debug.c | 10 +++++++++-
 drivers/net/wireless/ath/ath10k/pci.c   | 11 ++++++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index f2c256b..f2e8690 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -353,6 +353,7 @@ enum ath10k_fw_error_dump_type {
 	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
 	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
 	ATH10K_FW_ERROR_DUMP_STACK = 2,
+	ATH10K_FW_ERROR_DUMP_EXC_STACK = 3,
 
 	ATH10K_FW_ERROR_DUMP_MAX,
 };
@@ -563,6 +564,7 @@ struct ath10k {
 	struct ath10k_dbglog_entry_storage dbglog_entry_data;
 	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
 	unsigned char stack_buf[ATH10K_FW_STACK_SIZE];
+	unsigned char exc_stack_buf[ATH10K_FW_STACK_SIZE];
 
 #ifdef CONFIG_ATH10K_DEBUGFS
 	struct ath10k_debug debug;
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 9c9fc10..a8d3978 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -602,6 +602,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 {
 	unsigned int len = (sizeof(ar->dbglog_entry_data)
 			    + sizeof(ar->stack_buf)
+			    + sizeof(ar->exc_stack_buf)
 			    + sizeof(ar->reg_dump_values));
 	unsigned int sofar = 0;
 	char *buf;
@@ -615,7 +616,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	sofar += hdr_len;
 
 	/* So we can add headers to the data dump */
-	len += 3 * sizeof(*dump_tlv);
+	len += 4 * sizeof(*dump_tlv);
 
 	/* This is going to get big when we start dumping FW RAM and such,
 	 * so go ahead and use vmalloc.
@@ -670,6 +671,13 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	memcpy(dump_tlv->tlv_data, &ar->stack_buf, dump_tlv->tlv_len);
 	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
 
+	/* Gather firmware exception (irq) stack dump */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_EXC_STACK;
+	dump_tlv->tlv_len = sizeof(ar->exc_stack_buf);
+	memcpy(dump_tlv->tlv_data, &ar->exc_stack_buf, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
 	WARN_ON(sofar != len);
 	return dump_data;
 }
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index e709c02..cf1cc41 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -864,6 +864,13 @@ static void ath10k_save_firmware_stack(struct ath10k *ar)
 					ar->stack_buf, HI_ITEM(hi_stack));
 }
 
+/* Save the firmware exception stack */
+static void ath10k_save_firmware_exc_stack(struct ath10k *ar)
+{
+	ath10k_save_firmware_mem_helper(ar, ATH10K_FW_STACK_SIZE,
+					ar->exc_stack_buf,
+					HI_ITEM(hi_err_stack));
+}
 
 static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 {
@@ -909,8 +916,10 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 			   reg_dump_values[i + 2],
 			   reg_dump_values[i + 3]);
 
-	if (!ar->crashed_since_read)
+	if (!ar->crashed_since_read) {
 		ath10k_save_firmware_stack(ar);
+		ath10k_save_firmware_exc_stack(ar);
+	}
 
 	/* Dump the debug logs on the target */
 	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 4/4] ath10k: Dump exception stack contents on firmware crash.
@ 2014-06-04 18:01   ` greearb
  0 siblings, 0 replies; 46+ messages in thread
From: greearb @ 2014-06-04 18:01 UTC (permalink / raw)
  To: ath10k; +Cc: Ben Greear, linux-wireless

From: Ben Greear <greearb@candelatech.com>

Firmware developers can decode this and maybe figure out
why the firmware crashed.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/ath/ath10k/core.h  |  2 ++
 drivers/net/wireless/ath/ath10k/debug.c | 10 +++++++++-
 drivers/net/wireless/ath/ath10k/pci.c   | 11 ++++++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index f2c256b..f2e8690 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -353,6 +353,7 @@ enum ath10k_fw_error_dump_type {
 	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
 	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
 	ATH10K_FW_ERROR_DUMP_STACK = 2,
+	ATH10K_FW_ERROR_DUMP_EXC_STACK = 3,
 
 	ATH10K_FW_ERROR_DUMP_MAX,
 };
@@ -563,6 +564,7 @@ struct ath10k {
 	struct ath10k_dbglog_entry_storage dbglog_entry_data;
 	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
 	unsigned char stack_buf[ATH10K_FW_STACK_SIZE];
+	unsigned char exc_stack_buf[ATH10K_FW_STACK_SIZE];
 
 #ifdef CONFIG_ATH10K_DEBUGFS
 	struct ath10k_debug debug;
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 9c9fc10..a8d3978 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -602,6 +602,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 {
 	unsigned int len = (sizeof(ar->dbglog_entry_data)
 			    + sizeof(ar->stack_buf)
+			    + sizeof(ar->exc_stack_buf)
 			    + sizeof(ar->reg_dump_values));
 	unsigned int sofar = 0;
 	char *buf;
@@ -615,7 +616,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	sofar += hdr_len;
 
 	/* So we can add headers to the data dump */
-	len += 3 * sizeof(*dump_tlv);
+	len += 4 * sizeof(*dump_tlv);
 
 	/* This is going to get big when we start dumping FW RAM and such,
 	 * so go ahead and use vmalloc.
@@ -670,6 +671,13 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
 	memcpy(dump_tlv->tlv_data, &ar->stack_buf, dump_tlv->tlv_len);
 	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
 
+	/* Gather firmware exception (irq) stack dump */
+	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
+	dump_tlv->type = ATH10K_FW_ERROR_DUMP_EXC_STACK;
+	dump_tlv->tlv_len = sizeof(ar->exc_stack_buf);
+	memcpy(dump_tlv->tlv_data, &ar->exc_stack_buf, dump_tlv->tlv_len);
+	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
+
 	WARN_ON(sofar != len);
 	return dump_data;
 }
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index e709c02..cf1cc41 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -864,6 +864,13 @@ static void ath10k_save_firmware_stack(struct ath10k *ar)
 					ar->stack_buf, HI_ITEM(hi_stack));
 }
 
+/* Save the firmware exception stack */
+static void ath10k_save_firmware_exc_stack(struct ath10k *ar)
+{
+	ath10k_save_firmware_mem_helper(ar, ATH10K_FW_STACK_SIZE,
+					ar->exc_stack_buf,
+					HI_ITEM(hi_err_stack));
+}
 
 static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 {
@@ -909,8 +916,10 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
 			   reg_dump_values[i + 2],
 			   reg_dump_values[i + 3]);
 
-	if (!ar->crashed_since_read)
+	if (!ar->crashed_since_read) {
 		ath10k_save_firmware_stack(ar);
+		ath10k_save_firmware_exc_stack(ar);
+	}
 
 	/* Dump the debug logs on the target */
 	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
-- 
1.7.11.7


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-04 18:01 ` greearb
@ 2014-06-05 16:18   ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-05 16:18 UTC (permalink / raw)
  To: greearb; +Cc: ath10k, linux-wireless

greearb@candelatech.com writes:

> From: Ben Greear <greearb@candelatech.com>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>

My first comments, more later. But my first impression is that I like
this, we just need to sort out some implementation issues.

kbuild found some warnings:

drivers/net/wireless/ath/ath10k/debug.c:725:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
drivers/net/wireless/ath/ath10k/debug.c:624:2: error: implicit declaration of function 'vmalloc' [-Werror=implicit-function-declaration]
drivers/net/wireless/ath/ath10k/debug.c:624:6: warning: assignment makes pointer from integer without a cast [enabled by default]

The naming of structures and functions are a bit too long, but I'll
comment on that later. That's easy to change as the last thing.

> +/**
> + * enum ath10k_fw_error_dump_type - types of data in the dump file
> + * @ATH10K_FW_ERROR_DUMP_DBGLOG:  Recent firmware debug log entries
> + * @ATH10K_FW_ERROR_DUMP_CRASH:   Crash dump in binary format
> + */
> +enum ath10k_fw_error_dump_type {
> +	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
> +	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
> +
> +	ATH10K_FW_ERROR_DUMP_MAX,
> +};
> +
> +

Extra newline.

> +struct ath10k_tlv_dump_data {
> +	u32 type; /* see ath10k_fw_error_dump_type above */
> +	u32 tlv_len; /* in bytes */
> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
> +} __packed;
> +
> +struct ath10k_dump_file_data {
> +	/* Dump file information */
> +	u32 len;
> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
> +	u32 version; /* File dump version, 1 for now. */

Actually I would prefer to have magic first and have ASCII string as
string, for example "ATH10K-FW-DUMP".

> +	/* Some info we can get from ath10k struct that might help. */
> +	u32 chip_id;
> +	u32 target_version;

bus_type or something like that would be good to add already now.

> +	u8 fw_version_major;
> +	u8 unused8; /* pad fw_version_major */
> +	u16 unused16; /* pad fw_version_major */

I don't see any point of using u8 or u16. Would it be simpler just to
use u32 for everything?

> +	u32 fw_version_minor;
> +	u16 fw_version_release;
> +	u16 fw_version_build;
> +	u32 phy_capability;
> +	u32 hw_min_tx_power;
> +	u32 hw_max_tx_power;
> +	u32 ht_cap_info;
> +	u32 vht_cap_info;
> +	u32 num_rf_chains;
> +	char fw_ver[64]; /* Firmware version string */

Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.

#define ETHTOOL_FWVERS_LEN	32

> +	/* Kernel related information */
> +	u32 kernel_ver_code; /* LINUX_VERSION_CODE */
> +	char kernel_ver[64]; /* VERMAGIC_STRING */
> +
> +	u8 unused[64]; /* Room for growth w/out changing binary format */

Maybe 128 bytes, just so that we don't need to change it for some time?

> +	struct ath10k_tlv_dump_data data; /* more may follow */

I would prefer:

u8 data[0];

So that this struct is only about the header.

> +} __packed;
> +
> +/* This will store at least the last 128 entries.  Each dbglog message
> + * is a max of 7 32-bit integers in length, but the length can be less
> + * than that as well.
> + */
> +#define ATH10K_DBGLOG_DATA_LEN (128 * 7 * 4)

Empty line after the define.

> @@ -488,6 +555,12 @@ struct ath10k {
>  
>  	struct dfs_pattern_detector *dfs_detector;
>  
> +	/* Used for crash-dump storage */
> +	/* Don't over-write dump info until someone reads the data. */
> +	bool crashed_since_read;
> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];

I think these should be in struct ath10k_debug.

> diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
> index 1b7ff4b..a7d7877 100644
> --- a/drivers/net/wireless/ath/ath10k/debug.c
> +++ b/drivers/net/wireless/ath/ath10k/debug.c
> @@ -17,6 +17,8 @@
>  
>  #include <linux/module.h>
>  #include <linux/debugfs.h>
> +#include <linux/version.h>
> +#include <linux/vermagic.h>
>  
>  #include "core.h"
>  #include "debug.h"
> @@ -577,6 +579,145 @@ static const struct file_operations fops_chip_id = {
>  	.llseek = default_llseek,
>  };
>  
> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
> +{
> +	int i;
> +	int z = ar->dbglog_entry_data.next_idx;
> +
> +	/* Don't save any new logs until user-space reads this. */
> +	if (ar->crashed_since_read)
> +		return;

Locking? If this functions depends on something, please document that
with lockdep_assert_held().

> +	for (i = 0; i < len; i++) {
> +		ar->dbglog_entry_data.data[z] = buffer[i];
> +		z++;
> +		if (z >= ATH10K_DBGLOG_DATA_LEN)
> +			z = 0;
> +	}

Empty line here.

> +	ar->dbglog_entry_data.next_idx = z;
> +}
> +EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
> +
> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
> +{
> +	unsigned int len = (sizeof(ar->dbglog_entry_data)

Unneeded parenthesis.

> +			    + sizeof(ar->reg_dump_values));
> +	unsigned int sofar = 0;
> +	char *buf;
> +	struct ath10k_tlv_dump_data *dump_tlv;
> +	struct ath10k_dump_file_data *dump_data;
> +	int hdr_len = sizeof(*dump_data) - sizeof(dump_data->data);

I prefer separating variable declarations and definitions.

> +
> +	lockdep_assert_held(&ar->conf_mutex);
> +
> +	len += hdr_len;
> +	sofar += hdr_len;
> +
> +	/* So we can add headers to the data dump */
> +	len += 2 * sizeof(*dump_tlv);

This is a bit awkward way of defining the length. Something like this
would be easier to read:

len = sizeof (*dump_tlv) + sizeof(ar->dbglog_entry_data);
len +=	sizeof (*dump_tlv) + sizeof(ar->reg_dump_values));

> +
> +	/* This is going to get big when we start dumping FW RAM and such,
> +	 * so go ahead and use vmalloc.
> +	 */
> +	buf = vmalloc(len);
> +	if (!buf)
> +		return NULL;
> +
> +	memset(buf, 0, len);
> +	dump_data = (struct ath10k_dump_file_data *)(buf);
> +	dump_data->len = len;
> +	dump_data->magic = 0x01020304;
> +	dump_data->version = 1;
> +	dump_data->chip_id = ar->chip_id;
> +	dump_data->target_version = ar->target_version;
> +	dump_data->fw_version_major = ar->fw_version_major;
> +	dump_data->fw_version_minor = ar->fw_version_minor;
> +	dump_data->fw_version_release = ar->fw_version_release;
> +	dump_data->fw_version_build = ar->fw_version_build;
> +	dump_data->phy_capability = ar->phy_capability;
> +	dump_data->hw_min_tx_power = ar->hw_min_tx_power;
> +	dump_data->hw_max_tx_power = ar->hw_max_tx_power;
> +	dump_data->ht_cap_info = ar->ht_cap_info;
> +	dump_data->vht_cap_info = ar->vht_cap_info;
> +	dump_data->num_rf_chains = ar->num_rf_chains;
> +
> +	strncpy(dump_data->fw_ver, ar->hw->wiphy->fw_version,
> +		sizeof(dump_data->fw_ver) - 1);

BUILD_BUG_ON(sizeof(dump_data->fw_ver) != sizeof(ar->hw->wiphy->fw_version))?

And wouldn't strlcpy() be safer?

> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;

checkpatch actually complains about this:

drivers/net/wireless/ath/ath10k/debug.c:649: WARNING: LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged

But I guess there's nothing we can do for that.

> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
> +		sizeof(dump_data->kernel_ver) - 1);

strlcpy()?

> +	/* Gather dbg-log */
> +	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
> +	dump_tlv->type = ATH10K_FW_ERROR_DUMP_DBGLOG;
> +	dump_tlv->tlv_len = sizeof(ar->dbglog_entry_data);
> +	memcpy(dump_tlv->tlv_data, &ar->dbglog_entry_data, dump_tlv->tlv_len);
> +	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
> +
> +	/* Gather crash-dump */
> +	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
> +	dump_tlv->type = ATH10K_FW_ERROR_DUMP_REGDUMP;
> +	dump_tlv->tlv_len = sizeof(ar->reg_dump_values);
> +	memcpy(dump_tlv->tlv_data, &ar->reg_dump_values, dump_tlv->tlv_len);
> +	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
> +
> +	return dump_data;
> +}
> +
> +static int ath10k_fw_error_dump_open(struct inode *inode, struct file *file)
> +{
> +	struct ath10k *ar = inode->i_private;
> +	int ret;
> +	struct ath10k_dump_file_data *dump;
> +
> +	if (!ar)
> +		return -EINVAL;

Is this cheak necessary? In what cases is i_private NULL?

> --- a/drivers/net/wireless/ath/ath10k/debug.h
> +++ b/drivers/net/wireless/ath/ath10k/debug.h
> @@ -19,6 +19,7 @@
>  #define _DEBUG_H_
>  
>  #include <linux/types.h>
> +#include "pci.h"

debug.h shouldn't access pci.h directly. All access should happen
through hif.

>  
>  enum ath10k_debug_mask {
> @@ -110,4 +111,28 @@ static inline void ath10k_dbg_dump(enum ath10k_debug_mask mask,
>  {
>  }
>  #endif /* CONFIG_ATH10K_DEBUG */
> +
> +
> +/* Target debug log related defines and structs */
> +
> +/* Target is 32-bit CPU, so we just use u32 for
> + * the pointers.  The memory space is relative to the
> + * target, not the host.
> + */
> +struct ath10k_fw_dbglog_buf {
> +	u32 next; /* pointer to dblog_buf_s. */
> +	u32 buffer; /* pointer to u8 buffer */
> +	u32 bufsize;
> +	u32 length;
> +	u32 count;
> +	u32 free;
> +} __packed;
> +
> +struct ath10k_fw_dbglog_hdr {
> +	u32 dbuf; /* pointer to dbglog_buf_s */
> +	u32 dropped;
> +} __packed;

Should these be in hw.h?

> --- a/drivers/net/wireless/ath/ath10k/pci.c
> +++ b/drivers/net/wireless/ath/ath10k/pci.c
> @@ -19,7 +19,7 @@
>  #include <linux/module.h>
>  #include <linux/interrupt.h>
>  #include <linux/spinlock.h>
> -#include <linux/bitops.h>

Why remove bitops.h?

Have to stop here, more later.

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-05 16:18   ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-05 16:18 UTC (permalink / raw)
  To: greearb; +Cc: linux-wireless, ath10k

greearb@candelatech.com writes:

> From: Ben Greear <greearb@candelatech.com>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>

My first comments, more later. But my first impression is that I like
this, we just need to sort out some implementation issues.

kbuild found some warnings:

drivers/net/wireless/ath/ath10k/debug.c:725:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
drivers/net/wireless/ath/ath10k/debug.c:624:2: error: implicit declaration of function 'vmalloc' [-Werror=implicit-function-declaration]
drivers/net/wireless/ath/ath10k/debug.c:624:6: warning: assignment makes pointer from integer without a cast [enabled by default]

The naming of structures and functions are a bit too long, but I'll
comment on that later. That's easy to change as the last thing.

> +/**
> + * enum ath10k_fw_error_dump_type - types of data in the dump file
> + * @ATH10K_FW_ERROR_DUMP_DBGLOG:  Recent firmware debug log entries
> + * @ATH10K_FW_ERROR_DUMP_CRASH:   Crash dump in binary format
> + */
> +enum ath10k_fw_error_dump_type {
> +	ATH10K_FW_ERROR_DUMP_DBGLOG = 0,
> +	ATH10K_FW_ERROR_DUMP_REGDUMP = 1,
> +
> +	ATH10K_FW_ERROR_DUMP_MAX,
> +};
> +
> +

Extra newline.

> +struct ath10k_tlv_dump_data {
> +	u32 type; /* see ath10k_fw_error_dump_type above */
> +	u32 tlv_len; /* in bytes */
> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
> +} __packed;
> +
> +struct ath10k_dump_file_data {
> +	/* Dump file information */
> +	u32 len;
> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
> +	u32 version; /* File dump version, 1 for now. */

Actually I would prefer to have magic first and have ASCII string as
string, for example "ATH10K-FW-DUMP".

> +	/* Some info we can get from ath10k struct that might help. */
> +	u32 chip_id;
> +	u32 target_version;

bus_type or something like that would be good to add already now.

> +	u8 fw_version_major;
> +	u8 unused8; /* pad fw_version_major */
> +	u16 unused16; /* pad fw_version_major */

I don't see any point of using u8 or u16. Would it be simpler just to
use u32 for everything?

> +	u32 fw_version_minor;
> +	u16 fw_version_release;
> +	u16 fw_version_build;
> +	u32 phy_capability;
> +	u32 hw_min_tx_power;
> +	u32 hw_max_tx_power;
> +	u32 ht_cap_info;
> +	u32 vht_cap_info;
> +	u32 num_rf_chains;
> +	char fw_ver[64]; /* Firmware version string */

Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.

#define ETHTOOL_FWVERS_LEN	32

> +	/* Kernel related information */
> +	u32 kernel_ver_code; /* LINUX_VERSION_CODE */
> +	char kernel_ver[64]; /* VERMAGIC_STRING */
> +
> +	u8 unused[64]; /* Room for growth w/out changing binary format */

Maybe 128 bytes, just so that we don't need to change it for some time?

> +	struct ath10k_tlv_dump_data data; /* more may follow */

I would prefer:

u8 data[0];

So that this struct is only about the header.

> +} __packed;
> +
> +/* This will store at least the last 128 entries.  Each dbglog message
> + * is a max of 7 32-bit integers in length, but the length can be less
> + * than that as well.
> + */
> +#define ATH10K_DBGLOG_DATA_LEN (128 * 7 * 4)

Empty line after the define.

> @@ -488,6 +555,12 @@ struct ath10k {
>  
>  	struct dfs_pattern_detector *dfs_detector;
>  
> +	/* Used for crash-dump storage */
> +	/* Don't over-write dump info until someone reads the data. */
> +	bool crashed_since_read;
> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];

I think these should be in struct ath10k_debug.

> diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
> index 1b7ff4b..a7d7877 100644
> --- a/drivers/net/wireless/ath/ath10k/debug.c
> +++ b/drivers/net/wireless/ath/ath10k/debug.c
> @@ -17,6 +17,8 @@
>  
>  #include <linux/module.h>
>  #include <linux/debugfs.h>
> +#include <linux/version.h>
> +#include <linux/vermagic.h>
>  
>  #include "core.h"
>  #include "debug.h"
> @@ -577,6 +579,145 @@ static const struct file_operations fops_chip_id = {
>  	.llseek = default_llseek,
>  };
>  
> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
> +{
> +	int i;
> +	int z = ar->dbglog_entry_data.next_idx;
> +
> +	/* Don't save any new logs until user-space reads this. */
> +	if (ar->crashed_since_read)
> +		return;

Locking? If this functions depends on something, please document that
with lockdep_assert_held().

> +	for (i = 0; i < len; i++) {
> +		ar->dbglog_entry_data.data[z] = buffer[i];
> +		z++;
> +		if (z >= ATH10K_DBGLOG_DATA_LEN)
> +			z = 0;
> +	}

Empty line here.

> +	ar->dbglog_entry_data.next_idx = z;
> +}
> +EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
> +
> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
> +{
> +	unsigned int len = (sizeof(ar->dbglog_entry_data)

Unneeded parenthesis.

> +			    + sizeof(ar->reg_dump_values));
> +	unsigned int sofar = 0;
> +	char *buf;
> +	struct ath10k_tlv_dump_data *dump_tlv;
> +	struct ath10k_dump_file_data *dump_data;
> +	int hdr_len = sizeof(*dump_data) - sizeof(dump_data->data);

I prefer separating variable declarations and definitions.

> +
> +	lockdep_assert_held(&ar->conf_mutex);
> +
> +	len += hdr_len;
> +	sofar += hdr_len;
> +
> +	/* So we can add headers to the data dump */
> +	len += 2 * sizeof(*dump_tlv);

This is a bit awkward way of defining the length. Something like this
would be easier to read:

len = sizeof (*dump_tlv) + sizeof(ar->dbglog_entry_data);
len +=	sizeof (*dump_tlv) + sizeof(ar->reg_dump_values));

> +
> +	/* This is going to get big when we start dumping FW RAM and such,
> +	 * so go ahead and use vmalloc.
> +	 */
> +	buf = vmalloc(len);
> +	if (!buf)
> +		return NULL;
> +
> +	memset(buf, 0, len);
> +	dump_data = (struct ath10k_dump_file_data *)(buf);
> +	dump_data->len = len;
> +	dump_data->magic = 0x01020304;
> +	dump_data->version = 1;
> +	dump_data->chip_id = ar->chip_id;
> +	dump_data->target_version = ar->target_version;
> +	dump_data->fw_version_major = ar->fw_version_major;
> +	dump_data->fw_version_minor = ar->fw_version_minor;
> +	dump_data->fw_version_release = ar->fw_version_release;
> +	dump_data->fw_version_build = ar->fw_version_build;
> +	dump_data->phy_capability = ar->phy_capability;
> +	dump_data->hw_min_tx_power = ar->hw_min_tx_power;
> +	dump_data->hw_max_tx_power = ar->hw_max_tx_power;
> +	dump_data->ht_cap_info = ar->ht_cap_info;
> +	dump_data->vht_cap_info = ar->vht_cap_info;
> +	dump_data->num_rf_chains = ar->num_rf_chains;
> +
> +	strncpy(dump_data->fw_ver, ar->hw->wiphy->fw_version,
> +		sizeof(dump_data->fw_ver) - 1);

BUILD_BUG_ON(sizeof(dump_data->fw_ver) != sizeof(ar->hw->wiphy->fw_version))?

And wouldn't strlcpy() be safer?

> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;

checkpatch actually complains about this:

drivers/net/wireless/ath/ath10k/debug.c:649: WARNING: LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged

But I guess there's nothing we can do for that.

> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
> +		sizeof(dump_data->kernel_ver) - 1);

strlcpy()?

> +	/* Gather dbg-log */
> +	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
> +	dump_tlv->type = ATH10K_FW_ERROR_DUMP_DBGLOG;
> +	dump_tlv->tlv_len = sizeof(ar->dbglog_entry_data);
> +	memcpy(dump_tlv->tlv_data, &ar->dbglog_entry_data, dump_tlv->tlv_len);
> +	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
> +
> +	/* Gather crash-dump */
> +	dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
> +	dump_tlv->type = ATH10K_FW_ERROR_DUMP_REGDUMP;
> +	dump_tlv->tlv_len = sizeof(ar->reg_dump_values);
> +	memcpy(dump_tlv->tlv_data, &ar->reg_dump_values, dump_tlv->tlv_len);
> +	sofar += sizeof(*dump_tlv) + dump_tlv->tlv_len;
> +
> +	return dump_data;
> +}
> +
> +static int ath10k_fw_error_dump_open(struct inode *inode, struct file *file)
> +{
> +	struct ath10k *ar = inode->i_private;
> +	int ret;
> +	struct ath10k_dump_file_data *dump;
> +
> +	if (!ar)
> +		return -EINVAL;

Is this cheak necessary? In what cases is i_private NULL?

> --- a/drivers/net/wireless/ath/ath10k/debug.h
> +++ b/drivers/net/wireless/ath/ath10k/debug.h
> @@ -19,6 +19,7 @@
>  #define _DEBUG_H_
>  
>  #include <linux/types.h>
> +#include "pci.h"

debug.h shouldn't access pci.h directly. All access should happen
through hif.

>  
>  enum ath10k_debug_mask {
> @@ -110,4 +111,28 @@ static inline void ath10k_dbg_dump(enum ath10k_debug_mask mask,
>  {
>  }
>  #endif /* CONFIG_ATH10K_DEBUG */
> +
> +
> +/* Target debug log related defines and structs */
> +
> +/* Target is 32-bit CPU, so we just use u32 for
> + * the pointers.  The memory space is relative to the
> + * target, not the host.
> + */
> +struct ath10k_fw_dbglog_buf {
> +	u32 next; /* pointer to dblog_buf_s. */
> +	u32 buffer; /* pointer to u8 buffer */
> +	u32 bufsize;
> +	u32 length;
> +	u32 count;
> +	u32 free;
> +} __packed;
> +
> +struct ath10k_fw_dbglog_hdr {
> +	u32 dbuf; /* pointer to dbglog_buf_s */
> +	u32 dropped;
> +} __packed;

Should these be in hw.h?

> --- a/drivers/net/wireless/ath/ath10k/pci.c
> +++ b/drivers/net/wireless/ath/ath10k/pci.c
> @@ -19,7 +19,7 @@
>  #include <linux/module.h>
>  #include <linux/interrupt.h>
>  #include <linux/spinlock.h>
> -#include <linux/bitops.h>

Why remove bitops.h?

Have to stop here, more later.

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-05 16:18   ` Kalle Valo
@ 2014-06-05 18:25     ` Ben Greear
  -1 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-05 18:25 UTC (permalink / raw)
  To: Kalle Valo; +Cc: ath10k, linux-wireless

On 06/05/2014 09:18 AM, Kalle Valo wrote:

>> +struct ath10k_tlv_dump_data {
>> +	u32 type; /* see ath10k_fw_error_dump_type above */
>> +	u32 tlv_len; /* in bytes */
>> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
>> +} __packed;
>> +
>> +struct ath10k_dump_file_data {
>> +	/* Dump file information */
>> +	u32 len;
>> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
>> +	u32 version; /* File dump version, 1 for now. */
> 
> Actually I would prefer to have magic first and have ASCII string as
> string, for example "ATH10K-FW-DUMP".

I'd like magic number to stay, was planning to use it to detect byte
ordering (ie, dumps might come from various different platforms, to
be decoded on some different platform).

I will add the string, however.

>> +	/* Some info we can get from ath10k struct that might help. */
>> +	u32 chip_id;
>> +	u32 target_version;
> 
> bus_type or something like that would be good to add already now.

Can you be more specific on what info you want here?  I don't see
any mention of bus_type in the ath10k dir.

>> +	u32 fw_version_minor;
>> +	u16 fw_version_release;
>> +	u16 fw_version_build;
>> +	u32 phy_capability;
>> +	u32 hw_min_tx_power;
>> +	u32 hw_max_tx_power;
>> +	u32 ht_cap_info;
>> +	u32 vht_cap_info;
>> +	u32 num_rf_chains;
>> +	char fw_ver[64]; /* Firmware version string */
> 
> Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.
> 
> #define ETHTOOL_FWVERS_LEN	32

I prefer not to..that way, firmware format will remain the same even if
the kernel changes the fwvers-len some day.


>> +	/* Kernel related information */
>> +	u32 kernel_ver_code; /* LINUX_VERSION_CODE */
>> +	char kernel_ver[64]; /* VERMAGIC_STRING */
>> +
>> +	u8 unused[64]; /* Room for growth w/out changing binary format */
> 
> Maybe 128 bytes, just so that we don't need to change it for some time?

certainly.

>> @@ -488,6 +555,12 @@ struct ath10k {
>>  
>>  	struct dfs_pattern_detector *dfs_detector;
>>  
>> +	/* Used for crash-dump storage */
>> +	/* Don't over-write dump info until someone reads the data. */
>> +	bool crashed_since_read;
>> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
>> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
> 
> I think these should be in struct ath10k_debug.

I did not do this because I figure we will want ethtool support w/out
forcing debugfs to be enabled someday soon.

>> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
>> +{
>> +	int i;
>> +	int z = ar->dbglog_entry_data.next_idx;
>> +
>> +	/* Don't save any new logs until user-space reads this. */
>> +	if (ar->crashed_since_read)
>> +		return;
> 
> Locking? If this functions depends on something, please document that
> with lockdep_assert_held().

To be honest, I was going to ignore locking and assume that firmware
will not crash that often.  Worst case would be a garbled crash dump
as there is no memory allocation involved in gathering the crash,
and the length of the crash dump will not change based on anything in
the crash logic.

I'm a bit leery of adding spin-locks in the dump routine just for
this, but I can add and use a new spin-lock if you prefer.  If so,
any idea if we can do the reads of the target's memory while holding
a spin-lock, or would I need some temporary buffers and only lock
while copying that in to the storage in the 'ar'?


>> +	ar->dbglog_entry_data.next_idx = z;
>> +}
>> +EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
>> +
>> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
>> +{
>> +	unsigned int len = (sizeof(ar->dbglog_entry_data)
> 
> Unneeded parenthesis.

Makes things line up nicely in xemacs auto-indent, but will fix.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-05 18:25     ` Ben Greear
  0 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-05 18:25 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k

On 06/05/2014 09:18 AM, Kalle Valo wrote:

>> +struct ath10k_tlv_dump_data {
>> +	u32 type; /* see ath10k_fw_error_dump_type above */
>> +	u32 tlv_len; /* in bytes */
>> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
>> +} __packed;
>> +
>> +struct ath10k_dump_file_data {
>> +	/* Dump file information */
>> +	u32 len;
>> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
>> +	u32 version; /* File dump version, 1 for now. */
> 
> Actually I would prefer to have magic first and have ASCII string as
> string, for example "ATH10K-FW-DUMP".

I'd like magic number to stay, was planning to use it to detect byte
ordering (ie, dumps might come from various different platforms, to
be decoded on some different platform).

I will add the string, however.

>> +	/* Some info we can get from ath10k struct that might help. */
>> +	u32 chip_id;
>> +	u32 target_version;
> 
> bus_type or something like that would be good to add already now.

Can you be more specific on what info you want here?  I don't see
any mention of bus_type in the ath10k dir.

>> +	u32 fw_version_minor;
>> +	u16 fw_version_release;
>> +	u16 fw_version_build;
>> +	u32 phy_capability;
>> +	u32 hw_min_tx_power;
>> +	u32 hw_max_tx_power;
>> +	u32 ht_cap_info;
>> +	u32 vht_cap_info;
>> +	u32 num_rf_chains;
>> +	char fw_ver[64]; /* Firmware version string */
> 
> Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.
> 
> #define ETHTOOL_FWVERS_LEN	32

I prefer not to..that way, firmware format will remain the same even if
the kernel changes the fwvers-len some day.


>> +	/* Kernel related information */
>> +	u32 kernel_ver_code; /* LINUX_VERSION_CODE */
>> +	char kernel_ver[64]; /* VERMAGIC_STRING */
>> +
>> +	u8 unused[64]; /* Room for growth w/out changing binary format */
> 
> Maybe 128 bytes, just so that we don't need to change it for some time?

certainly.

>> @@ -488,6 +555,12 @@ struct ath10k {
>>  
>>  	struct dfs_pattern_detector *dfs_detector;
>>  
>> +	/* Used for crash-dump storage */
>> +	/* Don't over-write dump info until someone reads the data. */
>> +	bool crashed_since_read;
>> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
>> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
> 
> I think these should be in struct ath10k_debug.

I did not do this because I figure we will want ethtool support w/out
forcing debugfs to be enabled someday soon.

>> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
>> +{
>> +	int i;
>> +	int z = ar->dbglog_entry_data.next_idx;
>> +
>> +	/* Don't save any new logs until user-space reads this. */
>> +	if (ar->crashed_since_read)
>> +		return;
> 
> Locking? If this functions depends on something, please document that
> with lockdep_assert_held().

To be honest, I was going to ignore locking and assume that firmware
will not crash that often.  Worst case would be a garbled crash dump
as there is no memory allocation involved in gathering the crash,
and the length of the crash dump will not change based on anything in
the crash logic.

I'm a bit leery of adding spin-locks in the dump routine just for
this, but I can add and use a new spin-lock if you prefer.  If so,
any idea if we can do the reads of the target's memory while holding
a spin-lock, or would I need some temporary buffers and only lock
while copying that in to the storage in the 'ar'?


>> +	ar->dbglog_entry_data.next_idx = z;
>> +}
>> +EXPORT_SYMBOL(ath10k_dbg_save_fw_dbg_buffer);
>> +
>> +static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar)
>> +{
>> +	unsigned int len = (sizeof(ar->dbglog_entry_data)
> 
> Unneeded parenthesis.

Makes things line up nicely in xemacs auto-indent, but will fix.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-05 18:25     ` Ben Greear
@ 2014-06-06  6:10       ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  6:10 UTC (permalink / raw)
  To: Ben Greear; +Cc: ath10k, linux-wireless

Ben Greear <greearb@candelatech.com> writes:

> On 06/05/2014 09:18 AM, Kalle Valo wrote:
>
>>> +struct ath10k_tlv_dump_data {
>>> +	u32 type; /* see ath10k_fw_error_dump_type above */
>>> +	u32 tlv_len; /* in bytes */
>>> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
>>> +} __packed;
>>> +
>>> +struct ath10k_dump_file_data {
>>> +	/* Dump file information */
>>> +	u32 len;
>>> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
>>> +	u32 version; /* File dump version, 1 for now. */
>> 
>> Actually I would prefer to have magic first and have ASCII string as
>> string, for example "ATH10K-FW-DUMP".
>
> I'd like magic number to stay, was planning to use it to detect byte
> ordering (ie, dumps might come from various different platforms, to
> be decoded on some different platform).

If you want to know the endianess, I would prefer to do it the proper
way, for example something like this:

#ifdef __BIG_ENDIAN
        dump_data->big_endian = 1;
#else
        dump_data->big_endian = 0;
#endif

>>> +	/* Some info we can get from ath10k struct that might help. */
>>> +	u32 chip_id;
>>> +	u32 target_version;
>> 
>> bus_type or something like that would be good to add already now.
>
> Can you be more specific on what info you want here?  I don't see
> any mention of bus_type in the ath10k dir.

I was thinking of adding a new field named "bus_type" which will contain
just zero for now. This is so that we don't need to make any changes to
the format if/when we add a new bus along with pci.

>>> +	u32 fw_version_minor;
>>> +	u16 fw_version_release;
>>> +	u16 fw_version_build;
>>> +	u32 phy_capability;
>>> +	u32 hw_min_tx_power;
>>> +	u32 hw_max_tx_power;
>>> +	u32 ht_cap_info;
>>> +	u32 vht_cap_info;
>>> +	u32 num_rf_chains;
>>> +	char fw_ver[64]; /* Firmware version string */
>> 
>> Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.
>> 
>> #define ETHTOOL_FWVERS_LEN	32
>
> I prefer not to..that way, firmware format will remain the same even if
> the kernel changes the fwvers-len some day.

That is defined in the ethtool user space interface so changing that
would break a lot of things. And also used by cfg80211 and other
drivers. I would prefer to be consistent here and use
ETHTOOL_FWVERS_LEN.

>>> @@ -488,6 +555,12 @@ struct ath10k {
>>>  
>>>  	struct dfs_pattern_detector *dfs_detector;
>>>  
>>> +	/* Used for crash-dump storage */
>>> +	/* Don't over-write dump info until someone reads the data. */
>>> +	bool crashed_since_read;
>>> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
>>> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
>> 
>> I think these should be in struct ath10k_debug.
>
> I did not do this because I figure we will want ethtool support w/out
> forcing debugfs to be enabled someday soon.

When we add ethtool support, it's easy to move these back. And then we
need to move the code out from debug.c anyway.

>>> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
>>> +{
>>> +	int i;
>>> +	int z = ar->dbglog_entry_data.next_idx;
>>> +
>>> +	/* Don't save any new logs until user-space reads this. */
>>> +	if (ar->crashed_since_read)
>>> +		return;
>> 
>> Locking? If this functions depends on something, please document that
>> with lockdep_assert_held().
>
> To be honest, I was going to ignore locking and assume that firmware
> will not crash that often.  Worst case would be a garbled crash dump
> as there is no memory allocation involved in gathering the crash,
> and the length of the crash dump will not change based on anything in
> the crash logic.

Ignoring locking means that in few years we have a big mess in our
hands. I prefer to do the locking right from day one.

> I'm a bit leery of adding spin-locks in the dump routine just for
> this, but I can add and use a new spin-lock if you prefer.

Why a new spinlock? I didn't review the locking requirements, but I
would first check ar->data_lock can be used.

> If so, any idea if we can do the reads of the target's memory while
> holding a spin-lock, or would I need some temporary buffers and only
> lock while copying that in to the storage in the 'ar'?

I don't see why you would need special locks for reading target's
memory. If there is something needed, pci.c should handle that. Michal?

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-06  6:10       ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  6:10 UTC (permalink / raw)
  To: Ben Greear; +Cc: linux-wireless, ath10k

Ben Greear <greearb@candelatech.com> writes:

> On 06/05/2014 09:18 AM, Kalle Valo wrote:
>
>>> +struct ath10k_tlv_dump_data {
>>> +	u32 type; /* see ath10k_fw_error_dump_type above */
>>> +	u32 tlv_len; /* in bytes */
>>> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
>>> +} __packed;
>>> +
>>> +struct ath10k_dump_file_data {
>>> +	/* Dump file information */
>>> +	u32 len;
>>> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
>>> +	u32 version; /* File dump version, 1 for now. */
>> 
>> Actually I would prefer to have magic first and have ASCII string as
>> string, for example "ATH10K-FW-DUMP".
>
> I'd like magic number to stay, was planning to use it to detect byte
> ordering (ie, dumps might come from various different platforms, to
> be decoded on some different platform).

If you want to know the endianess, I would prefer to do it the proper
way, for example something like this:

#ifdef __BIG_ENDIAN
        dump_data->big_endian = 1;
#else
        dump_data->big_endian = 0;
#endif

>>> +	/* Some info we can get from ath10k struct that might help. */
>>> +	u32 chip_id;
>>> +	u32 target_version;
>> 
>> bus_type or something like that would be good to add already now.
>
> Can you be more specific on what info you want here?  I don't see
> any mention of bus_type in the ath10k dir.

I was thinking of adding a new field named "bus_type" which will contain
just zero for now. This is so that we don't need to make any changes to
the format if/when we add a new bus along with pci.

>>> +	u32 fw_version_minor;
>>> +	u16 fw_version_release;
>>> +	u16 fw_version_build;
>>> +	u32 phy_capability;
>>> +	u32 hw_min_tx_power;
>>> +	u32 hw_max_tx_power;
>>> +	u32 ht_cap_info;
>>> +	u32 vht_cap_info;
>>> +	u32 num_rf_chains;
>>> +	char fw_ver[64]; /* Firmware version string */
>> 
>> Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.
>> 
>> #define ETHTOOL_FWVERS_LEN	32
>
> I prefer not to..that way, firmware format will remain the same even if
> the kernel changes the fwvers-len some day.

That is defined in the ethtool user space interface so changing that
would break a lot of things. And also used by cfg80211 and other
drivers. I would prefer to be consistent here and use
ETHTOOL_FWVERS_LEN.

>>> @@ -488,6 +555,12 @@ struct ath10k {
>>>  
>>>  	struct dfs_pattern_detector *dfs_detector;
>>>  
>>> +	/* Used for crash-dump storage */
>>> +	/* Don't over-write dump info until someone reads the data. */
>>> +	bool crashed_since_read;
>>> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
>>> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
>> 
>> I think these should be in struct ath10k_debug.
>
> I did not do this because I figure we will want ethtool support w/out
> forcing debugfs to be enabled someday soon.

When we add ethtool support, it's easy to move these back. And then we
need to move the code out from debug.c anyway.

>>> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
>>> +{
>>> +	int i;
>>> +	int z = ar->dbglog_entry_data.next_idx;
>>> +
>>> +	/* Don't save any new logs until user-space reads this. */
>>> +	if (ar->crashed_since_read)
>>> +		return;
>> 
>> Locking? If this functions depends on something, please document that
>> with lockdep_assert_held().
>
> To be honest, I was going to ignore locking and assume that firmware
> will not crash that often.  Worst case would be a garbled crash dump
> as there is no memory allocation involved in gathering the crash,
> and the length of the crash dump will not change based on anything in
> the crash logic.

Ignoring locking means that in few years we have a big mess in our
hands. I prefer to do the locking right from day one.

> I'm a bit leery of adding spin-locks in the dump routine just for
> this, but I can add and use a new spin-lock if you prefer.

Why a new spinlock? I didn't review the locking requirements, but I
would first check ar->data_lock can be used.

> If so, any idea if we can do the reads of the target's memory while
> holding a spin-lock, or would I need some temporary buffers and only
> lock while copying that in to the storage in the 'ar'?

I don't see why you would need special locks for reading target's
memory. If there is something needed, pci.c should handle that. Michal?

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k: provide firmware crash info via debugfs.
  2014-06-06  6:10       ` Kalle Valo
@ 2014-06-06  6:30         ` Michal Kazior
  -1 siblings, 0 replies; 46+ messages in thread
From: Michal Kazior @ 2014-06-06  6:30 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Ben Greear, ath10k, linux-wireless

On 6 June 2014 08:10, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
> Ben Greear <greearb@candelatech.com> writes:
[...]
>> I'm a bit leery of adding spin-locks in the dump routine just for
>> this, but I can add and use a new spin-lock if you prefer.
>
> Why a new spinlock? I didn't review the locking requirements, but I
> would first check ar->data_lock can be used.
>
>> If so, any idea if we can do the reads of the target's memory while
>> holding a spin-lock, or would I need some temporary buffers and only
>> lock while copying that in to the storage in the 'ar'?
>
> I don't see why you would need special locks for reading target's
> memory. If there is something needed, pci.c should handle that. Michal?

By definition the diagnostic window access must be serialized. We
don't do this with locks now but rely ON driver states/sequences. We
might have some problems lurking there already but I'd need to analyze
it to tell for sure.

Calling pci_diag_* functions while holding a spinlock should otherwise
be fine because these functions use mdelay() and poll the diagnostic
window copy engine pipe.

Using ar->data_lock for a pci transport specific requirement (the
diagnostic window) seems wrong though.


Michał

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k: provide firmware crash info via debugfs.
@ 2014-06-06  6:30         ` Michal Kazior
  0 siblings, 0 replies; 46+ messages in thread
From: Michal Kazior @ 2014-06-06  6:30 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Ben Greear, linux-wireless, ath10k

On 6 June 2014 08:10, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
> Ben Greear <greearb@candelatech.com> writes:
[...]
>> I'm a bit leery of adding spin-locks in the dump routine just for
>> this, but I can add and use a new spin-lock if you prefer.
>
> Why a new spinlock? I didn't review the locking requirements, but I
> would first check ar->data_lock can be used.
>
>> If so, any idea if we can do the reads of the target's memory while
>> holding a spin-lock, or would I need some temporary buffers and only
>> lock while copying that in to the storage in the 'ar'?
>
> I don't see why you would need special locks for reading target's
> memory. If there is something needed, pci.c should handle that. Michal?

By definition the diagnostic window access must be serialized. We
don't do this with locks now but rely ON driver states/sequences. We
might have some problems lurking there already but I'd need to analyze
it to tell for sure.

Calling pci_diag_* functions while holding a spinlock should otherwise
be fine because these functions use mdelay() and poll the diagnostic
window copy engine pipe.

Using ar->data_lock for a pci transport specific requirement (the
diagnostic window) seems wrong though.


Michał

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-04 18:01 ` greearb
@ 2014-06-06  6:55   ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  6:55 UTC (permalink / raw)
  To: greearb; +Cc: ath10k, linux-wireless

greearb@candelatech.com writes:

> From: Ben Greear <greearb@candelatech.com>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>

[...]

> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
> +		sizeof(dump_data->kernel_ver) - 1);

Other thing nice to have here is the kernel timestamp in the kernel log
"[123456.4321]". How difficult would it be to add that to the dump?

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-06  6:55   ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  6:55 UTC (permalink / raw)
  To: greearb; +Cc: linux-wireless, ath10k

greearb@candelatech.com writes:

> From: Ben Greear <greearb@candelatech.com>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>

[...]

> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
> +		sizeof(dump_data->kernel_ver) - 1);

Other thing nice to have here is the kernel timestamp in the kernel log
"[123456.4321]". How difficult would it be to add that to the dump?

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k: provide firmware crash info via debugfs.
  2014-06-06  6:30         ` Michal Kazior
@ 2014-06-06  8:55           ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  8:55 UTC (permalink / raw)
  To: Michal Kazior; +Cc: Ben Greear, ath10k, linux-wireless

Michal Kazior <michal.kazior@tieto.com> writes:

> On 6 June 2014 08:10, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>> Ben Greear <greearb@candelatech.com> writes:
> [...]
>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>> this, but I can add and use a new spin-lock if you prefer.
>>
>> Why a new spinlock? I didn't review the locking requirements, but I
>> would first check ar->data_lock can be used.
>>
>>> If so, any idea if we can do the reads of the target's memory while
>>> holding a spin-lock, or would I need some temporary buffers and only
>>> lock while copying that in to the storage in the 'ar'?
>>
>> I don't see why you would need special locks for reading target's
>> memory. If there is something needed, pci.c should handle that. Michal?
>
> By definition the diagnostic window access must be serialized. We
> don't do this with locks now but rely ON driver states/sequences. We
> might have some problems lurking there already but I'd need to analyze
> it to tell for sure.

Should that serialisation happen within pci.c?

> Calling pci_diag_* functions while holding a spinlock should otherwise
> be fine because these functions use mdelay() and poll the diagnostic
> window copy engine pipe.
>
> Using ar->data_lock for a pci transport specific requirement (the
> diagnostic window) seems wrong though.

I agree. I did not mean using data_lock to serialise the pci code.

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k: provide firmware crash info via debugfs.
@ 2014-06-06  8:55           ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  8:55 UTC (permalink / raw)
  To: Michal Kazior; +Cc: Ben Greear, linux-wireless, ath10k

Michal Kazior <michal.kazior@tieto.com> writes:

> On 6 June 2014 08:10, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>> Ben Greear <greearb@candelatech.com> writes:
> [...]
>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>> this, but I can add and use a new spin-lock if you prefer.
>>
>> Why a new spinlock? I didn't review the locking requirements, but I
>> would first check ar->data_lock can be used.
>>
>>> If so, any idea if we can do the reads of the target's memory while
>>> holding a spin-lock, or would I need some temporary buffers and only
>>> lock while copying that in to the storage in the 'ar'?
>>
>> I don't see why you would need special locks for reading target's
>> memory. If there is something needed, pci.c should handle that. Michal?
>
> By definition the diagnostic window access must be serialized. We
> don't do this with locks now but rely ON driver states/sequences. We
> might have some problems lurking there already but I'd need to analyze
> it to tell for sure.

Should that serialisation happen within pci.c?

> Calling pci_diag_* functions while holding a spinlock should otherwise
> be fine because these functions use mdelay() and poll the diagnostic
> window copy engine pipe.
>
> Using ar->data_lock for a pci transport specific requirement (the
> diagnostic window) seems wrong though.

I agree. I did not mean using data_lock to serialise the pci code.

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-04 18:01 ` greearb
@ 2014-06-06  9:33   ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  9:33 UTC (permalink / raw)
  To: greearb; +Cc: ath10k, linux-wireless

greearb@candelatech.com writes:

> From: Ben Greear <greearb@candelatech.com>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>

Review part 2.

> @@ -875,6 +877,80 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
>  			   reg_dump_values[i + 2],
>  			   reg_dump_values[i + 3]);
>  
> +	/* Dump the debug logs on the target */
> +	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
> +	if (ath10k_pci_diag_read_mem(ar, host_addr,
> +				     &reg_dump_area, sizeof(u32)) != 0) {
> +		ath10k_warn("failed to read hi_dbglog_hdr\n");

Please print the error code.

> +		goto save_regs_and_restart;
> +	}
> +
> +	ath10k_err("target register Debug Log Location: 0x%08X\n",
> +		   reg_dump_area);

Is this needed? I think this more like debug log material than something
we should report to the user.

> +	ret = ath10k_pci_diag_read_mem(ar, reg_dump_area,
> +				       &dbg_hdr, sizeof(dbg_hdr));
> +	if (ret != 0) {
> +		ath10k_err("failed to dump Debug Log Area\n");

ath10k_warn("failed to dump debug log area: %d\n", ret);

> +		goto save_regs_and_restart;
> +	}
> +
> +	ath10k_err("Debug Log Header, dbuf: 0x%x  dropped: %i\n",
> +		   dbg_hdr.dbuf, dbg_hdr.dropped);

ath10k_dbg()?

> +	dbufp = dbg_hdr.dbuf;
> +	i = 0;
> +	while (dbufp) {

for (i = 0; dbufp > 0; i++)?

> +		struct ath10k_fw_dbglog_buf dbuf;

Please move to the beginning of the function.

> +		ret = ath10k_pci_diag_read_mem(ar, dbufp,
> +					       &dbuf, sizeof(dbuf));
> +		if (ret != 0) {
> +			ath10k_err("failed to read Debug Log Area: 0x%x\n",
> +				   dbufp);

ath10k_warn("failed to read debug log area: %d\n");

> +			goto save_regs_and_restart;
> +		}
> +
> +		/* We have a buffer of data */
> +		ath10k_err("[%i] next: 0x%x buf: 0x%x sz: %i len: %i count: %i free: %i\n",
> +			   i, dbuf.next, dbuf.buffer, dbuf.bufsize, dbuf.length,
> +			   dbuf.count, dbuf.free);

ath10k_dbg()?

> +		if (dbuf.buffer && dbuf.length) {

I would prefer inverse here to keep the indentation clean, something
like:

if (dbuf.buffer == 0 || dbug.length == 0)
        goto next;

> +			u8 *buffer = kmalloc(dbuf.length, GFP_ATOMIC);

Should we have a maximum for the length so that we don't allocate insane
amounts of memory by accident?

> +			if (buffer) {

Again I would prefer the inverse:

if (!buf)
   break;

I assume there's no sense continuing here if kmalloc fails and better
just bail out.

> +				ret = ath10k_pci_diag_read_mem(ar, dbuf.buffer,
> +							       buffer,
> +							       dbuf.length);
> +				if (ret != 0) {
> +					ath10k_err("failed to read Debug Log buffer: 0x%x\n",
> +						   dbuf.buffer);

ath10k_warn("failed to read debug log buffer: %d\n", ret);

> +					kfree(buffer);
> +					goto save_regs_and_restart;
> +				}
> +
> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
> +							      dbuf.length);
> +				kfree(buffer);

Instead of doing atomic allocations multiple times in a loop, would it
be better to allocate just one buffer before the loop and free it
afterwards?

> +			}
> +		}
> +		dbufp = dbuf.next;
> +		if (dbufp == dbg_hdr.dbuf) {
> +			/* It is a circular buffer it seems, bail if next
> +			 * is head
> +			 */
> +			break;
> +		}
> +		i++;
> +	} /* While we have a debug buffer to read */
> +
> +save_regs_and_restart:

exit_save_regs:

> +	if (!ar->crashed_since_read) {
> +		ar->crashed_since_read = true;
> +		memcpy(ar->reg_dump_values, reg_dump_values,
> +		       sizeof(ar->reg_dump_values));
> +	}

How is the locking handled in this function?

> +do_restart:

exit:

>  	queue_work(ar->workqueue, &ar->restart_work);
>  }

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-06  9:33   ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-06  9:33 UTC (permalink / raw)
  To: greearb; +Cc: linux-wireless, ath10k

greearb@candelatech.com writes:

> From: Ben Greear <greearb@candelatech.com>
>
> Store the firmware crash registers and last 128 or so
> firmware debug-log ids and present them to user-space
> via debugfs.
>
> Should help with figuring out why the firmware crashed.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>

Review part 2.

> @@ -875,6 +877,80 @@ static void ath10k_pci_hif_dump_area(struct ath10k *ar)
>  			   reg_dump_values[i + 2],
>  			   reg_dump_values[i + 3]);
>  
> +	/* Dump the debug logs on the target */
> +	host_addr = host_interest_item_address(HI_ITEM(hi_dbglog_hdr));
> +	if (ath10k_pci_diag_read_mem(ar, host_addr,
> +				     &reg_dump_area, sizeof(u32)) != 0) {
> +		ath10k_warn("failed to read hi_dbglog_hdr\n");

Please print the error code.

> +		goto save_regs_and_restart;
> +	}
> +
> +	ath10k_err("target register Debug Log Location: 0x%08X\n",
> +		   reg_dump_area);

Is this needed? I think this more like debug log material than something
we should report to the user.

> +	ret = ath10k_pci_diag_read_mem(ar, reg_dump_area,
> +				       &dbg_hdr, sizeof(dbg_hdr));
> +	if (ret != 0) {
> +		ath10k_err("failed to dump Debug Log Area\n");

ath10k_warn("failed to dump debug log area: %d\n", ret);

> +		goto save_regs_and_restart;
> +	}
> +
> +	ath10k_err("Debug Log Header, dbuf: 0x%x  dropped: %i\n",
> +		   dbg_hdr.dbuf, dbg_hdr.dropped);

ath10k_dbg()?

> +	dbufp = dbg_hdr.dbuf;
> +	i = 0;
> +	while (dbufp) {

for (i = 0; dbufp > 0; i++)?

> +		struct ath10k_fw_dbglog_buf dbuf;

Please move to the beginning of the function.

> +		ret = ath10k_pci_diag_read_mem(ar, dbufp,
> +					       &dbuf, sizeof(dbuf));
> +		if (ret != 0) {
> +			ath10k_err("failed to read Debug Log Area: 0x%x\n",
> +				   dbufp);

ath10k_warn("failed to read debug log area: %d\n");

> +			goto save_regs_and_restart;
> +		}
> +
> +		/* We have a buffer of data */
> +		ath10k_err("[%i] next: 0x%x buf: 0x%x sz: %i len: %i count: %i free: %i\n",
> +			   i, dbuf.next, dbuf.buffer, dbuf.bufsize, dbuf.length,
> +			   dbuf.count, dbuf.free);

ath10k_dbg()?

> +		if (dbuf.buffer && dbuf.length) {

I would prefer inverse here to keep the indentation clean, something
like:

if (dbuf.buffer == 0 || dbug.length == 0)
        goto next;

> +			u8 *buffer = kmalloc(dbuf.length, GFP_ATOMIC);

Should we have a maximum for the length so that we don't allocate insane
amounts of memory by accident?

> +			if (buffer) {

Again I would prefer the inverse:

if (!buf)
   break;

I assume there's no sense continuing here if kmalloc fails and better
just bail out.

> +				ret = ath10k_pci_diag_read_mem(ar, dbuf.buffer,
> +							       buffer,
> +							       dbuf.length);
> +				if (ret != 0) {
> +					ath10k_err("failed to read Debug Log buffer: 0x%x\n",
> +						   dbuf.buffer);

ath10k_warn("failed to read debug log buffer: %d\n", ret);

> +					kfree(buffer);
> +					goto save_regs_and_restart;
> +				}
> +
> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
> +							      dbuf.length);
> +				kfree(buffer);

Instead of doing atomic allocations multiple times in a loop, would it
be better to allocate just one buffer before the loop and free it
afterwards?

> +			}
> +		}
> +		dbufp = dbuf.next;
> +		if (dbufp == dbg_hdr.dbuf) {
> +			/* It is a circular buffer it seems, bail if next
> +			 * is head
> +			 */
> +			break;
> +		}
> +		i++;
> +	} /* While we have a debug buffer to read */
> +
> +save_regs_and_restart:

exit_save_regs:

> +	if (!ar->crashed_since_read) {
> +		ar->crashed_since_read = true;
> +		memcpy(ar->reg_dump_values, reg_dump_values,
> +		       sizeof(ar->reg_dump_values));
> +	}

How is the locking handled in this function?

> +do_restart:

exit:

>  	queue_work(ar->workqueue, &ar->restart_work);
>  }

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k: provide firmware crash info via debugfs.
  2014-06-06  8:55           ` Kalle Valo
@ 2014-06-06  9:45             ` Michal Kazior
  -1 siblings, 0 replies; 46+ messages in thread
From: Michal Kazior @ 2014-06-06  9:45 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Ben Greear, ath10k, linux-wireless

On 6 June 2014 10:55, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
> Michal Kazior <michal.kazior@tieto.com> writes:
>
>> On 6 June 2014 08:10, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>>> Ben Greear <greearb@candelatech.com> writes:
>> [...]
>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>> this, but I can add and use a new spin-lock if you prefer.
>>>
>>> Why a new spinlock? I didn't review the locking requirements, but I
>>> would first check ar->data_lock can be used.
>>>
>>>> If so, any idea if we can do the reads of the target's memory while
>>>> holding a spin-lock, or would I need some temporary buffers and only
>>>> lock while copying that in to the storage in the 'ar'?
>>>
>>> I don't see why you would need special locks for reading target's
>>> memory. If there is something needed, pci.c should handle that. Michal?
>>
>> By definition the diagnostic window access must be serialized. We
>> don't do this with locks now but rely ON driver states/sequences. We
>> might have some problems lurking there already but I'd need to analyze
>> it to tell for sure.
>
> Should that serialisation happen within pci.c?

You can but that's not a requirement I guess. E.g. exchange_bmi_msg
hif callback requires the caller to serialize calls.


Michał

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k: provide firmware crash info via debugfs.
@ 2014-06-06  9:45             ` Michal Kazior
  0 siblings, 0 replies; 46+ messages in thread
From: Michal Kazior @ 2014-06-06  9:45 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Ben Greear, linux-wireless, ath10k

On 6 June 2014 10:55, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
> Michal Kazior <michal.kazior@tieto.com> writes:
>
>> On 6 June 2014 08:10, Kalle Valo <kvalo@qca.qualcomm.com> wrote:
>>> Ben Greear <greearb@candelatech.com> writes:
>> [...]
>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>> this, but I can add and use a new spin-lock if you prefer.
>>>
>>> Why a new spinlock? I didn't review the locking requirements, but I
>>> would first check ar->data_lock can be used.
>>>
>>>> If so, any idea if we can do the reads of the target's memory while
>>>> holding a spin-lock, or would I need some temporary buffers and only
>>>> lock while copying that in to the storage in the 'ar'?
>>>
>>> I don't see why you would need special locks for reading target's
>>> memory. If there is something needed, pci.c should handle that. Michal?
>>
>> By definition the diagnostic window access must be serialized. We
>> don't do this with locks now but rely ON driver states/sequences. We
>> might have some problems lurking there already but I'd need to analyze
>> it to tell for sure.
>
> Should that serialisation happen within pci.c?

You can but that's not a requirement I guess. E.g. exchange_bmi_msg
hif callback requires the caller to serialize calls.


Michał

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-06  6:55   ` Kalle Valo
@ 2014-06-06 16:01     ` Ben Greear
  -1 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-06 16:01 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k

On 06/05/2014 11:55 PM, Kalle Valo wrote:
> greearb@candelatech.com writes:
> 
>> From: Ben Greear <greearb@candelatech.com>
>>
>> Store the firmware crash registers and last 128 or so
>> firmware debug-log ids and present them to user-space
>> via debugfs.
>>
>> Should help with figuring out why the firmware crashed.
>>
>> Signed-off-by: Ben Greear <greearb@candelatech.com>
> 
> [...]
> 
>> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
>> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
>> +		sizeof(dump_data->kernel_ver) - 1);
> 
> Other thing nice to have here is the kernel timestamp in the kernel log
> "[123456.4321]". How difficult would it be to add that to the dump?

It's easy to get the time-of-day, if that is what you mean.

I'll add that...

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-06 16:01     ` Ben Greear
  0 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-06 16:01 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k

On 06/05/2014 11:55 PM, Kalle Valo wrote:
> greearb@candelatech.com writes:
> 
>> From: Ben Greear <greearb@candelatech.com>
>>
>> Store the firmware crash registers and last 128 or so
>> firmware debug-log ids and present them to user-space
>> via debugfs.
>>
>> Should help with figuring out why the firmware crashed.
>>
>> Signed-off-by: Ben Greear <greearb@candelatech.com>
> 
> [...]
> 
>> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
>> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
>> +		sizeof(dump_data->kernel_ver) - 1);
> 
> Other thing nice to have here is the kernel timestamp in the kernel log
> "[123456.4321]". How difficult would it be to add that to the dump?

It's easy to get the time-of-day, if that is what you mean.

I'll add that...

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-06  6:10       ` Kalle Valo
@ 2014-06-06 16:11         ` Ben Greear
  -1 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-06 16:11 UTC (permalink / raw)
  To: Kalle Valo; +Cc: ath10k, linux-wireless

On 06/05/2014 11:10 PM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
> 
>> On 06/05/2014 09:18 AM, Kalle Valo wrote:
>>
>>>> +struct ath10k_tlv_dump_data {
>>>> +	u32 type; /* see ath10k_fw_error_dump_type above */
>>>> +	u32 tlv_len; /* in bytes */
>>>> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
>>>> +} __packed;
>>>> +
>>>> +struct ath10k_dump_file_data {
>>>> +	/* Dump file information */
>>>> +	u32 len;
>>>> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
>>>> +	u32 version; /* File dump version, 1 for now. */
>>>
>>> Actually I would prefer to have magic first and have ASCII string as
>>> string, for example "ATH10K-FW-DUMP".
>>
>> I'd like magic number to stay, was planning to use it to detect byte
>> ordering (ie, dumps might come from various different platforms, to
>> be decoded on some different platform).
> 
> If you want to know the endianess, I would prefer to do it the proper
> way, for example something like this:
> 
> #ifdef __BIG_ENDIAN
>         dump_data->big_endian = 1;
> #else
>         dump_data->big_endian = 0;
> #endif

Ok.

>>>> +	/* Some info we can get from ath10k struct that might help. */
>>>> +	u32 chip_id;
>>>> +	u32 target_version;
>>>
>>> bus_type or something like that would be good to add already now.
>>
>> Can you be more specific on what info you want here?  I don't see
>> any mention of bus_type in the ath10k dir.
> 
> I was thinking of adding a new field named "bus_type" which will contain
> just zero for now. This is so that we don't need to make any changes to
> the format if/when we add a new bus along with pci.

Sure, will do.

>>>> +	u32 fw_version_minor;
>>>> +	u16 fw_version_release;
>>>> +	u16 fw_version_build;
>>>> +	u32 phy_capability;
>>>> +	u32 hw_min_tx_power;
>>>> +	u32 hw_max_tx_power;
>>>> +	u32 ht_cap_info;
>>>> +	u32 vht_cap_info;
>>>> +	u32 num_rf_chains;
>>>> +	char fw_ver[64]; /* Firmware version string */
>>>
>>> Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.
>>>
>>> #define ETHTOOL_FWVERS_LEN	32
>>
>> I prefer not to..that way, firmware format will remain the same even if
>> the kernel changes the fwvers-len some day.
> 
> That is defined in the ethtool user space interface so changing that
> would break a lot of things. And also used by cfg80211 and other
> drivers. I would prefer to be consistent here and use
> ETHTOOL_FWVERS_LEN.

Ok.

>>>> @@ -488,6 +555,12 @@ struct ath10k {
>>>>  
>>>>  	struct dfs_pattern_detector *dfs_detector;
>>>>  
>>>> +	/* Used for crash-dump storage */
>>>> +	/* Don't over-write dump info until someone reads the data. */
>>>> +	bool crashed_since_read;
>>>> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
>>>> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
>>>
>>> I think these should be in struct ath10k_debug.
>>
>> I did not do this because I figure we will want ethtool support w/out
>> forcing debugfs to be enabled someday soon.
> 
> When we add ethtool support, it's easy to move these back. And then we
> need to move the code out from debug.c anyway.

Well, it seems like needless churn and makes it harder to follow
'git blame' when you move big chunks around.

We would not have to move the code out of debug.c, but if you do want
it moved, lets pick that place now and just put it there to begin with.

>>>> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
>>>> +{
>>>> +	int i;
>>>> +	int z = ar->dbglog_entry_data.next_idx;
>>>> +
>>>> +	/* Don't save any new logs until user-space reads this. */
>>>> +	if (ar->crashed_since_read)
>>>> +		return;
>>>
>>> Locking? If this functions depends on something, please document that
>>> with lockdep_assert_held().
>>
>> To be honest, I was going to ignore locking and assume that firmware
>> will not crash that often.  Worst case would be a garbled crash dump
>> as there is no memory allocation involved in gathering the crash,
>> and the length of the crash dump will not change based on anything in
>> the crash logic.
> 
> Ignoring locking means that in few years we have a big mess in our
> hands. I prefer to do the locking right from day one.
> 
>> I'm a bit leery of adding spin-locks in the dump routine just for
>> this, but I can add and use a new spin-lock if you prefer.
> 
> Why a new spinlock? I didn't review the locking requirements, but I
> would first check ar->data_lock can be used.

I think it has to be a spin-lock because the crash dump is gathered
in the irq handler, so I can't use a mutex as far as I know...

I'll work on adding such a lock today.

>> If so, any idea if we can do the reads of the target's memory while
>> holding a spin-lock, or would I need some temporary buffers and only
>> lock while copying that in to the storage in the 'ar'?
> 
> I don't see why you would need special locks for reading target's
> memory. If there is something needed, pci.c should handle that. Michal?

I was concerned something might sleep, but I can do some testing with lockdep
enabled and hopefully it will find any issues of that nature.

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-06 16:11         ` Ben Greear
  0 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-06 16:11 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k

On 06/05/2014 11:10 PM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
> 
>> On 06/05/2014 09:18 AM, Kalle Valo wrote:
>>
>>>> +struct ath10k_tlv_dump_data {
>>>> +	u32 type; /* see ath10k_fw_error_dump_type above */
>>>> +	u32 tlv_len; /* in bytes */
>>>> +	u8 tlv_data[]; /* Pad to 32-bit boundaries as needed. */
>>>> +} __packed;
>>>> +
>>>> +struct ath10k_dump_file_data {
>>>> +	/* Dump file information */
>>>> +	u32 len;
>>>> +	u32 magic; /* 0x01020304, tells us byte-order of host if we care */
>>>> +	u32 version; /* File dump version, 1 for now. */
>>>
>>> Actually I would prefer to have magic first and have ASCII string as
>>> string, for example "ATH10K-FW-DUMP".
>>
>> I'd like magic number to stay, was planning to use it to detect byte
>> ordering (ie, dumps might come from various different platforms, to
>> be decoded on some different platform).
> 
> If you want to know the endianess, I would prefer to do it the proper
> way, for example something like this:
> 
> #ifdef __BIG_ENDIAN
>         dump_data->big_endian = 1;
> #else
>         dump_data->big_endian = 0;
> #endif

Ok.

>>>> +	/* Some info we can get from ath10k struct that might help. */
>>>> +	u32 chip_id;
>>>> +	u32 target_version;
>>>
>>> bus_type or something like that would be good to add already now.
>>
>> Can you be more specific on what info you want here?  I don't see
>> any mention of bus_type in the ath10k dir.
> 
> I was thinking of adding a new field named "bus_type" which will contain
> just zero for now. This is so that we don't need to make any changes to
> the format if/when we add a new bus along with pci.

Sure, will do.

>>>> +	u32 fw_version_minor;
>>>> +	u16 fw_version_release;
>>>> +	u16 fw_version_build;
>>>> +	u32 phy_capability;
>>>> +	u32 hw_min_tx_power;
>>>> +	u32 hw_max_tx_power;
>>>> +	u32 ht_cap_info;
>>>> +	u32 vht_cap_info;
>>>> +	u32 num_rf_chains;
>>>> +	char fw_ver[64]; /* Firmware version string */
>>>
>>> Can we reuse ETHTOOL_FWVERS_LEN? cfg80211 already uses that.
>>>
>>> #define ETHTOOL_FWVERS_LEN	32
>>
>> I prefer not to..that way, firmware format will remain the same even if
>> the kernel changes the fwvers-len some day.
> 
> That is defined in the ethtool user space interface so changing that
> would break a lot of things. And also used by cfg80211 and other
> drivers. I would prefer to be consistent here and use
> ETHTOOL_FWVERS_LEN.

Ok.

>>>> @@ -488,6 +555,12 @@ struct ath10k {
>>>>  
>>>>  	struct dfs_pattern_detector *dfs_detector;
>>>>  
>>>> +	/* Used for crash-dump storage */
>>>> +	/* Don't over-write dump info until someone reads the data. */
>>>> +	bool crashed_since_read;
>>>> +	struct ath10k_dbglog_entry_storage dbglog_entry_data;
>>>> +	u32 reg_dump_values[REG_DUMP_COUNT_QCA988X];
>>>
>>> I think these should be in struct ath10k_debug.
>>
>> I did not do this because I figure we will want ethtool support w/out
>> forcing debugfs to be enabled someday soon.
> 
> When we add ethtool support, it's easy to move these back. And then we
> need to move the code out from debug.c anyway.

Well, it seems like needless churn and makes it harder to follow
'git blame' when you move big chunks around.

We would not have to move the code out of debug.c, but if you do want
it moved, lets pick that place now and just put it there to begin with.

>>>> +void ath10k_dbg_save_fw_dbg_buffer(struct ath10k *ar, u8 *buffer, int len)
>>>> +{
>>>> +	int i;
>>>> +	int z = ar->dbglog_entry_data.next_idx;
>>>> +
>>>> +	/* Don't save any new logs until user-space reads this. */
>>>> +	if (ar->crashed_since_read)
>>>> +		return;
>>>
>>> Locking? If this functions depends on something, please document that
>>> with lockdep_assert_held().
>>
>> To be honest, I was going to ignore locking and assume that firmware
>> will not crash that often.  Worst case would be a garbled crash dump
>> as there is no memory allocation involved in gathering the crash,
>> and the length of the crash dump will not change based on anything in
>> the crash logic.
> 
> Ignoring locking means that in few years we have a big mess in our
> hands. I prefer to do the locking right from day one.
> 
>> I'm a bit leery of adding spin-locks in the dump routine just for
>> this, but I can add and use a new spin-lock if you prefer.
> 
> Why a new spinlock? I didn't review the locking requirements, but I
> would first check ar->data_lock can be used.

I think it has to be a spin-lock because the crash dump is gathered
in the irq handler, so I can't use a mutex as far as I know...

I'll work on adding such a lock today.

>> If so, any idea if we can do the reads of the target's memory while
>> holding a spin-lock, or would I need some temporary buffers and only
>> lock while copying that in to the storage in the 'ar'?
> 
> I don't see why you would need special locks for reading target's
> memory. If there is something needed, pci.c should handle that. Michal?

I was concerned something might sleep, but I can do some testing with lockdep
enabled and hopefully it will find any issues of that nature.

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-06  9:33   ` Kalle Valo
@ 2014-06-06 17:06     ` Ben Greear
  -1 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-06 17:06 UTC (permalink / raw)
  To: Kalle Valo; +Cc: ath10k, linux-wireless

On 06/06/2014 02:33 AM, Kalle Valo wrote:

>> +					kfree(buffer);
>> +					goto save_regs_and_restart;
>> +				}
>> +
>> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
>> +							      dbuf.length);
>> +				kfree(buffer);
> 
> Instead of doing atomic allocations multiple times in a loop, would it
> be better to allocate just one buffer before the loop and free it
> afterwards?

There is no hard guarantee that the buffer lengths are the same,
so I think it needs to remain as is.  Would rather not crap out
because firmware suddenly got more clever...

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-06 17:06     ` Ben Greear
  0 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-06 17:06 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k

On 06/06/2014 02:33 AM, Kalle Valo wrote:

>> +					kfree(buffer);
>> +					goto save_regs_and_restart;
>> +				}
>> +
>> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
>> +							      dbuf.length);
>> +				kfree(buffer);
> 
> Instead of doing atomic allocations multiple times in a loop, would it
> be better to allocate just one buffer before the loop and free it
> afterwards?

There is no hard guarantee that the buffer lengths are the same,
so I think it needs to remain as is.  Would rather not crap out
because firmware suddenly got more clever...

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-06 16:01     ` Ben Greear
@ 2014-06-07 12:50       ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-07 12:50 UTC (permalink / raw)
  To: Ben Greear; +Cc: linux-wireless, ath10k

Ben Greear <greearb@candelatech.com> writes:

> On 06/05/2014 11:55 PM, Kalle Valo wrote:
>> greearb@candelatech.com writes:
>> 
>>> From: Ben Greear <greearb@candelatech.com>
>>>
>>> Store the firmware crash registers and last 128 or so
>>> firmware debug-log ids and present them to user-space
>>> via debugfs.
>>>
>>> Should help with figuring out why the firmware crashed.
>>>
>>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>> 
>> [...]
>> 
>>> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
>>> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
>>> +		sizeof(dump_data->kernel_ver) - 1);
>> 
>> Other thing nice to have here is the kernel timestamp in the kernel log
>> "[123456.4321]". How difficult would it be to add that to the dump?
>
> It's easy to get the time-of-day, if that is what you mean.

That's actually seconds from boot, not wall time:

[    0.000000] tsc: Fast TSC calibration using PIT
[    0.000000] tsc: Detected 2793.393 MHz processor
[    0.000004] Calibrating delay loop (skipped), value calculated using timer frequency.. 5586.78 BogoMIP
S (lpj=27933930)
[    0.000009] pid_max: default: 32768 minimum: 301
[    0.000016] ACPI: Core revision 20140214
[    0.023399] ACPI: All ACPI Tables successfully acquired

There's function print_time() which prints the time and the timestamp
apparently comes from local_clock().

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-07 12:50       ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-07 12:50 UTC (permalink / raw)
  To: Ben Greear; +Cc: linux-wireless, ath10k

Ben Greear <greearb@candelatech.com> writes:

> On 06/05/2014 11:55 PM, Kalle Valo wrote:
>> greearb@candelatech.com writes:
>> 
>>> From: Ben Greear <greearb@candelatech.com>
>>>
>>> Store the firmware crash registers and last 128 or so
>>> firmware debug-log ids and present them to user-space
>>> via debugfs.
>>>
>>> Should help with figuring out why the firmware crashed.
>>>
>>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>> 
>> [...]
>> 
>>> +	dump_data->kernel_ver_code = LINUX_VERSION_CODE;
>>> +	strncpy(dump_data->kernel_ver, VERMAGIC_STRING,
>>> +		sizeof(dump_data->kernel_ver) - 1);
>> 
>> Other thing nice to have here is the kernel timestamp in the kernel log
>> "[123456.4321]". How difficult would it be to add that to the dump?
>
> It's easy to get the time-of-day, if that is what you mean.

That's actually seconds from boot, not wall time:

[    0.000000] tsc: Fast TSC calibration using PIT
[    0.000000] tsc: Detected 2793.393 MHz processor
[    0.000004] Calibrating delay loop (skipped), value calculated using timer frequency.. 5586.78 BogoMIP
S (lpj=27933930)
[    0.000009] pid_max: default: 32768 minimum: 301
[    0.000016] ACPI: Core revision 20140214
[    0.023399] ACPI: All ACPI Tables successfully acquired

There's function print_time() which prints the time and the timestamp
apparently comes from local_clock().

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-06 16:11         ` Ben Greear
@ 2014-06-07 12:55           ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-07 12:55 UTC (permalink / raw)
  To: Ben Greear; +Cc: ath10k, linux-wireless

Ben Greear <greearb@candelatech.com> writes:

> On 06/05/2014 11:10 PM, Kalle Valo wrote:
>> Ben Greear <greearb@candelatech.com> writes:
>> 
>>> I did not do this because I figure we will want ethtool support w/out
>>> forcing debugfs to be enabled someday soon.
>> 
>> When we add ethtool support, it's easy to move these back. And then we
>> need to move the code out from debug.c anyway.
>
> Well, it seems like needless churn and makes it harder to follow
> 'git blame' when you move big chunks around.

I don't care about 'git blame', the actual code is far more important to
me. If git blame is so important, someone else can improve 'git blame'
to detect moving code chunks.

> We would not have to move the code out of debug.c, but if you do want
> it moved, lets pick that place now and just put it there to begin
> with.

Code in debug.c should have the data it owns in struct ath10k_debug.

>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>> this, but I can add and use a new spin-lock if you prefer.
>> 
>> Why a new spinlock? I didn't review the locking requirements, but I
>> would first check ar->data_lock can be used.
>
> I think it has to be a spin-lock because the crash dump is gathered
> in the irq handler, so I can't use a mutex as far as I know...
>
> I'll work on adding such a lock today.

I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-07 12:55           ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-07 12:55 UTC (permalink / raw)
  To: Ben Greear; +Cc: linux-wireless, ath10k

Ben Greear <greearb@candelatech.com> writes:

> On 06/05/2014 11:10 PM, Kalle Valo wrote:
>> Ben Greear <greearb@candelatech.com> writes:
>> 
>>> I did not do this because I figure we will want ethtool support w/out
>>> forcing debugfs to be enabled someday soon.
>> 
>> When we add ethtool support, it's easy to move these back. And then we
>> need to move the code out from debug.c anyway.
>
> Well, it seems like needless churn and makes it harder to follow
> 'git blame' when you move big chunks around.

I don't care about 'git blame', the actual code is far more important to
me. If git blame is so important, someone else can improve 'git blame'
to detect moving code chunks.

> We would not have to move the code out of debug.c, but if you do want
> it moved, lets pick that place now and just put it there to begin
> with.

Code in debug.c should have the data it owns in struct ath10k_debug.

>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>> this, but I can add and use a new spin-lock if you prefer.
>> 
>> Why a new spinlock? I didn't review the locking requirements, but I
>> would first check ar->data_lock can be used.
>
> I think it has to be a spin-lock because the crash dump is gathered
> in the irq handler, so I can't use a mutex as far as I know...
>
> I'll work on adding such a lock today.

I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-06 17:06     ` Ben Greear
@ 2014-06-07 12:57       ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-07 12:57 UTC (permalink / raw)
  To: Ben Greear; +Cc: ath10k, linux-wireless

Ben Greear <greearb@candelatech.com> writes:

> On 06/06/2014 02:33 AM, Kalle Valo wrote:
>
>>> +					kfree(buffer);
>>> +					goto save_regs_and_restart;
>>> +				}
>>> +
>>> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
>>> +							      dbuf.length);
>>> +				kfree(buffer);
>> 
>> Instead of doing atomic allocations multiple times in a loop, would it
>> be better to allocate just one buffer before the loop and free it
>> afterwards?
>
> There is no hard guarantee that the buffer lengths are the same,
> so I think it needs to remain as is.  Would rather not crap out
> because firmware suddenly got more clever...

This is related to my earlier comment about having a max len for the
buffers. So why not come up with a sane max length, allocate once a
temporary buffer of that length and use the same buffer in the loop?

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-07 12:57       ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-07 12:57 UTC (permalink / raw)
  To: Ben Greear; +Cc: linux-wireless, ath10k

Ben Greear <greearb@candelatech.com> writes:

> On 06/06/2014 02:33 AM, Kalle Valo wrote:
>
>>> +					kfree(buffer);
>>> +					goto save_regs_and_restart;
>>> +				}
>>> +
>>> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
>>> +							      dbuf.length);
>>> +				kfree(buffer);
>> 
>> Instead of doing atomic allocations multiple times in a loop, would it
>> be better to allocate just one buffer before the loop and free it
>> afterwards?
>
> There is no hard guarantee that the buffer lengths are the same,
> so I think it needs to remain as is.  Would rather not crap out
> because firmware suddenly got more clever...

This is related to my earlier comment about having a max len for the
buffers. So why not come up with a sane max length, allocate once a
temporary buffer of that length and use the same buffer in the loop?

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-07 12:57       ` Kalle Valo
@ 2014-06-07 15:29         ` Ben Greear
  -1 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-07 15:29 UTC (permalink / raw)
  To: Kalle Valo; +Cc: ath10k, linux-wireless



On 06/07/2014 05:57 AM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
>
>> On 06/06/2014 02:33 AM, Kalle Valo wrote:
>>
>>>> +					kfree(buffer);
>>>> +					goto save_regs_and_restart;
>>>> +				}
>>>> +
>>>> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
>>>> +							      dbuf.length);
>>>> +				kfree(buffer);
>>>
>>> Instead of doing atomic allocations multiple times in a loop, would it
>>> be better to allocate just one buffer before the loop and free it
>>> afterwards?
>>
>> There is no hard guarantee that the buffer lengths are the same,
>> so I think it needs to remain as is.  Would rather not crap out
>> because firmware suddenly got more clever...
>
> This is related to my earlier comment about having a max len for the
> buffers. So why not come up with a sane max length, allocate once a
> temporary buffer of that length and use the same buffer in the loop?

I can fix it at a 4k chunk if you want.  Current firmware uses around 2k chunk
I believe, and only two buffers, so either way it's not a lot of work for CPU.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-07 15:29         ` Ben Greear
  0 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-07 15:29 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k



On 06/07/2014 05:57 AM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
>
>> On 06/06/2014 02:33 AM, Kalle Valo wrote:
>>
>>>> +					kfree(buffer);
>>>> +					goto save_regs_and_restart;
>>>> +				}
>>>> +
>>>> +				ath10k_dbg_save_fw_dbg_buffer(ar, buffer,
>>>> +							      dbuf.length);
>>>> +				kfree(buffer);
>>>
>>> Instead of doing atomic allocations multiple times in a loop, would it
>>> be better to allocate just one buffer before the loop and free it
>>> afterwards?
>>
>> There is no hard guarantee that the buffer lengths are the same,
>> so I think it needs to remain as is.  Would rather not crap out
>> because firmware suddenly got more clever...
>
> This is related to my earlier comment about having a max len for the
> buffers. So why not come up with a sane max length, allocate once a
> temporary buffer of that length and use the same buffer in the loop?

I can fix it at a 4k chunk if you want.  Current firmware uses around 2k chunk
I believe, and only two buffers, so either way it's not a lot of work for CPU.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-07 12:55           ` Kalle Valo
@ 2014-06-07 15:32             ` Ben Greear
  -1 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-07 15:32 UTC (permalink / raw)
  To: Kalle Valo; +Cc: ath10k, linux-wireless



On 06/07/2014 05:55 AM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
>
>> On 06/05/2014 11:10 PM, Kalle Valo wrote:
>>> Ben Greear <greearb@candelatech.com> writes:
>>>
>>>> I did not do this because I figure we will want ethtool support w/out
>>>> forcing debugfs to be enabled someday soon.
>>>
>>> When we add ethtool support, it's easy to move these back. And then we
>>> need to move the code out from debug.c anyway.
>>
>> Well, it seems like needless churn and makes it harder to follow
>> 'git blame' when you move big chunks around.
>
> I don't care about 'git blame', the actual code is far more important to
> me. If git blame is so important, someone else can improve 'git blame'
> to detect moving code chunks.
>
>> We would not have to move the code out of debug.c, but if you do want
>> it moved, lets pick that place now and just put it there to begin
>> with.
>
> Code in debug.c should have the data it owns in struct ath10k_debug.

Ok, I'll move the data structs to the debug logic for next version of the patch.


>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>> this, but I can add and use a new spin-lock if you prefer.
>>>
>>> Why a new spinlock? I didn't review the locking requirements, but I
>>> would first check ar->data_lock can be used.
>>
>> I think it has to be a spin-lock because the crash dump is gathered
>> in the irq handler, so I can't use a mutex as far as I know...
>>
>> I'll work on adding such a lock today.
>
> I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.

Surely we do not want to impede traffic flow just to dump debug info?

And, it is easier to review a specific spinlock rather than use one big
global-ish lock and have to review every use of that lock for issues.

My -v2 had a new spinlock for this new debug data.  I'll move it into the
debug struct with the other new data fields for next version.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-07 15:32             ` Ben Greear
  0 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-07 15:32 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k



On 06/07/2014 05:55 AM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
>
>> On 06/05/2014 11:10 PM, Kalle Valo wrote:
>>> Ben Greear <greearb@candelatech.com> writes:
>>>
>>>> I did not do this because I figure we will want ethtool support w/out
>>>> forcing debugfs to be enabled someday soon.
>>>
>>> When we add ethtool support, it's easy to move these back. And then we
>>> need to move the code out from debug.c anyway.
>>
>> Well, it seems like needless churn and makes it harder to follow
>> 'git blame' when you move big chunks around.
>
> I don't care about 'git blame', the actual code is far more important to
> me. If git blame is so important, someone else can improve 'git blame'
> to detect moving code chunks.
>
>> We would not have to move the code out of debug.c, but if you do want
>> it moved, lets pick that place now and just put it there to begin
>> with.
>
> Code in debug.c should have the data it owns in struct ath10k_debug.

Ok, I'll move the data structs to the debug logic for next version of the patch.


>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>> this, but I can add and use a new spin-lock if you prefer.
>>>
>>> Why a new spinlock? I didn't review the locking requirements, but I
>>> would first check ar->data_lock can be used.
>>
>> I think it has to be a spin-lock because the crash dump is gathered
>> in the irq handler, so I can't use a mutex as far as I know...
>>
>> I'll work on adding such a lock today.
>
> I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.

Surely we do not want to impede traffic flow just to dump debug info?

And, it is easier to review a specific spinlock rather than use one big
global-ish lock and have to review every use of that lock for issues.

My -v2 had a new spinlock for this new debug data.  I'll move it into the
debug struct with the other new data fields for next version.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-07 15:29         ` Ben Greear
@ 2014-06-08  8:12           ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-08  8:12 UTC (permalink / raw)
  To: Ben Greear; +Cc: ath10k, linux-wireless

Ben Greear <greearb@candelatech.com> writes:

>>>> Instead of doing atomic allocations multiple times in a loop, would it
>>>> be better to allocate just one buffer before the loop and free it
>>>> afterwards?
>>>
>>> There is no hard guarantee that the buffer lengths are the same,
>>> so I think it needs to remain as is.  Would rather not crap out
>>> because firmware suddenly got more clever...
>>
>> This is related to my earlier comment about having a max len for the
>> buffers. So why not come up with a sane max length, allocate once a
>> temporary buffer of that length and use the same buffer in the loop?
>
> I can fix it at a 4k chunk if you want.  Current firmware uses around 2k chunk
> I believe, and only two buffers, so either way it's not a lot of work for CPU.

I think we should have a max chunk size in ath10k anyway and using one
buffer makes sense in that case.

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-08  8:12           ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-08  8:12 UTC (permalink / raw)
  To: Ben Greear; +Cc: linux-wireless, ath10k

Ben Greear <greearb@candelatech.com> writes:

>>>> Instead of doing atomic allocations multiple times in a loop, would it
>>>> be better to allocate just one buffer before the loop and free it
>>>> afterwards?
>>>
>>> There is no hard guarantee that the buffer lengths are the same,
>>> so I think it needs to remain as is.  Would rather not crap out
>>> because firmware suddenly got more clever...
>>
>> This is related to my earlier comment about having a max len for the
>> buffers. So why not come up with a sane max length, allocate once a
>> temporary buffer of that length and use the same buffer in the loop?
>
> I can fix it at a 4k chunk if you want.  Current firmware uses around 2k chunk
> I believe, and only two buffers, so either way it's not a lot of work for CPU.

I think we should have a max chunk size in ath10k anyway and using one
buffer makes sense in that case.

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-07 15:32             ` Ben Greear
@ 2014-06-08  8:28               ` Kalle Valo
  -1 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-08  8:28 UTC (permalink / raw)
  To: Ben Greear; +Cc: ath10k, linux-wireless

Ben Greear <greearb@candelatech.com> writes:

> On 06/07/2014 05:55 AM, Kalle Valo wrote:
>> Ben Greear <greearb@candelatech.com> writes:
>>
>>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>>> this, but I can add and use a new spin-lock if you prefer.
>>>>
>>>> Why a new spinlock? I didn't review the locking requirements, but I
>>>> would first check ar->data_lock can be used.
>>>
>>> I think it has to be a spin-lock because the crash dump is gathered
>>> in the irq handler, so I can't use a mutex as far as I know...
>>>
>>> I'll work on adding such a lock today.
>>
>> I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.
>
> Surely we do not want to impede traffic flow just to dump debug info?

I don't see data_lock being used anywhere in hotpath, but of course I
might have missed something. Anyway, if for optimisation reasons we need
to introduce a new lock that should happen for a specific case in hot
path. Handling firmware debug log events is not in that category.

> And, it is easier to review a specific spinlock rather than use one big
> global-ish lock and have to review every use of that lock for issues.

Sure, it's easier for you to just add a new lock and then forget :) But
not for me who would have to maintain 20 different locks for years to
come. My view is that a new lock should be added on very exceptional
cases and with good justifications.

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-08  8:28               ` Kalle Valo
  0 siblings, 0 replies; 46+ messages in thread
From: Kalle Valo @ 2014-06-08  8:28 UTC (permalink / raw)
  To: Ben Greear; +Cc: linux-wireless, ath10k

Ben Greear <greearb@candelatech.com> writes:

> On 06/07/2014 05:55 AM, Kalle Valo wrote:
>> Ben Greear <greearb@candelatech.com> writes:
>>
>>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>>> this, but I can add and use a new spin-lock if you prefer.
>>>>
>>>> Why a new spinlock? I didn't review the locking requirements, but I
>>>> would first check ar->data_lock can be used.
>>>
>>> I think it has to be a spin-lock because the crash dump is gathered
>>> in the irq handler, so I can't use a mutex as far as I know...
>>>
>>> I'll work on adding such a lock today.
>>
>> I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.
>
> Surely we do not want to impede traffic flow just to dump debug info?

I don't see data_lock being used anywhere in hotpath, but of course I
might have missed something. Anyway, if for optimisation reasons we need
to introduce a new lock that should happen for a specific case in hot
path. Handling firmware debug log events is not in that category.

> And, it is easier to review a specific spinlock rather than use one big
> global-ish lock and have to review every use of that lock for issues.

Sure, it's easier for you to just add a new lock and then forget :) But
not for me who would have to maintain 20 different locks for years to
come. My view is that a new lock should be added on very exceptional
cases and with good justifications.

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
  2014-06-08  8:28               ` Kalle Valo
@ 2014-06-08 15:40                 ` Ben Greear
  -1 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-08 15:40 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k



On 06/08/2014 01:28 AM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
>
>> On 06/07/2014 05:55 AM, Kalle Valo wrote:
>>> Ben Greear <greearb@candelatech.com> writes:
>>>
>>>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>>>> this, but I can add and use a new spin-lock if you prefer.
>>>>>
>>>>> Why a new spinlock? I didn't review the locking requirements, but I
>>>>> would first check ar->data_lock can be used.
>>>>
>>>> I think it has to be a spin-lock because the crash dump is gathered
>>>> in the irq handler, so I can't use a mutex as far as I know...
>>>>
>>>> I'll work on adding such a lock today.
>>>
>>> I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.
>>
>> Surely we do not want to impede traffic flow just to dump debug info?
>
> I don't see data_lock being used anywhere in hotpath, but of course I
> might have missed something. Anyway, if for optimisation reasons we need
> to introduce a new lock that should happen for a specific case in hot
> path. Handling firmware debug log events is not in that category.
>
>> And, it is easier to review a specific spinlock rather than use one big
>> global-ish lock and have to review every use of that lock for issues.
>
> Sure, it's easier for you to just add a new lock and then forget :) But
> not for me who would have to maintain 20 different locks for years to
> come. My view is that a new lock should be added on very exceptional
> cases and with good justifications.

I have the opposite opinion on what makes it easy to maintain
locks, but I'll switch to the data-spinlock as you suggest.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/4] ath10k:  provide firmware crash info via debugfs.
@ 2014-06-08 15:40                 ` Ben Greear
  0 siblings, 0 replies; 46+ messages in thread
From: Ben Greear @ 2014-06-08 15:40 UTC (permalink / raw)
  To: Kalle Valo; +Cc: linux-wireless, ath10k



On 06/08/2014 01:28 AM, Kalle Valo wrote:
> Ben Greear <greearb@candelatech.com> writes:
>
>> On 06/07/2014 05:55 AM, Kalle Valo wrote:
>>> Ben Greear <greearb@candelatech.com> writes:
>>>
>>>>>> I'm a bit leery of adding spin-locks in the dump routine just for
>>>>>> this, but I can add and use a new spin-lock if you prefer.
>>>>>
>>>>> Why a new spinlock? I didn't review the locking requirements, but I
>>>>> would first check ar->data_lock can be used.
>>>>
>>>> I think it has to be a spin-lock because the crash dump is gathered
>>>> in the irq handler, so I can't use a mutex as far as I know...
>>>>
>>>> I'll work on adding such a lock today.
>>>
>>> I asked why add a _new_ spinlock as ar->data_lock is already a spinlock.
>>
>> Surely we do not want to impede traffic flow just to dump debug info?
>
> I don't see data_lock being used anywhere in hotpath, but of course I
> might have missed something. Anyway, if for optimisation reasons we need
> to introduce a new lock that should happen for a specific case in hot
> path. Handling firmware debug log events is not in that category.
>
>> And, it is easier to review a specific spinlock rather than use one big
>> global-ish lock and have to review every use of that lock for issues.
>
> Sure, it's easier for you to just add a new lock and then forget :) But
> not for me who would have to maintain 20 different locks for years to
> come. My view is that a new lock should be added on very exceptional
> cases and with good justifications.

I have the opposite opinion on what makes it easy to maintain
locks, but I'll switch to the data-spinlock as you suggest.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2014-06-08 15:41 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-04 18:01 [PATCH 1/4] ath10k: provide firmware crash info via debugfs greearb
2014-06-04 18:01 ` greearb
2014-06-04 18:01 ` [PATCH 2/4] ath10k: save firmware debug log messages greearb
2014-06-04 18:01   ` greearb
2014-06-04 18:01 ` [PATCH 3/4] ath10k: save firmware stack upon firmware crash greearb
2014-06-04 18:01   ` greearb
2014-06-04 18:01 ` [PATCH 4/4] ath10k: Dump exception stack contents on " greearb
2014-06-04 18:01   ` greearb
2014-06-05 16:18 ` [PATCH 1/4] ath10k: provide firmware crash info via debugfs Kalle Valo
2014-06-05 16:18   ` Kalle Valo
2014-06-05 18:25   ` Ben Greear
2014-06-05 18:25     ` Ben Greear
2014-06-06  6:10     ` Kalle Valo
2014-06-06  6:10       ` Kalle Valo
2014-06-06  6:30       ` Michal Kazior
2014-06-06  6:30         ` Michal Kazior
2014-06-06  8:55         ` Kalle Valo
2014-06-06  8:55           ` Kalle Valo
2014-06-06  9:45           ` Michal Kazior
2014-06-06  9:45             ` Michal Kazior
2014-06-06 16:11       ` Ben Greear
2014-06-06 16:11         ` Ben Greear
2014-06-07 12:55         ` Kalle Valo
2014-06-07 12:55           ` Kalle Valo
2014-06-07 15:32           ` Ben Greear
2014-06-07 15:32             ` Ben Greear
2014-06-08  8:28             ` Kalle Valo
2014-06-08  8:28               ` Kalle Valo
2014-06-08 15:40               ` Ben Greear
2014-06-08 15:40                 ` Ben Greear
2014-06-06  6:55 ` Kalle Valo
2014-06-06  6:55   ` Kalle Valo
2014-06-06 16:01   ` Ben Greear
2014-06-06 16:01     ` Ben Greear
2014-06-07 12:50     ` Kalle Valo
2014-06-07 12:50       ` Kalle Valo
2014-06-06  9:33 ` Kalle Valo
2014-06-06  9:33   ` Kalle Valo
2014-06-06 17:06   ` Ben Greear
2014-06-06 17:06     ` Ben Greear
2014-06-07 12:57     ` Kalle Valo
2014-06-07 12:57       ` Kalle Valo
2014-06-07 15:29       ` Ben Greear
2014-06-07 15:29         ` Ben Greear
2014-06-08  8:12         ` Kalle Valo
2014-06-08  8:12           ` Kalle Valo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.