Linux-EDAC Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/2] rasdaemon: add support for memory Corrected Error predictive failure analysis
@ 2020-05-29  8:24 lvying6
  2020-05-29  8:24 ` [PATCH 1/2] rasdaemon: add rbtree support for page record lvying6
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: lvying6 @ 2020-05-29  8:24 UTC (permalink / raw)
  To: mchehab, linux-edac; +Cc: guanyalong, wuyun.wu, tanxiaofei

rasdaemon: add support for memory Corrected Error predictive failure analysis

wuyun (2):
  rasdaemon: add rbtree support for page record
  rasdaemon: add support for memory Corrected Error predictive failure
    analysis

 Makefile.am               |   4 +-
 misc/rasdaemon.env        |  29 ++++
 misc/rasdaemon.service.in |   1 +
 ras-events.c              |   3 +
 ras-mc-handler.c          |   5 +
 ras-page-isolation.c      | 308 +++++++++++++++++++++++++++++++++++++
 ras-page-isolation.h      |  68 ++++++++
 rbtree.c                  | 384 ++++++++++++++++++++++++++++++++++++++++++++++
 rbtree.h                  | 165 ++++++++++++++++++++
 9 files changed, 965 insertions(+), 2 deletions(-)
 create mode 100644 misc/rasdaemon.env
 create mode 100644 ras-page-isolation.c
 create mode 100644 ras-page-isolation.h
 create mode 100644 rbtree.c
 create mode 100644 rbtree.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] rasdaemon: add rbtree support for page record
  2020-05-29  8:24 [PATCH 0/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
@ 2020-05-29  8:24 ` lvying6
  2020-05-29 12:24   ` Mauro Carvalho Chehab
  2020-05-29  8:24 ` [PATCH 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: lvying6 @ 2020-05-29  8:24 UTC (permalink / raw)
  To: mchehab, linux-edac; +Cc: guanyalong, wuyun.wu, tanxiaofei

From: wuyun <wuyun.wu@huawei.com>

The rbtree is very efficient for recording and querying fault page info.

Signed-off-by: lvying <lvying6@huawei.com>
---
 Makefile.am |   4 +-
 rbtree.c    | 384 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 rbtree.h    | 165 ++++++++++++++++++++++++++
 3 files changed, 551 insertions(+), 2 deletions(-)
 create mode 100644 rbtree.c
 create mode 100644 rbtree.h

diff --git a/Makefile.am b/Makefile.am
index 843b538..2ff742d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -17,7 +17,7 @@ all-local: $(SYSTEMD_SERVICES)
 
 sbin_PROGRAMS = rasdaemon
 rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c \
-		    bitfield.c
+		    bitfield.c rbtree.c
 if WITH_SQLITE3
    rasdaemon_SOURCES += ras-record.c
 endif
@@ -59,7 +59,7 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
 include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
 		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
 		  ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
-		  ras-devlink-handler.h ras-diskerror-handler.h
+		  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h
 
 # This rule can't be called with more than one Makefile job (like make -j8)
 # I can't figure out a way to fix that
diff --git a/rbtree.c b/rbtree.c
new file mode 100644
index 0000000..d9b1bd4
--- /dev/null
+++ b/rbtree.c
@@ -0,0 +1,384 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  Taken from the Linux 2.6.30 source with some minor modificatons.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *right = node->rb_right;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_right = right->rb_left))
+		rb_set_parent(right->rb_left, node);
+	right->rb_left = node;
+
+	rb_set_parent(right, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_left)
+			parent->rb_left = right;
+		else
+			parent->rb_right = right;
+	}
+	else
+		root->rb_node = right;
+	rb_set_parent(node, right);
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *left = node->rb_left;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_left = left->rb_right))
+		rb_set_parent(left->rb_right, node);
+	left->rb_right = node;
+
+	rb_set_parent(left, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_right)
+			parent->rb_right = left;
+		else
+			parent->rb_left = left;
+	}
+	else
+		root->rb_node = left;
+	rb_set_parent(node, left);
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *parent, *gparent;
+
+	while ((parent = rb_parent(node)) && rb_is_red(parent))
+	{
+		gparent = rb_parent(parent);
+
+		if (parent == gparent->rb_left)
+		{
+			{
+				register struct rb_node *uncle = gparent->rb_right;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_right == node)
+			{
+				struct rb_node *tmp;
+				__rb_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_right(gparent, root);
+		} else {
+			{
+				struct rb_node *uncle = gparent->rb_left;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_left == node)
+			{
+				struct rb_node *tmp;
+				__rb_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_left(gparent, root);
+		}
+	}
+
+	rb_set_black(root->rb_node);
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+			     struct rb_root *root)
+{
+	struct rb_node *other;
+
+	while ((!node || rb_is_black(node)) && node != root->rb_node)
+	{
+		if (parent->rb_left == node)
+		{
+			other = parent->rb_right;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_left(parent, root);
+				other = parent->rb_right;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_right || rb_is_black(other->rb_right))
+				{
+					rb_set_black(other->rb_left);
+					rb_set_red(other);
+					__rb_rotate_right(other, root);
+					other = parent->rb_right;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_right);
+				__rb_rotate_left(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+		else
+		{
+			other = parent->rb_left;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_right(parent, root);
+				other = parent->rb_left;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_left || rb_is_black(other->rb_left))
+				{
+					rb_set_black(other->rb_right);
+					rb_set_red(other);
+					__rb_rotate_left(other, root);
+					other = parent->rb_left;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_left);
+				__rb_rotate_right(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		rb_set_black(node);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *child, *parent;
+	int color;
+
+	if (!node->rb_left)
+		child = node->rb_right;
+	else if (!node->rb_right)
+		child = node->rb_left;
+	else
+	{
+		struct rb_node *old = node, *left;
+
+		node = node->rb_right;
+		while ((left = node->rb_left) != NULL)
+			node = left;
+		child = node->rb_right;
+		parent = rb_parent(node);
+		color = rb_color(node);
+
+		if (child)
+			rb_set_parent(child, parent);
+		if (parent == old) {
+			parent->rb_right = child;
+			parent = node;
+		} else
+			parent->rb_left = child;
+
+		node->rb_parent_color = old->rb_parent_color;
+		node->rb_right = old->rb_right;
+		node->rb_left = old->rb_left;
+
+		if (rb_parent(old))
+		{
+			if (rb_parent(old)->rb_left == old)
+				rb_parent(old)->rb_left = node;
+			else
+				rb_parent(old)->rb_right = node;
+		} else
+			root->rb_node = node;
+
+		rb_set_parent(old->rb_left, node);
+		if (old->rb_right)
+			rb_set_parent(old->rb_right, node);
+		goto color;
+	}
+
+	parent = rb_parent(node);
+	color = rb_color(node);
+
+	if (child)
+		rb_set_parent(child, parent);
+	if (parent)
+	{
+		if (parent->rb_left == node)
+			parent->rb_left = child;
+		else
+			parent->rb_right = child;
+	}
+	else
+		root->rb_node = child;
+
+ color:
+	if (color == RB_BLACK)
+		__rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a right-hand child, go down and then left as far
+	   as we can. */
+	if (node->rb_right) {
+		node = node->rb_right;
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/* No right-hand children.  Everything down and left is
+	   smaller than us, so any 'next' node must be in the general
+	   direction of our parent. Go up the tree; any time the
+	   ancestor is a right-hand child of its parent, keep going
+	   up. First time it's a left-hand child of its parent, said
+	   parent is our 'next' node. */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a left-hand child, go down and then right as far
+	   as we can. */
+	if (node->rb_left) {
+		node = node->rb_left;
+		while (node->rb_right)
+			node=node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/* No left-hand children. Go up till we find an ancestor which
+	   is a right-hand child of its parent */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (parent) {
+		if (victim == parent->rb_left)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else {
+		root->rb_node = new;
+	}
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
diff --git a/rbtree.h b/rbtree.h
new file mode 100644
index 0000000..a8a0459
--- /dev/null
+++ b/rbtree.h
@@ -0,0 +1,165 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  Taken from the Linux 2.6.30 source.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+						 unsigned long offset)
+{
+	struct rb_node * n = inode->i_rb_page_cache.rb_node;
+	struct page * page;
+
+	while (n)
+	{
+		page = rb_entry(n, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			n = n->rb_left;
+		else if (offset > page->offset)
+			n = n->rb_right;
+		else
+			return page;
+	}
+	return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+						   unsigned long offset,
+						   struct rb_node * node)
+{
+	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+	struct rb_node * parent = NULL;
+	struct page * page;
+
+	while (*p)
+	{
+		parent = *p;
+		page = rb_entry(parent, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			p = &(*p)->rb_left;
+		else if (offset > page->offset)
+			p = &(*p)->rb_right;
+		else
+			return page;
+	}
+
+	rb_link_node(node, parent, p);
+
+	return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+						 unsigned long offset,
+						 struct rb_node * node)
+{
+	struct page * ret;
+	if ((ret = __rb_insert_page_cache(inode, offset, node)))
+		goto out;
+	rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+	return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef	_LINUX_RBTREE_H
+#define	_LINUX_RBTREE_H
+
+#include <stddef.h>
+
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+struct rb_node
+{
+	unsigned long  rb_parent_color;
+#define	RB_RED		0
+#define	RB_BLACK	1
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+    /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root
+{
+	struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
+#define rb_color(r)   ((r)->rb_parent_color & 1)
+#define rb_is_red(r)   (!rb_color(r))
+#define rb_is_black(r) rb_color(r)
+#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
+#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
+}
+static inline void rb_set_color(struct rb_node *rb, int color)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
+}
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
+#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
+#define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+			    struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+				struct rb_node ** rb_link)
+{
+	node->rb_parent_color = (unsigned long )parent;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#endif	/* _LINUX_RBTREE_H */
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis
  2020-05-29  8:24 [PATCH 0/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
  2020-05-29  8:24 ` [PATCH 1/2] rasdaemon: add rbtree support for page record lvying6
@ 2020-05-29  8:24 ` lvying6
  2020-05-29 14:31   ` Mauro Carvalho Chehab
  2020-06-20 12:26 ` [PATCH v2 rasdaemon 0/2] support memory CE " lvying6
  2020-07-21 12:26 ` [PATCH 0/2] " Mauro Carvalho Chehab
  3 siblings, 1 reply; 11+ messages in thread
From: lvying6 @ 2020-05-29  8:24 UTC (permalink / raw)
  To: mchehab, linux-edac; +Cc: guanyalong, wuyun.wu, tanxiaofei

From: wuyun <wuyun.wu@huawei.com>

Memory Corrected Error was corrected by hardware. These errors do not
require immediate software actions, but are still reported for
accounting and predictive failure analysis.

Based on statistical results, some actions can be taken to prevent
Corrected Error from evoluting to Uncorrected Error.

Signed-off-by: lvying <lvying6@huawei.com>
---
 Makefile.am               |   4 +-
 misc/rasdaemon.env        |  29 +++++
 misc/rasdaemon.service.in |   1 +
 ras-events.c              |   3 +
 ras-mc-handler.c          |   5 +
 ras-page-isolation.c      | 308 ++++++++++++++++++++++++++++++++++++++++++++++
 ras-page-isolation.h      |  68 ++++++++++
 7 files changed, 416 insertions(+), 2 deletions(-)
 create mode 100644 misc/rasdaemon.env
 create mode 100644 ras-page-isolation.c
 create mode 100644 ras-page-isolation.h

diff --git a/Makefile.am b/Makefile.am
index 2ff742d..6fc39f2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -17,7 +17,7 @@ all-local: $(SYSTEMD_SERVICES)
 
 sbin_PROGRAMS = rasdaemon
 rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c \
-		    bitfield.c rbtree.c
+		    bitfield.c rbtree.c ras-page-isolation.c
 if WITH_SQLITE3
    rasdaemon_SOURCES += ras-record.c
 endif
@@ -59,7 +59,7 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
 include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
 		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
 		  ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
-		  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h
+		  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h
 
 # This rule can't be called with more than one Makefile job (like make -j8)
 # I can't figure out a way to fix that
diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
new file mode 100644
index 0000000..0c05af0
--- /dev/null
+++ b/misc/rasdaemon.env
@@ -0,0 +1,29 @@
+# Page Isolation
+# Note: Run-time configuration is unsupported, service restart needed.
+# Note: this file should be installed at /etc/sysconfig/rasdaemon
+
+# Specify the threshold of isolating buggy pages.
+#
+# Format:
+#   [0-9]+[unit]
+# WARNING: please make sure perfectly match this format.
+#
+# Supported units:
+# PAGE_CE_REFRESH_CYCLE: D|d (day), H|h (hour), M|m (min), default is in hour
+# PAGE_CE_THRESHOLD: K|k (x1000), M|m (x1000k), default is none
+#
+# The two configs will only take no effect when PAGE_CE_ACTION is "off".
+PAGE_CE_REFRESH_CYCLE="24h"
+PAGE_CE_THRESHOLD="50"
+
+# Specify the internal action in rasdaemon to exceeding a page error threshold.
+#
+# off      no action
+# account  only account errors
+# soft     try to soft-offline page without killing any processes
+#          This requires an uptodate kernel. Might not be successfull.
+# hard     try to hard-offline page by killing processes
+#          Requires an uptodate kernel. Might not be successfull.
+# soft-then-hard   First try to soft offline, then try hard offlining.
+# Note: default offline choice is "soft".
+PAGE_CE_ACTION="soft"
diff --git a/misc/rasdaemon.service.in b/misc/rasdaemon.service.in
index be9ad5a..e73a08a 100644
--- a/misc/rasdaemon.service.in
+++ b/misc/rasdaemon.service.in
@@ -3,6 +3,7 @@ Description=RAS daemon to log the RAS events
 After=syslog.target
 
 [Service]
+EnvironmentFile=/etc/sysconfig/rasdaemon
 ExecStart=@sbindir@/rasdaemon -f -r
 ExecStartPost=@sbindir@/rasdaemon --enable
 ExecStop=@sbindir@/rasdaemon --disable
diff --git a/ras-events.c b/ras-events.c
index 511c93d..c034f6c 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -798,6 +798,9 @@ int handle_ras_events(int record_events)
 	ras->page_size = page_size;
 	ras->record_events = record_events;
 
+	/* FIXME: enable memory isolation unconditionally */
+	ras_page_account_init();
+
 	rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event",
 			       ras_mc_event_handler, NULL, MC_EVENT);
 	if (!rc)
diff --git a/ras-mc-handler.c b/ras-mc-handler.c
index deb7e05..bfbe1ef 100644
--- a/ras-mc-handler.c
+++ b/ras-mc-handler.c
@@ -23,6 +23,7 @@
 #include "ras-mc-handler.h"
 #include "ras-record.h"
 #include "ras-logger.h"
+#include "ras-page-isolation.h"
 #include "ras-report.h"
 
 int ras_mc_event_handler(struct trace_seq *s,
@@ -183,6 +184,10 @@ int ras_mc_event_handler(struct trace_seq *s,
 
 	ras_store_mc_event(ras, &ev);
 
+	/* Account page corrected errors */
+	if (!strcmp(ev.error_type, "Corrected"))
+		ras_record_page_error(ev.address, ev.error_count, now);
+
 #ifdef HAVE_ABRT_REPORT
 	/* Report event to ABRT */
 	ras_report_mc_event(ras, &ev);
diff --git a/ras-page-isolation.c b/ras-page-isolation.c
new file mode 100644
index 0000000..1bd04e4
--- /dev/null
+++ b/ras-page-isolation.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2015 Yun Wu (Abel)  <wuyun.wu@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "ras-logger.h"
+#include "ras-page-isolation.h"
+
+static const struct config threshold_units[] = {
+	{ "m",	1000 },
+	{ "k",	1000 },
+	{ "",	1    },
+	{}
+};
+
+static const struct config cycle_units[] = {
+	{ "d",	24 },
+	{ "h",	60 },
+	{ "m",	60 },
+	{}
+};
+
+static struct isolation threshold = {
+	.name = "PAGE_CE_THRESHOLD",
+	.units = threshold_units,
+	.env = "50",
+	.unit = "",
+};
+
+static struct isolation cycle = {
+	.name = "PAGE_CE_REFRESH_CYCLE",
+	.units = cycle_units,
+	.env = "24h",
+	.unit = "h",
+};
+
+static const char *kernel_offline[] = {
+	[OFFLINE_SOFT]		 = "/sys/devices/system/memory/soft_offline_page",
+	[OFFLINE_HARD]		 = "/sys/devices/system/memory/hard_offline_page",
+	[OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page",
+};
+
+static const struct config offline_choice[] = {
+	{ "off",		OFFLINE_OFF },
+	{ "account",		OFFLINE_ACCOUNT },
+	{ "soft",		OFFLINE_SOFT },
+	{ "hard",		OFFLINE_HARD },
+	{ "soft-then-hard",	OFFLINE_SOFT_THEN_HARD },
+	{}
+};
+
+static const char *page_state[] = {
+	[PAGE_ONLINE]		= "online",
+	[PAGE_OFFLINE]		= "offlined",
+	[PAGE_OFFLINE_FAILED]	= "offline-failed",
+};
+
+static enum otype offline = OFFLINE_SOFT;
+static struct rb_root page_records;
+
+static void page_offline_init(void)
+{
+	const char *env = "PAGE_CE_ACTION";
+	char *choice = getenv(env);
+	const struct config *c = NULL;
+	int matched = 0;
+
+	if (choice) {
+		for (c = offline_choice; c->name; c++) {
+			if (!strcasecmp(choice, c->name)) {
+				offline = c->val;
+				matched = 1;
+				break;
+			}
+		}
+	}
+
+	if (!matched)
+		log(TERM, LOG_INFO, "Improper %s, set to default soft\n", env);
+
+	if (offline > OFFLINE_ACCOUNT && access(kernel_offline[offline], W_OK)) {
+		log(TERM, LOG_INFO, "Kernel does not support page offline interface\n");
+		offline = OFFLINE_ACCOUNT;
+	}
+
+	log(TERM, LOG_INFO, "Page offline choice on Corrected Errors is %s\n",
+	    offline_choice[offline].name);
+}
+
+static void parse_isolation_env(struct isolation *config)
+{
+	char *env = getenv(config->name), *unit = NULL;
+	const struct config *units = NULL;
+	unsigned long value;
+	int no_unit, unit_matched;
+	int last, i;
+
+reparse:
+	/* Start a new round */
+	no_unit = unit_matched = 0;
+
+	/* Environments could be un-configured */
+	if (!env || !strlen(env))
+		goto use_default;
+
+	/* Index of the last char of environment */
+	last = strlen(env) - 1;
+	unit = env + last;
+	if (isdigit(*unit)) {
+		unit = config->unit;
+		no_unit = 1;
+	}
+
+	/* Only decimal digit can be accepted */
+	for (i = 0; i < last; i++) {
+		if (!isdigit(env[i]))
+			goto use_default;
+	}
+
+	/* Check if value is valid or not */
+	if (sscanf(env, "%lu", &value) < 1 || !value)
+		goto use_default;
+
+	for (units = config->units; units->name; units++) {
+		if (!strcasecmp(unit, units->name))
+			unit_matched = 1;
+		if (unit_matched)
+			value *= units->val;
+	}
+
+	/* Improper unit */
+	if (!unit_matched)
+		goto use_default;
+
+	config->env = env;
+	config->val = value;
+	config->unit = no_unit ? unit : "";
+	return;
+
+use_default:
+	log(TERM, LOG_INFO, "Improper %s, set to default %s.\n",
+	    config->name, config->env);
+
+	env = config->env;
+	goto reparse;
+}
+
+static void page_isolation_init(void)
+{
+	/**
+	 * It's unnecessary to parse threshold configuration when offline
+	 * choice is off.
+	 */
+	if (offline == OFFLINE_OFF)
+		return;
+
+	parse_isolation_env(&threshold);
+	parse_isolation_env(&cycle);
+	log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s%s / %s%s\n",
+	    threshold.env, threshold.unit, cycle.env, cycle.unit);
+}
+
+void ras_page_account_init(void)
+{
+	page_offline_init();
+	page_isolation_init();
+}
+
+static int do_page_offline(unsigned long long addr, enum otype type)
+{
+	FILE *offline_file;
+	int err;
+
+	offline_file = fopen(kernel_offline[type], "w");
+	if (!offline_file)
+		return -1;
+
+	fprintf(offline_file, "%#llx", addr);
+	err = ferror(offline_file) ? -1 : 0;
+	fclose(offline_file);
+
+	return err;
+}
+
+static void page_offline(struct page_record *pr)
+{
+	unsigned long long addr = pr->addr;
+	int ret;
+
+	/* Offlining page is not required */
+	if (offline <= OFFLINE_ACCOUNT)
+		return;
+
+	/* Ignore offlined pages */
+	if (pr->offlined != PAGE_ONLINE)
+		return;
+
+	/* Time to silence this noisy page */
+	if (offline == OFFLINE_SOFT_THEN_HARD) {
+		ret = do_page_offline(addr, OFFLINE_SOFT);
+		if (ret < 0)
+			ret = do_page_offline(addr, OFFLINE_HARD);
+	} else {
+		ret = do_page_offline(addr, offline);
+	}
+
+	pr->offlined = ret < 0 ? PAGE_OFFLINE_FAILED : PAGE_OFFLINE;
+
+	log(TERM, LOG_INFO, "Result of offlining page at %#llx: %s\n",
+	    addr, page_state[pr->offlined]);
+}
+
+static void page_record(struct page_record *pr, unsigned count, time_t time)
+{
+	unsigned long period = time - pr->start;
+	unsigned long tolerate;
+
+	if (period >= cycle.val) {
+		/**
+		 * Since we don't refresh automatically, it is possible that the period
+		 * between two occurences longer than the pre-configured refresh cycle.
+		 * In this case, we tolerate the frequency of the whole period up to
+		 * the pre-configured threshold.
+		 */
+		tolerate = (period / (double)cycle.val) * threshold.val;
+		pr->count -= (tolerate > pr->count) ? pr->count : tolerate;
+		pr->start = time;
+		pr->excess = 0;
+	}
+
+	pr->count += count;
+	if (pr->count >= threshold.val) {
+		log(TERM, LOG_INFO, "Corrected Errors at %#llx exceed threshold\n", pr->addr);
+
+		/**
+		 * Backup ce count of current cycle to enable next round, which actually
+		 * should never happen if we can disable overflow completely in the same
+		 * time unit (but sadly we can't).
+		 */
+		pr->excess += pr->count;
+		pr->count = 0;
+		page_offline(pr);
+	}
+}
+
+static struct page_record *page_lookup_insert(unsigned long long addr)
+{
+	struct rb_node **entry = &page_records.rb_node;
+	struct rb_node *parent = NULL;
+	struct page_record *pr = NULL, *find = NULL;
+
+	while (*entry) {
+		parent = *entry;
+		pr = rb_entry(parent, struct page_record, entry);
+		if (addr == pr->addr) {
+			return pr;
+		} else if (addr < pr->addr) {
+			entry = &(*entry)->rb_left;
+		} else {
+			entry = &(*entry)->rb_right;
+		}
+	}
+
+	find = calloc(1, sizeof(struct page_record));
+	if (!find) {
+		log(TERM, LOG_ERR, "No memory for page records\n");
+		return NULL;
+	}
+
+	find->addr = addr;
+	rb_link_node(&find->entry, parent, entry);
+	rb_insert_color(&find->entry, &page_records);
+
+	return find;
+}
+
+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time)
+{
+	struct page_record *pr = NULL;
+
+	if (offline == OFFLINE_OFF)
+		return;
+
+	pr = page_lookup_insert(addr & PAGE_MASK);
+	if (pr) {
+		if (!pr->start)
+			pr->start = time;
+		page_record(pr, count, time);
+	}
+}
diff --git a/ras-page-isolation.h b/ras-page-isolation.h
new file mode 100644
index 0000000..6aefa1e
--- /dev/null
+++ b/ras-page-isolation.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Yun Wu (Abel)  <wuyun.wu@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef __RAS_PAGE_ISOLATION_H
+#define __RAS_PAGE_ISOLATION_H
+
+#include <time.h>
+#include "rbtree.h"
+
+#define PAGE_SHIFT		12
+#define PAGE_SIZE		(1 << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+struct config {
+	char			*name;
+	int			val;
+};
+
+enum otype {
+	OFFLINE_OFF,
+	OFFLINE_ACCOUNT,
+	OFFLINE_SOFT,
+	OFFLINE_HARD,
+	OFFLINE_SOFT_THEN_HARD,
+};
+
+enum pstate {
+	PAGE_ONLINE,
+	PAGE_OFFLINE,
+	PAGE_OFFLINE_FAILED,
+};
+
+struct page_record {
+	struct rb_node		entry;
+	unsigned long long	addr;
+	time_t			start;
+	enum pstate		offlined;
+	unsigned long		count;
+	unsigned long		excess;
+};
+
+struct isolation {
+	char			*name;
+	char			*env;
+	const struct config	*units;
+	unsigned long		val;
+	char			*unit;
+};
+
+void ras_page_account_init(void);
+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time);
+
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] rasdaemon: add rbtree support for page record
  2020-05-29  8:24 ` [PATCH 1/2] rasdaemon: add rbtree support for page record lvying6
@ 2020-05-29 12:24   ` Mauro Carvalho Chehab
  2020-06-20 11:57     ` lvying
  0 siblings, 1 reply; 11+ messages in thread
From: Mauro Carvalho Chehab @ 2020-05-29 12:24 UTC (permalink / raw)
  To: lvying6; +Cc: linux-edac, guanyalong, wuyun.wu, tanxiaofei

Em Fri, 29 May 2020 16:24:22 +0800
lvying6 <lvying6@huawei.com> escreveu:

> From: wuyun <wuyun.wu@huawei.com>
> 
> The rbtree is very efficient for recording and querying fault page info.
> 
> Signed-off-by: lvying <lvying6@huawei.com>
> ---
>  Makefile.am |   4 +-
>  rbtree.c    | 384 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  rbtree.h    | 165 ++++++++++++++++++++++++++
>  3 files changed, 551 insertions(+), 2 deletions(-)
>  create mode 100644 rbtree.c
>  create mode 100644 rbtree.h
> 
> diff --git a/Makefile.am b/Makefile.am
> index 843b538..2ff742d 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -17,7 +17,7 @@ all-local: $(SYSTEMD_SERVICES)
>  
>  sbin_PROGRAMS = rasdaemon
>  rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c \
> -		    bitfield.c
> +		    bitfield.c rbtree.c

I would move the change at Makefile.am to the next patch.

As I'll comment there, I'd like to have a separate configure
option for each feature provided by the rasdaemon.

So, I would like to see something like:

	if WITH_PG_RECORD
	   rasdaemon_SOURCES += rbtree.c ras-page-isolation.c
	endif

at the Makefile.am (after applying both patches)

>  if WITH_SQLITE3
>     rasdaemon_SOURCES += ras-record.c
>  endif
> @@ -59,7 +59,7 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
>  include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
>  		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
>  		  ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
> -		  ras-devlink-handler.h ras-diskerror-handler.h
> +		  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h
>  
>  # This rule can't be called with more than one Makefile job (like make -j8)
>  # I can't figure out a way to fix that
> diff --git a/rbtree.c b/rbtree.c
> new file mode 100644
> index 0000000..d9b1bd4
> --- /dev/null
> +++ b/rbtree.c
> @@ -0,0 +1,384 @@
> +/*
> +  Red Black Trees
> +  (C) 1999  Andrea Arcangeli <andrea@suse.de>
> +  (C) 2002  David Woodhouse <dwmw2@infradead.org>
> +  Taken from the Linux 2.6.30 source with some minor modificatons.
> +
> +  This program is free software; you can redistribute it and/or modify
> +  it under the terms of the GNU General Public License as published by
> +  the Free Software Foundation; either version 2 of the License, or
> +  (at your option) any later version.
> +
> +  This program is distributed in the hope that it will be useful,
> +  but WITHOUT ANY WARRANTY; without even the implied warranty of
> +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +  GNU General Public License for more details.
> +
> +  You should have received a copy of the GNU General Public License
> +  along with this program; if not, write to the Free Software
> +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> +
> +  linux/lib/rbtree.c
> +*/
> +
> +#include "rbtree.h"
> +
> +static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
> +{
> +	struct rb_node *right = node->rb_right;
> +	struct rb_node *parent = rb_parent(node);
> +
> +	if ((node->rb_right = right->rb_left))
> +		rb_set_parent(right->rb_left, node);
> +	right->rb_left = node;
> +
> +	rb_set_parent(right, parent);
> +
> +	if (parent)
> +	{
> +		if (node == parent->rb_left)
> +			parent->rb_left = right;
> +		else
> +			parent->rb_right = right;
> +	}
> +	else
> +		root->rb_node = right;
> +	rb_set_parent(node, right);
> +}
> +
> +static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
> +{
> +	struct rb_node *left = node->rb_left;
> +	struct rb_node *parent = rb_parent(node);
> +
> +	if ((node->rb_left = left->rb_right))
> +		rb_set_parent(left->rb_right, node);
> +	left->rb_right = node;
> +
> +	rb_set_parent(left, parent);
> +
> +	if (parent)
> +	{
> +		if (node == parent->rb_right)
> +			parent->rb_right = left;
> +		else
> +			parent->rb_left = left;
> +	}
> +	else
> +		root->rb_node = left;
> +	rb_set_parent(node, left);
> +}
> +
> +void rb_insert_color(struct rb_node *node, struct rb_root *root)
> +{
> +	struct rb_node *parent, *gparent;
> +
> +	while ((parent = rb_parent(node)) && rb_is_red(parent))
> +	{
> +		gparent = rb_parent(parent);
> +
> +		if (parent == gparent->rb_left)
> +		{
> +			{
> +				register struct rb_node *uncle = gparent->rb_right;
> +				if (uncle && rb_is_red(uncle))
> +				{
> +					rb_set_black(uncle);
> +					rb_set_black(parent);
> +					rb_set_red(gparent);
> +					node = gparent;
> +					continue;
> +				}
> +			}
> +
> +			if (parent->rb_right == node)
> +			{
> +				struct rb_node *tmp;
> +				__rb_rotate_left(parent, root);
> +				tmp = parent;
> +				parent = node;
> +				node = tmp;
> +			}
> +
> +			rb_set_black(parent);
> +			rb_set_red(gparent);
> +			__rb_rotate_right(gparent, root);
> +		} else {
> +			{
> +				struct rb_node *uncle = gparent->rb_left;
> +				if (uncle && rb_is_red(uncle))
> +				{
> +					rb_set_black(uncle);
> +					rb_set_black(parent);
> +					rb_set_red(gparent);
> +					node = gparent;
> +					continue;
> +				}
> +			}
> +
> +			if (parent->rb_left == node)
> +			{
> +				struct rb_node *tmp;
> +				__rb_rotate_right(parent, root);
> +				tmp = parent;
> +				parent = node;
> +				node = tmp;
> +			}
> +
> +			rb_set_black(parent);
> +			rb_set_red(gparent);
> +			__rb_rotate_left(gparent, root);
> +		}
> +	}
> +
> +	rb_set_black(root->rb_node);
> +}
> +
> +static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
> +			     struct rb_root *root)
> +{
> +	struct rb_node *other;
> +
> +	while ((!node || rb_is_black(node)) && node != root->rb_node)
> +	{
> +		if (parent->rb_left == node)
> +		{
> +			other = parent->rb_right;
> +			if (rb_is_red(other))
> +			{
> +				rb_set_black(other);
> +				rb_set_red(parent);
> +				__rb_rotate_left(parent, root);
> +				other = parent->rb_right;
> +			}
> +			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
> +			    (!other->rb_right || rb_is_black(other->rb_right)))
> +			{
> +				rb_set_red(other);
> +				node = parent;
> +				parent = rb_parent(node);
> +			}
> +			else
> +			{
> +				if (!other->rb_right || rb_is_black(other->rb_right))
> +				{
> +					rb_set_black(other->rb_left);
> +					rb_set_red(other);
> +					__rb_rotate_right(other, root);
> +					other = parent->rb_right;
> +				}
> +				rb_set_color(other, rb_color(parent));
> +				rb_set_black(parent);
> +				rb_set_black(other->rb_right);
> +				__rb_rotate_left(parent, root);
> +				node = root->rb_node;
> +				break;
> +			}
> +		}
> +		else
> +		{
> +			other = parent->rb_left;
> +			if (rb_is_red(other))
> +			{
> +				rb_set_black(other);
> +				rb_set_red(parent);
> +				__rb_rotate_right(parent, root);
> +				other = parent->rb_left;
> +			}
> +			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
> +			    (!other->rb_right || rb_is_black(other->rb_right)))
> +			{
> +				rb_set_red(other);
> +				node = parent;
> +				parent = rb_parent(node);
> +			}
> +			else
> +			{
> +				if (!other->rb_left || rb_is_black(other->rb_left))
> +				{
> +					rb_set_black(other->rb_right);
> +					rb_set_red(other);
> +					__rb_rotate_left(other, root);
> +					other = parent->rb_left;
> +				}
> +				rb_set_color(other, rb_color(parent));
> +				rb_set_black(parent);
> +				rb_set_black(other->rb_left);
> +				__rb_rotate_right(parent, root);
> +				node = root->rb_node;
> +				break;
> +			}
> +		}
> +	}
> +	if (node)
> +		rb_set_black(node);
> +}
> +
> +void rb_erase(struct rb_node *node, struct rb_root *root)
> +{
> +	struct rb_node *child, *parent;
> +	int color;
> +
> +	if (!node->rb_left)
> +		child = node->rb_right;
> +	else if (!node->rb_right)
> +		child = node->rb_left;
> +	else
> +	{
> +		struct rb_node *old = node, *left;
> +
> +		node = node->rb_right;
> +		while ((left = node->rb_left) != NULL)
> +			node = left;
> +		child = node->rb_right;
> +		parent = rb_parent(node);
> +		color = rb_color(node);
> +
> +		if (child)
> +			rb_set_parent(child, parent);
> +		if (parent == old) {
> +			parent->rb_right = child;
> +			parent = node;
> +		} else
> +			parent->rb_left = child;
> +
> +		node->rb_parent_color = old->rb_parent_color;
> +		node->rb_right = old->rb_right;
> +		node->rb_left = old->rb_left;
> +
> +		if (rb_parent(old))
> +		{
> +			if (rb_parent(old)->rb_left == old)
> +				rb_parent(old)->rb_left = node;
> +			else
> +				rb_parent(old)->rb_right = node;
> +		} else
> +			root->rb_node = node;
> +
> +		rb_set_parent(old->rb_left, node);
> +		if (old->rb_right)
> +			rb_set_parent(old->rb_right, node);
> +		goto color;
> +	}
> +
> +	parent = rb_parent(node);
> +	color = rb_color(node);
> +
> +	if (child)
> +		rb_set_parent(child, parent);
> +	if (parent)
> +	{
> +		if (parent->rb_left == node)
> +			parent->rb_left = child;
> +		else
> +			parent->rb_right = child;
> +	}
> +	else
> +		root->rb_node = child;
> +
> + color:
> +	if (color == RB_BLACK)
> +		__rb_erase_color(child, parent, root);
> +}
> +
> +/*
> + * This function returns the first node (in sort order) of the tree.
> + */
> +struct rb_node *rb_first(const struct rb_root *root)
> +{
> +	struct rb_node	*n;
> +
> +	n = root->rb_node;
> +	if (!n)
> +		return NULL;
> +	while (n->rb_left)
> +		n = n->rb_left;
> +	return n;
> +}
> +
> +struct rb_node *rb_last(const struct rb_root *root)
> +{
> +	struct rb_node	*n;
> +
> +	n = root->rb_node;
> +	if (!n)
> +		return NULL;
> +	while (n->rb_right)
> +		n = n->rb_right;
> +	return n;
> +}
> +
> +struct rb_node *rb_next(const struct rb_node *node)
> +{
> +	struct rb_node *parent;
> +
> +	if (rb_parent(node) == node)
> +		return NULL;
> +
> +	/* If we have a right-hand child, go down and then left as far
> +	   as we can. */
> +	if (node->rb_right) {
> +		node = node->rb_right;
> +		while (node->rb_left)
> +			node=node->rb_left;
> +		return (struct rb_node *)node;
> +	}
> +
> +	/* No right-hand children.  Everything down and left is
> +	   smaller than us, so any 'next' node must be in the general
> +	   direction of our parent. Go up the tree; any time the
> +	   ancestor is a right-hand child of its parent, keep going
> +	   up. First time it's a left-hand child of its parent, said
> +	   parent is our 'next' node. */
> +	while ((parent = rb_parent(node)) && node == parent->rb_right)
> +		node = parent;
> +
> +	return parent;
> +}
> +
> +struct rb_node *rb_prev(const struct rb_node *node)
> +{
> +	struct rb_node *parent;
> +
> +	if (rb_parent(node) == node)
> +		return NULL;
> +
> +	/* If we have a left-hand child, go down and then right as far
> +	   as we can. */
> +	if (node->rb_left) {
> +		node = node->rb_left;
> +		while (node->rb_right)
> +			node=node->rb_right;
> +		return (struct rb_node *)node;
> +	}
> +
> +	/* No left-hand children. Go up till we find an ancestor which
> +	   is a right-hand child of its parent */
> +	while ((parent = rb_parent(node)) && node == parent->rb_left)
> +		node = parent;
> +
> +	return parent;
> +}
> +
> +void rb_replace_node(struct rb_node *victim, struct rb_node *new,
> +		     struct rb_root *root)
> +{
> +	struct rb_node *parent = rb_parent(victim);
> +
> +	/* Set the surrounding nodes to point to the replacement */
> +	if (parent) {
> +		if (victim == parent->rb_left)
> +			parent->rb_left = new;
> +		else
> +			parent->rb_right = new;
> +	} else {
> +		root->rb_node = new;
> +	}
> +	if (victim->rb_left)
> +		rb_set_parent(victim->rb_left, new);
> +	if (victim->rb_right)
> +		rb_set_parent(victim->rb_right, new);
> +
> +	/* Copy the pointers/colour from the victim to the replacement */
> +	*new = *victim;
> +}
> diff --git a/rbtree.h b/rbtree.h
> new file mode 100644
> index 0000000..a8a0459
> --- /dev/null
> +++ b/rbtree.h
> @@ -0,0 +1,165 @@
> +/*
> +  Red Black Trees
> +  (C) 1999  Andrea Arcangeli <andrea@suse.de>
> +  Taken from the Linux 2.6.30 source.
> +
> +  This program is free software; you can redistribute it and/or modify
> +  it under the terms of the GNU General Public License as published by
> +  the Free Software Foundation; either version 2 of the License, or
> +  (at your option) any later version.
> +
> +  This program is distributed in the hope that it will be useful,
> +  but WITHOUT ANY WARRANTY; without even the implied warranty of
> +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +  GNU General Public License for more details.
> +
> +  You should have received a copy of the GNU General Public License
> +  along with this program; if not, write to the Free Software
> +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> +
> +  linux/include/linux/rbtree.h
> +
> +  To use rbtrees you'll have to implement your own insert and search cores.
> +  This will avoid us to use callbacks and to drop drammatically performances.
> +  I know it's not the cleaner way,  but in C (not in C++) to get
> +  performances and genericity...
> +
> +  Some example of insert and search follows here. The search is a plain
> +  normal search over an ordered tree. The insert instead must be implemented
> +  int two steps: as first thing the code must insert the element in
> +  order as a red leaf in the tree, then the support library function
> +  rb_insert_color() must be called. Such function will do the
> +  not trivial work to rebalance the rbtree if necessary.
> +
> +-----------------------------------------------------------------------
> +static inline struct page * rb_search_page_cache(struct inode * inode,
> +						 unsigned long offset)
> +{
> +	struct rb_node * n = inode->i_rb_page_cache.rb_node;
> +	struct page * page;
> +
> +	while (n)
> +	{
> +		page = rb_entry(n, struct page, rb_page_cache);
> +
> +		if (offset < page->offset)
> +			n = n->rb_left;
> +		else if (offset > page->offset)
> +			n = n->rb_right;
> +		else
> +			return page;
> +	}
> +	return NULL;
> +}
> +
> +static inline struct page * __rb_insert_page_cache(struct inode * inode,
> +						   unsigned long offset,
> +						   struct rb_node * node)
> +{
> +	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
> +	struct rb_node * parent = NULL;
> +	struct page * page;
> +
> +	while (*p)
> +	{
> +		parent = *p;
> +		page = rb_entry(parent, struct page, rb_page_cache);
> +
> +		if (offset < page->offset)
> +			p = &(*p)->rb_left;
> +		else if (offset > page->offset)
> +			p = &(*p)->rb_right;
> +		else
> +			return page;
> +	}
> +
> +	rb_link_node(node, parent, p);
> +
> +	return NULL;
> +}
> +
> +static inline struct page * rb_insert_page_cache(struct inode * inode,
> +						 unsigned long offset,
> +						 struct rb_node * node)
> +{
> +	struct page * ret;
> +	if ((ret = __rb_insert_page_cache(inode, offset, node)))
> +		goto out;
> +	rb_insert_color(node, &inode->i_rb_page_cache);
> + out:
> +	return ret;
> +}
> +-----------------------------------------------------------------------
> +*/
> +
> +#ifndef	_LINUX_RBTREE_H
> +#define	_LINUX_RBTREE_H
> +
> +#include <stddef.h>
> +
> +#define container_of(ptr, type, member) ({			\
> +	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
> +	(type *)( (char *)__mptr - offsetof(type,member) );})
> +
> +struct rb_node
> +{
> +	unsigned long  rb_parent_color;
> +#define	RB_RED		0
> +#define	RB_BLACK	1
> +	struct rb_node *rb_right;
> +	struct rb_node *rb_left;
> +} __attribute__((aligned(sizeof(long))));
> +    /* The alignment might seem pointless, but allegedly CRIS needs it */
> +
> +struct rb_root
> +{
> +	struct rb_node *rb_node;
> +};
> +
> +
> +#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
> +#define rb_color(r)   ((r)->rb_parent_color & 1)
> +#define rb_is_red(r)   (!rb_color(r))
> +#define rb_is_black(r) rb_color(r)
> +#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
> +#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
> +
> +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
> +{
> +	rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
> +}
> +static inline void rb_set_color(struct rb_node *rb, int color)
> +{
> +	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
> +}
> +
> +#define RB_ROOT	(struct rb_root) { NULL, }
> +#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
> +
> +#define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
> +#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
> +#define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
> +
> +extern void rb_insert_color(struct rb_node *, struct rb_root *);
> +extern void rb_erase(struct rb_node *, struct rb_root *);
> +
> +/* Find logical next and previous nodes in a tree */
> +extern struct rb_node *rb_next(const struct rb_node *);
> +extern struct rb_node *rb_prev(const struct rb_node *);
> +extern struct rb_node *rb_first(const struct rb_root *);
> +extern struct rb_node *rb_last(const struct rb_root *);
> +
> +/* Fast replacement of a single node without remove/rebalance/add/rebalance */
> +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
> +			    struct rb_root *root);
> +
> +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
> +				struct rb_node ** rb_link)
> +{
> +	node->rb_parent_color = (unsigned long )parent;
> +	node->rb_left = node->rb_right = NULL;
> +
> +	*rb_link = node;
> +}
> +
> +#endif	/* _LINUX_RBTREE_H */



Thanks,
Mauro

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis
  2020-05-29  8:24 ` [PATCH 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
@ 2020-05-29 14:31   ` Mauro Carvalho Chehab
  2020-06-20 11:59     ` lvying
  0 siblings, 1 reply; 11+ messages in thread
From: Mauro Carvalho Chehab @ 2020-05-29 14:31 UTC (permalink / raw)
  To: lvying6; +Cc: linux-edac, guanyalong, wuyun.wu, tanxiaofei

Em Fri, 29 May 2020 16:24:23 +0800
lvying6 <lvying6@huawei.com> escreveu:

> From: wuyun <wuyun.wu@huawei.com>
> 
> Memory Corrected Error was corrected by hardware. These errors do not
> require immediate software actions, but are still reported for
> accounting and predictive failure analysis.
> 
> Based on statistical results, some actions can be taken to prevent
> Corrected Error from evoluting to Uncorrected Error.
> 
> Signed-off-by: lvying <lvying6@huawei.com>
> ---
>  Makefile.am               |   4 +-
>  misc/rasdaemon.env        |  29 +++++
>  misc/rasdaemon.service.in |   1 +
>  ras-events.c              |   3 +
>  ras-mc-handler.c          |   5 +
>  ras-page-isolation.c      | 308 ++++++++++++++++++++++++++++++++++++++++++++++
>  ras-page-isolation.h      |  68 ++++++++++
>  7 files changed, 416 insertions(+), 2 deletions(-)
>  create mode 100644 misc/rasdaemon.env
>  create mode 100644 ras-page-isolation.c
>  create mode 100644 ras-page-isolation.h
> 
> diff --git a/Makefile.am b/Makefile.am
> index 2ff742d..6fc39f2 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -17,7 +17,7 @@ all-local: $(SYSTEMD_SERVICES)
>  
>  sbin_PROGRAMS = rasdaemon
>  rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c \
> -		    bitfield.c rbtree.c
> +		    bitfield.c rbtree.c ras-page-isolation.c
>  if WITH_SQLITE3
>     rasdaemon_SOURCES += ras-record.c
>  endif
> @@ -59,7 +59,7 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
>  include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
>  		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
>  		  ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
> -		  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h
> +		  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h
>  
>  # This rule can't be called with more than one Makefile job (like make -j8)
>  # I can't figure out a way to fix that
> diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
> new file mode 100644
> index 0000000..0c05af0
> --- /dev/null
> +++ b/misc/rasdaemon.env
> @@ -0,0 +1,29 @@
> +# Page Isolation
> +# Note: Run-time configuration is unsupported, service restart needed.


> +# Note: this file should be installed at /etc/sysconfig/rasdaemon

If so, be sure to add it to Makefile.am, at "install-data-local"
target.

It would also be nice if you patch could touch[1]:

	misc/rasdaemon.spec.in

[1] I'll end doing this if your patch series doesn't, as
    my release scripts depend on having it built into
    Fedora rawhide, as I want to ensure that all build
    time dependencies are properly mapped - at least at the
    *.spec file.

> +
> +# Specify the threshold of isolating buggy pages.
> +#
> +# Format:
> +#   [0-9]+[unit]


> +# WARNING: please make sure perfectly match this format.

I would improve the parser and remove this warning.

> +#
> +# Supported units:
> +# PAGE_CE_REFRESH_CYCLE: D|d (day), H|h (hour), M|m (min), default is in hour
> +# PAGE_CE_THRESHOLD: K|k (x1000), M|m (x1000k), default is none
> +#
> +# The two configs will only take no effect when PAGE_CE_ACTION is "off".
> +PAGE_CE_REFRESH_CYCLE="24h"
> +PAGE_CE_THRESHOLD="50"
> +
> +# Specify the internal action in rasdaemon to exceeding a page error threshold.
> +#
> +# off      no action
> +# account  only account errors
> +# soft     try to soft-offline page without killing any processes
> +#          This requires an uptodate kernel. Might not be successfull.
> +# hard     try to hard-offline page by killing processes
> +#          Requires an uptodate kernel. Might not be successfull.
> +# soft-then-hard   First try to soft offline, then try hard offlining.
> +# Note: default offline choice is "soft".
> +PAGE_CE_ACTION="soft"
> diff --git a/misc/rasdaemon.service.in b/misc/rasdaemon.service.in
> index be9ad5a..e73a08a 100644
> --- a/misc/rasdaemon.service.in
> +++ b/misc/rasdaemon.service.in
> @@ -3,6 +3,7 @@ Description=RAS daemon to log the RAS events
>  After=syslog.target
>  
>  [Service]
> +EnvironmentFile=/etc/sysconfig/rasdaemon
>  ExecStart=@sbindir@/rasdaemon -f -r
>  ExecStartPost=@sbindir@/rasdaemon --enable
>  ExecStop=@sbindir@/rasdaemon --disable
> diff --git a/ras-events.c b/ras-events.c
> index 511c93d..c034f6c 100644
> --- a/ras-events.c
> +++ b/ras-events.c
> @@ -798,6 +798,9 @@ int handle_ras_events(int record_events)
>  	ras->page_size = page_size;
>  	ras->record_events = record_events;
>  


> +	/* FIXME: enable memory isolation unconditionally */
> +	ras_page_account_init();
> +

As I commented on patch 1, I'd like to be able to allow building
rasdaemon with a feature subset.

>  	rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event",
>  			       ras_mc_event_handler, NULL, MC_EVENT);
>  	if (!rc)
> diff --git a/ras-mc-handler.c b/ras-mc-handler.c
> index deb7e05..bfbe1ef 100644
> --- a/ras-mc-handler.c
> +++ b/ras-mc-handler.c
> @@ -23,6 +23,7 @@
>  #include "ras-mc-handler.h"
>  #include "ras-record.h"
>  #include "ras-logger.h"
> +#include "ras-page-isolation.h"
>  #include "ras-report.h"
>  
>  int ras_mc_event_handler(struct trace_seq *s,
> @@ -183,6 +184,10 @@ int ras_mc_event_handler(struct trace_seq *s,
>  
>  	ras_store_mc_event(ras, &ev);
>  
> +	/* Account page corrected errors */
> +	if (!strcmp(ev.error_type, "Corrected"))
> +		ras_record_page_error(ev.address, ev.error_count, now);
> +

Same here: the code should be called only if the predictive
failure analysis feature is enabled, e. g.:

	#ifdef HAVE_feature_name
		/* Account page corrected errors */
		if (!strcmp(ev.error_type, "Corrected"))
			ras_record_page_error(ev.address, ev.error_count, now);

	#endif

>  #ifdef HAVE_ABRT_REPORT
>  	/* Report event to ABRT */
>  	ras_report_mc_event(ras, &ev);
> diff --git a/ras-page-isolation.c b/ras-page-isolation.c
> new file mode 100644
> index 0000000..1bd04e4
> --- /dev/null
> +++ b/ras-page-isolation.c
> @@ -0,0 +1,308 @@
> +/*
> + * Copyright (C) 2015 Yun Wu (Abel)  <wuyun.wu@huawei.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +*/
> +
> +#include <ctype.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include "ras-logger.h"
> +#include "ras-page-isolation.h"
> +
> +static const struct config threshold_units[] = {
> +	{ "m",	1000 },
> +	{ "k",	1000 },
> +	{ "",	1    },
> +	{}
> +};
> +
> +static const struct config cycle_units[] = {
> +	{ "d",	24 },
> +	{ "h",	60 },
> +	{ "m",	60 },
> +	{}
> +};
> +
> +static struct isolation threshold = {
> +	.name = "PAGE_CE_THRESHOLD",
> +	.units = threshold_units,
> +	.env = "50",
> +	.unit = "",
> +};
> +
> +static struct isolation cycle = {
> +	.name = "PAGE_CE_REFRESH_CYCLE",
> +	.units = cycle_units,
> +	.env = "24h",
> +	.unit = "h",
> +};
> +
> +static const char *kernel_offline[] = {
> +	[OFFLINE_SOFT]		 = "/sys/devices/system/memory/soft_offline_page",
> +	[OFFLINE_HARD]		 = "/sys/devices/system/memory/hard_offline_page",
> +	[OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page",
> +};
> +
> +static const struct config offline_choice[] = {
> +	{ "off",		OFFLINE_OFF },
> +	{ "account",		OFFLINE_ACCOUNT },
> +	{ "soft",		OFFLINE_SOFT },
> +	{ "hard",		OFFLINE_HARD },
> +	{ "soft-then-hard",	OFFLINE_SOFT_THEN_HARD },
> +	{}
> +};
> +
> +static const char *page_state[] = {
> +	[PAGE_ONLINE]		= "online",
> +	[PAGE_OFFLINE]		= "offlined",
> +	[PAGE_OFFLINE_FAILED]	= "offline-failed",
> +};
> +
> +static enum otype offline = OFFLINE_SOFT;
> +static struct rb_root page_records;
> +
> +static void page_offline_init(void)
> +{
> +	const char *env = "PAGE_CE_ACTION";
> +	char *choice = getenv(env);
> +	const struct config *c = NULL;
> +	int matched = 0;
> +
> +	if (choice) {
> +		for (c = offline_choice; c->name; c++) {
> +			if (!strcasecmp(choice, c->name)) {
> +				offline = c->val;
> +				matched = 1;
> +				break;
> +			}
> +		}
> +	}
> +
> +	if (!matched)
> +		log(TERM, LOG_INFO, "Improper %s, set to default soft\n", env);
> +
> +	if (offline > OFFLINE_ACCOUNT && access(kernel_offline[offline], W_OK)) {
> +		log(TERM, LOG_INFO, "Kernel does not support page offline interface\n");
> +		offline = OFFLINE_ACCOUNT;
> +	}
> +
> +	log(TERM, LOG_INFO, "Page offline choice on Corrected Errors is %s\n",
> +	    offline_choice[offline].name);
> +}
> +
> +static void parse_isolation_env(struct isolation *config)
> +{
> +	char *env = getenv(config->name), *unit = NULL;
> +	const struct config *units = NULL;
> +	unsigned long value;
> +	int no_unit, unit_matched;
> +	int last, i;
> +


> +reparse:

Instead of a goto-based loop, could you please change the code to use
a normal loop?

E. g. Something like:

	do {
		done = try_set_isolation_env();
	} while(!done);

> +	/* Start a new round */
> +	no_unit = unit_matched = 0;

Nitpick: Please place one statement per line, e. g.:

	no_unit = 0;
	unit_patched = 0;

> +
> +	/* Environments could be un-configured */
> +	if (!env || !strlen(env))
> +		goto use_default;

Could you please verify if your code won't have endless loops?

On a first glance, it seems that, if env is zero, the loop
will run forever.

> +
> +	/* Index of the last char of environment */
> +	last = strlen(env) - 1;
> +	unit = env + last;
> +	if (isdigit(*unit)) {
> +		unit = config->unit;
> +		no_unit = 1;
> +	}

I guess you need to add:

	} else {
		last--;
	}

> +
> +	/* Only decimal digit can be accepted */
> +	for (i = 0; i < last; i++) {
> +		if (!isdigit(env[i]))
> +			goto use_default;
> +	}

.. as the last character won't be a digit if no_unit == 0.

> +
> +	/* Check if value is valid or not */
> +	if (sscanf(env, "%lu", &value) < 1 || !value)
> +		goto use_default;

Also here, you should discard the last byte, if it is an unit.

> +
> +	for (units = config->units; units->name; units++) {
> +		if (!strcasecmp(unit, units->name))
> +			unit_matched = 1;
> +		if (unit_matched)
> +			value *= units->val;
> +	}
> +
> +	/* Improper unit */
> +	if (!unit_matched)
> +		goto use_default;
> +
> +	config->env = env;
> +	config->val = value;
> +	config->unit = no_unit ? unit : "";
> +	return;
> +
> +use_default:
> +	log(TERM, LOG_INFO, "Improper %s, set to default %s.\n",
> +	    config->name, config->env);
> +
> +	env = config->env;
> +	goto reparse;
> +}

Btw, after re-reading this code a couple of times, I guess this logic 
could be re-written on a different way, with should avoid the reparse
loop and making it simpler for reviewers to analyze it (code untested):

	static int parse_isolation_env(struct isolation *config)
	{
		char *env = getenv(config->name), *unit = NULL;
		unsigned long value;
		char *s[255];
		int len;

		strncpy(s, env, sizeof(s));
		s[255] = '\0';

		len = strlen(s);

		if (!isdigit(s[len - 1])) {
			for (units = config->units; units->name; units++) {
				if (!strcasecmp(&s[len - 1], units->name)) {
					unit = units->val;
					break;
				}
			}

			/* Return error if the unit is unknown */

			if (!unit)
				return -1;
			else
				s[len - 1] = '\0';
		}

		/* Return error if value is not valid or if it is zero */
		if (sscanf(s, "%lu", &value) < 1 || !value)
			return -1;

		value *= unit->val;
	
		config->env = env;
		config->val = value;
		config->unit = unit ? unit : "";

		return 0;
	}		

Btw, instead of setting a default, I would instead print some error message,
as some action will need to be taken in order to avoid parsing it wrongly.

> +
> +static void page_isolation_init(void)
> +{
> +	/**
> +	 * It's unnecessary to parse threshold configuration when offline
> +	 * choice is off.
> +	 */
> +	if (offline == OFFLINE_OFF)
> +		return;
> +
> +	parse_isolation_env(&threshold);
> +	parse_isolation_env(&cycle);
> +	log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s%s / %s%s\n",
> +	    threshold.env, threshold.unit, cycle.env, cycle.unit);
> +}
> +
> +void ras_page_account_init(void)
> +{
> +	page_offline_init();
> +	page_isolation_init();
> +}
> +
> +static int do_page_offline(unsigned long long addr, enum otype type)
> +{
> +	FILE *offline_file;
> +	int err;
> +
> +	offline_file = fopen(kernel_offline[type], "w");
> +	if (!offline_file)
> +		return -1;
> +
> +	fprintf(offline_file, "%#llx", addr);
> +	err = ferror(offline_file) ? -1 : 0;
> +	fclose(offline_file);
> +
> +	return err;
> +}
> +
> +static void page_offline(struct page_record *pr)
> +{
> +	unsigned long long addr = pr->addr;
> +	int ret;
> +
> +	/* Offlining page is not required */
> +	if (offline <= OFFLINE_ACCOUNT)
> +		return;
> +
> +	/* Ignore offlined pages */
> +	if (pr->offlined != PAGE_ONLINE)
> +		return;
> +
> +	/* Time to silence this noisy page */
> +	if (offline == OFFLINE_SOFT_THEN_HARD) {
> +		ret = do_page_offline(addr, OFFLINE_SOFT);
> +		if (ret < 0)
> +			ret = do_page_offline(addr, OFFLINE_HARD);
> +	} else {
> +		ret = do_page_offline(addr, offline);
> +	}
> +
> +	pr->offlined = ret < 0 ? PAGE_OFFLINE_FAILED : PAGE_OFFLINE;
> +
> +	log(TERM, LOG_INFO, "Result of offlining page at %#llx: %s\n",
> +	    addr, page_state[pr->offlined]);
> +}
> +
> +static void page_record(struct page_record *pr, unsigned count, time_t time)
> +{
> +	unsigned long period = time - pr->start;
> +	unsigned long tolerate;
> +
> +	if (period >= cycle.val) {
> +		/**
> +		 * Since we don't refresh automatically, it is possible that the period
> +		 * between two occurences longer than the pre-configured refresh cycle.

Nitpick:
		longer than -> will be longer than

> +		 * In this case, we tolerate the frequency of the whole period up to
> +		 * the pre-configured threshold.
> +		 */
> +		tolerate = (period / (double)cycle.val) * threshold.val;
> +		pr->count -= (tolerate > pr->count) ? pr->count : tolerate;
> +		pr->start = time;
> +		pr->excess = 0;
> +	}
> +
> +	pr->count += count;
> +	if (pr->count >= threshold.val) {
> +		log(TERM, LOG_INFO, "Corrected Errors at %#llx exceed threshold\n", pr->addr);

Nitpick:
	exceed -> exceeded

> +
> +		/**
> +		 * Backup ce count of current cycle to enable next round, which actually
> +		 * should never happen if we can disable overflow completely in the same
> +		 * time unit (but sadly we can't).
> +		 */
> +		pr->excess += pr->count;
> +		pr->count = 0;
> +		page_offline(pr);
> +	}
> +}
> +
> +static struct page_record *page_lookup_insert(unsigned long long addr)
> +{
> +	struct rb_node **entry = &page_records.rb_node;
> +	struct rb_node *parent = NULL;
> +	struct page_record *pr = NULL, *find = NULL;
> +
> +	while (*entry) {
> +		parent = *entry;
> +		pr = rb_entry(parent, struct page_record, entry);
> +		if (addr == pr->addr) {
> +			return pr;
> +		} else if (addr < pr->addr) {
> +			entry = &(*entry)->rb_left;
> +		} else {
> +			entry = &(*entry)->rb_right;
> +		}
> +	}
> +
> +	find = calloc(1, sizeof(struct page_record));
> +	if (!find) {
> +		log(TERM, LOG_ERR, "No memory for page records\n");
> +		return NULL;
> +	}
> +
> +	find->addr = addr;
> +	rb_link_node(&find->entry, parent, entry);
> +	rb_insert_color(&find->entry, &page_records);
> +
> +	return find;
> +}
> +
> +void ras_record_page_error(unsigned long long addr, unsigned count, time_t time)
> +{
> +	struct page_record *pr = NULL;
> +
> +	if (offline == OFFLINE_OFF)
> +		return;
> +
> +	pr = page_lookup_insert(addr & PAGE_MASK);
> +	if (pr) {
> +		if (!pr->start)
> +			pr->start = time;
> +		page_record(pr, count, time);
> +	}
> +}
> diff --git a/ras-page-isolation.h b/ras-page-isolation.h
> new file mode 100644
> index 0000000..6aefa1e
> --- /dev/null
> +++ b/ras-page-isolation.h
> @@ -0,0 +1,68 @@
> +/*
> + * Copyright (C) 2015 Yun Wu (Abel)  <wuyun.wu@huawei.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +*/
> +
> +#ifndef __RAS_PAGE_ISOLATION_H
> +#define __RAS_PAGE_ISOLATION_H
> +
> +#include <time.h>
> +#include "rbtree.h"
> +
> +#define PAGE_SHIFT		12
> +#define PAGE_SIZE		(1 << PAGE_SHIFT)
> +#define PAGE_MASK		(~(PAGE_SIZE-1))
> +
> +struct config {
> +	char			*name;
> +	int			val;
> +};
> +
> +enum otype {
> +	OFFLINE_OFF,
> +	OFFLINE_ACCOUNT,
> +	OFFLINE_SOFT,
> +	OFFLINE_HARD,
> +	OFFLINE_SOFT_THEN_HARD,
> +};
> +
> +enum pstate {
> +	PAGE_ONLINE,
> +	PAGE_OFFLINE,
> +	PAGE_OFFLINE_FAILED,
> +};
> +
> +struct page_record {
> +	struct rb_node		entry;
> +	unsigned long long	addr;
> +	time_t			start;
> +	enum pstate		offlined;
> +	unsigned long		count;
> +	unsigned long		excess;
> +};
> +
> +struct isolation {
> +	char			*name;
> +	char			*env;
> +	const struct config	*units;
> +	unsigned long		val;
> +	char			*unit;
> +};
> +
> +void ras_page_account_init(void);
> +void ras_record_page_error(unsigned long long addr, unsigned count, time_t time);
> +
> +#endif



Thanks,
Mauro

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] rasdaemon: add rbtree support for page record
  2020-05-29 12:24   ` Mauro Carvalho Chehab
@ 2020-06-20 11:57     ` lvying
  0 siblings, 0 replies; 11+ messages in thread
From: lvying @ 2020-06-20 11:57 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: linux-edac, guanyalong, wuyun.wu, tanxiaofei

>> diff --git a/Makefile.am b/Makefile.am
>> index 843b538..2ff742d 100644
>> --- a/Makefile.am
>> +++ b/Makefile.am
>> @@ -17,7 +17,7 @@ all-local: $(SYSTEMD_SERVICES)
>>   
>>   sbin_PROGRAMS = rasdaemon
>>   rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c \
>> -		    bitfield.c
>> +		    bitfield.c rbtree.c
> 
> I would move the change at Makefile.am to the next patch.
> 
> As I'll comment there, I'd like to have a separate configure
> option for each feature provided by the rasdaemon.
> 
> So, I would like to see something like:
> 
> 	if WITH_PG_RECORD
> 	   rasdaemon_SOURCES += rbtree.c ras-page-isolation.c
> 	endif
> 
> at the Makefile.am (after applying both patches)
> 
Ok, I will add a configure option(WITH_MEMORY_CE_PFA) to the 
configure.ac to isolate this feature. This modification will be moved to 
next patch.



-- 
Thanks!
Lv Ying

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis
  2020-05-29 14:31   ` Mauro Carvalho Chehab
@ 2020-06-20 11:59     ` lvying
  0 siblings, 0 replies; 11+ messages in thread
From: lvying @ 2020-06-20 11:59 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: linux-edac, guanyalong, wuyun.wu, tanxiaofei

>> diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
>> new file mode 100644
>> index 0000000..0c05af0
>> --- /dev/null
>> +++ b/misc/rasdaemon.env
>> @@ -0,0 +1,29 @@
>> +# Page Isolation
>> +# Note: Run-time configuration is unsupported, service restart needed.
> 
> 
>> +# Note: this file should be installed at /etc/sysconfig/rasdaemon
> 
> If so, be sure to add it to Makefile.am, at "install-data-local"
> target.
> 
> It would also be nice if you patch could touch[1]:
> 
> 	misc/rasdaemon.spec.in
> 
> [1] I'll end doing this if your patch series doesn't, as
>      my release scripts depend on having it built into
>      Fedora rawhide, as I want to ensure that all build
>      time dependencies are properly mapped - at least at the
>      *.spec file.
> 

OK, I add %install and %files content:
%install
...
install -D -p -m 0644 misc/rasdaemon.env 
%{buildroot}%{_sysconfdir}/sysconfig/%{name}
...
%file
...
%config(noreplace) %{_sysconfdir}/sysconfig/%{name}
...
Also, I will add instructions for use into man file.

>> +
>> +# Specify the threshold of isolating buggy pages.
>> +#
>> +# Format:
>> +#   [0-9]+[unit]
> 
> 
>> +# WARNING: please make sure perfectly match this format.
> 
> I would improve the parser and remove this warning.
> 

the parser can handle exception input cases. This message is ambiguous.
Just like this:
# Notice: please make sure match this format, rasdaemon will use default 
value for exception input cases.

>> diff --git a/ras-events.c b/ras-events.c
>> index 511c93d..c034f6c 100644
>> --- a/ras-events.c
>> +++ b/ras-events.c
>> @@ -798,6 +798,9 @@ int handle_ras_events(int record_events)
>>   	ras->page_size = page_size;
>>   	ras->record_events = record_events;
>>   
> 
> 
>> +	/* FIXME: enable memory isolation unconditionally */
>> +	ras_page_account_init();
>> +
> 
> As I commented on patch 1, I'd like to be able to allow building
> rasdaemon with a feature subset.
> 

Ok, I rewrite:
#ifdef HAVE_MEMORY_CE_PFA
     /* FIXME: enable memory isolation unconditionally */
     ras_page_account_init();
#endif

>>   
>> +	/* Account page corrected errors */
>> +	if (!strcmp(ev.error_type, "Corrected"))
>> +		ras_record_page_error(ev.address, ev.error_count, now);
>> +
> 
> Same here: the code should be called only if the predictive
> failure analysis feature is enabled, e. g.:
> 
> 	#ifdef HAVE_feature_name
> 		/* Account page corrected errors */
> 		if (!strcmp(ev.error_type, "Corrected"))
> 			ras_record_page_error(ev.address, ev.error_count, now);
> 
> 	#endif
> 
Ok, I use same macro "HAVE_MEMORY_CE_PFA" here.

> 
> Instead of a goto-based loop, could you please change the code to use
> a normal loop?
> 

Ok, I will use less goto-based loop to make this code logic clearly, 
maybe only use goto-base loop to handle exception case.

> E. g. Something like:
> 
> 	do {
> 		done = try_set_isolation_env();
> 	} while(!done);
> 
>> +	/* Start a new round */
>> +	no_unit = unit_matched = 0;
> 
> Nitpick: Please place one statement per line, e. g.:
> 
> 	no_unit = 0;
> 	unit_patched = 0;

OK, I will follow this rule

> 
>> +
>> +	/* Environments could be un-configured */
>> +	if (!env || !strlen(env))
>> +		goto use_default;
> 
> Could you please verify if your code won't have endless loops?
> 
> On a first glance, it seems that, if env is zero, the loop
> will run forever.

No, when the env is zero, it will use default env value:
if (sscanf(env, "%lu", &value) < 1 || !value)
	goto use_default;

> 
>> +
>> +	/* Index of the last char of environment */
>> +	last = strlen(env) - 1;
>> +	unit = env + last;
>> +	if (isdigit(*unit)) {
>> +		unit = config->unit;
>> +		no_unit = 1;
>> +	}
> 
> I guess you need to add:
> 
> 	} else {
> 		last--;
> 	}
> 

No need here, this place just judge whether the env has valid unit, if 
not use the default unit.

>> +
>> +	/* Only decimal digit can be accepted */
>> +	for (i = 0; i < last; i++) {
>> +		if (!isdigit(env[i]))
>> +			goto use_default;
>> +	}
> 
> .. as the last character won't be a digit if no_unit == 0.
> 

It doesn't matter, here only check the characters before the last 
character are all digit.

> 
> Btw, after re-reading this code a couple of times, I guess this logic
> could be re-written on a different way, with should avoid the reparse
> loop and making it simpler for reviewers to analyze it (code untested):
> 
> 	static int parse_isolation_env(struct isolation *config)
> 	{
> 		char *env = getenv(config->name), *unit = NULL;
> 		unsigned long value;
> 		char *s[255];
> 		int len;
> 
> 		strncpy(s, env, sizeof(s));
> 		s[255] = '\0';
> 
> 		len = strlen(s);
> 
> 		if (!isdigit(s[len - 1])) {
> 			for (units = config->units; units->name; units++) {
> 				if (!strcasecmp(&s[len - 1], units->name)) {
> 					unit = units->val;
> 					break;
> 				}
> 			}
> 
> 			/* Return error if the unit is unknown */
> 
> 			if (!unit)
> 				return -1;
> 			else
> 				s[len - 1] = '\0';
> 		}
> 
> 		/* Return error if value is not valid or if it is zero */
> 		if (sscanf(s, "%lu", &value) < 1 || !value)
> 			return -1;
> 
> 		value *= unit->val;
> 	
> 		config->env = env;
> 		config->val = value;
> 		config->unit = unit ? unit : "";
> 
> 		return 0;
> 	}		
> 
> Btw, instead of setting a default, I would instead print some error message,
> as some action will need to be taken in order to avoid parsing it wrongly.

And I rewrite this function and reduce the reparse goto-base statement 
to make it clearly. The first part to check if env characters are valid. 
The second part to parse the env value(if not valid use the default value):

static void parse_isolation_env(struct isolation *config)
{
     char *env = getenv(config->name);
     char *unit = NULL;
     const struct config *units = NULL;
     int i, no_unit;
     int valid = 0;
     int unit_matched = 0;
     unsigned long value, tmp;

     /* check if env is vaild */
     if (env && strlen(env)) {
         /* All the character before unit must be digit */
         for (i = 0; i < strlen(env) - 1; i++) {
             if (!isdigit(env[i]))
                 goto parse;
         }
         if (sscanf(env, "%lu", &value) < 1 || !value)
             goto parse;
         /* check if the unit is vaild */
         unit = env + strlen(env) - 1;
         /* no unit, all the character are value character */
         if (isdigit(*unit)) {
             valid = 1;
             no_unit = 1;
             goto parse;
         }
         for (units = config->units; units->name; units++) {
             /* value character and unit character are both valid */
             if (!strcasecmp(unit, units->name)) {
                 valid = 1;
                 no_unit = 0;
                 break;
             }
         }
     }

parse:
     /* if invalid, use default env */
     if (valid) {
         config->env = env;
         if (!no_unit)
             config->unit = unit;
     } else {
          log(TERM, LOG_INFO, "Improper %s, set to default %s.\n",
                  config->name, config->env);
     }

     /* if env value string is greater than ulong_max, truncate the last 
digit */
     sscanf(config->env, "%lu", &value);
     for (units = config->units; units->name; units++) {
         if (!strcasecmp(config->unit, units->name))
             unit_matched = 1;
         if (unit_matched) {
             tmp = value;
             value *= units->val;
             if (tmp != 0 && value / tmp != units->val)
                 config->overflow = true;
         }
     }
     config->val = value;
     /* In order to output value and unit perfectly */
     config->unit = no_unit ? config->unit : "";
}

I will send patch version2 to give more detail about env parse.

>> +	if (period >= cycle.val) {
>> +		/**
>> +		 * Since we don't refresh automatically, it is possible that the period
>> +		 * between two occurences longer than the pre-configured refresh cycle.
> 
> Nitpick:
> 		longer than -> will be longer than
> 
Ok, I will fix it


>> +	pr->count += count;
>> +	if (pr->count >= threshold.val) {
>> +		log(TERM, LOG_INFO, "Corrected Errors at %#llx exceed threshold\n", pr->addr);
> 
> Nitpick:
> 	exceed -> exceeded
> 
Ok, I will fix it


> 
> Thanks,
> Mauro
> .
> 


-- 
Thanks!
Lv Ying

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 rasdaemon 0/2] support memory CE predictive failure analysis
  2020-05-29  8:24 [PATCH 0/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
  2020-05-29  8:24 ` [PATCH 1/2] rasdaemon: add rbtree support for page record lvying6
  2020-05-29  8:24 ` [PATCH 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
@ 2020-06-20 12:26 ` lvying6
  2020-06-20 12:26   ` [PATCH v2 rasdaemon 1/2] rasdaemon: add rbtree support for page record lvying6
  2020-06-20 12:26   ` [PATCH v2 rasdaemon 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
  2020-07-21 12:26 ` [PATCH 0/2] " Mauro Carvalho Chehab
  3 siblings, 2 replies; 11+ messages in thread
From: lvying6 @ 2020-06-20 12:26 UTC (permalink / raw)
  To: mchehab+huawei, linux-edac; +Cc: guanyalong, wuyun.wu, tanxiaofei

The patch v2 has been modified according to Mauro's code review.
Changelog:
v1 -- v2
1. add a configure option(WITH_MEMORY_CE_PFA) to isolate this feature.
2. add %install and %files content into misc/rasdaemon.spec.in
3. add config file man page
4. rewrite parse_isolation_env function to make it clearly

lvying6 (2):
  rasdaemon: add rbtree support for page record
  rasdaemon: add support for memory Corrected Error predictive failure
    analysis

 .travis.yml               |   2 +-
 Makefile.am               |   5 +-
 configure.ac              |  11 ++
 man/rasdaemon.1.in        |   7 +
 misc/rasdaemon.env        |  29 ++++
 misc/rasdaemon.service.in |   1 +
 misc/rasdaemon.spec.in    |   2 +
 ras-events.c              |   6 +
 ras-mc-handler.c          |   7 +
 ras-page-isolation.c      | 332 +++++++++++++++++++++++++++++++++++++++
 ras-page-isolation.h      |  66 ++++++++
 rbtree.c                  | 384 ++++++++++++++++++++++++++++++++++++++++++++++
 rbtree.h                  | 165 ++++++++++++++++++++
 13 files changed, 1015 insertions(+), 2 deletions(-)
 create mode 100644 misc/rasdaemon.env
 create mode 100644 ras-page-isolation.c
 create mode 100644 ras-page-isolation.h
 create mode 100644 rbtree.c
 create mode 100644 rbtree.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 rasdaemon 1/2] rasdaemon: add rbtree support for page record
  2020-06-20 12:26 ` [PATCH v2 rasdaemon 0/2] support memory CE " lvying6
@ 2020-06-20 12:26   ` lvying6
  2020-06-20 12:26   ` [PATCH v2 rasdaemon 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
  1 sibling, 0 replies; 11+ messages in thread
From: lvying6 @ 2020-06-20 12:26 UTC (permalink / raw)
  To: mchehab+huawei, linux-edac; +Cc: guanyalong, wuyun.wu, tanxiaofei

From: wuyun <wuyun.wu@huawei.com>

The rbtree is very efficient for recording and querying fault page info.

Signed-off-by: wuyun <wuyun.wu@huawei.com>
Signed-off-by: lvying6 <lvying6@huawei.com>
---
 rbtree.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 rbtree.h | 165 +++++++++++++++++++++++++++
 2 files changed, 549 insertions(+)
 create mode 100644 rbtree.c
 create mode 100644 rbtree.h

diff --git a/rbtree.c b/rbtree.c
new file mode 100644
index 0000000..d9b1bd4
--- /dev/null
+++ b/rbtree.c
@@ -0,0 +1,384 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  Taken from the Linux 2.6.30 source with some minor modificatons.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *right = node->rb_right;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_right = right->rb_left))
+		rb_set_parent(right->rb_left, node);
+	right->rb_left = node;
+
+	rb_set_parent(right, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_left)
+			parent->rb_left = right;
+		else
+			parent->rb_right = right;
+	}
+	else
+		root->rb_node = right;
+	rb_set_parent(node, right);
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *left = node->rb_left;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_left = left->rb_right))
+		rb_set_parent(left->rb_right, node);
+	left->rb_right = node;
+
+	rb_set_parent(left, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_right)
+			parent->rb_right = left;
+		else
+			parent->rb_left = left;
+	}
+	else
+		root->rb_node = left;
+	rb_set_parent(node, left);
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *parent, *gparent;
+
+	while ((parent = rb_parent(node)) && rb_is_red(parent))
+	{
+		gparent = rb_parent(parent);
+
+		if (parent == gparent->rb_left)
+		{
+			{
+				register struct rb_node *uncle = gparent->rb_right;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_right == node)
+			{
+				struct rb_node *tmp;
+				__rb_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_right(gparent, root);
+		} else {
+			{
+				struct rb_node *uncle = gparent->rb_left;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_left == node)
+			{
+				struct rb_node *tmp;
+				__rb_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_left(gparent, root);
+		}
+	}
+
+	rb_set_black(root->rb_node);
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+			     struct rb_root *root)
+{
+	struct rb_node *other;
+
+	while ((!node || rb_is_black(node)) && node != root->rb_node)
+	{
+		if (parent->rb_left == node)
+		{
+			other = parent->rb_right;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_left(parent, root);
+				other = parent->rb_right;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_right || rb_is_black(other->rb_right))
+				{
+					rb_set_black(other->rb_left);
+					rb_set_red(other);
+					__rb_rotate_right(other, root);
+					other = parent->rb_right;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_right);
+				__rb_rotate_left(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+		else
+		{
+			other = parent->rb_left;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_right(parent, root);
+				other = parent->rb_left;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_left || rb_is_black(other->rb_left))
+				{
+					rb_set_black(other->rb_right);
+					rb_set_red(other);
+					__rb_rotate_left(other, root);
+					other = parent->rb_left;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_left);
+				__rb_rotate_right(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		rb_set_black(node);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *child, *parent;
+	int color;
+
+	if (!node->rb_left)
+		child = node->rb_right;
+	else if (!node->rb_right)
+		child = node->rb_left;
+	else
+	{
+		struct rb_node *old = node, *left;
+
+		node = node->rb_right;
+		while ((left = node->rb_left) != NULL)
+			node = left;
+		child = node->rb_right;
+		parent = rb_parent(node);
+		color = rb_color(node);
+
+		if (child)
+			rb_set_parent(child, parent);
+		if (parent == old) {
+			parent->rb_right = child;
+			parent = node;
+		} else
+			parent->rb_left = child;
+
+		node->rb_parent_color = old->rb_parent_color;
+		node->rb_right = old->rb_right;
+		node->rb_left = old->rb_left;
+
+		if (rb_parent(old))
+		{
+			if (rb_parent(old)->rb_left == old)
+				rb_parent(old)->rb_left = node;
+			else
+				rb_parent(old)->rb_right = node;
+		} else
+			root->rb_node = node;
+
+		rb_set_parent(old->rb_left, node);
+		if (old->rb_right)
+			rb_set_parent(old->rb_right, node);
+		goto color;
+	}
+
+	parent = rb_parent(node);
+	color = rb_color(node);
+
+	if (child)
+		rb_set_parent(child, parent);
+	if (parent)
+	{
+		if (parent->rb_left == node)
+			parent->rb_left = child;
+		else
+			parent->rb_right = child;
+	}
+	else
+		root->rb_node = child;
+
+ color:
+	if (color == RB_BLACK)
+		__rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a right-hand child, go down and then left as far
+	   as we can. */
+	if (node->rb_right) {
+		node = node->rb_right;
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/* No right-hand children.  Everything down and left is
+	   smaller than us, so any 'next' node must be in the general
+	   direction of our parent. Go up the tree; any time the
+	   ancestor is a right-hand child of its parent, keep going
+	   up. First time it's a left-hand child of its parent, said
+	   parent is our 'next' node. */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a left-hand child, go down and then right as far
+	   as we can. */
+	if (node->rb_left) {
+		node = node->rb_left;
+		while (node->rb_right)
+			node=node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/* No left-hand children. Go up till we find an ancestor which
+	   is a right-hand child of its parent */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (parent) {
+		if (victim == parent->rb_left)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else {
+		root->rb_node = new;
+	}
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
diff --git a/rbtree.h b/rbtree.h
new file mode 100644
index 0000000..a8a0459
--- /dev/null
+++ b/rbtree.h
@@ -0,0 +1,165 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  Taken from the Linux 2.6.30 source.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+						 unsigned long offset)
+{
+	struct rb_node * n = inode->i_rb_page_cache.rb_node;
+	struct page * page;
+
+	while (n)
+	{
+		page = rb_entry(n, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			n = n->rb_left;
+		else if (offset > page->offset)
+			n = n->rb_right;
+		else
+			return page;
+	}
+	return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+						   unsigned long offset,
+						   struct rb_node * node)
+{
+	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+	struct rb_node * parent = NULL;
+	struct page * page;
+
+	while (*p)
+	{
+		parent = *p;
+		page = rb_entry(parent, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			p = &(*p)->rb_left;
+		else if (offset > page->offset)
+			p = &(*p)->rb_right;
+		else
+			return page;
+	}
+
+	rb_link_node(node, parent, p);
+
+	return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+						 unsigned long offset,
+						 struct rb_node * node)
+{
+	struct page * ret;
+	if ((ret = __rb_insert_page_cache(inode, offset, node)))
+		goto out;
+	rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+	return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef	_LINUX_RBTREE_H
+#define	_LINUX_RBTREE_H
+
+#include <stddef.h>
+
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+struct rb_node
+{
+	unsigned long  rb_parent_color;
+#define	RB_RED		0
+#define	RB_BLACK	1
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+    /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root
+{
+	struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
+#define rb_color(r)   ((r)->rb_parent_color & 1)
+#define rb_is_red(r)   (!rb_color(r))
+#define rb_is_black(r) rb_color(r)
+#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
+#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
+}
+static inline void rb_set_color(struct rb_node *rb, int color)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
+}
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
+#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
+#define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+			    struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+				struct rb_node ** rb_link)
+{
+	node->rb_parent_color = (unsigned long )parent;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#endif	/* _LINUX_RBTREE_H */
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 rasdaemon 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis
  2020-06-20 12:26 ` [PATCH v2 rasdaemon 0/2] support memory CE " lvying6
  2020-06-20 12:26   ` [PATCH v2 rasdaemon 1/2] rasdaemon: add rbtree support for page record lvying6
@ 2020-06-20 12:26   ` lvying6
  1 sibling, 0 replies; 11+ messages in thread
From: lvying6 @ 2020-06-20 12:26 UTC (permalink / raw)
  To: mchehab+huawei, linux-edac; +Cc: guanyalong, wuyun.wu, tanxiaofei

From: wuyun <wuyun.wu@huawei.com>

Memory Corrected Error was corrected by hardware. These errors do not
require immediate software actions, but are still reported for
accounting and predictive failure analysis.

Based on statistical results, some actions can be taken to prevent
Corrected Error from evoluting to Uncorrected Error.

Signed-off-by: wuyun <wuyun.wu@huawei.com>
Signed-off-by: lvying6 <lvying6@huawei.com>
---
 .travis.yml               |   2 +-
 Makefile.am               |   5 +-
 configure.ac              |  11 ++
 man/rasdaemon.1.in        |   7 +
 misc/rasdaemon.env        |  29 ++++
 misc/rasdaemon.service.in |   1 +
 misc/rasdaemon.spec.in    |   2 +
 ras-events.c              |   6 +
 ras-mc-handler.c          |   7 +
 ras-page-isolation.c      | 332 ++++++++++++++++++++++++++++++++++++++++++++++
 ras-page-isolation.h      |  66 +++++++++
 11 files changed, 466 insertions(+), 2 deletions(-)
 create mode 100644 misc/rasdaemon.env
 create mode 100644 ras-page-isolation.c
 create mode 100644 ras-page-isolation.h

diff --git a/.travis.yml b/.travis.yml
index 846e8f6..79cf4ca 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,7 @@ before_install:
 - sudo apt-get install -y sqlite3
 install:
 - autoreconf -vfi
-- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode
+- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode --enable-memory-ce-pfa
 
 script:
 - make && sudo make install
diff --git a/Makefile.am b/Makefile.am
index 843b538..51ef4de 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -54,12 +54,15 @@ endif
 if WITH_HISI_NS_DECODE
    rasdaemon_SOURCES += non-standard-hisi_hip07.c non-standard-hisi_hip08.c
 endif
+if WITH_MEMORY_CE_PFA
+   rasdaemon_SOURCES += rbtree.c ras-page-isolation.c
+endif
 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
 
 include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
 		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
 		  ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
-		  ras-devlink-handler.h ras-diskerror-handler.h
+		  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h
 
 # This rule can't be called with more than one Makefile job (like make -j8)
 # I can't figure out a way to fix that
diff --git a/configure.ac b/configure.ac
index a5a04dc..c50f818 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,6 +131,16 @@ AS_IF([test "x$enable_hisi_ns_decode" = "xyes" || test "x$enable_all" == "xyes"]
 AM_CONDITIONAL([WITH_HISI_NS_DECODE], [test x$enable_hisi_ns_decode = xyes || test x$enable_all == xyes])
 AM_COND_IF([WITH_HISI_NS_DECODE], [USE_HISI_NS_DECODE="yes"], [USE_HISI_NS_DECODE="no"])
 
+AC_ARG_ENABLE([memory_ce_pfa],
+    AS_HELP_STRING([--enable-memory-ce-pfa], [enable memory Corrected Error predictive failure analysis]))
+
+AS_IF([test "x$enable_memory_ce_pfa" = "xyes" || test "x$enable_all" == "xyes"], [
+  AC_DEFINE(HAVE_MEMORY_CE_PFA,1,"have memory corrected error predictive failure analysis")
+  AC_SUBST([WITH_MEMORY_CE_PFA])
+])
+AM_CONDITIONAL([WITH_MEMORY_CE_PFA], [test x$enable_memory_ce_pfa = xyes || test x$enable_all == xyes])
+AM_COND_IF([WITH_MEMORY_CE_PFA], [USE_MEMORY_CE_PFA="yes"], [USE_MEMORY_CE_PFA="no"])
+
 test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
 
 CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes"
@@ -162,4 +172,5 @@ compile time options summary
     ARM events          : $USE_ARM
     DEVLINK             : $USE_DEVLINK
     Disk I/O errors     : $USE_DISKERROR
+    Memory CE PFA       : $USE_MEMORY_CE_PFA
 EOF
diff --git a/man/rasdaemon.1.in b/man/rasdaemon.1.in
index 834df16..833c8e1 100644
--- a/man/rasdaemon.1.in
+++ b/man/rasdaemon.1.in
@@ -62,6 +62,13 @@ feature.
 .BI "--version"
 Print the program version and exit.
 
+.SH CONFIG FILE
+
+The \fBrasdaemon\fR program supports a config file to set rasdaemon systemd service
+environment variables. By default the config file is read from /etc/sysconfig/rasdaemon.
+
+The general format is environmentname=value.
+
 .SH SEE ALSO
 \fBras-mc-ctl\fR(8)
 
diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
new file mode 100644
index 0000000..12fd766
--- /dev/null
+++ b/misc/rasdaemon.env
@@ -0,0 +1,29 @@
+# Page Isolation
+# Note: Run-time configuration is unsupported, service restart needed.
+# Note: this file should be installed at /etc/sysconfig/rasdaemon
+
+# Specify the threshold of isolating buggy pages.
+#
+# Format:
+#   [0-9]+[unit]
+# Notice: please make sure match this format, rasdaemon will use default value for exception input cases.
+#
+# Supported units:
+# PAGE_CE_REFRESH_CYCLE: D|d (day), H|h (hour), M|m (min), default is in hour
+# PAGE_CE_THRESHOLD: K|k (x1000), M|m (x1000k), default is none
+#
+# The two configs will only take no effect when PAGE_CE_ACTION is "off".
+PAGE_CE_REFRESH_CYCLE="24h"
+PAGE_CE_THRESHOLD="50"
+
+# Specify the internal action in rasdaemon to exceeding a page error threshold.
+#
+# off      no action
+# account  only account errors
+# soft     try to soft-offline page without killing any processes
+#          This requires an uptodate kernel. Might not be successfull.
+# hard     try to hard-offline page by killing processes
+#          Requires an uptodate kernel. Might not be successfull.
+# soft-then-hard   First try to soft offline, then try hard offlining.
+# Note: default offline choice is "soft".
+PAGE_CE_ACTION="soft"
diff --git a/misc/rasdaemon.service.in b/misc/rasdaemon.service.in
index be9ad5a..e73a08a 100644
--- a/misc/rasdaemon.service.in
+++ b/misc/rasdaemon.service.in
@@ -3,6 +3,7 @@ Description=RAS daemon to log the RAS events
 After=syslog.target
 
 [Service]
+EnvironmentFile=/etc/sysconfig/rasdaemon
 ExecStart=@sbindir@/rasdaemon -f -r
 ExecStartPost=@sbindir@/rasdaemon --enable
 ExecStop=@sbindir@/rasdaemon --disable
diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in
index 48dd311..378bbcd 100644
--- a/misc/rasdaemon.spec.in
+++ b/misc/rasdaemon.spec.in
@@ -44,6 +44,7 @@ make %{?_smp_mflags}
 
 %install
 make install DESTDIR=%{buildroot}
+install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name}
 install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service
 install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service
 rm INSTALL %{buildroot}/usr/include/*.h
@@ -56,6 +57,7 @@ rm INSTALL %{buildroot}/usr/include/*.h
 %{_unitdir}/*.service
 %{_sharedstatedir}/rasdaemon
 %{_sysconfdir}/ras/dimm_labels.d
+%config(noreplace) %{_sysconfdir}/sysconfig/%{name}
 
 %changelog
 
diff --git a/ras-events.c b/ras-events.c
index 511c93d..8a56887 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -39,6 +39,7 @@
 #include "ras-diskerror-handler.h"
 #include "ras-record.h"
 #include "ras-logger.h"
+#include "ras-page-isolation.h"
 
 /*
  * Polling time, if read() doesn't block. Currently, trace_pipe_raw never
@@ -798,6 +799,11 @@ int handle_ras_events(int record_events)
 	ras->page_size = page_size;
 	ras->record_events = record_events;
 
+#ifdef HAVE_MEMORY_CE_PFA
+	/* FIXME: enable memory isolation unconditionally */
+	ras_page_account_init();
+#endif
+
 	rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event",
 			       ras_mc_event_handler, NULL, MC_EVENT);
 	if (!rc)
diff --git a/ras-mc-handler.c b/ras-mc-handler.c
index deb7e05..42b05cd 100644
--- a/ras-mc-handler.c
+++ b/ras-mc-handler.c
@@ -23,6 +23,7 @@
 #include "ras-mc-handler.h"
 #include "ras-record.h"
 #include "ras-logger.h"
+#include "ras-page-isolation.h"
 #include "ras-report.h"
 
 int ras_mc_event_handler(struct trace_seq *s,
@@ -183,6 +184,12 @@ int ras_mc_event_handler(struct trace_seq *s,
 
 	ras_store_mc_event(ras, &ev);
 
+#ifdef HAVE_MEMORY_CE_PFA
+	/* Account page corrected errors */
+	if (!strcmp(ev.error_type, "Corrected"))
+		ras_record_page_error(ev.address, ev.error_count, now);
+#endif
+
 #ifdef HAVE_ABRT_REPORT
 	/* Report event to ABRT */
 	ras_report_mc_event(ras, &ev);
diff --git a/ras-page-isolation.c b/ras-page-isolation.c
new file mode 100644
index 0000000..50e4406
--- /dev/null
+++ b/ras-page-isolation.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+*/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "ras-logger.h"
+#include "ras-page-isolation.h"
+
+#define PARSED_ENV_LEN 50
+static const struct config threshold_units[] = {
+	{ "m",	1000 },
+	{ "k",	1000 },
+	{ "",	1    },
+	{}
+};
+
+static const struct config cycle_units[] = {
+	{ "d",	24 },
+	{ "h",	60 },
+	{ "m",	60 },
+	{ "s",  1  },
+	{}
+};
+
+static struct isolation threshold = {
+	.name = "PAGE_CE_THRESHOLD",
+	.units = threshold_units,
+	.env = "50",
+	.unit = "",
+};
+
+static struct isolation cycle = {
+	.name = "PAGE_CE_REFRESH_CYCLE",
+	.units = cycle_units,
+	.env = "24h",
+	.unit = "h",
+};
+
+static const char *kernel_offline[] = {
+	[OFFLINE_SOFT]		 = "/sys/devices/system/memory/soft_offline_page",
+	[OFFLINE_HARD]		 = "/sys/devices/system/memory/hard_offline_page",
+	[OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page",
+};
+
+static const struct config offline_choice[] = {
+	{ "off",		OFFLINE_OFF },
+	{ "account",		OFFLINE_ACCOUNT },
+	{ "soft",		OFFLINE_SOFT },
+	{ "hard",		OFFLINE_HARD },
+	{ "soft-then-hard",	OFFLINE_SOFT_THEN_HARD },
+	{}
+};
+
+static const char *page_state[] = {
+	[PAGE_ONLINE]		= "online",
+	[PAGE_OFFLINE]		= "offlined",
+	[PAGE_OFFLINE_FAILED]	= "offline-failed",
+};
+
+static enum otype offline = OFFLINE_SOFT;
+static struct rb_root page_records;
+
+static void page_offline_init(void)
+{
+	const char *env = "PAGE_CE_ACTION";
+	char *choice = getenv(env);
+	const struct config *c = NULL;
+	int matched = 0;
+
+	if (choice) {
+		for (c = offline_choice; c->name; c++) {
+			if (!strcasecmp(choice, c->name)) {
+				offline = c->val;
+				matched = 1;
+				break;
+			}
+		}
+	}
+
+	if (!matched)
+		log(TERM, LOG_INFO, "Improper %s, set to default soft\n", env);
+
+	if (offline > OFFLINE_ACCOUNT && access(kernel_offline[offline], W_OK)) {
+		log(TERM, LOG_INFO, "Kernel does not support page offline interface\n");
+		offline = OFFLINE_ACCOUNT;
+	}
+
+	log(TERM, LOG_INFO, "Page offline choice on Corrected Errors is %s\n",
+	    offline_choice[offline].name);
+}
+
+static void parse_isolation_env(struct isolation *config)
+{
+	char *env = getenv(config->name);
+	char *unit = NULL;
+	const struct config *units = NULL;
+	int i, no_unit;
+	int valid = 0;
+	int unit_matched = 0;
+	unsigned long value, tmp;
+
+	/* check if env is vaild */
+	if (env && strlen(env)) {
+		/* All the character before unit must be digit */
+		for (i = 0; i < strlen(env) - 1; i++) {
+			if (!isdigit(env[i]))
+				goto parse;
+		}
+		if (sscanf(env, "%lu", &value) < 1 || !value)
+			goto parse;
+		/* check if the unit is vaild */
+		unit = env + strlen(env) - 1;
+		/* no unit, all the character are value character */
+		if (isdigit(*unit)) {
+			valid = 1;
+			no_unit = 1;
+			goto parse;
+		}
+		for (units = config->units; units->name; units++) {
+			/* value character and unit character are both valid */
+			if (!strcasecmp(unit, units->name)) {
+				valid = 1;
+				no_unit = 0;
+				break;
+			}
+		}
+	}
+
+parse:
+	/* if invalid, use default env */
+	if (valid) {
+		config->env = env;
+		if (!no_unit)
+			config->unit = unit;
+	} else {
+		 log(TERM, LOG_INFO, "Improper %s, set to default %s.\n",
+				 config->name, config->env);
+	}
+
+	/* if env value string is greater than ulong_max, truncate the last digit */
+	sscanf(config->env, "%lu", &value);
+	for (units = config->units; units->name; units++) {
+		if (!strcasecmp(config->unit, units->name))
+			unit_matched = 1;
+		if (unit_matched) {
+			tmp = value;
+			value *= units->val;
+			if (tmp != 0 && value / tmp != units->val)
+				config->overflow = true;
+		}
+	}
+	config->val = value;
+	/* In order to output value and unit perfectly */
+	config->unit = no_unit ? config->unit : "";
+}
+
+static void parse_env_string(struct isolation *config, char *str)
+{
+	int i;
+
+	if (config->overflow) {
+		/* when overflow, use basic unit */
+		for (i = 0; config->units[i].name; i++) ;
+		sprintf(str, "%lu%s", config->val, config->units[i-1].name);
+		log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n",
+				config->name, config->env);
+	} else {
+		sprintf(str, "%s%s", config->env, config->unit);
+	}
+}
+
+static void page_isolation_init(void)
+{
+	char threshold_string[PARSED_ENV_LEN];
+	char cycle_string[PARSED_ENV_LEN];
+	/**
+	 * It's unnecessary to parse threshold configuration when offline
+	 * choice is off.
+	 */
+	if (offline == OFFLINE_OFF)
+		return;
+
+	parse_isolation_env(&threshold);
+	parse_isolation_env(&cycle);
+	parse_env_string(&threshold, threshold_string);
+	parse_env_string(&cycle, cycle_string);
+	log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n",
+			threshold_string, cycle_string);
+}
+
+void ras_page_account_init(void)
+{
+	page_offline_init();
+	page_isolation_init();
+}
+
+static int do_page_offline(unsigned long long addr, enum otype type)
+{
+	FILE *offline_file;
+	int err;
+
+	offline_file = fopen(kernel_offline[type], "w");
+	if (!offline_file)
+		return -1;
+
+	fprintf(offline_file, "%#llx", addr);
+	err = ferror(offline_file) ? -1 : 0;
+	fclose(offline_file);
+
+	return err;
+}
+
+static void page_offline(struct page_record *pr)
+{
+	unsigned long long addr = pr->addr;
+	int ret;
+
+	/* Offlining page is not required */
+	if (offline <= OFFLINE_ACCOUNT)
+		return;
+
+	/* Ignore offlined pages */
+	if (pr->offlined != PAGE_ONLINE)
+		return;
+
+	/* Time to silence this noisy page */
+	if (offline == OFFLINE_SOFT_THEN_HARD) {
+		ret = do_page_offline(addr, OFFLINE_SOFT);
+		if (ret < 0)
+			ret = do_page_offline(addr, OFFLINE_HARD);
+	} else {
+		ret = do_page_offline(addr, offline);
+	}
+
+	pr->offlined = ret < 0 ? PAGE_OFFLINE_FAILED : PAGE_OFFLINE;
+
+	log(TERM, LOG_INFO, "Result of offlining page at %#llx: %s\n",
+	    addr, page_state[pr->offlined]);
+}
+
+static void page_record(struct page_record *pr, unsigned count, time_t time)
+{
+	unsigned long period = time - pr->start;
+	unsigned long tolerate;
+
+	if (period >= cycle.val) {
+		/**
+		 * Since we don't refresh automatically, it is possible that the period
+		 * between two occurences will be longer than the pre-configured refresh cycle.
+		 * In this case, we tolerate the frequency of the whole period up to
+		 * the pre-configured threshold.
+		 */
+		tolerate = (period / (double)cycle.val) * threshold.val;
+		pr->count -= (tolerate > pr->count) ? pr->count : tolerate;
+		pr->start = time;
+		pr->excess = 0;
+	}
+
+	pr->count += count;
+	if (pr->count >= threshold.val) {
+		log(TERM, LOG_INFO, "Corrected Errors at %#llx exceeded threshold\n", pr->addr);
+
+		/**
+		 * Backup ce count of current cycle to enable next round, which actually
+		 * should never happen if we can disable overflow completely in the same
+		 * time unit (but sadly we can't).
+		 */
+		pr->excess += pr->count;
+		pr->count = 0;
+		page_offline(pr);
+	}
+}
+
+static struct page_record *page_lookup_insert(unsigned long long addr)
+{
+	struct rb_node **entry = &page_records.rb_node;
+	struct rb_node *parent = NULL;
+	struct page_record *pr = NULL, *find = NULL;
+
+	while (*entry) {
+		parent = *entry;
+		pr = rb_entry(parent, struct page_record, entry);
+		if (addr == pr->addr) {
+			return pr;
+		} else if (addr < pr->addr) {
+			entry = &(*entry)->rb_left;
+		} else {
+			entry = &(*entry)->rb_right;
+		}
+	}
+
+	find = calloc(1, sizeof(struct page_record));
+	if (!find) {
+		log(TERM, LOG_ERR, "No memory for page records\n");
+		return NULL;
+	}
+
+	find->addr = addr;
+	rb_link_node(&find->entry, parent, entry);
+	rb_insert_color(&find->entry, &page_records);
+
+	return find;
+}
+
+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time)
+{
+	struct page_record *pr = NULL;
+
+	if (offline == OFFLINE_OFF)
+		return;
+
+	pr = page_lookup_insert(addr & PAGE_MASK);
+	if (pr) {
+		if (!pr->start)
+			pr->start = time;
+		page_record(pr, count, time);
+	}
+}
diff --git a/ras-page-isolation.h b/ras-page-isolation.h
new file mode 100644
index 0000000..3d03cef
--- /dev/null
+++ b/ras-page-isolation.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+*/
+
+#ifndef __RAS_PAGE_ISOLATION_H
+#define __RAS_PAGE_ISOLATION_H
+
+#include <time.h>
+#include <stdbool.h>
+#include "rbtree.h"
+
+#define PAGE_SHIFT		12
+#define PAGE_SIZE		(1 << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+struct config {
+	char			*name;
+	unsigned long   val;
+};
+
+enum otype {
+	OFFLINE_OFF,
+	OFFLINE_ACCOUNT,
+	OFFLINE_SOFT,
+	OFFLINE_HARD,
+	OFFLINE_SOFT_THEN_HARD,
+};
+
+enum pstate {
+	PAGE_ONLINE,
+	PAGE_OFFLINE,
+	PAGE_OFFLINE_FAILED,
+};
+
+struct page_record {
+	struct rb_node		entry;
+	unsigned long long	addr;
+	time_t			start;
+	enum pstate		offlined;
+	unsigned long		count;
+	unsigned long		excess;
+};
+
+struct isolation {
+	char			*name;
+	char			*env;
+	const struct config	*units;
+	unsigned long		val;
+	bool			overflow;
+	char			*unit;
+};
+
+void ras_page_account_init(void);
+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time);
+
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/2] rasdaemon: add support for memory Corrected Error predictive failure analysis
  2020-05-29  8:24 [PATCH 0/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
                   ` (2 preceding siblings ...)
  2020-06-20 12:26 ` [PATCH v2 rasdaemon 0/2] support memory CE " lvying6
@ 2020-07-21 12:26 ` Mauro Carvalho Chehab
  3 siblings, 0 replies; 11+ messages in thread
From: Mauro Carvalho Chehab @ 2020-07-21 12:26 UTC (permalink / raw)
  To: lvying6; +Cc: linux-edac, guanyalong, wuyun.wu, tanxiaofei

Em Fri, 29 May 2020 16:24:21 +0800
lvying6 <lvying6@huawei.com> escreveu:

> rasdaemon: add support for memory Corrected Error predictive failure analysis
> 
> wuyun (2):
>   rasdaemon: add rbtree support for page record
>   rasdaemon: add support for memory Corrected Error predictive failure
>     analysis

Patches merged, thanks.

Thanks,
Mauro

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, back to index

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-29  8:24 [PATCH 0/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
2020-05-29  8:24 ` [PATCH 1/2] rasdaemon: add rbtree support for page record lvying6
2020-05-29 12:24   ` Mauro Carvalho Chehab
2020-06-20 11:57     ` lvying
2020-05-29  8:24 ` [PATCH 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
2020-05-29 14:31   ` Mauro Carvalho Chehab
2020-06-20 11:59     ` lvying
2020-06-20 12:26 ` [PATCH v2 rasdaemon 0/2] support memory CE " lvying6
2020-06-20 12:26   ` [PATCH v2 rasdaemon 1/2] rasdaemon: add rbtree support for page record lvying6
2020-06-20 12:26   ` [PATCH v2 rasdaemon 2/2] rasdaemon: add support for memory Corrected Error predictive failure analysis lvying6
2020-07-21 12:26 ` [PATCH 0/2] " Mauro Carvalho Chehab

Linux-EDAC Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-edac/0 linux-edac/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-edac linux-edac/ https://lore.kernel.org/linux-edac \
		linux-edac@vger.kernel.org
	public-inbox-index linux-edac

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-edac


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git