linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V4 2/4] mm: frontswap: core code
@ 2011-05-27 19:48 Dan Magenheimer
  2011-06-07 17:53 ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 2+ messages in thread
From: Dan Magenheimer @ 2011-05-27 19:48 UTC (permalink / raw)
  To: linux-kernel, linux-mm, jeremy, hughd, ngupta, konrad.wilk,
	JBeulich, kurt.hackel, npiggin, akpm, riel, hannes, matthew,
	chris.mason, dan.magenheimer

[PATCH V4 2/4] mm: frontswap: core code

Core frontswap interface between swap subsystem hooks and
frontswap backend via frontswap_ops.

Credits: Frontswap_ops design derived from Jeremy Fitzhardinge
design for tmem; sysfs code modelled after mm/ksm.c

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>

Diffstat:
 include/linux/frontswap.h                |   86 +++++
 mm/frontswap.c                           |  331 +++++++++++++++++++++
 2 files changed, 417 insertions(+)

--- linux-2.6.39/include/linux/frontswap.h	1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.39-frontswap/include/linux/frontswap.h	2011-05-26 15:37:25.198065401 -0600
@@ -0,0 +1,86 @@
+#ifndef _LINUX_FRONTSWAP_H
+#define _LINUX_FRONTSWAP_H
+
+#include <linux/swap.h>
+#include <linux/mm.h>
+
+struct frontswap_ops {
+	void (*init)(unsigned);
+	int (*put_page)(unsigned, pgoff_t, struct page *);
+	int (*get_page)(unsigned, pgoff_t, struct page *);
+	void (*flush_page)(unsigned, pgoff_t);
+	void (*flush_area)(unsigned);
+};
+
+extern int frontswap_enabled;
+extern struct frontswap_ops
+	frontswap_register_ops(struct frontswap_ops *ops);
+extern void frontswap_shrink(unsigned long);
+extern unsigned long frontswap_curr_pages(void);
+
+extern void frontswap_init(unsigned type);
+extern int __frontswap_put_page(struct page *page);
+extern int __frontswap_get_page(struct page *page);
+extern void __frontswap_flush_page(unsigned, pgoff_t);
+extern void __frontswap_flush_area(unsigned);
+
+#ifndef CONFIG_FRONTSWAP
+/* all inline routines become no-ops and all externs are ignored */
+#define frontswap_enabled (0)
+#endif
+
+static inline int frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
+{
+	int ret = 0;
+
+	if (frontswap_enabled && sis->frontswap_map)
+		ret = test_bit(offset % BITS_PER_LONG,
+			&sis->frontswap_map[offset/BITS_PER_LONG]);
+	return ret;
+}
+
+static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
+{
+	if (frontswap_enabled && sis->frontswap_map)
+		set_bit(offset % BITS_PER_LONG,
+			&sis->frontswap_map[offset/BITS_PER_LONG]);
+}
+
+static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
+{
+	if (frontswap_enabled && sis->frontswap_map)
+		clear_bit(offset % BITS_PER_LONG,
+			&sis->frontswap_map[offset/BITS_PER_LONG]);
+}
+
+static inline int frontswap_put_page(struct page *page)
+{
+	int ret = -1;
+
+	if (frontswap_enabled)
+		ret = __frontswap_put_page(page);
+	return ret;
+}
+
+static inline int frontswap_get_page(struct page *page)
+{
+	int ret = -1;
+
+	if (frontswap_enabled)
+		ret = __frontswap_get_page(page);
+	return ret;
+}
+
+static inline void frontswap_flush_page(unsigned type, pgoff_t offset)
+{
+	if (frontswap_enabled)
+		__frontswap_flush_page(type, offset);
+}
+
+static inline void frontswap_flush_area(unsigned type)
+{
+	if (frontswap_enabled)
+		__frontswap_flush_area(type);
+}
+
+#endif /* _LINUX_FRONTSWAP_H */
--- linux-2.6.39/mm/frontswap.c	1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.39-frontswap/mm/frontswap.c	2011-05-26 15:37:25.254816281 -0600
@@ -0,0 +1,331 @@
+/*
+ * Frontswap frontend
+ *
+ * This code provides the generic "frontend" layer to call a matching
+ * "backend" driver implementation of frontswap.  See
+ * Documentation/vm/frontswap.txt for more information.
+ *
+ * Copyright (C) 2009-2010 Oracle Corp.  All rights reserved.
+ * Author: Dan Magenheimer
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sysctl.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/frontswap.h>
+#include <linux/swapfile.h>
+
+/*
+ * frontswap_ops is set by frontswap_register_ops to contain the pointers
+ * to the frontswap "backend" implementation functions.
+ */
+static struct frontswap_ops frontswap_ops;
+
+/*
+ * This global enablement flag reduces overhead on systems where frontswap_ops
+ * has not been registered, so is preferred to the slower alternative: a
+ * function call that checks a non-global.
+ */
+int frontswap_enabled;
+EXPORT_SYMBOL(frontswap_enabled);
+
+/* useful stats available in /sys/kernel/mm/frontswap */
+static unsigned long frontswap_gets;
+static unsigned long frontswap_succ_puts;
+static unsigned long frontswap_failed_puts;
+static unsigned long frontswap_flushes;
+
+/*
+ * register operations for frontswap, returning previous thus allowing
+ * detection of multiple backends and possible nesting
+ */
+struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
+{
+	struct frontswap_ops old = frontswap_ops;
+
+	frontswap_ops = *ops;
+	frontswap_enabled = 1;
+	return old;
+}
+EXPORT_SYMBOL(frontswap_register_ops);
+
+/* Called when a swap device is swapon'd */
+void frontswap_init(unsigned type)
+{
+	if (frontswap_enabled)
+		(*frontswap_ops.init)(type);
+}
+EXPORT_SYMBOL(frontswap_init);
+
+/*
+ * "Put" data from a page to frontswap and associate it with the page's
+ * swaptype and offset.  Page must be locked and in the swap cache.
+ * If frontswap already contains a page with matching swaptype and
+ * offset, the frontswap implmentation may either overwrite the data
+ * and return success or flush the page from frontswap and return failure
+ */
+int __frontswap_put_page(struct page *page)
+{
+	int ret = -1, dup = 0;
+	swp_entry_t entry = { .val = page_private(page), };
+	int type = swp_type(entry);
+	struct swap_info_struct *sis = swap_info[type];
+	pgoff_t offset = swp_offset(entry);
+
+	BUG_ON(!PageLocked(page));
+	if (frontswap_test(sis, offset))
+		dup = 1;
+	ret = (*frontswap_ops.put_page)(type, offset, page);
+	if (ret == 0) {
+		frontswap_set(sis, offset);
+		frontswap_succ_puts++;
+		if (!dup)
+			sis->frontswap_pages++;
+	} else if (dup) {
+		/*
+		  failed dup always results in automatic flush of
+		  the (older) page from frontswap
+		 */
+		frontswap_clear(sis, offset);
+		sis->frontswap_pages--;
+		frontswap_failed_puts++;
+	} else
+		frontswap_failed_puts++;
+	return ret;
+}
+
+/*
+ * "Get" data from frontswap associated with swaptype and offset that were
+ * specified when the data was put to frontswap and use it to fill the
+ * specified page with data. Page must be locked and in the swap cache
+ */
+int __frontswap_get_page(struct page *page)
+{
+	int ret = -1;
+	swp_entry_t entry = { .val = page_private(page), };
+	int type = swp_type(entry);
+	struct swap_info_struct *sis = swap_info[type];
+	pgoff_t offset = swp_offset(entry);
+
+	BUG_ON(!PageLocked(page));
+	if (frontswap_test(sis, offset))
+		ret = (*frontswap_ops.get_page)(type, offset, page);
+	if (ret == 0)
+		frontswap_gets++;
+	return ret;
+}
+
+/*
+ * Flush any data from frontswap associated with the specified swaptype
+ * and offset so that a subsequent "get" will fail.
+ */
+void __frontswap_flush_page(unsigned type, pgoff_t offset)
+{
+	struct swap_info_struct *sis = swap_info[type];
+
+	if (frontswap_test(sis, offset)) {
+		(*frontswap_ops.flush_page)(type, offset);
+		sis->frontswap_pages--;
+		frontswap_clear(sis, offset);
+		frontswap_flushes++;
+	}
+}
+
+/*
+ * Flush all data from frontswap associated with all offsets for the
+ * specified swaptype.
+ */
+void __frontswap_flush_area(unsigned type)
+{
+	struct swap_info_struct *sis = swap_info[type];
+
+	(*frontswap_ops.flush_area)(type);
+	sis->frontswap_pages = 0;
+	memset(sis->frontswap_map, 0, sis->max / sizeof(long));
+}
+
+/*
+ * Frontswap, like a true swap device, may unnecessarily retain pages
+ * under certain circumstances; "shrink" frontswap is essentially a
+ * "partial swapoff" and works by calling try_to_unuse to attempt to
+ * unuse enough frontswap pages to attempt to -- subject to memory
+ * constraints -- reduce the number of pages in frontswap
+ */
+void frontswap_shrink(unsigned long target_pages)
+{
+	int wrapped = 0;
+	bool locked = false;
+
+	for (wrapped = 0; wrapped <= 3; wrapped++) {
+
+		struct swap_info_struct *si = NULL;
+		unsigned long total_pages = 0, total_pages_to_unuse;
+		unsigned long pages = 0, unuse_pages = 0;
+		int type;
+
+		/*
+		 * we don't want to hold swap_lock while doing a very
+		 * lengthy try_to_unuse, but swap_list may change
+		 * so restart scan from swap_list.head each time
+		 */
+		spin_lock(&swap_lock);
+		locked = true;
+		total_pages = 0;
+		for (type = swap_list.head; type >= 0; type = si->next) {
+			si = swap_info[type];
+			total_pages += si->frontswap_pages;
+		}
+		if (total_pages <= target_pages)
+			goto out;
+		total_pages_to_unuse = total_pages - target_pages;
+		for (type = swap_list.head; type >= 0; type = si->next) {
+			si = swap_info[type];
+			if (total_pages_to_unuse < si->frontswap_pages)
+				pages = unuse_pages = total_pages_to_unuse;
+			else {
+				pages = si->frontswap_pages;
+				unuse_pages = 0; /* unuse all */
+			}
+			if (security_vm_enough_memory_kern(pages))
+				continue;
+			vm_unacct_memory(pages);
+			break;
+		}
+		if (type < 0)
+			goto out;
+		locked = false;
+		spin_unlock(&swap_lock);
+		try_to_unuse(type, true, unuse_pages);
+	}
+
+out:
+	if (locked)
+		spin_unlock(&swap_lock);
+	return;
+}
+EXPORT_SYMBOL(frontswap_shrink);
+
+/*
+ * count and return the number of pages frontswap pages across all
+ * swap devices.  This is exported so that a kernel module can
+ * determine current usage without reading sysfs.
+ */
+unsigned long frontswap_curr_pages(void)
+{
+	int type;
+	unsigned long totalpages = 0;
+	struct swap_info_struct *si = NULL;
+
+	spin_lock(&swap_lock);
+	for (type = swap_list.head; type >= 0; type = si->next) {
+		si = swap_info[type];
+		totalpages += si->frontswap_pages;
+	}
+	spin_unlock(&swap_lock);
+	return totalpages;
+}
+EXPORT_SYMBOL(frontswap_curr_pages);
+
+#ifdef CONFIG_SYSFS
+
+/* see Documentation/ABI/xxx/sysfs-kernel-mm-frontswap */
+
+#define FRONTSWAP_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+#define FRONTSWAP_ATTR(_name) \
+	static struct kobj_attribute _name##_attr = \
+		__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static ssize_t curr_pages_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", frontswap_curr_pages());
+}
+
+static ssize_t curr_pages_store(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       const char *buf, size_t count)
+{
+	unsigned long target_pages;
+	int err;
+
+	err = strict_strtoul(buf, 10, &target_pages);
+	if (err)
+		return -EINVAL;
+
+	frontswap_shrink(target_pages);
+
+	return count;
+}
+FRONTSWAP_ATTR(curr_pages);
+
+static ssize_t succ_puts_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", frontswap_succ_puts);
+}
+FRONTSWAP_ATTR_RO(succ_puts);
+
+static ssize_t failed_puts_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", frontswap_failed_puts);
+}
+FRONTSWAP_ATTR_RO(failed_puts);
+
+static ssize_t gets_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", frontswap_gets);
+}
+FRONTSWAP_ATTR_RO(gets);
+
+static ssize_t flushes_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", frontswap_flushes);
+}
+FRONTSWAP_ATTR_RO(flushes);
+
+static struct attribute *frontswap_attrs[] = {
+	&curr_pages_attr.attr,
+	&succ_puts_attr.attr,
+	&failed_puts_attr.attr,
+	&gets_attr.attr,
+	&flushes_attr.attr,
+	NULL,
+};
+
+static struct attribute_group frontswap_attr_group = {
+	.attrs = frontswap_attrs,
+	.name = "frontswap",
+};
+
+#endif /* CONFIG_SYSFS */
+
+static int __init init_frontswap(void)
+{
+#ifdef CONFIG_SYSFS
+	int err;
+
+	err = sysfs_create_group(mm_kobj, &frontswap_attr_group);
+#endif /* CONFIG_SYSFS */
+	return 0;
+}
+
+static void __exit exit_frontswap(void)
+{
+	frontswap_shrink(0UL);
+}
+
+module_init(init_frontswap);
+module_exit(exit_frontswap);

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH V4 2/4] mm: frontswap: core code
  2011-05-27 19:48 [PATCH V4 2/4] mm: frontswap: core code Dan Magenheimer
@ 2011-06-07 17:53 ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 2+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-06-07 17:53 UTC (permalink / raw)
  To: Dan Magenheimer
  Cc: linux-kernel, linux-mm, jeremy, hughd, ngupta, JBeulich,
	kurt.hackel, npiggin, akpm, riel, hannes, matthew, chris.mason

On Fri, May 27, 2011 at 12:48:45PM -0700, Dan Magenheimer wrote:
> [PATCH V4 2/4] mm: frontswap: core code
> 
> Core frontswap interface between swap subsystem hooks and
> frontswap backend via frontswap_ops.
> 
> Credits: Frontswap_ops design derived from Jeremy Fitzhardinge
> design for tmem; sysfs code modelled after mm/ksm.c
> 
> Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
> 
> Diffstat:
>  include/linux/frontswap.h                |   86 +++++
>  mm/frontswap.c                           |  331 +++++++++++++++++++++
>  2 files changed, 417 insertions(+)
> 
> --- linux-2.6.39/include/linux/frontswap.h	1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.39-frontswap/include/linux/frontswap.h	2011-05-26 15:37:25.198065401 -0600
> @@ -0,0 +1,86 @@
> +#ifndef _LINUX_FRONTSWAP_H
> +#define _LINUX_FRONTSWAP_H
> +
> +#include <linux/swap.h>
> +#include <linux/mm.h>
> +
> +struct frontswap_ops {
> +	void (*init)(unsigned);
> +	int (*put_page)(unsigned, pgoff_t, struct page *);
> +	int (*get_page)(unsigned, pgoff_t, struct page *);
> +	void (*flush_page)(unsigned, pgoff_t);
> +	void (*flush_area)(unsigned);
> +};
> +
> +extern int frontswap_enabled;
> +extern struct frontswap_ops
> +	frontswap_register_ops(struct frontswap_ops *ops);
> +extern void frontswap_shrink(unsigned long);
> +extern unsigned long frontswap_curr_pages(void);
> +
> +extern void frontswap_init(unsigned type);
> +extern int __frontswap_put_page(struct page *page);
> +extern int __frontswap_get_page(struct page *page);
> +extern void __frontswap_flush_page(unsigned, pgoff_t);
> +extern void __frontswap_flush_area(unsigned);
> +
> +#ifndef CONFIG_FRONTSWAP
> +/* all inline routines become no-ops and all externs are ignored */
> +#define frontswap_enabled (0)
> +#endif
> +
> +static inline int frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
> +{
> +	int ret = 0;
> +
> +	if (frontswap_enabled && sis->frontswap_map)
> +		ret = test_bit(offset % BITS_PER_LONG,
> +			&sis->frontswap_map[offset/BITS_PER_LONG]);
> +	return ret;
> +}
> +
> +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
> +{
> +	if (frontswap_enabled && sis->frontswap_map)
> +		set_bit(offset % BITS_PER_LONG,
> +			&sis->frontswap_map[offset/BITS_PER_LONG]);
> +}
> +
> +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
> +{
> +	if (frontswap_enabled && sis->frontswap_map)
> +		clear_bit(offset % BITS_PER_LONG,
> +			&sis->frontswap_map[offset/BITS_PER_LONG]);
> +}
> +
> +static inline int frontswap_put_page(struct page *page)
> +{
> +	int ret = -1;
> +
> +	if (frontswap_enabled)
> +		ret = __frontswap_put_page(page);
> +	return ret;
> +}
> +
> +static inline int frontswap_get_page(struct page *page)
> +{
> +	int ret = -1;
> +
> +	if (frontswap_enabled)
> +		ret = __frontswap_get_page(page);
> +	return ret;
> +}
> +
> +static inline void frontswap_flush_page(unsigned type, pgoff_t offset)
> +{
> +	if (frontswap_enabled)
> +		__frontswap_flush_page(type, offset);
> +}
> +
> +static inline void frontswap_flush_area(unsigned type)
> +{
> +	if (frontswap_enabled)
> +		__frontswap_flush_area(type);
> +}
> +
> +#endif /* _LINUX_FRONTSWAP_H */
> --- linux-2.6.39/mm/frontswap.c	1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.39-frontswap/mm/frontswap.c	2011-05-26 15:37:25.254816281 -0600
> @@ -0,0 +1,331 @@
> +/*
> + * Frontswap frontend
> + *
> + * This code provides the generic "frontend" layer to call a matching
> + * "backend" driver implementation of frontswap.  See
> + * Documentation/vm/frontswap.txt for more information.
> + *
> + * Copyright (C) 2009-2010 Oracle Corp.  All rights reserved.
> + * Author: Dan Magenheimer
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/mman.h>
> +#include <linux/sysctl.h>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
> +#include <linux/proc_fs.h>
> +#include <linux/security.h>
> +#include <linux/capability.h>
> +#include <linux/module.h>
> +#include <linux/uaccess.h>
> +#include <linux/frontswap.h>
> +#include <linux/swapfile.h>
> +
> +/*
> + * frontswap_ops is set by frontswap_register_ops to contain the pointers
> + * to the frontswap "backend" implementation functions.
> + */
> +static struct frontswap_ops frontswap_ops;
> +
> +/*
> + * This global enablement flag reduces overhead on systems where frontswap_ops
> + * has not been registered, so is preferred to the slower alternative: a
> + * function call that checks a non-global.
> + */
> +int frontswap_enabled;
> +EXPORT_SYMBOL(frontswap_enabled);
> +
> +/* useful stats available in /sys/kernel/mm/frontswap */
> +static unsigned long frontswap_gets;
> +static unsigned long frontswap_succ_puts;
> +static unsigned long frontswap_failed_puts;
> +static unsigned long frontswap_flushes;
> +
> +/*
> + * register operations for frontswap, returning previous thus allowing
> + * detection of multiple backends and possible nesting
> + */
> +struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
> +{
> +	struct frontswap_ops old = frontswap_ops;
> +
> +	frontswap_ops = *ops;
> +	frontswap_enabled = 1;
> +	return old;
> +}
> +EXPORT_SYMBOL(frontswap_register_ops);
> +
> +/* Called when a swap device is swapon'd */
> +void frontswap_init(unsigned type)
> +{
> +	if (frontswap_enabled)
> +		(*frontswap_ops.init)(type);
> +}
> +EXPORT_SYMBOL(frontswap_init);
> +
> +/*
> + * "Put" data from a page to frontswap and associate it with the page's
> + * swaptype and offset.  Page must be locked and in the swap cache.
> + * If frontswap already contains a page with matching swaptype and
> + * offset, the frontswap implmentation may either overwrite the data
> + * and return success or flush the page from frontswap and return failure
> + */
> +int __frontswap_put_page(struct page *page)
> +{
> +	int ret = -1, dup = 0;
> +	swp_entry_t entry = { .val = page_private(page), };
> +	int type = swp_type(entry);
> +	struct swap_info_struct *sis = swap_info[type];

There is no danger that swap_info[type] returns NULL?
Should you get the get the swap_lock lock? Or is that not
neccessary b/c the caller already holds the lock? If so it might
be better to name the function __frontswap_put_page_locked(..)?


> +	pgoff_t offset = swp_offset(entry);
> +
> +	BUG_ON(!PageLocked(page));
> +	if (frontswap_test(sis, offset))
> +		dup = 1;
> +	ret = (*frontswap_ops.put_page)(type, offset, page);
> +	if (ret == 0) {
> +		frontswap_set(sis, offset);
> +		frontswap_succ_puts++;
> +		if (!dup)
> +			sis->frontswap_pages++;
> +	} else if (dup) {
> +		/*
> +		  failed dup always results in automatic flush of
> +		  the (older) page from frontswap
> +		 */
> +		frontswap_clear(sis, offset);
> +		sis->frontswap_pages--;
> +		frontswap_failed_puts++;
> +	} else
> +		frontswap_failed_puts++;
> +	return ret;
> +}
> +
> +/*
> + * "Get" data from frontswap associated with swaptype and offset that were
> + * specified when the data was put to frontswap and use it to fill the
> + * specified page with data. Page must be locked and in the swap cache
> + */
> +int __frontswap_get_page(struct page *page)
> +{
> +	int ret = -1;
> +	swp_entry_t entry = { .val = page_private(page), };
> +	int type = swp_type(entry);
> +	struct swap_info_struct *sis = swap_info[type];
> +	pgoff_t offset = swp_offset(entry);
> +
> +	BUG_ON(!PageLocked(page));

Looks like swap_readpage already does this?

> +	if (frontswap_test(sis, offset))

So no checking if sis == NULL b/c the caller already does that? You might
want to stick a comment here.
> +		ret = (*frontswap_ops.get_page)(type, offset, page);
> +	if (ret == 0)
> +		frontswap_gets++;
> +	return ret;
> +}
> +
> +/*
> + * Flush any data from frontswap associated with the specified swaptype
> + * and offset so that a subsequent "get" will fail.
> + */
> +void __frontswap_flush_page(unsigned type, pgoff_t offset)
> +{
> +	struct swap_info_struct *sis = swap_info[type];
> +
> +	if (frontswap_test(sis, offset)) {
> +		(*frontswap_ops.flush_page)(type, offset);
> +		sis->frontswap_pages--;
> +		frontswap_clear(sis, offset);
> +		frontswap_flushes++;
> +	}
> +}
> +
> +/*
> + * Flush all data from frontswap associated with all offsets for the
> + * specified swaptype.
> + */
> +void __frontswap_flush_area(unsigned type)
> +{
> +	struct swap_info_struct *sis = swap_info[type];
> +
> +	(*frontswap_ops.flush_area)(type);
> +	sis->frontswap_pages = 0;
> +	memset(sis->frontswap_map, 0, sis->max / sizeof(long));
> +}
> +
> +/*
> + * Frontswap, like a true swap device, may unnecessarily retain pages
> + * under certain circumstances; "shrink" frontswap is essentially a
> + * "partial swapoff" and works by calling try_to_unuse to attempt to
> + * unuse enough frontswap pages to attempt to -- subject to memory
> + * constraints -- reduce the number of pages in frontswap
> + */
> +void frontswap_shrink(unsigned long target_pages)
> +{
> +	int wrapped = 0;
> +	bool locked = false;
> +
> +	for (wrapped = 0; wrapped <= 3; wrapped++) {

3? Is this value special? You might want to provide a comment
of why '3' is _the_ choice.

> +
> +		struct swap_info_struct *si = NULL;
> +		unsigned long total_pages = 0, total_pages_to_unuse;
> +		unsigned long pages = 0, unuse_pages = 0;
> +		int type;
> +
> +		/*
> +		 * we don't want to hold swap_lock while doing a very
> +		 * lengthy try_to_unuse, but swap_list may change
> +		 * so restart scan from swap_list.head each time
> +		 */
> +		spin_lock(&swap_lock);
> +		locked = true;
> +		total_pages = 0;
> +		for (type = swap_list.head; type >= 0; type = si->next) {
> +			si = swap_info[type];
> +			total_pages += si->frontswap_pages;
> +		}
> +		if (total_pages <= target_pages)
> +			goto out;
> +		total_pages_to_unuse = total_pages - target_pages;


> +		for (type = swap_list.head; type >= 0; type = si->next) {
> +			si = swap_info[type];
> +			if (total_pages_to_unuse < si->frontswap_pages)
> +				pages = unuse_pages = total_pages_to_unuse;
> +			else {
> +				pages = si->frontswap_pages;

unused_pages sounds better than 'pages'.

> +				unuse_pages = 0; /* unuse all */

pages_unuse?  pages_to_unuse? Yeah, that sounds better.
> +			}
> +			if (security_vm_enough_memory_kern(pages))
> +				continue;
> +			vm_unacct_memory(pages);
> +			break;
> +		}
> +		if (type < 0)
> +			goto out;
> +		locked = false;
> +		spin_unlock(&swap_lock);
> +		try_to_unuse(type, true, unuse_pages);
> +	}
> +
> +out:
> +	if (locked)
> +		spin_unlock(&swap_lock);
> +	return;
> +}
> +EXPORT_SYMBOL(frontswap_shrink);
> +
> +/*
> + * count and return the number of pages frontswap pages across all
> + * swap devices.  This is exported so that a kernel module can
> + * determine current usage without reading sysfs.
> + */
> +unsigned long frontswap_curr_pages(void)
> +{
> +	int type;
> +	unsigned long totalpages = 0;
> +	struct swap_info_struct *si = NULL;
> +
> +	spin_lock(&swap_lock);
> +	for (type = swap_list.head; type >= 0; type = si->next) {
> +		si = swap_info[type];
> +		totalpages += si->frontswap_pages;

Uh, you don't need to check 'si' for NULL?
> +	}
> +	spin_unlock(&swap_lock);
> +	return totalpages;
> +}
> +EXPORT_SYMBOL(frontswap_curr_pages);
> +
> +#ifdef CONFIG_SYSFS
> +
> +/* see Documentation/ABI/xxx/sysfs-kernel-mm-frontswap */
> +
> +#define FRONTSWAP_ATTR_RO(_name) \
> +	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> +#define FRONTSWAP_ATTR(_name) \
> +	static struct kobj_attribute _name##_attr = \
> +		__ATTR(_name, 0644, _name##_show, _name##_store)
> +
> +static ssize_t curr_pages_show(struct kobject *kobj,
> +			       struct kobj_attribute *attr, char *buf)
> +{
> +	return sprintf(buf, "%lu\n", frontswap_curr_pages());
> +}
> +
> +static ssize_t curr_pages_store(struct kobject *kobj,
> +			       struct kobj_attribute *attr,
> +			       const char *buf, size_t count)
> +{
> +	unsigned long target_pages;
> +	int err;
> +
> +	err = strict_strtoul(buf, 10, &target_pages);
> +	if (err)
> +		return -EINVAL;

How about just "if (strict..) return -EINVAL" ) and ditch the 'int err'?

> +
> +	frontswap_shrink(target_pages);
> +
> +	return count;
> +}
> +FRONTSWAP_ATTR(curr_pages);
> +
> +static ssize_t succ_puts_show(struct kobject *kobj,
> +			       struct kobj_attribute *attr, char *buf)
> +{
> +	return sprintf(buf, "%lu\n", frontswap_succ_puts);
> +}
> +FRONTSWAP_ATTR_RO(succ_puts);
> +
> +static ssize_t failed_puts_show(struct kobject *kobj,
> +			       struct kobj_attribute *attr, char *buf)
> +{
> +	return sprintf(buf, "%lu\n", frontswap_failed_puts);
> +}
> +FRONTSWAP_ATTR_RO(failed_puts);
> +
> +static ssize_t gets_show(struct kobject *kobj,
> +			       struct kobj_attribute *attr, char *buf)
> +{
> +	return sprintf(buf, "%lu\n", frontswap_gets);
> +}
> +FRONTSWAP_ATTR_RO(gets);
> +
> +static ssize_t flushes_show(struct kobject *kobj,
> +			       struct kobj_attribute *attr, char *buf)
> +{
> +	return sprintf(buf, "%lu\n", frontswap_flushes);
> +}
> +FRONTSWAP_ATTR_RO(flushes);
> +
> +static struct attribute *frontswap_attrs[] = {
> +	&curr_pages_attr.attr,
> +	&succ_puts_attr.attr,
> +	&failed_puts_attr.attr,
> +	&gets_attr.attr,
> +	&flushes_attr.attr,
> +	NULL,
> +};
> +
> +static struct attribute_group frontswap_attr_group = {
> +	.attrs = frontswap_attrs,
> +	.name = "frontswap",
> +};
> +
> +#endif /* CONFIG_SYSFS */
> +
> +static int __init init_frontswap(void)
> +{
> +#ifdef CONFIG_SYSFS
> +	int err;
> +
> +	err = sysfs_create_group(mm_kobj, &frontswap_attr_group);

So shouldn't you return 'err'?

> +#endif /* CONFIG_SYSFS */
> +	return 0;
> +}
> +
> +static void __exit exit_frontswap(void)
> +{
> +	frontswap_shrink(0UL);
> +}
> +
> +module_init(init_frontswap);
> +module_exit(exit_frontswap);

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2011-06-07 17:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-27 19:48 [PATCH V4 2/4] mm: frontswap: core code Dan Magenheimer
2011-06-07 17:53 ` Konrad Rzeszutek Wilk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).