linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] f2fs crypto: use per-inode tfm structure
@ 2015-05-22  0:40 Jaegeuk Kim
  2015-05-22  0:40 ` [PATCH 2/3] f2fs crypto: use bounce pages from mempool first Jaegeuk Kim
  2015-05-22  0:40 ` [PATCH 3/3] f2fs crypto: preallocate BIO_MAX_PAGES for writeback Jaegeuk Kim
  0 siblings, 2 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2015-05-22  0:40 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel
  Cc: Jaegeuk Kim, Theodore Ts'o

This patch applies the following ext4 patch:

  ext4 crypto: use per-inode tfm structure

As suggested by Herbert Xu, we shouldn't allocate a new tfm each time
we read or write a page.  Instead we can use a single tfm hanging off
the inode's crypt_info structure for all of our encryption needs for
that inode, since the tfm can be used by multiple crypto requests in
parallel.

Also use cmpxchg() to avoid races that could result in crypt_info
structure getting doubly allocated or doubly freed.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/crypto.c       |  82 +++----------------------------------
 fs/f2fs/crypto_fname.c |  48 +---------------------
 fs/f2fs/crypto_key.c   | 107 ++++++++++++++++++++++++++++++++++++-------------
 fs/f2fs/dir.c          |   8 ++--
 fs/f2fs/f2fs.h         |   5 +--
 fs/f2fs/f2fs_crypto.h  |   4 --
 fs/f2fs/inode.c        |   2 +-
 fs/f2fs/namei.c        |   4 +-
 fs/f2fs/super.c        |   3 +-
 9 files changed, 96 insertions(+), 167 deletions(-)

diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
index c6d1122..2c7819a 100644
--- a/fs/f2fs/crypto.c
+++ b/fs/f2fs/crypto.c
@@ -91,8 +91,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx)
 	}
 	ctx->w.control_page = NULL;
 	if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
-		if (ctx->tfm)
-			crypto_free_tfm(ctx->tfm);
 		kmem_cache_free(f2fs_crypto_ctx_cachep, ctx);
 	} else {
 		spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
@@ -113,7 +111,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx)
 struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode)
 {
 	struct f2fs_crypto_ctx *ctx = NULL;
-	int res = 0;
 	unsigned long flags;
 	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
 
@@ -138,56 +135,13 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode)
 	spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
 	if (!ctx) {
 		ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS);
-		if (!ctx) {
-			res = -ENOMEM;
-			goto out;
-		}
+		if (!ctx)
+			return ERR_PTR(-ENOMEM);
 		ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
 	} else {
 		ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
 	}
 	ctx->flags &= ~F2FS_WRITE_PATH_FL;
-
-	/*
-	 * Allocate a new Crypto API context if we don't already have
-	 * one or if it isn't the right mode.
-	 */
-	if (ctx->tfm && (ctx->mode != ci->ci_data_mode)) {
-		crypto_free_tfm(ctx->tfm);
-		ctx->tfm = NULL;
-		ctx->mode = F2FS_ENCRYPTION_MODE_INVALID;
-	}
-	if (!ctx->tfm) {
-		switch (ci->ci_data_mode) {
-		case F2FS_ENCRYPTION_MODE_AES_256_XTS:
-			ctx->tfm = crypto_ablkcipher_tfm(
-				crypto_alloc_ablkcipher("xts(aes)", 0, 0));
-			break;
-		case F2FS_ENCRYPTION_MODE_AES_256_GCM:
-			/*
-			 * TODO(mhalcrow): AEAD w/ gcm(aes);
-			 * crypto_aead_setauthsize()
-			 */
-			ctx->tfm = ERR_PTR(-ENOTSUPP);
-			break;
-		default:
-			BUG();
-		}
-		if (IS_ERR_OR_NULL(ctx->tfm)) {
-			res = PTR_ERR(ctx->tfm);
-			ctx->tfm = NULL;
-			goto out;
-		}
-		ctx->mode = ci->ci_data_mode;
-	}
-	BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_data_mode));
-
-out:
-	if (res) {
-		if (!IS_ERR_OR_NULL(ctx))
-			f2fs_release_crypto_ctx(ctx);
-		ctx = ERR_PTR(res);
-	}
 	return ctx;
 }
 
@@ -229,11 +183,8 @@ static void f2fs_crypto_destroy(void)
 {
 	struct f2fs_crypto_ctx *pos, *n;
 
-	list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) {
-		if (pos->tfm)
-			crypto_free_tfm(pos->tfm);
+	list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list)
 		kmem_cache_free(f2fs_crypto_ctx_cachep, pos);
-	}
 	INIT_LIST_HEAD(&f2fs_free_crypto_ctxs);
 	if (f2fs_bounce_page_pool)
 		mempool_destroy(f2fs_bounce_page_pool);
@@ -383,32 +334,11 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx,
 	struct ablkcipher_request *req = NULL;
 	DECLARE_F2FS_COMPLETION_RESULT(ecr);
 	struct scatterlist dst, src;
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-	struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm);
+	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
 	int res = 0;
 
-	BUG_ON(!ctx->tfm);
-	BUG_ON(ctx->mode != fi->i_crypt_info->ci_data_mode);
-
-	if (ctx->mode != F2FS_ENCRYPTION_MODE_AES_256_XTS) {
-		printk_ratelimited(KERN_ERR
-				"%s: unsupported crypto algorithm: %d\n",
-				__func__, ctx->mode);
-		return -ENOTSUPP;
-	}
-
-	crypto_ablkcipher_clear_flags(atfm, ~0);
-	crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
-
-	res = crypto_ablkcipher_setkey(atfm, fi->i_crypt_info->ci_raw,
-					fi->i_crypt_info->ci_size);
-	if (res) {
-		printk_ratelimited(KERN_ERR
-				"%s: crypto_ablkcipher_setkey() failed\n",
-				__func__);
-		return res;
-	}
-	req = ablkcipher_request_alloc(atfm, GFP_NOFS);
+	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
 	if (!req) {
 		printk_ratelimited(KERN_ERR
 				"%s: crypto_request_alloc() failed\n",
diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c
index 016c4b6..81852cc 100644
--- a/fs/f2fs/crypto_fname.c
+++ b/fs/f2fs/crypto_fname.c
@@ -249,52 +249,6 @@ static int digest_decode(const char *src, int len, char *dst)
 	return cp - dst;
 }
 
-int f2fs_setup_fname_crypto(struct inode *inode)
-{
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-	struct f2fs_crypt_info *ci = fi->i_crypt_info;
-	struct crypto_ablkcipher *ctfm;
-	int res;
-
-	/* Check if the crypto policy is set on the inode */
-	res = f2fs_encrypted_inode(inode);
-	if (res == 0)
-		return 0;
-
-	res = f2fs_get_encryption_info(inode);
-	if (res < 0)
-		return res;
-	ci = fi->i_crypt_info;
-
-	if (!ci || ci->ci_ctfm)
-		return 0;
-
-	if (ci->ci_filename_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) {
-		printk_once(KERN_WARNING "f2fs: unsupported key mode %d\n",
-				ci->ci_filename_mode);
-		return -ENOKEY;
-	}
-
-	ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))", 0, 0);
-	if (!ctfm || IS_ERR(ctfm)) {
-		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
-		printk(KERN_DEBUG "%s: error (%d) allocating crypto tfm\n",
-				__func__, res);
-		return res;
-	}
-	crypto_ablkcipher_clear_flags(ctfm, ~0);
-	crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
-			     CRYPTO_TFM_REQ_WEAK_KEY);
-
-	res = crypto_ablkcipher_setkey(ctfm, ci->ci_raw, ci->ci_size);
-	if (res) {
-		crypto_free_ablkcipher(ctfm);
-		return -EIO;
-	}
-	ci->ci_ctfm = ctfm;
-	return 0;
-}
-
 /**
  * f2fs_fname_crypto_round_up() -
  *
@@ -427,7 +381,7 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
 		fname->disk_name.len = iname->len;
 		return 0;
 	}
-	ret = f2fs_setup_fname_crypto(dir);
+	ret = f2fs_get_encryption_info(dir);
 	if (ret)
 		return ret;
 	ci = F2FS_I(dir)->i_crypt_info;
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 8a10569..95b8f93 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -87,20 +87,31 @@ out:
 	return res;
 }
 
-void f2fs_free_encryption_info(struct inode *inode)
+static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
 {
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-	struct f2fs_crypt_info *ci = fi->i_crypt_info;
-
 	if (!ci)
 		return;
 
 	if (ci->ci_keyring_key)
 		key_put(ci->ci_keyring_key);
 	crypto_free_ablkcipher(ci->ci_ctfm);
-	memzero_explicit(&ci->ci_raw, sizeof(ci->ci_raw));
 	kmem_cache_free(f2fs_crypt_info_cachep, ci);
-	fi->i_crypt_info = NULL;
+}
+
+void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci)
+{
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	struct f2fs_crypt_info *prev;
+
+	if (ci == NULL)
+		ci = ACCESS_ONCE(fi->i_crypt_info);
+	if (ci == NULL)
+		return;
+	prev = cmpxchg(&fi->i_crypt_info, ci, NULL);
+	if (prev != ci)
+		return;
+
+	f2fs_free_crypt_info(ci);
 }
 
 int _f2fs_get_encryption_info(struct inode *inode)
@@ -113,17 +124,23 @@ int _f2fs_get_encryption_info(struct inode *inode)
 	struct f2fs_encryption_key *master_key;
 	struct f2fs_encryption_context ctx;
 	struct user_key_payload *ukp;
+	struct crypto_ablkcipher *ctfm;
+	const char *cipher_str;
+	char raw_key[F2FS_MAX_KEY_SIZE];
+	char mode;
 	int res;
 
 	res = f2fs_crypto_initialize();
 	if (res)
 		return res;
-
-	if (fi->i_crypt_info) {
-		if (!fi->i_crypt_info->ci_keyring_key ||
-			key_validate(fi->i_crypt_info->ci_keyring_key) == 0)
+retry:
+	crypt_info = ACCESS_ONCE(fi->i_crypt_info);
+	if (crypt_info) {
+		if (!crypt_info->ci_keyring_key ||
+				key_validate(crypt_info->ci_keyring_key) == 0)
 			return 0;
-		f2fs_free_encryption_info(inode);
+		f2fs_free_encryption_info(inode, crypt_info);
+		goto retry;
 	}
 
 	res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
@@ -143,18 +160,30 @@ int _f2fs_get_encryption_info(struct inode *inode)
 	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
 	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
 	crypt_info->ci_ctfm = NULL;
+	crypt_info->ci_keyring_key = NULL;
 	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
 				sizeof(crypt_info->ci_master_key));
 	if (S_ISREG(inode->i_mode))
-		crypt_info->ci_size =
-			f2fs_encryption_key_size(crypt_info->ci_data_mode);
+		mode = crypt_info->ci_data_mode;
 	else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		crypt_info->ci_size =
-			f2fs_encryption_key_size(crypt_info->ci_filename_mode);
+		mode = crypt_info->ci_filename_mode;
 	else
 		BUG();
 
-	BUG_ON(!crypt_info->ci_size);
+	switch (mode) {
+	case F2FS_ENCRYPTION_MODE_AES_256_XTS:
+		cipher_str = "xts(aes)";
+		break;
+	case F2FS_ENCRYPTION_MODE_AES_256_CTS:
+		cipher_str = "cts(cbc(aes))";
+		break;
+	default:
+		printk_once(KERN_WARNING
+			    "f2fs: unsupported key mode %d (ino %u)\n",
+			    mode, (unsigned) inode->i_ino);
+		res = -ENOKEY;
+		goto out;
+	}
 
 	memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX,
 					F2FS_KEY_DESC_PREFIX_SIZE);
@@ -169,6 +198,7 @@ int _f2fs_get_encryption_info(struct inode *inode)
 		keyring_key = NULL;
 		goto out;
 	}
+	crypt_info->ci_keyring_key = keyring_key;
 	BUG_ON(keyring_key->type != &key_type_logon);
 	ukp = ((struct user_key_payload *)keyring_key->payload.data);
 	if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
@@ -180,19 +210,40 @@ int _f2fs_get_encryption_info(struct inode *inode)
 				F2FS_KEY_DERIVATION_NONCE_SIZE);
 	BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE);
 	res = f2fs_derive_key_aes(ctx.nonce, master_key->raw,
-					crypt_info->ci_raw);
-out:
-	if (res < 0) {
-		if (res == -ENOKEY)
-			res = 0;
-		kmem_cache_free(f2fs_crypt_info_cachep, crypt_info);
-	} else {
-		fi->i_crypt_info = crypt_info;
-		crypt_info->ci_keyring_key = keyring_key;
-		keyring_key = NULL;
+				  raw_key);
+	if (res)
+		goto out;
+
+	ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+	if (!ctfm || IS_ERR(ctfm)) {
+		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+		printk(KERN_DEBUG
+		       "%s: error %d (inode %u) allocating crypto tfm\n",
+		       __func__, res, (unsigned) inode->i_ino);
+		goto out;
 	}
-	if (keyring_key)
-		key_put(keyring_key);
+	crypt_info->ci_ctfm = ctfm;
+	crypto_ablkcipher_clear_flags(ctfm, ~0);
+	crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+			     CRYPTO_TFM_REQ_WEAK_KEY);
+	res = crypto_ablkcipher_setkey(ctfm, raw_key,
+					f2fs_encryption_key_size(mode));
+	if (res)
+		goto out;
+
+	memzero_explicit(raw_key, sizeof(raw_key));
+	if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) {
+		f2fs_free_crypt_info(crypt_info);
+		goto retry;
+	}
+	return 0;
+
+out:
+	if (res == -ENOKEY && !S_ISREG(inode->i_mode))
+		res = 0;
+
+	f2fs_free_crypt_info(crypt_info);
+	memzero_explicit(raw_key, sizeof(raw_key));
 	return res;
 }
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 3e92376..a34ebd8 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -825,11 +825,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 	struct f2fs_str fstr = FSTR_INIT(NULL, 0);
 	int err = 0;
 
-	err = f2fs_setup_fname_crypto(inode);
-	if (err)
-		return err;
-
 	if (f2fs_encrypted_inode(inode)) {
+		err = f2fs_get_encryption_info(inode);
+		if (err)
+			return err;
+
 		err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN,
 								&fstr);
 		if (err < 0)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7e93fcf..70cdf7b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2016,7 +2016,7 @@ int f2fs_decrypt_one(struct inode *, struct page *);
 void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *);
 
 /* crypto_key.c */
-void f2fs_free_encryption_info(struct inode *);
+void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *);
 int _f2fs_get_encryption_info(struct inode *inode);
 
 /* crypto_fname.c */
@@ -2051,7 +2051,6 @@ static inline int f2fs_get_encryption_info(struct inode *inode)
 	return 0;
 }
 
-int f2fs_setup_fname_crypto(struct inode *);
 void f2fs_fname_crypto_free_buffer(struct f2fs_str *);
 int f2fs_fname_setup_filename(struct inode *, const struct qstr *,
 				int lookup, struct f2fs_filename *);
@@ -2065,8 +2064,6 @@ static inline void f2fs_exit_crypto(void) { }
 
 static inline int f2fs_has_encryption_key(struct inode *i) { return 0; }
 static inline int f2fs_get_encryption_info(struct inode *i) { return 0; }
-
-static inline int f2fs_setup_fname_crypto(struct inode *i) { return 0; }
 static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { }
 
 static inline int f2fs_fname_setup_filename(struct inode *dir,
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
index e33cec9..be59d91 100644
--- a/fs/f2fs/f2fs_crypto.h
+++ b/fs/f2fs/f2fs_crypto.h
@@ -75,13 +75,11 @@ struct f2fs_encryption_key {
 } __attribute__((__packed__));
 
 struct f2fs_crypt_info {
-	unsigned char	ci_size;
 	char		ci_data_mode;
 	char		ci_filename_mode;
 	char		ci_flags;
 	struct crypto_ablkcipher *ci_ctfm;
 	struct key	*ci_keyring_key;
-	char		ci_raw[F2FS_MAX_KEY_SIZE];
 	char		ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
 };
 
@@ -90,7 +88,6 @@ struct f2fs_crypt_info {
 #define F2FS_WRITE_PATH_FL			      0x00000004
 
 struct f2fs_crypto_ctx {
-	struct crypto_tfm *tfm;         /* Crypto API context */
 	union {
 		struct {
 			struct page *bounce_page;       /* Ciphertext page */
@@ -103,7 +100,6 @@ struct f2fs_crypto_ctx {
 		struct list_head free_list;     /* Free list */
 	};
 	char flags;                      /* Flags */
-	char mode;                       /* Encryption mode for tfm */
 };
 
 struct f2fs_completion_result {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ea6ba3b..2550868 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -364,7 +364,7 @@ no_delete:
 out_clear:
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
 	if (F2FS_I(inode)->i_crypt_info)
-		f2fs_free_encryption_info(inode);
+		f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info);
 #endif
 	clear_inode(inode);
 }
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 26e68ad..daed09c 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -364,7 +364,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 		if (err)
 			goto err_out;
 
-		err = f2fs_setup_fname_crypto(inode);
+		err = f2fs_get_encryption_info(inode);
 		if (err)
 			goto err_out;
 
@@ -927,7 +927,7 @@ static void *f2fs_encrypted_follow_link(struct dentry *dentry,
 	u32 max_size = inode->i_sb->s_blocksize;
 	int res;
 
-	res = f2fs_setup_fname_crypto(inode);
+	res = f2fs_get_encryption_info(inode);
 	if (res)
 		return ERR_PTR(res);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f596b24..38bb7c6 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -449,7 +449,8 @@ static int f2fs_drop_inode(struct inode *inode)
 
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
 			if (F2FS_I(inode)->i_crypt_info)
-				f2fs_free_encryption_info(inode);
+				f2fs_free_encryption_info(inode,
+					F2FS_I(inode)->i_crypt_info);
 #endif
 			spin_lock(&inode->i_lock);
 		}
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-22  0:40 [PATCH 1/3] f2fs crypto: use per-inode tfm structure Jaegeuk Kim
@ 2015-05-22  0:40 ` Jaegeuk Kim
  2015-05-25 10:00   ` [f2fs-dev] " Chao Yu
                     ` (2 more replies)
  2015-05-22  0:40 ` [PATCH 3/3] f2fs crypto: preallocate BIO_MAX_PAGES for writeback Jaegeuk Kim
  1 sibling, 3 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2015-05-22  0:40 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

If a lot of write streams are triggered, alloc_page and __free_page are
costly called, resulting in high memory pressure.

In order to avoid that, let's reuse mempool pages for writeback pages.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/crypto.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
index 2c7819a..2ceee68 100644
--- a/fs/f2fs/crypto.c
+++ b/fs/f2fs/crypto.c
@@ -408,20 +408,12 @@ struct page *f2fs_encrypt(struct inode *inode,
 		return (struct page *)ctx;
 
 	/* The encryption operation will require a bounce page. */
-	ciphertext_page = alloc_page(GFP_NOFS);
+	ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOFS);
 	if (!ciphertext_page) {
-		/*
-		 * This is a potential bottleneck, but at least we'll have
-		 * forward progress.
-		 */
-		ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
-							GFP_NOFS);
-		if (WARN_ON_ONCE(!ciphertext_page))
-			ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
-						GFP_NOFS | __GFP_WAIT);
-		ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-	} else {
+		ciphertext_page = alloc_page(GFP_NOFS);
 		ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+	} else {
+		ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
 	}
 	ctx->flags |= F2FS_WRITE_PATH_FL;
 	ctx->w.bounce_page = ciphertext_page;
-- 
2.1.1


------------------------------------------------------------------------------
One dashboard for servers and applications across Physical-Virtual-Cloud 
Widest out-of-the-box monitoring support with 50+ applications
Performance metrics, stats and reports that give you Actionable Insights
Deep dive visibility with transaction tracing using APM Insight.
http://ad.doubleclick.net/ddm/clk/290420510;117567292;y

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] f2fs crypto: preallocate BIO_MAX_PAGES for writeback
  2015-05-22  0:40 [PATCH 1/3] f2fs crypto: use per-inode tfm structure Jaegeuk Kim
  2015-05-22  0:40 ` [PATCH 2/3] f2fs crypto: use bounce pages from mempool first Jaegeuk Kim
@ 2015-05-22  0:40 ` Jaegeuk Kim
  1 sibling, 0 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2015-05-22  0:40 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch changes the number of preallocated pages to BIO_MAX_PAGES to prepare
writeback encryption.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
index 2ceee68..cfe5a04 100644
--- a/fs/f2fs/crypto.c
+++ b/fs/f2fs/crypto.c
@@ -48,7 +48,7 @@
 
 /* Encryption added and removed here! (L: */
 
-static unsigned int num_prealloc_crypto_pages = 32;
+static unsigned int num_prealloc_crypto_pages = BIO_MAX_PAGES;
 static unsigned int num_prealloc_crypto_ctxs = 128;
 
 module_param(num_prealloc_crypto_pages, uint, 0444);
-- 
2.1.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* RE: [f2fs-dev] [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-22  0:40 ` [PATCH 2/3] f2fs crypto: use bounce pages from mempool first Jaegeuk Kim
@ 2015-05-25 10:00   ` Chao Yu
  2015-05-27 19:09     ` Jaegeuk Kim
  2015-05-25 19:55   ` Theodore Ts'o
  2015-05-28  4:21   ` [PATCH 2/3 v2] " Jaegeuk Kim
  2 siblings, 1 reply; 11+ messages in thread
From: Chao Yu @ 2015-05-25 10:00 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Friday, May 22, 2015 8:40 AM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
> 
> If a lot of write streams are triggered, alloc_page and __free_page are
> costly called, resulting in high memory pressure.
> 
> In order to avoid that, let's reuse mempool pages for writeback pages.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/crypto.c | 16 ++++------------
>  1 file changed, 4 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
> index 2c7819a..2ceee68 100644
> --- a/fs/f2fs/crypto.c
> +++ b/fs/f2fs/crypto.c
> @@ -408,20 +408,12 @@ struct page *f2fs_encrypt(struct inode *inode,
>  		return (struct page *)ctx;
> 
>  	/* The encryption operation will require a bounce page. */
> -	ciphertext_page = alloc_page(GFP_NOFS);
> +	ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOFS);
>  	if (!ciphertext_page) {
> -		/*
> -		 * This is a potential bottleneck, but at least we'll have
> -		 * forward progress.
> -		 */
> -		ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
> -							GFP_NOFS);
> -		if (WARN_ON_ONCE(!ciphertext_page))
> -			ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
> -						GFP_NOFS | __GFP_WAIT);
> -		ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
> -	} else {
> +		ciphertext_page = alloc_page(GFP_NOFS);

Using alloc_page(GFP_NOFS | __GFP_WAIT) to avoid failure?

Thanks,

>  		ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
> +	} else {
> +		ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
>  	}
>  	ctx->flags |= F2FS_WRITE_PATH_FL;
>  	ctx->w.bounce_page = ciphertext_page;
> --
> 2.1.1
> 
> 
> ------------------------------------------------------------------------------
> One dashboard for servers and applications across Physical-Virtual-Cloud
> Widest out-of-the-box monitoring support with 50+ applications
> Performance metrics, stats and reports that give you Actionable Insights
> Deep dive visibility with transaction tracing using APM Insight.
> http://ad.doubleclick.net/ddm/clk/290420510;117567292;y
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-22  0:40 ` [PATCH 2/3] f2fs crypto: use bounce pages from mempool first Jaegeuk Kim
  2015-05-25 10:00   ` [f2fs-dev] " Chao Yu
@ 2015-05-25 19:55   ` Theodore Ts'o
  2015-05-27 21:18     ` Jaegeuk Kim
  2015-05-28  4:21   ` [PATCH 2/3 v2] " Jaegeuk Kim
  2 siblings, 1 reply; 11+ messages in thread
From: Theodore Ts'o @ 2015-05-25 19:55 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On Thu, May 21, 2015 at 05:40:24PM -0700, Jaegeuk Kim wrote:
> If a lot of write streams are triggered, alloc_page and __free_page are
> costly called, resulting in high memory pressure.
> 
> In order to avoid that, let's reuse mempool pages for writeback pages.

The reason why the mempool pages was used as a fallback was because
once we are deep in the writeback code, handling memory allocation
failures is close to impossible, since we've already made enough
changes that unwinding them would be extremely difficult.  So the
basic idea was to use the mempool as an emergency reserve, since
Failure Is Not An Option, and the alternative, which is to simply loop
until the mm subsystem sees fit to give us a page, has sometimes led
to deadlock.

The primary source of write streams should be either (a) fsync
operations, or (b) calls from the writeback thread.  Are there any
additional sources for f2fs?  If they are calls from fsync operations,
and we have more than a threshold number of write operations in play,
we might want to think about blocking the fsync/fdatasync writeback,
**before** the operation starts taking locks, so other write
operations can proceed.  And the writeback thread should keep the
number of write operations to a reasonable number, especially given
that we are treating page encryption as a blocking operation.  Or is
there something else going on which is making this to be more of a
problem for f2fs?

Regards,

						- Ted

------------------------------------------------------------------------------
One dashboard for servers and applications across Physical-Virtual-Cloud 
Widest out-of-the-box monitoring support with 50+ applications
Performance metrics, stats and reports that give you Actionable Insights
Deep dive visibility with transaction tracing using APM Insight.
http://ad.doubleclick.net/ddm/clk/290420510;117567292;y

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-25 10:00   ` [f2fs-dev] " Chao Yu
@ 2015-05-27 19:09     ` Jaegeuk Kim
  2015-05-29  2:45       ` Chao Yu
  0 siblings, 1 reply; 11+ messages in thread
From: Jaegeuk Kim @ 2015-05-27 19:09 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Chao,

On Mon, May 25, 2015 at 06:00:47PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Friday, May 22, 2015 8:40 AM
> > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
> > 
> > If a lot of write streams are triggered, alloc_page and __free_page are
> > costly called, resulting in high memory pressure.
> > 
> > In order to avoid that, let's reuse mempool pages for writeback pages.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/crypto.c | 16 ++++------------
> >  1 file changed, 4 insertions(+), 12 deletions(-)
> > 
> > diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
> > index 2c7819a..2ceee68 100644
> > --- a/fs/f2fs/crypto.c
> > +++ b/fs/f2fs/crypto.c
> > @@ -408,20 +408,12 @@ struct page *f2fs_encrypt(struct inode *inode,
> >  		return (struct page *)ctx;
> > 
> >  	/* The encryption operation will require a bounce page. */
> > -	ciphertext_page = alloc_page(GFP_NOFS);
> > +	ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOFS);
> >  	if (!ciphertext_page) {
> > -		/*
> > -		 * This is a potential bottleneck, but at least we'll have
> > -		 * forward progress.
> > -		 */
> > -		ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
> > -							GFP_NOFS);
> > -		if (WARN_ON_ONCE(!ciphertext_page))
> > -			ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
> > -						GFP_NOFS | __GFP_WAIT);
> > -		ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
> > -	} else {
> > +		ciphertext_page = alloc_page(GFP_NOFS);
> 
> Using alloc_page(GFP_NOFS | __GFP_WAIT) to avoid failure?

#define GFP_NOFS	(__GFP_WAIT | __GFP_IO)

Thanks,

> 
> Thanks,
> 
> >  		ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
> > +	} else {
> > +		ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
> >  	}
> >  	ctx->flags |= F2FS_WRITE_PATH_FL;
> >  	ctx->w.bounce_page = ciphertext_page;
> > --
> > 2.1.1
> > 
> > 
> > ------------------------------------------------------------------------------
> > One dashboard for servers and applications across Physical-Virtual-Cloud
> > Widest out-of-the-box monitoring support with 50+ applications
> > Performance metrics, stats and reports that give you Actionable Insights
> > Deep dive visibility with transaction tracing using APM Insight.
> > http://ad.doubleclick.net/ddm/clk/290420510;117567292;y
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-25 19:55   ` Theodore Ts'o
@ 2015-05-27 21:18     ` Jaegeuk Kim
  2015-05-28 18:18       ` Theodore Ts'o
  0 siblings, 1 reply; 11+ messages in thread
From: Jaegeuk Kim @ 2015-05-27 21:18 UTC (permalink / raw)
  To: Theodore Ts'o, linux-kernel, linux-fsdevel, linux-f2fs-devel

On Mon, May 25, 2015 at 03:55:51PM -0400, Theodore Ts'o wrote:
> On Thu, May 21, 2015 at 05:40:24PM -0700, Jaegeuk Kim wrote:
> > If a lot of write streams are triggered, alloc_page and __free_page are
> > costly called, resulting in high memory pressure.
> > 
> > In order to avoid that, let's reuse mempool pages for writeback pages.
> 
> The reason why the mempool pages was used as a fallback was because
> once we are deep in the writeback code, handling memory allocation
> failures is close to impossible, since we've already made enough
> changes that unwinding them would be extremely difficult.  So the
> basic idea was to use the mempool as an emergency reserve, since
> Failure Is Not An Option, and the alternative, which is to simply loop
> until the mm subsystem sees fit to give us a page, has sometimes led
> to deadlock.

So, in the current flow,

  ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOFS);

  if (WARN_ON_ONCE(!ciphertext_page))
    ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
                                            GFP_NOFS | __GFP_WAIT);
                                                      ^^^^^^^^^^^^
Was it intended                                       __GFP_NOFAIL?

Anyway, f2fs handles ENOMEM in ->writepage by:

...
redirty_out:
  redirty_page_for_writepage();
  return AOP_WRITEPAGE_ACTIVATE;
}

> 
> The primary source of write streams should be either (a) fsync
> operations, or (b) calls from the writeback thread.  Are there any
> additional sources for f2fs?  If they are calls from fsync operations,
> and we have more than a threshold number of write operations in play,
> we might want to think about blocking the fsync/fdatasync writeback,
> **before** the operation starts taking locks, so other write
> operations can proceed.  And the writeback thread should keep the
> number of write operations to a reasonable number, especially given
> that we are treating page encryption as a blocking operation.  Or is
> there something else going on which is making this to be more of a
> problem for f2fs?

The problem that I'd like to address here is to reduce the call counts of
allocating and freeing a number of pages in pairs.

When I conduct xfstests/224 under 1GB DRAM, I've seen triggering several oom
killers, and in that moment, a huge number of inactive anonymous pages are
registered in the page cache. Not sure why those pages are not reclaimed
seamlessly though.

Neverthelss, once I changed the flow to reuse the pages in the mempool for
encryption/decryption, I could have resolved that issue.
And, I thought that there is no reason to allocate new pages for every requests.

For general purpose, it may need an additional mempool too.

Thanks,

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3 v2] f2fs crypto: use bounce pages from mempool first
  2015-05-22  0:40 ` [PATCH 2/3] f2fs crypto: use bounce pages from mempool first Jaegeuk Kim
  2015-05-25 10:00   ` [f2fs-dev] " Chao Yu
  2015-05-25 19:55   ` Theodore Ts'o
@ 2015-05-28  4:21   ` Jaegeuk Kim
  2 siblings, 0 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2015-05-28  4:21 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel

Change log from v1:
 o remain the existing emergecy mempool
 o add a new mempool for writeback pages

>From ab8c49a79c4a6cd0ca1093d5e42cb93b55b35bfd Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 20 May 2015 19:12:30 -0700
Subject: [PATCH] f2fs crypto: introduce a mempool for bounce pages

If a lot of write streams are triggered, alloc_page and __free_page are
costly called, resulting in high memory pressure.

In order to avoid that, this patch introduces an additional mempool for writeback pages.
Note that, the existing mempool is used for the emergency purpose.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/crypto.c      | 43 +++++++++++++++++++++++++++++++------------
 fs/f2fs/f2fs_crypto.h | 10 +++++++---
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
index 2c7819a..e823593 100644
--- a/fs/f2fs/crypto.c
+++ b/fs/f2fs/crypto.c
@@ -48,8 +48,9 @@
 
 /* Encryption added and removed here! (L: */
 
-static unsigned int num_prealloc_crypto_pages = 32;
 static unsigned int num_prealloc_crypto_ctxs = 128;
+static unsigned int num_prealloc_crypto_pages = BIO_MAX_PAGES;
+static unsigned int num_prealloc_emergent_pages = 32;
 
 module_param(num_prealloc_crypto_pages, uint, 0444);
 MODULE_PARM_DESC(num_prealloc_crypto_pages,
@@ -58,7 +59,7 @@ module_param(num_prealloc_crypto_ctxs, uint, 0444);
 MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
 		"Number of crypto contexts to preallocate");
 
-static mempool_t *f2fs_bounce_page_pool;
+static mempool_t *f2fs_bounce_page_pool, *f2fs_emergent_page_pool;
 
 static LIST_HEAD(f2fs_free_crypto_ctxs);
 static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock);
@@ -83,10 +84,13 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx)
 	unsigned long flags;
 
 	if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) {
-		if (ctx->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL)
-			__free_page(ctx->w.bounce_page);
-		else
+		if (ctx->flags & F2FS_BOUNCE_PAGE_POOL_FREE_ENCRYPT_FL)
 			mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool);
+		else if (ctx->flags & F2FS_EMERGENT_PAGE_POOL_FREE_ENCRYPT_FL)
+			mempool_free(ctx->w.bounce_page,
+					f2fs_emergent_page_pool);
+		else
+			__free_page(ctx->w.bounce_page);
 		ctx->w.bounce_page = NULL;
 	}
 	ctx->w.control_page = NULL;
@@ -189,6 +193,9 @@ static void f2fs_crypto_destroy(void)
 	if (f2fs_bounce_page_pool)
 		mempool_destroy(f2fs_bounce_page_pool);
 	f2fs_bounce_page_pool = NULL;
+	if (f2fs_emergent_page_pool)
+		mempool_destroy(f2fs_emergent_page_pool);
+	f2fs_emergent_page_pool = NULL;
 }
 
 /**
@@ -225,6 +232,11 @@ int f2fs_crypto_initialize(void)
 	if (!f2fs_bounce_page_pool)
 		goto fail;
 
+	f2fs_emergent_page_pool =
+		mempool_create_page_pool(num_prealloc_emergent_pages, 0);
+	if (!f2fs_emergent_page_pool)
+		goto fail;
+
 already_initialized:
 	mutex_unlock(&crypto_init);
 	return 0;
@@ -408,21 +420,28 @@ struct page *f2fs_encrypt(struct inode *inode,
 		return (struct page *)ctx;
 
 	/* The encryption operation will require a bounce page. */
+	ctx->flags &= ~F2FS_MASK_PAGE_POOL_FREE_ENCRYPT_FL;
+
+	ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOFS);
+	if (ciphertext_page) {
+		ctx->flags |= F2FS_BOUNCE_PAGE_POOL_FREE_ENCRYPT_FL;
+		goto got_it;
+	}
+
 	ciphertext_page = alloc_page(GFP_NOFS);
 	if (!ciphertext_page) {
 		/*
 		 * This is a potential bottleneck, but at least we'll have
 		 * forward progress.
 		 */
-		ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
-							GFP_NOFS);
+		ciphertext_page = mempool_alloc(f2fs_emergent_page_pool,
+								GFP_NOFS);
 		if (WARN_ON_ONCE(!ciphertext_page))
-			ciphertext_page = mempool_alloc(f2fs_bounce_page_pool,
-						GFP_NOFS | __GFP_WAIT);
-		ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-	} else {
-		ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+			ciphertext_page = mempool_alloc(f2fs_emergent_page_pool,
+						GFP_NOFS | __GFP_NOFAIL);
+		ctx->flags |= F2FS_EMERGENT_PAGE_POOL_FREE_ENCRYPT_FL;
 	}
+got_it:
 	ctx->flags |= F2FS_WRITE_PATH_FL;
 	ctx->w.bounce_page = ciphertext_page;
 	ctx->w.control_page = plaintext_page;
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
index be59d91..5b05a53 100644
--- a/fs/f2fs/f2fs_crypto.h
+++ b/fs/f2fs/f2fs_crypto.h
@@ -83,9 +83,13 @@ struct f2fs_crypt_info {
 	char		ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
 };
 
-#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL             0x00000001
-#define F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL     0x00000002
-#define F2FS_WRITE_PATH_FL			      0x00000004
+#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL		0x00000001
+#define F2FS_WRITE_PATH_FL				0x00000002
+#define F2FS_BOUNCE_PAGE_POOL_FREE_ENCRYPT_FL		0x00000004
+#define F2FS_EMERGENT_PAGE_POOL_FREE_ENCRYPT_FL		0x00000008
+#define F2FS_MASK_PAGE_POOL_FREE_ENCRYPT_FL				\
+		(F2FS_BOUNCE_PAGE_POOL_FREE_ENCRYPT_FL |		\
+		F2FS_EMERGENT_PAGE_POOL_FREE_ENCRYPT_FL)
 
 struct f2fs_crypto_ctx {
 	union {
-- 
2.1.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-27 21:18     ` Jaegeuk Kim
@ 2015-05-28 18:18       ` Theodore Ts'o
  2015-05-29 19:10         ` nick
  0 siblings, 1 reply; 11+ messages in thread
From: Theodore Ts'o @ 2015-05-28 18:18 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On Wed, May 27, 2015 at 02:18:54PM -0700, Jaegeuk Kim wrote:
> The problem that I'd like to address here is to reduce the call counts of
> allocating and freeing a number of pages in pairs.
> 
> When I conduct xfstests/224 under 1GB DRAM, I've seen triggering several oom
> killers, and in that moment, a huge number of inactive anonymous pages are
> registered in the page cache. Not sure why those pages are not reclaimed
> seamlessly though.

If the system is running 8 fio processes, each one writing 1 meg
(BIO_MAX pages) at a time, one of the things that is going on is that
we need to grab 256 4k paegs before the submitting the bio, and then
if there are a large number of bio's queued, we can have potentially a
very large number of pages allocated until the I/O has been completed.

So the problem is it's extremely difficult to determine ahead of time
how many pages that need to be reserved in a mempool.  Simply
increasing the number of in the mempool from 32 to 256 is no guarantee
that it will be enough.  We originally only reserved 32 pages so that
in the case of an extreme memory crunch, we could make at least some
amount of forward progress.

I can imagine a number of different solutions (and these are not
mutally exclusive):

1) Try to dynamically adjust the number of pages we keep in the
mempool so that we ramp up under I/O load and then gradually ramp down
when the I/O pressure decreases.

2) Keep track of how many temporary encryption outstanding bounce
pages are outstanding, if we exceed some number, push back in
writepages for encrypted inode.  That way we can make it be a tunable
so that we don't end up using a huge number of pages, and can start
throttling encrypted writeback even before we start getting allocation
failures.

I'm currently leaning towards #2, myself.  I haven't tried doing some
kernel performance measurements to see how much CPU time we're
spending in alloc_page() and free_page() when under a very heavy
memory load.  I assume you've done some measurements and this has been
very heavy.  Can you give more details about how much CPU time is
getting burned by alloc_page() and free_page()?  I had been assuming
that if we're I/O bound, the extra CPU time to allocate and free the
pages wouldn't be really onerous.  If you're seeing something
different, I'd love to see some data (perf traces, etc.) to correct my
impressions.

Cheers,

					- Ted

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [f2fs-dev] [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-27 19:09     ` Jaegeuk Kim
@ 2015-05-29  2:45       ` Chao Yu
  0 siblings, 0 replies; 11+ messages in thread
From: Chao Yu @ 2015-05-29  2:45 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Thursday, May 28, 2015 3:10 AM
> To: Chao Yu
> Cc: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
> 
> Hi Chao,
> 
> On Mon, May 25, 2015 at 06:00:47PM +0800, Chao Yu wrote:
> > Hi Jaegeuk,
> >

[snip]

> >
> > Using alloc_page(GFP_NOFS | __GFP_WAIT) to avoid failure?
> 
> #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)

That's right, my miss.

Thanks,

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] f2fs crypto: use bounce pages from mempool first
  2015-05-28 18:18       ` Theodore Ts'o
@ 2015-05-29 19:10         ` nick
  0 siblings, 0 replies; 11+ messages in thread
From: nick @ 2015-05-29 19:10 UTC (permalink / raw)
  To: Theodore Ts'o, Jaegeuk Kim, linux-kernel, linux-fsdevel,
	linux-f2fs-devel



On 2015-05-28 02:18 PM, Theodore Ts'o wrote:
> On Wed, May 27, 2015 at 02:18:54PM -0700, Jaegeuk Kim wrote:
>> The problem that I'd like to address here is to reduce the call counts of
>> allocating and freeing a number of pages in pairs.
>>
>> When I conduct xfstests/224 under 1GB DRAM, I've seen triggering several oom
>> killers, and in that moment, a huge number of inactive anonymous pages are
>> registered in the page cache. Not sure why those pages are not reclaimed
>> seamlessly though.
> 
> If the system is running 8 fio processes, each one writing 1 meg
> (BIO_MAX pages) at a time, one of the things that is going on is that
> we need to grab 256 4k paegs before the submitting the bio, and then
> if there are a large number of bio's queued, we can have potentially a
> very large number of pages allocated until the I/O has been completed.
> 
> So the problem is it's extremely difficult to determine ahead of time
> how many pages that need to be reserved in a mempool.  Simply
> increasing the number of in the mempool from 32 to 256 is no guarantee
> that it will be enough.  We originally only reserved 32 pages so that
> in the case of an extreme memory crunch, we could make at least some
> amount of forward progress.
> 
> I can imagine a number of different solutions (and these are not
> mutally exclusive):
> 
> 1) Try to dynamically adjust the number of pages we keep in the
> mempool so that we ramp up under I/O load and then gradually ramp down
> when the I/O pressure decreases.
> 
> 2) Keep track of how many temporary encryption outstanding bounce
> pages are outstanding, if we exceed some number, push back in
> writepages for encrypted inode.  That way we can make it be a tunable
> so that we don't end up using a huge number of pages, and can start
> throttling encrypted writeback even before we start getting allocation
> failures.
> 
> I'm currently leaning towards #2, myself.  I haven't tried doing some
> kernel performance measurements to see how much CPU time we're
> spending in alloc_page() and free_page() when under a very heavy
> memory load.  I assume you've done some measurements and this has been
> very heavy.  Can you give more details about how much CPU time is
> getting burned by alloc_page() and free_page()?  I had been assuming
> that if we're I/O bound, the extra CPU time to allocate and free the
> pages wouldn't be really onerous.  If you're seeing something
> different, I'd love to see some data (perf traces, etc.) to correct my
> impressions.
> 
> Cheers,
> 
> 					- Ted
> 
> ------------------------------------------------------------------------------
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
Ted,
After reading this and thinking over the last few days you seem to have a correct idea
at least from my understanding of cache write back in the Linux kernel. However this
really needs to be bench marked first. I would be glad to benchmark this with perf 
on my system if you can give me the run down of what needs to be bench marked, I
have am a idea but would like a little more clarity.
Cheers,
Nick

------------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-05-29 19:10 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-22  0:40 [PATCH 1/3] f2fs crypto: use per-inode tfm structure Jaegeuk Kim
2015-05-22  0:40 ` [PATCH 2/3] f2fs crypto: use bounce pages from mempool first Jaegeuk Kim
2015-05-25 10:00   ` [f2fs-dev] " Chao Yu
2015-05-27 19:09     ` Jaegeuk Kim
2015-05-29  2:45       ` Chao Yu
2015-05-25 19:55   ` Theodore Ts'o
2015-05-27 21:18     ` Jaegeuk Kim
2015-05-28 18:18       ` Theodore Ts'o
2015-05-29 19:10         ` nick
2015-05-28  4:21   ` [PATCH 2/3 v2] " Jaegeuk Kim
2015-05-22  0:40 ` [PATCH 3/3] f2fs crypto: preallocate BIO_MAX_PAGES for writeback Jaegeuk Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).