[PATCH] io_uring: provide fallback request for OOM situations

* [PATCH] io_uring: provide fallback request for OOM situations
@ 2019-11-08 15:22 Jens Axboe
  0 siblings, 0 replies; only message in thread
From: Jens Axboe @ 2019-11-08 15:22 UTC (permalink / raw)
  To: io-uring

One thing that really sucks for userspace APIs is if the kernel passes
back -ENOMEM/-EAGAIN for resource shortages. The application really has
no idea of what to do in those cases. Should it try and reap
completions? Probably a good idea. Will it solve the issue? Who knows.

This patch adds a simple fallback mechanism if we fail to allocate
memory for a request. We first to to the atomic pool and see if we can
get memory that way, if that fails, we punt to a pre-allocated request.
There's just one of these, but the important part is if we ever return
-EBUSY to the application, the applications knows that it can wait for
events and make forward progress when events have completed. This is the
important part.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 94ec44caac00..fb25cce9d580 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -238,6 +238,9 @@ struct io_ring_ctx {
 	/* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */
 	struct completion	*completions;
 
+	/* if all else fails... */
+	struct io_kiocb		*fallback_req;
+
 #if defined(CONFIG_UNIX)
 	struct socket		*ring_sock;
 #endif
@@ -407,6 +410,10 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	if (!ctx)
 		return NULL;
 
+	ctx->fallback_req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
+	if (!ctx->fallback_req)
+		goto err;
+
 	ctx->completions = kmalloc(2 * sizeof(struct completion), GFP_KERNEL);
 	if (!ctx->completions)
 		goto err;
@@ -432,6 +439,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->inflight_list);
 	return ctx;
 err:
+	if (ctx->fallback_req)
+		kmem_cache_free(req_cachep, ctx->fallback_req);
 	kfree(ctx->completions);
 	kfree(ctx);
 	return NULL;
@@ -732,6 +741,27 @@ static bool io_req_over_limit(struct io_ring_ctx *ctx)
 	return inflight >= limit;
 }
 
+static inline bool io_is_fallback_req(struct io_kiocb *req)
+{
+	return req == (struct io_kiocb *)
+			((unsigned long) req->ctx->fallback_req & ~1UL);
+}
+
+static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
+{
+	struct io_kiocb *req;
+
+	req = kmem_cache_alloc(req_cachep, GFP_ATOMIC | __GFP_NOWARN);
+	if (req)
+		return req;
+
+	req = ctx->fallback_req;
+	if (!test_and_set_bit_lock(0, (unsigned long *) ctx->fallback_req))
+		return req;
+
+	return NULL;
+}
+
 static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 				   struct io_submit_state *state, bool force)
 {
@@ -742,21 +772,17 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		return ERR_PTR(-ENXIO);
 
 	if (!state) {
-		if (unlikely(!force && io_req_over_limit(ctx))) {
-			req = ERR_PTR(-EBUSY);
+		if (unlikely(!force && io_req_over_limit(ctx)))
 			goto out_limit;
-		}
 		req = kmem_cache_alloc(req_cachep, gfp);
 		if (unlikely(!req))
-			goto out;
+			goto fallback;
 	} else if (!state->free_reqs) {
 		size_t sz;
 		int ret;
 
-		if (unlikely(!force && io_req_over_limit(ctx))) {
-			req = ERR_PTR(-EBUSY);
+		if (unlikely(!force && io_req_over_limit(ctx)))
 			goto out_limit;
-		}
 		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
 		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
 
@@ -767,7 +793,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		if (unlikely(ret <= 0)) {
 			state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
 			if (!state->reqs[0])
-				goto out;
+				goto fallback;
 			ret = 1;
 		}
 		state->free_reqs = ret - 1;
@@ -779,6 +805,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		state->cur_req++;
 	}
 
+got_it:
 	req->file = NULL;
 	req->ctx = ctx;
 	req->flags = 0;
@@ -787,11 +814,13 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 	req->result = 0;
 	INIT_IO_WORK(&req->work, io_wq_submit_work);
 	return req;
-out:
-	req = ERR_PTR(-EAGAIN);
+fallback:
+	req = io_get_fallback_req(ctx);
+	if (req)
+		goto got_it;
 out_limit:
 	percpu_ref_put(&ctx->refs);
-	return req;
+	return ERR_PTR(-EBUSY);
 }
 
 static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
@@ -819,7 +848,10 @@ static void __io_free_req(struct io_kiocb *req)
 		spin_unlock_irqrestore(&ctx->inflight_lock, flags);
 	}
 	percpu_ref_put(&ctx->refs);
-	kmem_cache_free(req_cachep, req);
+	if (likely(!io_is_fallback_req(req)))
+		kmem_cache_free(req_cachep, req);
+	else
+		clear_bit_unlock(0, (unsigned long *) ctx->fallback_req);
 }
 
 static bool io_link_cancel_timeout(struct io_kiocb *req)
@@ -1025,8 +1057,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			 * completions for those, only batch free for fixed
 			 * file and non-linked commands.
 			 */
-			if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
-			    REQ_F_FIXED_FILE) {
+			if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+			    REQ_F_FIXED_FILE) && !io_is_fallback_req(req)) {
 				reqs[to_free++] = req;
 				if (to_free == ARRAY_SIZE(reqs))
 					io_free_req_many(ctx, reqs, &to_free);
@@ -4143,6 +4175,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 				ring_pages(ctx->sq_entries, ctx->cq_entries));
 	free_uid(ctx->user);
 	kfree(ctx->completions);
+	kmem_cache_free(req_cachep, ctx->fallback_req);
 	kfree(ctx);
 }

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] only message in thread