/** * io_uring cancel miss reproducer * Copyright 2022 Florian Fischer * * io_urings's IORING_OP_ASYNC_CANCEL sometimes does not cancel inflight requests. * * Tested systems: * - Xeon E7-4830 Debian 11 * - liburing 2.1 * - reproducable on: linux-block/io_uring-5.17 * * Building the reproducer: * $ gcc -Werror -Wall -g -O3 io_uring-cancel.c -o io_uring-cancel -pthread -luring * * Steps to reproduce on the above system: * 1. Compile the reproducer * 2. Run the reproducer in a loop * $ for i in $(seq 100); do echo $i; ./io_uring-cancel; done * * (I need < ~30 runs to reproduce a stuck thread with 96 threads) * * Explanation: * The reproducer opens #CPUs event file descriptors and starts #CPUs threads. * Each thread creates an io_uring and enters a loop. * In each iteration the thread submits a read request from evfds[thread_id]. * Submits a second read request from evfds[(thread_id + 1) % #CPUs]. * And issues a write request for evfds[(thread_id + 2) % #CPUs] but the write * request does not increase the eventfds count thus it will not complete any read. * The write request is awaited. * Then two cancel requests for the reads are submitted and the outstanding * requests are awaited. * * The reproducer gets stuck because a cancel request returning with -EALREADY * did not properly cancel the read the read request is never completed. * * WhenpPassing a non-zero value to the reproducer as write increment * $ ./io_uring-cancel 1 * I could not longer reproduce stuck threads. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #define IORING_ENTRIES 2 #define ITERATIONS 1000 _Thread_local size_t thread_id; static pthread_t* threads; static int* evfds; static unsigned nthreads; static unsigned write_increment = 0; static struct io_uring_sqe* checked_get_sqe(struct io_uring* ring) { struct io_uring_sqe* sqe = io_uring_get_sqe(ring); assert(sqe); return sqe; } static void prep_evfd_read(struct io_uring* ring, int evfd, uint64_t* buf, uintptr_t tag) { struct io_uring_sqe* sqe = checked_get_sqe(ring); io_uring_prep_read(sqe, evfds[evfd], buf, sizeof(*buf), 0); io_uring_sqe_set_data(sqe, (void*)tag); } static void prep_evfd_write(struct io_uring* ring, int evfd, uint64_t* buf) { struct io_uring_sqe* sqe = checked_get_sqe(ring); io_uring_prep_write(sqe, evfds[evfd], buf, sizeof(*buf), 0); } static void prep_cancel(struct io_uring* ring, uintptr_t tag) { struct io_uring_sqe* sqe = checked_get_sqe(ring); io_uring_prep_cancel(sqe, (void*)tag, 0); } static void checked_submit_n(struct io_uring* ring, int n) { int res = io_uring_submit(ring); if (res < 0) err(EXIT_FAILURE, "io_uring_submit failed"); if (res != n) errx(EXIT_FAILURE, "io_uring_submit submitted less sqes than preprared"); } static void checked_submit_one(struct io_uring* ring) { checked_submit_n(ring, 1); } static void collect_n_completions(struct io_uring* ring, int n) { struct io_uring_cqe* cqe; for (int i = 0; i < n; ++i) { int res = io_uring_wait_cqe(ring, &cqe); if (res) err(EXIT_FAILURE, "io_uring_wait_cqe failed"); io_uring_cqe_seen(ring, cqe); } } void* thread_func(void* arg) { thread_id = (uintptr_t)(arg); struct io_uring ring; int res = io_uring_queue_init(IORING_ENTRIES, &ring, 0); if (res) err(EXIT_FAILURE, "io_uring_queue_init failed"); for (unsigned i = 0; i < ITERATIONS; ++i) { uint64_t rbuf = 0; uint64_t wbuf = write_increment; prep_evfd_read(&ring, thread_id, &rbuf, 1); checked_submit_one(&ring); prep_evfd_read(&ring, (thread_id + 1) % nthreads, &rbuf, 2); checked_submit_one(&ring); prep_evfd_write(&ring, (thread_id + 2) % nthreads, &wbuf); checked_submit_one(&ring); collect_n_completions(&ring, 1); prep_cancel(&ring, 1); prep_cancel(&ring, 2); checked_submit_n(&ring, 2); collect_n_completions(&ring, 4); } return NULL; } int main(int argc, char* argv[]) { if (argc > 1) write_increment = atoi(argv[1]); nthreads = get_nprocs(); // nthreads = 10; evfds = malloc(sizeof(int) * nthreads); if (!evfds) err(EXIT_FAILURE, "malloc failed"); for (unsigned i = 0; i < nthreads; ++i) { evfds[i] = eventfd(0, 0); if(!evfds[i]) err(EXIT_FAILURE, "eventfd failed"); } threads = malloc(sizeof(pthread_t) * nthreads); if (!threads) err(EXIT_FAILURE, "malloc failed"); for (unsigned i = 0; i < nthreads; ++i) { errno = pthread_create(&threads[i], NULL, thread_func, (void*)(uintptr_t)i); if (errno) err(EXIT_FAILURE, "pthread_create failed"); } for (unsigned i = 0; i < nthreads; ++i) { errno = pthread_join(threads[i], NULL); if (errno) err(EXIT_FAILURE, "pthread_join failed"); } exit(EXIT_SUCCESS); }