On Wed, Jul 07, 2021 at 06:58:12PM +0200, Emanuele Giuseppe Esposito wrote: > This lock is going to replace most of the AioContext locks > in the job and blockjob, so that a Job can run in an arbitrary > AioContext. > > Signed-off-by: Emanuele Giuseppe Esposito > --- > include/block/blockjob_int.h | 1 + > include/qemu/job.h | 2 + > block/backup.c | 4 + > block/mirror.c | 11 +- > blockdev.c | 62 ++++---- > blockjob.c | 67 +++++++-- > job-qmp.c | 55 +++---- > job.c | 284 +++++++++++++++++++++++++++-------- > qemu-img.c | 15 +- > 9 files changed, 350 insertions(+), 151 deletions(-) > > diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h > index 6633d83da2..8b91126506 100644 > --- a/include/block/blockjob_int.h > +++ b/include/block/blockjob_int.h > @@ -53,6 +53,7 @@ struct BlockJobDriver { > */ > void (*attached_aio_context)(BlockJob *job, AioContext *new_context); > > + /* Called with job mutex *not* held. */ > void (*set_speed)(BlockJob *job, int64_t speed); > }; > > diff --git a/include/qemu/job.h b/include/qemu/job.h > index 4421d08d93..359f4e6b3a 100644 > --- a/include/qemu/job.h > +++ b/include/qemu/job.h > @@ -49,6 +49,8 @@ typedef struct Job { > /** > * The type of this job. > * Set it in job_create and just read. > + * All calls to the driver function must be not locked by job_mutex, > + * to avoid deadlocks. > */ > const JobDriver *driver; > > diff --git a/block/backup.c b/block/backup.c > index bd3614ce70..80ce956299 100644 > --- a/block/backup.c > +++ b/block/backup.c > @@ -315,6 +315,10 @@ static void coroutine_fn backup_pause(Job *job) > } > } > > +/* > + * Called with job mutex *not* held (we don't want to call block_copy_kick > + * with the lock held!) > + */ > static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed) > { > BackupBlockJob *s = container_of(job, BackupBlockJob, common); > diff --git a/block/mirror.c b/block/mirror.c > index 49aaaafffa..deefaa6a39 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -1150,9 +1150,11 @@ static void mirror_complete(Job *job, Error **errp) > s->should_complete = true; > > /* If the job is paused, it will be re-entered when it is resumed */ > + job_lock(); > if (!job_is_paused(job)) { > - job_enter(job); > + job_enter_locked(job); > } > + job_unlock(); > } > > static void coroutine_fn mirror_pause(Job *job) > @@ -1171,10 +1173,13 @@ static bool mirror_drained_poll(BlockJob *job) > * from one of our own drain sections, to avoid a deadlock waiting for > * ourselves. > */ > - if (!job_is_paused(&s->common.job) && !job_is_cancelled(&s->common.job) && > - !s->in_drain) { > + job_lock(); > + if (!job_is_paused(&s->common.job) && > + !job_is_cancelled_locked(&s->common.job) && !s->in_drain) { > + job_unlock(); > return true; > } > + job_unlock(); > > return !!s->in_flight; > } > diff --git a/blockdev.c b/blockdev.c > index 8e2c15370e..9255aea6a2 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -150,9 +150,11 @@ void blockdev_mark_auto_del(BlockBackend *blk) > AioContext *aio_context = job_get_aiocontext(&job->job); > aio_context_acquire(aio_context); > > + job_lock(); > job_cancel(&job->job, false); > > aio_context_release(aio_context); > + job_unlock(); This looks strange. The way it's written suggests there is a reason why job_unlock() has to be called after aio_context_release(). Can job_unlock() be called immediately after job_cancel()? > } > } > > @@ -3309,48 +3311,44 @@ out: > aio_context_release(aio_context); > } > > -/* Get a block job using its ID and acquire its AioContext */ > -static BlockJob *find_block_job(const char *id, AioContext **aio_context, > - Error **errp) > +/* Get a block job using its ID and acquire its job_lock */ "its" suggests job_lock is per-Job. I suggest saying something like "Returns with job_lock held on success" instead.