All of lore.kernel.org
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: John Snow <jsnow@redhat.com>, qemu-block@nongnu.org
Cc: kwolf@redhat.com, famz@redhat.com, qemu-devel@nongnu.org,
	armbru@redhat.com, vsementsov@parallels.com, stefanha@redhat.com
Subject: Re: [Qemu-devel] [PATCH 07/11] block: drive_backup transaction callback support
Date: Wed, 18 Mar 2015 09:41:43 -0400	[thread overview]
Message-ID: <55098097.8060401@redhat.com> (raw)
In-Reply-To: <5508B85E.40604@redhat.com>

On 2015-03-17 at 19:27, John Snow wrote:
>
>
> On 03/17/2015 03:49 PM, Max Reitz wrote:
>> On 2015-03-04 at 23:15, John Snow wrote:
>>> This patch actually implements the transactional callback system
>>> for the drive_backup transaction.
>>>
>>> (1) We manually pick up a reference to the bitmap if present to allow
>>>      its cleanup to be delayed until after all drive_backup jobs 
>>> launched
>>>      by the transaction have fully completed.
>>>
>>> (2) We create a functional closure that envelops the original
>>> drive_backup
>>>      callback, to be able to intercept the completion status and
>>> return code
>>>      for the job.
>>>
>>> (3) We add the drive_backup_cb method for the drive_backup action, 
>>> which
>>>      unpacks the completion information and invokes the final cleanup.
>>>
>>> (4) backup_transaction_complete will perform the final cleanup on the
>>>      backup job.
>>>
>>> (5) In the case of transaction cancellation, drive_backup_cb is still
>>>      responsible for cleaning up the mess we may have already made.
>>>
>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>> ---
>>>   block/backup.c            |  9 +++++++
>>>   blockdev.c                | 64
>>> ++++++++++++++++++++++++++++++++++++++---------
>>>   include/block/block_int.h |  8 ++++++
>>>   3 files changed, 69 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/block/backup.c b/block/backup.c
>>> index 4332df4..3673fb0 100644
>>> --- a/block/backup.c
>>> +++ b/block/backup.c
>>> @@ -233,6 +233,15 @@ typedef struct {
>>>       int ret;
>>>   } BackupCompleteData;
>>> +void backup_transaction_complete(BlockJob *job, int ret)
>>> +{
>>> +    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>> +
>>> +    if (s->sync_bitmap) {
>>> +        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL);
>>> +    }
>>> +}
>>> +
>>>   static void backup_complete(BlockJob *job, void *opaque)
>>>   {
>>>       BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>> diff --git a/blockdev.c b/blockdev.c
>>> index 9e3b9e9..3ff14a7 100644
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque,
>>> int ret)
>>>   typedef void (CallbackFn)(void *opaque, int ret);
>>> -/* Temporary. Removed in the next patch. */
>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>> -                                    void *opaque,
>>> -                                    void (*callback)(void *, int),
>>> -                                    void **new_opaque);
>>> -
>>> -void undo_transaction_wrapper(BlkTransactionState *common);
>>> -
>>>   /**
>>>    * Create a new transactional callback wrapper.
>>>    *
>>> @@ -1389,7 +1381,7 @@ void
>>> undo_transaction_wrapper(BlkTransactionState *common);
>>>    *
>>>    * @return The callback to be used instead of @callback.
>>>    */
>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>> +static CallbackFn *new_transaction_wrapper(BlkTransactionState 
>>> *common,
>>>                                              void *opaque,
>>>                                              CallbackFn *callback,
>>>                                              void **new_opaque)
>>> @@ -1416,7 +1408,7 @@ CallbackFn
>>> *new_transaction_wrapper(BlkTransactionState *common,
>>>   /**
>>>    * Undo any actions performed by the above call.
>>>    */
>>> -void undo_transaction_wrapper(BlkTransactionState *common)
>>> +static void undo_transaction_wrapper(BlkTransactionState *common)
>>>   {
>>>       BlkTransactionList *btl = common->list;
>>>       BlkTransactionState *bts;
>>> @@ -1449,6 +1441,7 @@ void
>>> undo_transaction_wrapper(BlkTransactionState *common)
>>>       blk_put_transaction_state(common);
>>>   }
>>> +static void block_job_cb(void *opaque, int ret);
>>>   static void _drive_backup(const char *device, const char *target,
>>>                             bool has_format, const char *format,
>>>                             enum MirrorSyncMode sync,
>>> @@ -1767,6 +1760,9 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>       BlockDriverState *bs;
>>>       DriveBackup *backup;
>>>       Error *local_err = NULL;
>>> +    CallbackFn *cb;
>>> +    void *opaque;
>>> +    BdrvDirtyBitmap *bmap = NULL;
>>>       assert(common->action->kind ==
>>> TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
>>>       backup = common->action->drive_backup;
>>> @@ -1777,6 +1773,19 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>           return;
>>>       }
>>> +    /* BackupBlockJob is opaque to us, so look up the bitmap
>>> ourselves */
>>> +    if (backup->has_bitmap) {
>>> +        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
>>> +        if (!bmap) {
>>> +            error_setg(errp, "Bitmap '%s' could not be found",
>>> backup->bitmap);
>>> +            return;
>>> +        }
>>> +    }
>>> +
>>> +    /* Create our transactional callback wrapper,
>>> +       and register that we'd like to call .cb() later. */
>>> +    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
>>> +
>>>       /* AioContext is released in .clean() */
>>>       state->aio_context = bdrv_get_aio_context(bs);
>>>       aio_context_acquire(state->aio_context);
>>> @@ -1789,7 +1798,7 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>                     backup->has_bitmap, backup->bitmap,
>>>                     backup->has_on_source_error, 
>>> backup->on_source_error,
>>>                     backup->has_on_target_error, 
>>> backup->on_target_error,
>>> -                  NULL, NULL,
>>> +                  cb, opaque,
>>>                     &local_err);
>>>       if (local_err) {
>>>           error_propagate(errp, local_err);
>>> @@ -1798,6 +1807,11 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>       state->bs = bs;
>>>       state->job = state->bs->job;
>>> +    /* Keep the job alive until .cb(), too. */
>>> +    block_job_incref(state->job);
>>> +    if (bmap) {
>>> +        bdrv_dirty_bitmap_incref(bmap);
>>> +    }
>>>   }
>>>   static void drive_backup_abort(BlkTransactionState *common)
>>> @@ -1809,6 +1823,10 @@ static void
>>> drive_backup_abort(BlkTransactionState *common)
>>>       if (bs && bs->job && bs->job == state->job) {
>>>           block_job_cancel_sync(bs->job);
>>>       }
>>> +
>>> +    /* Undo any callback actions we may have done. Putting down
>>> references
>>> +     * obtained during prepare() is handled by cb(). */
>>> +    undo_transaction_wrapper(common);
>>>   }
>>>   static void drive_backup_clean(BlkTransactionState *common)
>>> @@ -1820,6 +1838,27 @@ static void
>>> drive_backup_clean(BlkTransactionState *common)
>>>       }
>>>   }
>>> +static void drive_backup_cb(BlkTransactionState *common)
>>> +{
>>> +    BlkTransactionData *btd = common->opaque;
>>> +    BlockDriverState *bs = btd->opaque;
>>> +    DriveBackupState *state = DO_UPCAST(DriveBackupState, common,
>>> common);
>>> +
>>> +    assert(state->bs == bs);
>>> +    if (bs->job) {
>>> +        assert(state->job == bs->job);
>>> +    }
>>> +
>>> +    state->aio_context = bdrv_get_aio_context(bs);
>>> +    aio_context_acquire(state->aio_context);
>>> +
>>> +    /* Note: We also have the individual job's return code in
>>> btd->ret */
>>> +    backup_transaction_complete(state->job, common->list->status);
>>> +    block_job_decref(state->job);
>>> +
>>> +    aio_context_release(state->aio_context);
>>> +}
>>> +
>>>   typedef struct BlockdevBackupState {
>>>       BlkTransactionState common;
>>>       BlockDriverState *bs;
>>> @@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = {
>>>           .instance_size = sizeof(DriveBackupState),
>>>           .prepare = drive_backup_prepare,
>>>           .abort = drive_backup_abort,
>>> -        .clean = drive_backup_clean
>>> +        .clean = drive_backup_clean,
>>> +        .cb = drive_backup_cb
>>>       },
>>>       [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>>>           .instance_size = sizeof(BlockdevBackupState),
>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>> index e0d5561..731684d 100644
>>> --- a/include/block/block_int.h
>>> +++ b/include/block/block_int.h
>>> @@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs,
>>> BlockDriverState *target,
>>>                     BlockCompletionFunc *cb, void *opaque,
>>>                     Error **errp);
>>> +/*
>>> + * backup_transaction_complete
>>> + * @job The BackupJob that was associated with a transaction
>>
>> s/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no
>> structure named "BackupJob", but this looks like there might be one)
>>
>>> + * @ret Amalgamated return code for the entire transaction
>>
>> Hm. The call to this function you're introducing in this patch will
>> probably stay the only one so there won't be anyone who'll have to worry
>> about what this means, but if there was, they probably won't reach a
>> conclusive result.
>>
>> I know what it means because I've seen patch 3 (right now it means
>> "everything OR-ed together so it's 0 for success or some non-zero (maybe
>> positive, maybe negative, depending on whether you have an even or an
>> odd number of errors, and depending on whether the jobs return negative
>> values for errors or not) for error"), but I wouldn't be able to infer
>> it from this. At the least you should add that 0 means success and
>> everything else means error (if you take my suggestion for patch 3, it
>> would be 0 for success and -errno for error, where that error number is
>> one of the errors encountered).
>>
>> Other than that, looks good (as far as I can tell with my still limited
>> insights into patch 3).
>>
>> Max
>>
>
> It's my opinion that this patch should give you insight into patch #3, 
> instead of the other way around. This patch is useful for 
> demonstrating the general flow, because I kept all drive_backup 
> specific concerns strictly separated from patch #3.

It only truly helps me understand patch 3 under the assumption that this 
patch is correct. For reviewing, I cannot really take that assumption. ;-)

I mean, it does help me with the interfaces patch 3 offers, but it still 
doesn't help me with the objects introduced in patch 3, for instance, 
because those are all contained in patch 3 itself. And if I cannot 
verify patch 3 on its own, I cannot really verify that what this patch 
does is correct in regards to the interfaces offered there.

> I'll write a more comprehensive document for the docs/ folder soon, 
> but the general shape of the idea is "The cleanup actions defined in 
> the .cb() method will be executed after every job in the transactional 
> group has completed."

Right, again, assuming your implementation is correct, but questioning 
that is part of the natural reviewing process. :-)

> But there's some fine print:
>
> "'every job' refers only to jobs that have the capability to register 
> a post-transaction callback, which currently means only drive_backup."
>
> The general approach to this is, mechanically:
>
> (1) Extend the life of objects of interest with reference counts, 
> including Jobs, Bitmaps, and BlkTransactionStates.
>
> (2) "steal" the callback from a BlockJob and, when invoked, update our 
> management information for the transaction group in BlkTransactionList.
>
> (3) Once all jobs we have sent out return, execute the .cb() methods 
> as indicated in the BlkTransactionList.
>
>
>
> So, if you were adding a callback to a different QMP transaction:
> - Look at new_transaction_wrapper; you'll use this to bump up various 
> references used internally for this whole system, and it'll keep 
> qmp_transaction from being able to delete the list and state objects.
>
>   This function will give to you (as a gimmick) a new callback and 
> opaque data structure pointer for you to give to the job that you are 
> starting. I obfuscate this just a bit to make it clear that you should 
> be using this function every time to help manage the transactional state.
>
> - Now, the job will run on its own happy way, When it finishes, it 
> will call transaction_callback, which is the function that 
> "intercepts" the callbacks, and dispatches the original enveloped 
> callbacks. It ditches the original data we used to know how to call 
> the original callback, and begins storing completion information for 
> jobs instead.
>
> - transaction_callback will call blk_transaction_complete to store 
> completion info for one job. In turn, it calls 
> put_blk_transaction_list to decrement the pending jobs count (the 
> "jobs" refcount) and once that hits zero ...
>
> - All of the callback mechanisms associated with each 
> BlockTransactionState are run via blk_run_transaction_callbacks.
>
> - This is where drive_backup_cb is going to get invoked again, and 
> then we will splinter back out into backup-specific concerns, with 
> Jobs and Bitmaps and what not.

Yes, I can see the control flow; my main problem lies in the three 
different structs introduced in patch 3. I'll tell you why I'm confused 
by them:

First, there is BlkTransactionList. Sounds to me like it should be a 
list of transactions. Judging from its members, it is rather a list of 
operations ("actions") for a single transaction, however. Apparently, 
there is a double-use for the word "transaction": I'd consider a 
transaction to be the thing started by the QMP command which is a group 
of atomic operations. However, that one structure is called 
BlkTransactionState, but it apparently this is only the state of a 
single operation in the transaction. Pre-existing, but doesn't make it 
better. You added comments for its list entry members, with the first 
one stating "All transactions in the current group" and the second being 
a subset of that. However, again, I consider these to be operations and 
not transactions. The group is the transaction.

So we have that adding to my confusion. Furthermore, I think we don't 
need to list entry members (entry and list_entry) in 
BlkTransactionState. I think it should be fine to drop the 
snap_bdrv_states list in qmp_transaction() and just add all 
BlkTransactionStates to the BlkTransactionList (or however you name it 
if you rename it) in the preparation while () loop there. I don't think 
we need to add them only in blk_transaction_complete() (and we should 
drop that from there if it's been done in qmp_transaction() already, of 
course). Then we can really just use that list generally as *the* list 
of operations in a transaction.

Second, there is BlkTransactionData. From the name I cannot judge at all 
how it is supposed to differ from BlkTransactionState, because, well, it 
contains some data about a transaction... Maybe, because I consider 
BlkTransactionState to be misnamed, BlkTransactionData actually contains 
data about the transaction and not about a single operation? But from 
looking at the members, I have no idea. It has "opaque" and "ret"... I 
don't know how that is vital data to the transaction itself (and I don't 
think it is, it is rather data for the completion callback, I guess?), 
it only looks like some sort of pattern I have seen for AIO callbacks 
and the like.

Third, there is BlkTransactionWrapper. I suppose it wraps a 
transaction...? Or maybe at least an operation, because apparently I can 
no longer be sure whether a transaction is a transaction or an 
operation... But there is no transaction or even a single operation in 
there. It's just a structure describing a callback. I don't know how 
that is a "transaction wrapper"...

So, I don't understand any of the names given to the objects, and while 
I might understand what they are used for by looking at them, I have a 
*really* hard time understanding the code that uses them, because if I 
don't stare at their definitions all the time, I will immediately forget 
what they contain and what they are used for because I don't understand 
the names at all (and there is nothing which explicitly tells me what 
they are used for either). The fact that all of them are starting with 
BlkTransaction makes mixing them up even easier.

> From the point of view of the "transaction action developer," the 
> interface to the new feature is found via the 
> "new_transaction_wrapper" function, But some care must be taken to 
> make sure this callback actually gets used once it has been requested. 
> The whole thing can be aborted with the "undo_transaction_wrapper" 
> function.
>
> All of the other functions that got added to blockdev.c in Patch #3 
> are there to assist these two "interface" functions in doing what they 
> claim to.
>
> Everything in patch 4 and 5 just adds more reference count flexibility 
> to things that are only of interest to drive_backup.
>
> Patch 6 allows us to inject our special callback wrapper into the 
> qmp_drive_backup function.
>
>
> Clear as mud?

Control flow and concept, clear. Implementation, still very muddy, sorry...

Max

> --js
>
>>> + */
>>> +void backup_transaction_complete(BlockJob *job, int ret);
>>> +
>>> +
>>>   void blk_dev_change_media_cb(BlockBackend *blk, bool load);
>>>   bool blk_dev_has_removable_media(BlockBackend *blk);
>>>   void blk_dev_eject_request(BlockBackend *blk, bool force);
>>
>>

  reply	other threads:[~2015-03-18 13:41 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-05  4:15 [Qemu-devel] [PATCH 00/11] block: incremental backup transactions John Snow
2015-03-05  4:15 ` [Qemu-devel] [PATCH 01/11] qapi: Add transaction support to block-dirty-bitmap operations John Snow
2015-03-17 15:14   ` Max Reitz
2015-03-05  4:15 ` [Qemu-devel] [PATCH 02/11] iotests: add transactional incremental backup test John Snow
2015-03-11 12:11   ` Kashyap Chamarthy
2015-03-11 14:25     ` John Snow
2015-03-11 16:18       ` Kashyap Chamarthy
2015-03-05  4:15 ` [Qemu-devel] [PATCH 03/11] block: add transactional callbacks feature John Snow
2015-03-17 17:47   ` Max Reitz
2015-03-17 18:04     ` John Snow
2015-03-17 18:18       ` Eric Blake
2015-03-17 18:23         ` John Snow
2015-03-17 18:19       ` Max Reitz
2015-03-05  4:15 ` [Qemu-devel] [PATCH 04/11] block: add refcount to Job object John Snow
2015-03-17 17:54   ` Max Reitz
2015-03-05  4:15 ` [Qemu-devel] [PATCH 05/11] block: add delayed bitmap successor cleanup John Snow
2015-03-17 18:44   ` Max Reitz
2015-03-17 19:12     ` John Snow
2015-03-17 22:46     ` John Snow
2015-03-18 13:03       ` Max Reitz
2015-03-05  4:15 ` [Qemu-devel] [PATCH 06/11] qmp: Add an implementation wrapper for qmp_drive_backup John Snow
2015-03-17 18:51   ` Max Reitz
2015-03-17 19:16     ` John Snow
2015-03-17 19:33       ` Max Reitz
2015-03-17 20:15       ` Eric Blake
2015-03-05  4:15 ` [Qemu-devel] [PATCH 07/11] block: drive_backup transaction callback support John Snow
2015-03-17 19:49   ` Max Reitz
2015-03-17 23:27     ` John Snow
2015-03-18 13:41       ` Max Reitz [this message]
2015-03-18 19:51         ` John Snow
2015-03-18 20:20           ` Max Reitz
2015-03-05  4:15 ` [Qemu-devel] [PATCH 08/11] iotests: add QMP event waiting queue John Snow
2015-03-17 20:04   ` Max Reitz
2015-03-05  4:15 ` [Qemu-devel] [PATCH 09/11] iotests: test 124 - drive object refactoring John Snow
2015-03-17 20:44   ` Max Reitz
2015-03-17 23:40     ` John Snow
2015-03-18 13:44       ` Max Reitz
2015-03-05  4:15 ` [Qemu-devel] [PATCH 10/11] iotests: 124 - backup_prepare refactoring John Snow
2015-03-17 20:50   ` Max Reitz
2015-03-17 23:44     ` John Snow
2015-03-05  4:15 ` [Qemu-devel] [PATCH 11/11] iotests: 124 - transactional failure test John Snow
2015-03-17 20:59   ` Max Reitz
2015-03-17 21:04     ` John Snow

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55098097.8060401@redhat.com \
    --to=mreitz@redhat.com \
    --cc=armbru@redhat.com \
    --cc=famz@redhat.com \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=vsementsov@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.