* [PATCH 1/1] fast-import: checkpoint: only write out refs and tags if we changed them
@ 2018-10-19 16:31 Eric Rannaud
2018-10-19 23:25 ` Eric Rannaud
0 siblings, 1 reply; 2+ messages in thread
From: Eric Rannaud @ 2018-10-19 16:31 UTC (permalink / raw)
To: git; +Cc: jeremy.serror, Junio C Hamano, Eric Rannaud
At a checkpoint, fast-import resets all branches and tags on disk to the
OID that we have in memory. If --force is not given, only branch resets
that result in fast forwards with respect to the state on disk are
allowed.
With this approach, even for refs that fast-import has not been
commanded to change for a long time (or ever), at each checkpoint, we
will systematically reset them to the last state this process knows
about, a state which may have been set before the previous checkpoint.
Any changes made to these refs by a different process since the last
checkpoint will be overwritten.
1> is one process, 2> is another:
1> $ git fast-import --force
1> reset refs/heads/master
1> from $A
1> checkpoint
2> $ git branch -f refs/heads/master $B
1> reset refs/heads/tip
1> from $C
1> checkpoint
At this point master points again to $A.
This problem is mitigated somewhat for branches when --force is not
specified (the default), as requiring a fast forward means in practice
that concurrent external changes are likely to be preserved. But it's
not foolproof either:
1> $ git fast-import
1> reset refs/heads/master
1> from $A
1> checkpoint
2> $ git branch -f refs/heads/master refs/heads/master~1
1> reset refs/heads/tip
1> from $C
1> checkpoint
At this point, master points again to $A, not $A~1 as requested by the
second process.
We now keep track of whether branches and tags have been changed by this
fast-import process since our last checkpoint (or startup). At the next
checkpoint, only refs and tags that new commands have changed are
written to disk.
---
fast-import.c | 14 +++++++++++
t/t9300-fast-import.sh | 53 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+)
Please let me know what you think of the change itself, before I spend
some quality time bashing out a way to avoid using sleep(1) in the
tests.
Thanks.
diff --git a/fast-import.c b/fast-import.c
index 95600c78e048..d25be5c83110 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -250,6 +250,7 @@ struct branch {
uintmax_t num_notes;
unsigned active : 1;
unsigned delete : 1;
+ unsigned changed : 1;
unsigned pack_id : PACK_ID_BITS;
struct object_id oid;
};
@@ -257,6 +258,7 @@ struct branch {
struct tag {
struct tag *next_tag;
const char *name;
+ unsigned changed : 1;
unsigned int pack_id;
struct object_id oid;
};
@@ -728,6 +730,7 @@ static struct branch *new_branch(const char *name)
b->branch_tree.versions[1].mode = S_IFDIR;
b->num_notes = 0;
b->active = 0;
+ b->changed = 0;
b->pack_id = MAX_PACK_ID;
branch_table[hc] = b;
branch_count++;
@@ -1715,6 +1718,10 @@ static int update_branch(struct branch *b)
struct object_id old_oid;
struct strbuf err = STRBUF_INIT;
+ if (!b->changed)
+ return 0;
+ b->changed = 0;
+
if (is_null_oid(&b->oid)) {
if (b->delete)
delete_ref(NULL, b->name, NULL, 0);
@@ -1780,6 +1787,10 @@ static void dump_tags(void)
goto cleanup;
}
for (t = first_tag; t; t = t->next_tag) {
+ if (!t->changed)
+ continue;
+ t->changed = 0;
+
strbuf_reset(&ref_name);
strbuf_addf(&ref_name, "refs/tags/%s", t->name);
@@ -2813,6 +2824,7 @@ static void parse_new_commit(const char *arg)
if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark))
b->pack_id = pack_id;
+ b->changed = 1;
b->last_commit = object_count_by_type[OBJ_COMMIT];
}
@@ -2894,6 +2906,7 @@ static void parse_new_tag(const char *arg)
t->pack_id = MAX_PACK_ID;
else
t->pack_id = pack_id;
+ t->changed = 1;
}
static void parse_reset_branch(const char *arg)
@@ -2914,6 +2927,7 @@ static void parse_reset_branch(const char *arg)
b = new_branch(arg);
read_next_command();
parse_from(b);
+ b->changed = 1;
if (command_buf.len > 0)
unread_command_buf = 1;
}
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index 40fe7e49767a..548994dfbeb3 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -3123,6 +3123,9 @@ test_expect_success 'U: validate root delete result' '
# The commands in input_file should not produce any output on the file
# descriptor set with --cat-blob-fd (or stdout if unspecified).
+#
+# If input_file2 is specified, sleep 2 seconds before writing it to fast-import
+# stdin.
#
# To make sure you're observing the side effects of checkpoint *before*
# fast-import terminates (and thus writes out its state), check that the
@@ -3131,6 +3134,7 @@ test_expect_success 'U: validate root delete result' '
background_import_then_checkpoint () {
options=$1
input_file=$2
+ input_file2=$3
mkfifo V.input
exec 8<>V.input
@@ -3155,6 +3159,7 @@ background_import_then_checkpoint () {
# because there is no reader on &9 yet.
(
cat "$input_file"
+ [ -n "$input_file2" ] && sleep 2 && cat "$input_file2"
echo "checkpoint"
echo "progress checkpoint"
) >&8 &
@@ -3262,4 +3267,52 @@ test_expect_success PIPE 'V: checkpoint updates tags after tag' '
background_import_still_running
'
+test_expect_success PIPE 'V: checkpoint only resets changed branches' '
+ cat >input <<-INPUT_END &&
+ reset refs/heads/V3
+ from refs/heads/U
+
+ checkpoint
+ INPUT_END
+
+ cat >input2 <<-INPUT_END &&
+ reset refs/heads/V4
+ from refs/heads/U
+
+ INPUT_END
+
+ background_import_then_checkpoint "" input input2 &&
+ sleep 1 &&
+ git branch -f V3 refs/heads/V &&
+ sleep 2 &&
+ test "$(git rev-parse --verify V3)" = "$(git rev-parse --verify V)" &&
+ background_import_still_running
+'
+
+test_expect_success PIPE 'V: checkpoint only updates changed tags' '
+ cat >input <<-INPUT_END &&
+ tag Vtag2
+ from refs/heads/U
+ tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data 0
+
+ checkpoint
+ INPUT_END
+
+ cat >input2 <<-INPUT_END &&
+ tag Vtag3
+ from refs/heads/U
+ tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data 0
+
+ INPUT_END
+
+ background_import_then_checkpoint "" input input2 &&
+ sleep 1 &&
+ git tag -f Vtag2 refs/heads/V &&
+ sleep 2 &&
+ test "$(git rev-parse --verify Vtag2)" = "$(git rev-parse --verify V)" &&
+ background_import_still_running
+'
+
test_done
--
2.19.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 1/1] fast-import: checkpoint: only write out refs and tags if we changed them
2018-10-19 16:31 [PATCH 1/1] fast-import: checkpoint: only write out refs and tags if we changed them Eric Rannaud
@ 2018-10-19 23:25 ` Eric Rannaud
0 siblings, 0 replies; 2+ messages in thread
From: Eric Rannaud @ 2018-10-19 23:25 UTC (permalink / raw)
To: git; +Cc: jeremy.serror, Junio C Hamano, Eric Rannaud
At a checkpoint, fast-import resets all branches and tags on disk to the
OID that we have in memory. If --force is not given, only branch resets
that result in fast forwards with respect to the state on disk are
allowed.
With this approach, even for refs that fast-import has not been
commanded to change for a long time (or ever), at each checkpoint, we
will systematically reset them to the last state this process knows
about, a state which may have been set before the previous checkpoint.
Any changes made to these refs by a different process since the last
checkpoint will be overwritten.
1> is one process, 2> is another:
1> $ git fast-import --force
1> reset refs/heads/master
1> from $A
1> checkpoint
2> $ git branch -f refs/heads/master $B
1> reset refs/heads/tip
1> from $C
1> checkpoint
At this point master points again to $A.
This problem is mitigated somewhat for branches when --force is not
specified (the default), as requiring a fast forward means in practice
that concurrent external changes are likely to be preserved. But it's
not foolproof either:
1> $ git fast-import
1> reset refs/heads/master
1> from $A
1> checkpoint
2> $ git branch -f refs/heads/master refs/heads/master~1
1> reset refs/heads/tip
1> from $C
1> checkpoint
At this point, master points again to $A, not $A~1 as requested by the
second process.
We now keep track of whether branches and tags have been changed by this
fast-import process since our last checkpoint (or startup). At the next
checkpoint, only refs and tags that new commands have changed are
written to disk.
---
fast-import.c | 14 +++++++++++
t/t9300-fast-import.sh | 55 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+)
Changed since v1:
The testcases failed to enforce the new behavior.
- t9300: Fixed tests to properly interleave operations between the
fast-import in the background and the concurrent "git branch" and "git
tag" command.
I misremembered how background_import_then_checkpoint() works: it
return only after the whole set of commands provided in input_file
(and now input_file2) are run by fast-import.
In v2, the competing git commands are started first in the background
and are run after a delay of 1 second. input_file is sent right away
to fast-import, followed 2 seconds later by input_file2. That is:
second 0: input_file
second 1: git branch or git tag command
second 2: input_file2
- t9300: Use --force for the branch testcase: without it, earlier
versions of Git don't fail this testcase (as resetting V3 to U is not
a FF, so the second checkpoint skips updating it in earlier versions
of Git, failing to demonstrate the problematic behavior).
Checked that the new testcases do fail with earlier versions of Git.
TODO: The testcases still rely on sleep(1).
diff --git a/fast-import.c b/fast-import.c
index 95600c78e048..d25be5c83110 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -250,6 +250,7 @@ struct branch {
uintmax_t num_notes;
unsigned active : 1;
unsigned delete : 1;
+ unsigned changed : 1;
unsigned pack_id : PACK_ID_BITS;
struct object_id oid;
};
@@ -257,6 +258,7 @@ struct branch {
struct tag {
struct tag *next_tag;
const char *name;
+ unsigned changed : 1;
unsigned int pack_id;
struct object_id oid;
};
@@ -728,6 +730,7 @@ static struct branch *new_branch(const char *name)
b->branch_tree.versions[1].mode = S_IFDIR;
b->num_notes = 0;
b->active = 0;
+ b->changed = 0;
b->pack_id = MAX_PACK_ID;
branch_table[hc] = b;
branch_count++;
@@ -1715,6 +1718,10 @@ static int update_branch(struct branch *b)
struct object_id old_oid;
struct strbuf err = STRBUF_INIT;
+ if (!b->changed)
+ return 0;
+ b->changed = 0;
+
if (is_null_oid(&b->oid)) {
if (b->delete)
delete_ref(NULL, b->name, NULL, 0);
@@ -1780,6 +1787,10 @@ static void dump_tags(void)
goto cleanup;
}
for (t = first_tag; t; t = t->next_tag) {
+ if (!t->changed)
+ continue;
+ t->changed = 0;
+
strbuf_reset(&ref_name);
strbuf_addf(&ref_name, "refs/tags/%s", t->name);
@@ -2813,6 +2824,7 @@ static void parse_new_commit(const char *arg)
if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark))
b->pack_id = pack_id;
+ b->changed = 1;
b->last_commit = object_count_by_type[OBJ_COMMIT];
}
@@ -2894,6 +2906,7 @@ static void parse_new_tag(const char *arg)
t->pack_id = MAX_PACK_ID;
else
t->pack_id = pack_id;
+ t->changed = 1;
}
static void parse_reset_branch(const char *arg)
@@ -2914,6 +2927,7 @@ static void parse_reset_branch(const char *arg)
b = new_branch(arg);
read_next_command();
parse_from(b);
+ b->changed = 1;
if (command_buf.len > 0)
unread_command_buf = 1;
}
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index 40fe7e49767a..92e8c3d54112 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -3123,6 +3123,9 @@ test_expect_success 'U: validate root delete result' '
# The commands in input_file should not produce any output on the file
# descriptor set with --cat-blob-fd (or stdout if unspecified).
+#
+# If input_file2 is specified, sleep 2 seconds before writing it to fast-import
+# stdin.
#
# To make sure you're observing the side effects of checkpoint *before*
# fast-import terminates (and thus writes out its state), check that the
@@ -3131,6 +3134,7 @@ test_expect_success 'U: validate root delete result' '
background_import_then_checkpoint () {
options=$1
input_file=$2
+ input_file2=$3
mkfifo V.input
exec 8<>V.input
@@ -3155,6 +3159,7 @@ background_import_then_checkpoint () {
# because there is no reader on &9 yet.
(
cat "$input_file"
+ [ -n "$input_file2" ] && sleep 2 && cat "$input_file2"
echo "checkpoint"
echo "progress checkpoint"
) >&8 &
@@ -3262,4 +3267,54 @@ test_expect_success PIPE 'V: checkpoint updates tags after tag' '
background_import_still_running
'
+background_delayed () {
+ (sleep 1 && exec "$@") &
+}
+
+test_expect_success PIPE 'V: checkpoint only resets changed branches' '
+ cat >input <<-INPUT_END &&
+ reset refs/heads/V3
+ from refs/heads/U
+
+ checkpoint
+
+ INPUT_END
+
+ cat >input2 <<-INPUT_END &&
+ reset refs/heads/V4
+ from refs/heads/U
+
+ INPUT_END
+
+ background_delayed git branch -f V3 V &&
+ background_import_then_checkpoint "--force" input input2 &&
+ test "$(git rev-parse --verify V3)" = "$(git rev-parse --verify V)" &&
+ background_import_still_running
+'
+
+test_expect_success PIPE 'V: checkpoint only updates changed tags' '
+ cat >input <<-INPUT_END &&
+ tag Vtag2
+ from refs/heads/U
+ tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data 0
+
+ checkpoint
+
+ INPUT_END
+
+ cat >input2 <<-INPUT_END &&
+ tag Vtag3
+ from refs/heads/U
+ tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data 0
+
+ INPUT_END
+
+ background_delayed git tag -f Vtag2 V &&
+ background_import_then_checkpoint "" input input2 &&
+ test "$(git show-ref -d Vtag2 |tail -1 |awk \{print\ \$1\})" = "$(git rev-parse --verify V)" &&
+ background_import_still_running
+'
+
test_done
--
2.19.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2018-10-19 23:25 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-19 16:31 [PATCH 1/1] fast-import: checkpoint: only write out refs and tags if we changed them Eric Rannaud
2018-10-19 23:25 ` Eric Rannaud
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.