All of lore.kernel.org
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Jonathan Tan <jonathantanmy@google.com>
Cc: git@vger.kernel.org
Subject: Re: [RFC PATCH] http: use --stdin and --keep when downloading pack
Date: Thu, 21 Feb 2019 13:26:40 -0800	[thread overview]
Message-ID: <xmqq5ztcu92n.fsf@gitster-ct.c.googlers.com> (raw)
In-Reply-To: <20190221001447.124088-1-jonathantanmy@google.com> (Jonathan Tan's message of "Wed, 20 Feb 2019 16:14:47 -0800")

Jonathan Tan <jonathantanmy@google.com> writes:

> When Git fetches a pack using dumb HTTP, it does at least 2 things
> differently from when it fetches using fetch-pack or receive-pack: (1)
> it reuses the server's name for the packfile (which incorporates a hash)
> for the packfile, and (2) it does not create a .keep file to avoid race
> conditions with repack.
>
> A subsequent patch will allow downloading packs over HTTP(S) as part of
> a fetch. These downloads will not necessarily be from a Git repository,
> and thus may not have a hash as part of its name. Also, generating a
> .keep file will be necessary to avoid race conditions with repack (until
> the fetch has successfully written the new refs).
>
> Thus, teach http to pass --stdin and --keep to index-pack, the former so
> that we have no reliance on the server's name for the packfile, and the
> latter so that we have the .keep file.

Makes sense.  Can a malicious dumb server mount a passive attack
that serves misnamed packfiles and wait for victims to come?

> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> This is part of the work of CDN offloading of fetch responses.
>
> I have plans to use the http_pack_request suite of functions to
> implement the part where we download from CDN over HTTP(S), but need
> this change to be able to do so. I think it's better from the code
> quality perspective to reuse these functions, but this necessitates a
> behavior change in that we no longer use the filename as declared by the
> server, so I'm sending this as RFC to see what the community thinks.
> ---
>  http-push.c   |  7 ++++++-
>  http-walker.c |  5 ++++-
>  http.c        | 42 ++++++++++++++++++++----------------------
>  http.h        |  2 +-
>  4 files changed, 31 insertions(+), 25 deletions(-)
>
> diff --git a/http-push.c b/http-push.c
> index b22c7caea0..409b266b0c 100644
> --- a/http-push.c
> +++ b/http-push.c
> @@ -586,11 +586,16 @@ static void finish_request(struct transfer_request *request)
>  			fprintf(stderr, "Unable to get pack file %s\n%s",
>  				request->url, curl_errorstr);
>  		} else {
> +			char *lockfile;
> +
>  			preq = (struct http_pack_request *)request->userData;
>  
>  			if (preq) {
> -				if (finish_http_pack_request(preq) == 0)
> +				if (finish_http_pack_request(preq,
> +							     &lockfile) == 0) {
> +					unlink(lockfile);
>  					fail = 0;
> +				}
>  				release_http_pack_request(preq);
>  			}
>  		}
> diff --git a/http-walker.c b/http-walker.c
> index 8ae5d76c6a..804dc82304 100644
> --- a/http-walker.c
> +++ b/http-walker.c
> @@ -425,6 +425,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne
>  	int ret;
>  	struct slot_results results;
>  	struct http_pack_request *preq;
> +	char *lockfile;
>  
>  	if (fetch_indices(walker, repo))
>  		return -1;
> @@ -457,7 +458,9 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne
>  		goto abort;
>  	}
>  
> -	ret = finish_http_pack_request(preq);
> +	ret = finish_http_pack_request(preq, &lockfile);
> +	if (!ret)
> +		unlink(lockfile);
>  	release_http_pack_request(preq);
>  	if (ret)
>  		return ret;
> diff --git a/http.c b/http.c
> index a32ad36ddf..5f8e602cd2 100644
> --- a/http.c
> +++ b/http.c
> @@ -2200,13 +2200,13 @@ void release_http_pack_request(struct http_pack_request *preq)
>  	free(preq);
>  }
>  
> -int finish_http_pack_request(struct http_pack_request *preq)
> +int finish_http_pack_request(struct http_pack_request *preq, char **lockfile)
>  {
>  	struct packed_git **lst;
>  	struct packed_git *p = preq->target;
> -	char *tmp_idx;
> -	size_t len;
>  	struct child_process ip = CHILD_PROCESS_INIT;
> +	int tmpfile_fd;
> +	int ret = 0;
>  
>  	close_pack_index(p);
>  
> @@ -2218,35 +2218,33 @@ int finish_http_pack_request(struct http_pack_request *preq)
>  		lst = &((*lst)->next);
>  	*lst = (*lst)->next;
>  
> -	if (!strip_suffix(preq->tmpfile.buf, ".pack.temp", &len))
> -		BUG("pack tmpfile does not end in .pack.temp?");
> -	tmp_idx = xstrfmt("%.*s.idx.temp", (int)len, preq->tmpfile.buf);
> +	tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY);
>  
>  	argv_array_push(&ip.args, "index-pack");
> -	argv_array_pushl(&ip.args, "-o", tmp_idx, NULL);
> -	argv_array_push(&ip.args, preq->tmpfile.buf);
> +	argv_array_push(&ip.args, "--stdin");
> +	argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX, (uintmax_t)getpid());
>  	ip.git_cmd = 1;
> -	ip.no_stdin = 1;
> -	ip.no_stdout = 1;
> +	ip.in = tmpfile_fd;
> +	ip.out = -1;
>  
> -	if (run_command(&ip)) {
> -		unlink(preq->tmpfile.buf);
> -		unlink(tmp_idx);
> -		free(tmp_idx);
> -		return -1;
> +	if (start_command(&ip)) {
> +		ret = -1;
> +		goto cleanup;
>  	}
>  
> -	unlink(sha1_pack_index_name(p->sha1));
> +	*lockfile = index_pack_lockfile(ip.out);
> +	close(ip.out);
>  
> -	if (finalize_object_file(preq->tmpfile.buf, sha1_pack_name(p->sha1))
> -	 || finalize_object_file(tmp_idx, sha1_pack_index_name(p->sha1))) {
> -		free(tmp_idx);
> -		return -1;
> +	if (finish_command(&ip)) {
> +		ret = -1;
> +		goto cleanup;
>  	}
>  
>  	install_packed_git(the_repository, p);
> -	free(tmp_idx);
> -	return 0;
> +cleanup:
> +	close(tmpfile_fd);
> +	unlink(preq->tmpfile.buf);
> +	return ret;
>  }

Nicely done.

>  
>  struct http_pack_request *new_http_pack_request(
> diff --git a/http.h b/http.h
> index 4eb4e808e5..20d1c85d0b 100644
> --- a/http.h
> +++ b/http.h
> @@ -212,7 +212,7 @@ struct http_pack_request {
>  
>  extern struct http_pack_request *new_http_pack_request(
>  	struct packed_git *target, const char *base_url);
> -extern int finish_http_pack_request(struct http_pack_request *preq);
> +int finish_http_pack_request(struct http_pack_request *preq, char **lockfile);
>  extern void release_http_pack_request(struct http_pack_request *preq);
>  
>  /* Helpers for fetching object */

      parent reply	other threads:[~2019-02-21 21:26 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-21  0:14 [RFC PATCH] http: use --stdin and --keep when downloading pack Jonathan Tan
2019-02-21 14:08 ` Jeff King
2019-02-21 18:49   ` Jonathan Tan
2019-02-22  5:04     ` Jeff King
2019-02-21 21:26 ` Junio C Hamano [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqq5ztcu92n.fsf@gitster-ct.c.googlers.com \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.