From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org, Jeff King <peff@peff.net>,
Stefan Beller <sbeller@google.com>,
Ramsay Jones <ramsay@ramsayjones.plus.com>
Subject: Re: [PATCH v3] fetch: replace string-list used as a look-up table with a hashmap
Date: Mon, 22 Oct 2018 11:57:46 +0200 (DST) [thread overview]
Message-ID: <nycvar.QRO.7.76.6.1810221140510.4546@tvgsbejvaqbjf.bet> (raw)
In-Reply-To: <xmqqy3au1tr1.fsf_-_@gitster-ct.c.googlers.com>
Hi Junio,
On Fri, 19 Oct 2018, Junio C Hamano wrote:
> In find_non_local_tags() helper function (used to implement the
> "follow tags"), we use string_list_has_string() on two string lists
> as a way to see if a refname has already been processed, etc.
>
> All this code predates more modern in-core lookup API like hashmap;
> replace them with two hashmaps and one string list---the hashmaps
> are used for look-ups and the string list is to keep the order of
> items in the returned result stable (which is the only single thing
> hashmap does worse than lookups on string-list).
This sounds as if you would need a linked hash map, i.e. a hash map whose
iterator remembers the order in which items were inserted.
I am fine, however, with your patch as a first step in that direction, and
only implementing a linked hash map when we realize that we need it
somewhere else, too.
Just one thing^W^Wa couple of things:
> Similarly, get_ref_map() uses another string-list as a look-up table
> for existing refs. Replace it with a hashmap.
>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
>
> * This converts another string-list user in the same file. For now
> I am done with this topic; I'm willing to fix bugs in this patch,
> but do not intend to spend time killing more string-lists used as
> look-up tables.
>
> builtin/fetch.c | 152 +++++++++++++++++++++++++++++++++---------------
> 1 file changed, 106 insertions(+), 46 deletions(-)
>
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 0696abfc2a..0f8e333022 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -223,18 +223,6 @@ static void add_merge_config(struct ref **head,
> }
> }
>
> -static int add_existing(const char *refname, const struct object_id *oid,
> - int flag, void *cbdata)
> -{
> - struct string_list *list = (struct string_list *)cbdata;
> - struct string_list_item *item = string_list_insert(list, refname);
> - struct object_id *old_oid = xmalloc(sizeof(*old_oid));
> -
> - oidcpy(old_oid, oid);
> - item->util = old_oid;
> - return 0;
> -}
> -
> static int will_fetch(struct ref **head, const unsigned char *sha1)
> {
> struct ref *rm = *head;
> @@ -246,16 +234,76 @@ static int will_fetch(struct ref **head, const unsigned char *sha1)
> return 0;
> }
>
> +struct refname_hash_entry {
> + struct hashmap_entry ent; /* must be the first member */
> + struct object_id oid;
> + char refname[FLEX_ARRAY];
> +};
> +
> +static int refname_hash_entry_cmp(const void *hashmap_cmp_fn_data,
> + const void *e1_,
> + const void *e2_,
> + const void *keydata)
> +{
> + const struct refname_hash_entry *e1 = e1_;
> + const struct refname_hash_entry *e2 = e2_;
> +
> + return strcmp(e1->refname, keydata ? keydata : e2->refname);
> +}
> +
> +static struct refname_hash_entry *refname_hash_add(struct hashmap *map,
> + const char *refname,
> + const struct object_id *oid)
> +{
> + struct refname_hash_entry *ent;
> + size_t len = strlen(refname);
> +
> + FLEX_ALLOC_MEM(ent, refname, refname, len);
> + hashmap_entry_init(ent, memhash(refname, len));
> + oidcpy(&ent->oid, oid);
> + hashmap_add(map, ent);
> + return ent;
> +}
> +
> +static int add_one_refname(const char *refname,
> + const struct object_id *oid,
> + int flag, void *cbdata)
> +{
> + struct hashmap *refname_map = cbdata;
> +
> + (void) refname_hash_add(refname_map, refname, oid);
> + return 0;
> +}
> +
> +static void refname_hash_init(struct hashmap *map)
> +{
> + hashmap_init(map, refname_hash_entry_cmp, NULL, 0);
> +}
> +
> +static int refname_hash_exists(struct hashmap *map, const char *refname)
> +{
> + struct hashmap_entry key;
> + size_t len = strlen(refname);
> + hashmap_entry_init(&key, memhash(refname, len));
> +
> + return !!hashmap_get(map, &key, refname);
It would probably make more sense to `hashmap_get_from_hash()` and
`strhash()` here (and `strhash()` should probably be used everywhere
instead of `memhash(str, strlen(str))`).
> +}
> +
> static void find_non_local_tags(const struct ref *refs,
> struct ref **head,
> struct ref ***tail)
> {
> - struct string_list existing_refs = STRING_LIST_INIT_DUP;
> - struct string_list remote_refs = STRING_LIST_INIT_NODUP;
> + struct hashmap existing_refs;
> + struct hashmap remote_refs;
> + struct string_list remote_refs_list = STRING_LIST_INIT_NODUP;
> + struct string_list_item *remote_ref_item;
> const struct ref *ref;
> - struct string_list_item *item = NULL;
> + struct refname_hash_entry *item = NULL;
>
> - for_each_ref(add_existing, &existing_refs);
> + refname_hash_init(&existing_refs);
> + refname_hash_init(&remote_refs);
> +
> + for_each_ref(add_one_refname, &existing_refs);
> for (ref = refs; ref; ref = ref->next) {
> if (!starts_with(ref->name, "refs/tags/"))
> continue;
> @@ -271,10 +319,10 @@ static void find_non_local_tags(const struct ref *refs,
> !has_object_file_with_flags(&ref->old_oid,
> OBJECT_INFO_QUICK) &&
> !will_fetch(head, ref->old_oid.hash) &&
> - !has_sha1_file_with_flags(item->util,
> + !has_sha1_file_with_flags(item->oid.hash,
I am not sure that we need to test for null OIDs here, given that...
> OBJECT_INFO_QUICK) &&
> - !will_fetch(head, item->util))
> - item->util = NULL;
> + !will_fetch(head, item->oid.hash))
> + oidclr(&item->oid);
we no longer can set `util` to `NULL`, but have to use a null OID to
indicate that condition now.
Of course, `has_sha1_file_with_flags()` is supposed to return `false` for
null OIDs, I guess.
> item = NULL;
> continue;
> }
> @@ -286,48 +334,55 @@ static void find_non_local_tags(const struct ref *refs,
> * fetch.
> */
> if (item &&
> - !has_sha1_file_with_flags(item->util, OBJECT_INFO_QUICK) &&
> - !will_fetch(head, item->util))
> - item->util = NULL;
> + !has_sha1_file_with_flags(item->oid.hash, OBJECT_INFO_QUICK) &&
> + !will_fetch(head, item->oid.hash))
> + oidclr(&item->oid);
>
> item = NULL;
>
> /* skip duplicates and refs that we already have */
> - if (string_list_has_string(&remote_refs, ref->name) ||
> - string_list_has_string(&existing_refs, ref->name))
> + if (refname_hash_exists(&remote_refs, ref->name) ||
> + refname_hash_exists(&existing_refs, ref->name))
> continue;
>
> - item = string_list_insert(&remote_refs, ref->name);
> - item->util = (void *)&ref->old_oid;
> + item = refname_hash_add(&remote_refs, ref->name, &ref->old_oid);
> + string_list_insert(&remote_refs_list, ref->name);
> }
> - string_list_clear(&existing_refs, 1);
> + hashmap_free(&existing_refs, 1);
>
> /*
> * We may have a final lightweight tag that needs to be
> * checked to see if it needs fetching.
> */
> if (item &&
> - !has_sha1_file_with_flags(item->util, OBJECT_INFO_QUICK) &&
> - !will_fetch(head, item->util))
> - item->util = NULL;
> + !has_sha1_file_with_flags(item->oid.hash, OBJECT_INFO_QUICK) &&
> + !will_fetch(head, item->oid.hash))
> + oidclr(&item->oid);
>
> /*
> - * For all the tags in the remote_refs string list,
> + * For all the tags in the remote_refs_list,
> * add them to the list of refs to be fetched
> */
> - for_each_string_list_item(item, &remote_refs) {
> + for_each_string_list_item(remote_ref_item, &remote_refs_list) {
> + const char *refname = remote_ref_item->string;
> + struct hashmap_entry key;
> +
> + hashmap_entry_init(&key, memhash(refname, strlen(refname)));
> + item = hashmap_get(&remote_refs, &key, refname);
> + if (!item)
> + continue; /* can this happen??? */
This would indicate a BUG, no?
Thank you for working on this,
Dscho
> +
> /* Unless we have already decided to ignore this item... */
> - if (item->util)
> - {
> - struct ref *rm = alloc_ref(item->string);
> - rm->peer_ref = alloc_ref(item->string);
> - oidcpy(&rm->old_oid, item->util);
> + if (!is_null_oid(&item->oid)) {
> + struct ref *rm = alloc_ref(item->refname);
> + rm->peer_ref = alloc_ref(item->refname);
> + oidcpy(&rm->old_oid, &item->oid);
> **tail = rm;
> *tail = &rm->next;
> }
> }
> -
> - string_list_clear(&remote_refs, 0);
> + hashmap_free(&remote_refs, 1);
> + string_list_clear(&remote_refs_list, 0);
> }
>
> static struct ref *get_ref_map(struct remote *remote,
> @@ -343,7 +398,7 @@ static struct ref *get_ref_map(struct remote *remote,
> /* opportunistically-updated references: */
> struct ref *orefs = NULL, **oref_tail = &orefs;
>
> - struct string_list existing_refs = STRING_LIST_INIT_DUP;
> + struct hashmap existing_refs;
>
> if (rs->nr) {
> struct refspec *fetch_refspec;
> @@ -437,19 +492,24 @@ static struct ref *get_ref_map(struct remote *remote,
>
> ref_map = ref_remove_duplicates(ref_map);
>
> - for_each_ref(add_existing, &existing_refs);
> + refname_hash_init(&existing_refs);
> + for_each_ref(add_one_refname, &existing_refs);
> +
> for (rm = ref_map; rm; rm = rm->next) {
> if (rm->peer_ref) {
> - struct string_list_item *peer_item =
> - string_list_lookup(&existing_refs,
> - rm->peer_ref->name);
> + struct hashmap_entry key;
> + const char *refname = rm->peer_ref->name;
> + struct refname_hash_entry *peer_item;
> +
> + hashmap_entry_init(&key, memhash(refname, strlen(refname)));
> + peer_item = hashmap_get(&existing_refs, &key, refname);
> if (peer_item) {
> - struct object_id *old_oid = peer_item->util;
> + struct object_id *old_oid = &peer_item->oid;
> oidcpy(&rm->peer_ref->old_oid, old_oid);
> }
> }
> }
> - string_list_clear(&existing_refs, 1);
> + hashmap_free(&existing_refs, 1);
>
> return ref_map;
> }
> --
> 2.19.1-450-ga4b8ab5363
>
>
next prev parent reply other threads:[~2018-10-22 9:58 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-26 21:28 [PATCH] fetch: replace string-list used as a look-up table with a hashmap Junio C Hamano
2018-09-26 22:59 ` Stefan Beller
2018-09-27 5:54 ` Jeff King
2018-09-29 18:31 ` Junio C Hamano
2018-09-30 4:57 ` Jeff King
2018-09-27 5:34 ` Jeff King
2018-09-30 2:11 ` Junio C Hamano
2018-10-19 3:48 ` [PATCH v3] " Junio C Hamano
2018-10-22 9:57 ` Johannes Schindelin [this message]
2018-10-27 6:47 ` Re*: " Junio C Hamano
2018-10-31 14:50 ` Johannes Schindelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=nycvar.QRO.7.76.6.1810221140510.4546@tvgsbejvaqbjf.bet \
--to=johannes.schindelin@gmx.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
--cc=ramsay@ramsayjones.plus.com \
--cc=sbeller@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).