Git Mailing List Archive on lore.kernel.org
 help / color / Atom feed
From: "Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>, Elijah Newren <newren@gmail.com>,
	Elijah Newren <newren@gmail.com>
Subject: [PATCH 2/5] strmap: new utility functions
Date: Fri, 21 Aug 2020 18:52:26 +0000
Message-ID: <a86fd5fdcc47baf85046eb07257f4db9f9498084.1598035949.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.835.git.git.1598035949.gitgitgadget@gmail.com>

From: Elijah Newren <newren@gmail.com>

Add strmap as a new struct and associated utility functions,
specifically for hashmaps that map strings to some value.  The API is
taken directly from Peff's proposal at
https://lore.kernel.org/git/20180906191203.GA26184@sigill.intra.peff.net/

Peff only included the header, not the implementation, so it isn't clear what
the structure was he was going to use for the hash entries.  Instead of having
my str_entry struct have three subfields (the hashmap_entry, the string, and
the void* value), I made it only have two -- the hashmap_entry and a
string_list_item, for two reasons:

  1) a strmap is often the data structure we want where string_list has
     been used in the past.  Using the same building block for
     individual entries in both makes it easier to adopt and reuse
     parts of the string_list API in strmap.

  2) In some cases, after doing lots of other work, I want to be able
     to iterate over the items in my strmap in sorted order.  hashmap
     obviously doesn't support that, but I wanted to be able to export
     the strmap to a string_list easily and then use its functions.
     (Note: I do not need the data structure to both be sorted and have
     efficient lookup at all times.  If I did, I might use a B-tree
     instead, as suggested by brian in response to Peff in the thread
     noted above.  In my case, most strmaps will never need sorting, but
     in one special case at the very end of a bunch of other work I want
     to iterate over the items in sorted order without doing any more
     lookups afterward.)

Also, I removed the STRMAP_INIT macro, since it cannot be used to
correctly initialize a strmap; the underlying hashmap needs a call to
hashmap_init() to allocate the hash table first.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 Makefile |  1 +
 strmap.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 strmap.h | 47 ++++++++++++++++++++++++++++++++
 3 files changed, 129 insertions(+)
 create mode 100644 strmap.c
 create mode 100644 strmap.h

diff --git a/Makefile b/Makefile
index 65f8cfb236..0da15a9ee5 100644
--- a/Makefile
+++ b/Makefile
@@ -988,6 +988,7 @@ LIB_OBJS += strbuf.o
 LIB_OBJS += strvec.o
 LIB_OBJS += streaming.o
 LIB_OBJS += string-list.o
+LIB_OBJS += strmap.o
 LIB_OBJS += sub-process.o
 LIB_OBJS += submodule-config.o
 LIB_OBJS += submodule.o
diff --git a/strmap.c b/strmap.c
new file mode 100644
index 0000000000..1c9fdb3b1e
--- /dev/null
+++ b/strmap.c
@@ -0,0 +1,81 @@
+#include "git-compat-util.h"
+#include "strmap.h"
+
+static int cmp_str_entry(const void *hashmap_cmp_fn_data,
+			 const struct hashmap_entry *entry1,
+			 const struct hashmap_entry *entry2,
+			 const void *keydata)
+{
+	const struct str_entry *e1, *e2;
+
+	e1 = container_of(entry1, const struct str_entry, ent);
+	e2 = container_of(entry2, const struct str_entry, ent);
+	return strcmp(e1->item.string, e2->item.string);
+}
+
+static struct str_entry *find_str_entry(struct strmap *map,
+					const char *str)
+{
+	struct str_entry entry;
+	hashmap_entry_init(&entry.ent, strhash(str));
+	entry.item.string = (char *)str;
+	return hashmap_get_entry(&map->map, &entry, ent, NULL);
+}
+
+void strmap_init(struct strmap *map)
+{
+	hashmap_init(&map->map, cmp_str_entry, NULL, 0);
+}
+
+void strmap_clear(struct strmap *map, int free_util)
+{
+	struct hashmap_iter iter;
+	struct str_entry *e;
+
+	if (!map)
+		return;
+
+	hashmap_for_each_entry(&map->map, &iter, e, ent /* member name */) {
+		free(e->item.string);
+		if (free_util)
+			free(e->item.util);
+	}
+	hashmap_free_entries(&map->map, struct str_entry, ent);
+	strmap_init(map);
+}
+
+/*
+ * Insert "str" into the map, pointing to "data". A copy of "str" is made, so
+ * it does not need to persist after the this function is called.
+ *
+ * If an entry for "str" already exists, its data pointer is overwritten, and
+ * the original data pointer returned. Otherwise, returns NULL.
+ */
+void *strmap_put(struct strmap *map, const char *str, void *data)
+{
+	struct str_entry *entry = find_str_entry(map, str);
+	void *old = NULL;
+
+	if (entry) {
+		old = entry->item.util;
+		entry->item.util = data;
+	} else {
+		entry = xmalloc(sizeof(*entry));
+		hashmap_entry_init(&entry->ent, strhash(str));
+		entry->item.string = strdup(str);
+		entry->item.util = data;
+		hashmap_add(&map->map, &entry->ent);
+	}
+	return old;
+}
+
+void *strmap_get(struct strmap *map, const char *str)
+{
+	struct str_entry *entry = find_str_entry(map, str);
+	return entry ? entry->item.util : NULL;
+}
+
+int strmap_contains(struct strmap *map, const char *str)
+{
+	return find_str_entry(map, str) != NULL;
+}
diff --git a/strmap.h b/strmap.h
new file mode 100644
index 0000000000..eb5807f6fa
--- /dev/null
+++ b/strmap.h
@@ -0,0 +1,47 @@
+#ifndef STRMAP_H
+#define STRMAP_H
+
+#include "hashmap.h"
+#include "string-list.h"
+
+struct strmap {
+	struct hashmap map;
+};
+
+struct str_entry {
+	struct hashmap_entry ent;
+	struct string_list_item item;
+};
+
+/*
+ * Initialize an empty strmap
+ */
+void strmap_init(struct strmap *map);
+
+/*
+ * Remove all entries from the map, releasing any allocated resources.
+ */
+void strmap_clear(struct strmap *map, int free_values);
+
+/*
+ * Insert "str" into the map, pointing to "data". A copy of "str" is made, so
+ * it does not need to persist after the this function is called.
+ *
+ * If an entry for "str" already exists, its data pointer is overwritten, and
+ * the original data pointer returned. Otherwise, returns NULL.
+ */
+void *strmap_put(struct strmap *map, const char *str, void *data);
+
+/*
+ * Return the data pointer mapped by "str", or NULL if the entry does not
+ * exist.
+ */
+void *strmap_get(struct strmap *map, const char *str);
+
+/*
+ * Return non-zero iff "str" is present in the map. This differs from
+ * strmap_get() in that it can distinguish entries with a NULL data pointer.
+ */
+int strmap_contains(struct strmap *map, const char *str);
+
+#endif /* STRMAP_H */
-- 
gitgitgadget


  parent reply index

Thread overview: 144+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-21 18:52 [PATCH 0/5] Add struct strmap and associated " Elijah Newren via GitGitGadget
2020-08-21 18:52 ` [PATCH 1/5] hashmap: add usage documentation explaining hashmap_free[_entries]() Elijah Newren via GitGitGadget
2020-08-21 19:22   ` Jeff King
2020-08-21 18:52 ` Elijah Newren via GitGitGadget [this message]
2020-08-21 19:48   ` [PATCH 2/5] strmap: new utility functions Jeff King
2020-08-21 18:52 ` [PATCH 3/5] strmap: add more " Elijah Newren via GitGitGadget
2020-08-21 19:58   ` Jeff King
2020-08-21 18:52 ` [PATCH 4/5] strmap: add strdup_strings option Elijah Newren via GitGitGadget
2020-08-21 20:01   ` Jeff King
2020-08-21 20:41     ` Elijah Newren
2020-08-21 21:03       ` Jeff King
2020-08-21 22:25         ` Elijah Newren
2020-08-28  7:08           ` Jeff King
2020-08-28 17:20             ` Elijah Newren
2020-08-21 18:52 ` [PATCH 5/5] strmap: add functions facilitating use as a string->int map Elijah Newren via GitGitGadget
2020-08-21 20:10   ` Jeff King
2020-08-21 20:51     ` Elijah Newren
2020-08-21 21:05       ` Jeff King
2020-08-21 20:16 ` [PATCH 0/5] Add struct strmap and associated utility functions Jeff King
2020-08-21 21:33   ` Elijah Newren
2020-08-21 22:28     ` Elijah Newren
2020-08-28  7:03     ` Jeff King
2020-08-28 15:29       ` Elijah Newren
2020-09-01  9:27         ` Jeff King
2020-10-13  0:40 ` [PATCH v2 00/10] " Elijah Newren via GitGitGadget
2020-10-13  0:40   ` [PATCH v2 01/10] hashmap: add usage documentation explaining hashmap_free[_entries]() Elijah Newren via GitGitGadget
2020-10-30 12:50     ` Jeff King
2020-10-30 19:55       ` Elijah Newren
2020-11-03 16:26         ` Jeff King
2020-11-03 16:48           ` Elijah Newren
2020-10-13  0:40   ` [PATCH v2 02/10] hashmap: adjust spacing to fix argument alignment Elijah Newren via GitGitGadget
2020-10-30 12:51     ` Jeff King
2020-10-13  0:40   ` [PATCH v2 03/10] hashmap: allow re-use after hashmap_free() Elijah Newren via GitGitGadget
2020-10-30 13:35     ` Jeff King
2020-10-30 15:37       ` Elijah Newren
2020-11-03 16:08         ` Jeff King
2020-11-03 16:16           ` Elijah Newren
2020-10-13  0:40   ` [PATCH v2 04/10] hashmap: introduce a new hashmap_partial_clear() Elijah Newren via GitGitGadget
2020-10-30 13:41     ` Jeff King
2020-10-30 16:03       ` Elijah Newren
2020-11-03 16:10         ` Jeff King
2020-10-13  0:40   ` [PATCH v2 05/10] strmap: new utility functions Elijah Newren via GitGitGadget
2020-10-30 14:12     ` Jeff King
2020-10-30 16:26       ` Elijah Newren
2020-10-13  0:40   ` [PATCH v2 06/10] strmap: add more " Elijah Newren via GitGitGadget
2020-10-30 14:23     ` Jeff King
2020-10-30 16:43       ` Elijah Newren
2020-11-03 16:12         ` Jeff King
2020-10-13  0:40   ` [PATCH v2 07/10] strmap: enable faster clearing and reusing of strmaps Elijah Newren via GitGitGadget
2020-10-30 14:27     ` Jeff King
2020-10-13  0:40   ` [PATCH v2 08/10] strmap: add functions facilitating use as a string->int map Elijah Newren via GitGitGadget
2020-10-30 14:39     ` Jeff King
2020-10-30 17:28       ` Elijah Newren
2020-11-03 16:20         ` Jeff King
2020-11-03 16:46           ` Elijah Newren
2020-10-13  0:40   ` [PATCH v2 09/10] strmap: add a strset sub-type Elijah Newren via GitGitGadget
2020-10-30 14:44     ` Jeff King
2020-10-30 18:02       ` Elijah Newren
2020-10-13  0:40   ` [PATCH v2 10/10] strmap: enable allocations to come from a mem_pool Elijah Newren via GitGitGadget
2020-10-30 14:56     ` Jeff King
2020-10-30 19:31       ` Elijah Newren
2020-11-03 16:24         ` Jeff King
2020-11-02 18:55   ` [PATCH v3 00/13] Add struct strmap and associated utility functions Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 01/13] hashmap: add usage documentation explaining hashmap_free[_entries]() Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 02/13] hashmap: adjust spacing to fix argument alignment Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 03/13] hashmap: allow re-use after hashmap_free() Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 04/13] hashmap: introduce a new hashmap_partial_clear() Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 05/13] hashmap: provide deallocation function names Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 06/13] strmap: new utility functions Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 07/13] strmap: add more " Elijah Newren via GitGitGadget
2020-11-04 20:13       ` Jeff King
2020-11-04 20:24         ` Elijah Newren
2020-11-02 18:55     ` [PATCH v3 08/13] strmap: enable faster clearing and reusing of strmaps Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 09/13] strmap: add functions facilitating use as a string->int map Elijah Newren via GitGitGadget
2020-11-04 20:21       ` Jeff King
2020-11-02 18:55     ` [PATCH v3 10/13] strmap: add a strset sub-type Elijah Newren via GitGitGadget
2020-11-04 20:31       ` Jeff King
2020-11-02 18:55     ` [PATCH v3 11/13] strmap: enable allocations to come from a mem_pool Elijah Newren via GitGitGadget
2020-11-02 18:55     ` [PATCH v3 12/13] strmap: take advantage of FLEXPTR_ALLOC_STR when relevant Elijah Newren via GitGitGadget
2020-11-04 20:43       ` Jeff King
2020-11-02 18:55     ` [PATCH v3 13/13] Use new HASHMAP_INIT macro to simplify hashmap initialization Elijah Newren via GitGitGadget
2020-11-04 20:48       ` Jeff King
2020-11-04 20:52     ` [PATCH v3 00/13] Add struct strmap and associated utility functions Jeff King
2020-11-04 22:20       ` Elijah Newren
2020-11-05  0:22     ` [PATCH v4 " Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 01/13] hashmap: add usage documentation explaining hashmap_free[_entries]() Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 02/13] hashmap: adjust spacing to fix argument alignment Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 03/13] hashmap: allow re-use after hashmap_free() Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 04/13] hashmap: introduce a new hashmap_partial_clear() Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 05/13] hashmap: provide deallocation function names Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 06/13] strmap: new utility functions Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 07/13] strmap: add more " Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 08/13] strmap: enable faster clearing and reusing of strmaps Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 09/13] strmap: add functions facilitating use as a string->int map Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 10/13] strmap: add a strset sub-type Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 11/13] strmap: enable allocations to come from a mem_pool Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 12/13] strmap: take advantage of FLEXPTR_ALLOC_STR when relevant Elijah Newren via GitGitGadget
2020-11-05  0:22       ` [PATCH v4 13/13] Use new HASHMAP_INIT macro to simplify hashmap initialization Elijah Newren via GitGitGadget
2020-11-05 13:29       ` [PATCH v4 00/13] Add struct strmap and associated utility functions Jeff King
2020-11-05 20:25         ` Junio C Hamano
2020-11-05 21:17           ` Jeff King
2020-11-05 21:22           ` Elijah Newren
2020-11-05 22:15             ` Junio C Hamano
2020-11-06  0:24       ` [PATCH v5 00/15] " Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 01/15] hashmap: add usage documentation explaining hashmap_free[_entries]() Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 02/15] hashmap: adjust spacing to fix argument alignment Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 03/15] hashmap: allow re-use after hashmap_free() Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 04/15] hashmap: introduce a new hashmap_partial_clear() Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 05/15] hashmap: provide deallocation function names Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 06/15] strmap: new utility functions Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 07/15] strmap: add more " Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 08/15] strmap: enable faster clearing and reusing of strmaps Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 09/15] strmap: add functions facilitating use as a string->int map Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 10/15] strmap: split create_entry() out of strmap_put() Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 11/15] strmap: add a strset sub-type Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 12/15] strmap: enable allocations to come from a mem_pool Elijah Newren via GitGitGadget
2020-11-11 17:33           ` Phillip Wood
2020-11-11 18:49             ` Elijah Newren
2020-11-11 19:01             ` Jeff King
2020-11-11 20:34               ` Chris Torek
2020-11-06  0:24         ` [PATCH v5 13/15] strmap: take advantage of FLEXPTR_ALLOC_STR when relevant Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 14/15] Use new HASHMAP_INIT macro to simplify hashmap initialization Elijah Newren via GitGitGadget
2020-11-06  0:24         ` [PATCH v5 15/15] shortlog: use strset from strmap.h Elijah Newren via GitGitGadget
2020-11-06  2:00         ` [PATCH v5 00/15] Add struct strmap and associated utility functions Junio C Hamano
2020-11-06  2:42           ` Elijah Newren
2020-11-06  2:48             ` Jeff King
2020-11-06 17:32               ` Junio C Hamano
2020-11-11 20:02         ` [PATCH v6 " Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 01/15] hashmap: add usage documentation explaining hashmap_free[_entries]() Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 02/15] hashmap: adjust spacing to fix argument alignment Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 03/15] hashmap: allow re-use after hashmap_free() Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 04/15] hashmap: introduce a new hashmap_partial_clear() Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 05/15] hashmap: provide deallocation function names Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 06/15] strmap: new utility functions Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 07/15] strmap: add more " Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 08/15] strmap: enable faster clearing and reusing of strmaps Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 09/15] strmap: add functions facilitating use as a string->int map Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 10/15] strmap: split create_entry() out of strmap_put() Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 11/15] strmap: add a strset sub-type Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 12/15] strmap: enable allocations to come from a mem_pool Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 13/15] strmap: take advantage of FLEXPTR_ALLOC_STR when relevant Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 14/15] Use new HASHMAP_INIT macro to simplify hashmap initialization Elijah Newren via GitGitGadget
2020-11-11 20:02           ` [PATCH v6 15/15] shortlog: use strset from strmap.h Elijah Newren via GitGitGadget
2020-11-11 20:07           ` [PATCH v6 00/15] Add struct strmap and associated utility functions Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a86fd5fdcc47baf85046eb07257f4db9f9498084.1598035949.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=newren@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Mailing List Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/git/0 git/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 git git/ https://lore.kernel.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.git


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git