* [PATCH v4] cleanup duplicate name_compare() functions @ 2014-06-19 8:07 Jeremiah Mahler 2014-06-19 8:07 ` Jeremiah Mahler 0 siblings, 1 reply; 6+ messages in thread From: Jeremiah Mahler @ 2014-06-19 8:07 UTC (permalink / raw) To: Jonathan Nieder; +Cc: git, Jeremiah Mahler Version 4 of the patch series to cleanup the duplicate name_compare() functions. The previous patch series would have left the system in a broken state if only part of the patches were applied. This version condenses all the patches in to a single working patch as Jonathan Nieder suggested [1]. [1]: http://marc.info/?l=git&m=140311885416615&w=2 Jeremiah Mahler (1): cleanup duplicate name_compare() functions cache.h | 2 +- dir.c | 3 +-- name-hash.c | 2 +- read-cache.c | 23 +++++++++++++---------- tree-walk.c | 10 ---------- unpack-trees.c | 11 ----------- 6 files changed, 16 insertions(+), 35 deletions(-) -- 2.0.0 ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v4] cleanup duplicate name_compare() functions 2014-06-19 8:07 [PATCH v4] cleanup duplicate name_compare() functions Jeremiah Mahler @ 2014-06-19 8:07 ` Jeremiah Mahler 2014-06-19 18:03 ` Junio C Hamano 0 siblings, 1 reply; 6+ messages in thread From: Jeremiah Mahler @ 2014-06-19 8:07 UTC (permalink / raw) To: Jonathan Nieder; +Cc: git, Jeremiah Mahler Both unpack-trees.c and read-cache.c have their own name_compare() function, which are identical. And read-cache.c has a cache_name_compare() function which is nearly identical to name_compare() [1]. The cache_name_compare() function is not specific to a cache, other than by being part of cache.h. Generalize the cache_name_compare() function by renaming it to name_compare(). Simplify the cache_name_stage_compare() function using name_compare(). Then change the few instances which used cache_name_compare() to name_compare() [2]. [1] cache_name_compare() is not identical to name_compare(). The former returns +1, -1, whereas the latter returns +N, -N. But there is no place where name_compare() is used that needs the magnitude so this difference does not alter its behavior. [2] The instances where cache_name_compare() is used have nothing to do with a cache. The new name, name_compare(), makes it clear that no cache is involved. Signed-off-by: Jeremiah Mahler <jmmahler@gmail.com> --- cache.h | 2 +- dir.c | 3 +-- name-hash.c | 2 +- read-cache.c | 23 +++++++++++++---------- tree-walk.c | 10 ---------- unpack-trees.c | 11 ----------- 6 files changed, 16 insertions(+), 35 deletions(-) diff --git a/cache.h b/cache.h index c498a30..e3205fe 100644 --- a/cache.h +++ b/cache.h @@ -1027,7 +1027,7 @@ extern int validate_headref(const char *ref); extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2); extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2); -extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2); +extern int name_compare(const char *name1, size_t len1, const char *name2, size_t len2); extern int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2); extern void *read_object_with_reference(const unsigned char *sha1, diff --git a/dir.c b/dir.c index 797805d..e65888d 100644 --- a/dir.c +++ b/dir.c @@ -1354,8 +1354,7 @@ static int cmp_name(const void *p1, const void *p2) const struct dir_entry *e1 = *(const struct dir_entry **)p1; const struct dir_entry *e2 = *(const struct dir_entry **)p2; - return cache_name_compare(e1->name, e1->len, - e2->name, e2->len); + return name_compare(e1->name, e1->len, e2->name, e2->len); } static struct path_simplify *create_simplify(const char **pathspec) diff --git a/name-hash.c b/name-hash.c index be7c4ae..e2bea88 100644 --- a/name-hash.c +++ b/name-hash.c @@ -179,7 +179,7 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen * Always do exact compare, even if we want a case-ignoring comparison; * we do the quick exact one first, because it will be the common case. */ - if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) + if (len == namelen && !name_compare(name, namelen, ce->name, len)) return 1; if (!icase) diff --git a/read-cache.c b/read-cache.c index 9f56d76..158241d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -434,18 +434,26 @@ int df_name_compare(const char *name1, int len1, int mode1, return c1 - c2; } -int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2) +int name_compare(const char *name1, size_t len1, const char *name2, size_t len2) { - int len = len1 < len2 ? len1 : len2; - int cmp; - - cmp = memcmp(name1, name2, len); + size_t min_len = (len1 < len2) ? len1 : len2; + int cmp = memcmp(name1, name2, min_len); if (cmp) return cmp; if (len1 < len2) return -1; if (len1 > len2) return 1; + return 0; +} + +int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2) +{ + int cmp; + + cmp = name_compare(name1, len1, name2, len2); + if (cmp) + return cmp; if (stage1 < stage2) return -1; @@ -454,11 +462,6 @@ int cache_name_stage_compare(const char *name1, int len1, int stage1, const char return 0; } -int cache_name_compare(const char *name1, int len1, const char *name2, int len2) -{ - return cache_name_stage_compare(name1, len1, 0, name2, len2, 0); -} - static int index_name_stage_pos(const struct index_state *istate, const char *name, int namelen, int stage) { int first, last; diff --git a/tree-walk.c b/tree-walk.c index 4dc86c7..5dd9a71 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -144,16 +144,6 @@ struct tree_desc_x { struct tree_desc_skip *skip; }; -static int name_compare(const char *a, int a_len, - const char *b, int b_len) -{ - int len = (a_len < b_len) ? a_len : b_len; - int cmp = memcmp(a, b, len); - if (cmp) - return cmp; - return (a_len - b_len); -} - static int check_entry_match(const char *a, int a_len, const char *b, int b_len) { /* diff --git a/unpack-trees.c b/unpack-trees.c index 4a9cdf2..c4a97ca 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -629,17 +629,6 @@ static int unpack_failed(struct unpack_trees_options *o, const char *message) return -1; } -/* NEEDSWORK: give this a better name and share with tree-walk.c */ -static int name_compare(const char *a, int a_len, - const char *b, int b_len) -{ - int len = (a_len < b_len) ? a_len : b_len; - int cmp = memcmp(a, b, len); - if (cmp) - return cmp; - return (a_len - b_len); -} - /* * The tree traversal is looking at name p. If we have a matching entry, * return it. If name p is a directory in the index, do not return -- 2.0.0 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v4] cleanup duplicate name_compare() functions 2014-06-19 8:07 ` Jeremiah Mahler @ 2014-06-19 18:03 ` Junio C Hamano 2014-06-19 18:29 ` Junio C Hamano 2014-06-19 20:45 ` Jeremiah Mahler 0 siblings, 2 replies; 6+ messages in thread From: Junio C Hamano @ 2014-06-19 18:03 UTC (permalink / raw) To: Jeremiah Mahler; +Cc: Jonathan Nieder, git Jeremiah Mahler <jmmahler@gmail.com> writes: > Both unpack-trees.c and read-cache.c have their own name_compare() > function, which are identical. And read-cache.c has a > cache_name_compare() function which is nearly identical to > name_compare() [1]. The cache_name_compare() function is not specific > to a cache, other than by being part of cache.h. 'other than by designed to be used only for comparing names in the cache entries' is probably more accurate, I would think. > Generalize the cache_name_compare() function by renaming it to > name_compare(). Simplify the cache_name_stage_compare() function using > name_compare(). Then change the few instances which used > cache_name_compare() to name_compare() [2]. > > [1] cache_name_compare() is not identical to name_compare(). The former > returns +1, -1, whereas the latter returns +N, -N. But there is no > place where name_compare() is used that needs the magnitude so this > difference does not alter its behavior. You chose to use the one that loses the information by unifying these two into the variant that only returns -1/0/+1. We know that it does not matter for the current callers, but is it expected that no future callers will benefit by having the magnitude information? > [2] The instances where cache_name_compare() is used have nothing to do > with a cache. The new name, name_compare(), makes it clear that no > cache is involved. This is redundant and should be dropped, as you already said "is not specific to a cache" earlier. > Signed-off-by: Jeremiah Mahler <jmmahler@gmail.com> > --- > cache.h | 2 +- > dir.c | 3 +-- > name-hash.c | 2 +- > read-cache.c | 23 +++++++++++++---------- > tree-walk.c | 10 ---------- > unpack-trees.c | 11 ----------- > 6 files changed, 16 insertions(+), 35 deletions(-) > > diff --git a/cache.h b/cache.h > index c498a30..e3205fe 100644 > --- a/cache.h > +++ b/cache.h > @@ -1027,7 +1027,7 @@ extern int validate_headref(const char *ref); > > extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2); > extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2); > -extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2); > +extern int name_compare(const char *name1, size_t len1, const char *name2, size_t len2); > extern int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2); > > extern void *read_object_with_reference(const unsigned char *sha1, > diff --git a/dir.c b/dir.c > index 797805d..e65888d 100644 > --- a/dir.c > +++ b/dir.c > @@ -1354,8 +1354,7 @@ static int cmp_name(const void *p1, const void *p2) > const struct dir_entry *e1 = *(const struct dir_entry **)p1; > const struct dir_entry *e2 = *(const struct dir_entry **)p2; > > - return cache_name_compare(e1->name, e1->len, > - e2->name, e2->len); > + return name_compare(e1->name, e1->len, e2->name, e2->len); > } > > static struct path_simplify *create_simplify(const char **pathspec) > diff --git a/name-hash.c b/name-hash.c > index be7c4ae..e2bea88 100644 > --- a/name-hash.c > +++ b/name-hash.c > @@ -179,7 +179,7 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen > * Always do exact compare, even if we want a case-ignoring comparison; > * we do the quick exact one first, because it will be the common case. > */ > - if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) > + if (len == namelen && !name_compare(name, namelen, ce->name, len)) > return 1; The existing code is somewhat strange; while the update is correct in the context of this patch, it may further want to be fixed in a later patch to either !name_compare(name, namelen, ce->name, len) or len == namelen && !memcmp(name, ce->name, len) The patch text looks good. Thanks. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v4] cleanup duplicate name_compare() functions 2014-06-19 18:03 ` Junio C Hamano @ 2014-06-19 18:29 ` Junio C Hamano 2014-06-20 1:55 ` Jeremiah Mahler 2014-06-19 20:45 ` Jeremiah Mahler 1 sibling, 1 reply; 6+ messages in thread From: Junio C Hamano @ 2014-06-19 18:29 UTC (permalink / raw) To: Jeremiah Mahler; +Cc: Jonathan Nieder, Git Mailing List On Thu, Jun 19, 2014 at 11:03 AM, Junio C Hamano <gitster@pobox.com> wrote: > > You chose to use the one that loses the information by unifying > these two into the variant that only returns -1/0/+1. We know that > it does not matter for the current callers, but is it expected that > no future callers will benefit by having the magnitude information? Heh, I was being silly, partly fooled by your reference to "magnitude". You are not losing information at all, because the caller cannot tell if the return value came from an earlier memcmp(), whose only guarantee is that the sign of the returned value is all that matters, or from the later subtraction between lengths. So unifying to the -1/0/+1 variant is entirely justifiable. It is just your rationale was a bit misleading. We often represent our strings as a counted string, i.e. a pair of the pointer to the beginning of the string and its length, and the string may not be NUL terminated to that length. To compare a pair of such counted strings, unpack-trees.c and read-cache.c implement their own name_compare() functions identically. In addition, cache_name_compare() function in read-cache.c is nearly identical. The only difference is when one string is the prefix of the other string, in which case the former returns -1/+1 to show which one is longer and the latter returns the difference of the lengths to show the same information. Unify these three functions by using the implementation from cache_name_compare(). This does not make any difference to the existing and future callers, as they must be paying attention only to the sign of the returned value (and not the magnitude) because the original implementations of these two functions return values returned by memcmp(3) when the one string is not a prefix of the other string, and the only thing memcmp(3) guarantees its callers is the sign of the returned value, not the magnitude. or something like that, perhaps? ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v4] cleanup duplicate name_compare() functions 2014-06-19 18:29 ` Junio C Hamano @ 2014-06-20 1:55 ` Jeremiah Mahler 0 siblings, 0 replies; 6+ messages in thread From: Jeremiah Mahler @ 2014-06-20 1:55 UTC (permalink / raw) To: Junio C Hamano; +Cc: git Junio, On Thu, Jun 19, 2014 at 11:29:21AM -0700, Junio C Hamano wrote: > On Thu, Jun 19, 2014 at 11:03 AM, Junio C Hamano <gitster@pobox.com> wrote: > > > > You chose to use the one that loses the information by unifying > > these two into the variant that only returns -1/0/+1. We know that > > it does not matter for the current callers, but is it expected that > > no future callers will benefit by having the magnitude information? > > Heh, I was being silly, partly fooled by your reference to > "magnitude". > > You are not losing information at all, because the caller cannot > tell if the return value came from an earlier memcmp(), whose only > guarantee is that the sign of the returned value is all that > matters, or from the later subtraction between lengths. > > So unifying to the -1/0/+1 variant is entirely justifiable. It is > just your rationale was a bit misleading. > > We often represent our strings as a counted string, i.e. a pair of > the pointer to the beginning of the string and its length, and the > string may not be NUL terminated to that length. > > To compare a pair of such counted strings, unpack-trees.c and > read-cache.c implement their own name_compare() functions > identically. In addition, cache_name_compare() function in > read-cache.c is nearly identical. The only difference is when one > string is the prefix of the other string, in which case the former > returns -1/+1 to show which one is longer and the latter returns the > difference of the lengths to show the same information. > > Unify these three functions by using the implementation from > cache_name_compare(). This does not make any difference to the > existing and future callers, as they must be paying attention only > to the sign of the returned value (and not the magnitude) because > the original implementations of these two functions return values > returned by memcmp(3) when the one string is not a prefix of the > other string, and the only thing memcmp(3) guarantees its callers is > the sign of the returned value, not the magnitude. > > or something like that, perhaps? Yes, that looks good. It is a bit clearer than my message. I like how you used "the prefix of the other string" to describe when the two functions behave differently. -- Jeremiah Mahler jmmahler@gmail.com http://github.com/jmahler ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v4] cleanup duplicate name_compare() functions 2014-06-19 18:03 ` Junio C Hamano 2014-06-19 18:29 ` Junio C Hamano @ 2014-06-19 20:45 ` Jeremiah Mahler 1 sibling, 0 replies; 6+ messages in thread From: Jeremiah Mahler @ 2014-06-19 20:45 UTC (permalink / raw) To: Junio C Hamano; +Cc: git Junio, On Thu, Jun 19, 2014 at 11:03:03AM -0700, Junio C Hamano wrote: > Jeremiah Mahler <jmmahler@gmail.com> writes: > > > Both unpack-trees.c and read-cache.c have their own name_compare() > > function, which are identical. And read-cache.c has a > > cache_name_compare() function which is nearly identical to > > name_compare() [1]. The cache_name_compare() function is not specific > > to a cache, other than by being part of cache.h. > > 'other than by designed to be used only for comparing names in the > cache entries' is probably more accurate, I would think. > > > Generalize the cache_name_compare() function by renaming it to > > name_compare(). Simplify the cache_name_stage_compare() function using > > name_compare(). Then change the few instances which used > > cache_name_compare() to name_compare() [2]. > > > > [1] cache_name_compare() is not identical to name_compare(). The former > > returns +1, -1, whereas the latter returns +N, -N. But there is no > > place where name_compare() is used that needs the magnitude so this > > difference does not alter its behavior. > > You chose to use the one that loses the information by unifying > these two into the variant that only returns -1/0/+1. We know that > it does not matter for the current callers, but is it expected that > no future callers will benefit by having the magnitude information? > > > [2] The instances where cache_name_compare() is used have nothing to do > > with a cache. The new name, name_compare(), makes it clear that no > > cache is involved. > > This is redundant and should be dropped, as you already said "is not > specific to a cache" earlier. > > > Signed-off-by: Jeremiah Mahler <jmmahler@gmail.com> > > --- > > cache.h | 2 +- > > dir.c | 3 +-- > > name-hash.c | 2 +- > > read-cache.c | 23 +++++++++++++---------- > > tree-walk.c | 10 ---------- > > unpack-trees.c | 11 ----------- > > 6 files changed, 16 insertions(+), 35 deletions(-) > > > > diff --git a/cache.h b/cache.h > > index c498a30..e3205fe 100644 > > --- a/cache.h > > +++ b/cache.h > > @@ -1027,7 +1027,7 @@ extern int validate_headref(const char *ref); > > > > extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2); > > extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2); > > -extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2); > > +extern int name_compare(const char *name1, size_t len1, const char *name2, size_t len2); > > extern int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2); > > > > extern void *read_object_with_reference(const unsigned char *sha1, > > diff --git a/dir.c b/dir.c > > index 797805d..e65888d 100644 > > --- a/dir.c > > +++ b/dir.c > > @@ -1354,8 +1354,7 @@ static int cmp_name(const void *p1, const void *p2) > > const struct dir_entry *e1 = *(const struct dir_entry **)p1; > > const struct dir_entry *e2 = *(const struct dir_entry **)p2; > > > > - return cache_name_compare(e1->name, e1->len, > > - e2->name, e2->len); > > + return name_compare(e1->name, e1->len, e2->name, e2->len); > > } > > > > static struct path_simplify *create_simplify(const char **pathspec) > > diff --git a/name-hash.c b/name-hash.c > > index be7c4ae..e2bea88 100644 > > --- a/name-hash.c > > +++ b/name-hash.c > > @@ -179,7 +179,7 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen > > * Always do exact compare, even if we want a case-ignoring comparison; > > * we do the quick exact one first, because it will be the common case. > > */ > > - if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) > > + if (len == namelen && !name_compare(name, namelen, ce->name, len)) > > return 1; > > The existing code is somewhat strange; while the update is correct > in the context of this patch, it may further want to be fixed in a > later patch to either > > !name_compare(name, namelen, ce->name, len) > > or > > len == namelen && !memcmp(name, ce->name, len) > I did not notice that, good catch. Since that line is going to be changed I can make a short fixup patch before the main patch and avoid the rename. > The patch text looks good. > > Thanks. -- Jeremiah Mahler jmmahler@gmail.com http://github.com/jmahler ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2014-06-20 1:55 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2014-06-19 8:07 [PATCH v4] cleanup duplicate name_compare() functions Jeremiah Mahler 2014-06-19 8:07 ` Jeremiah Mahler 2014-06-19 18:03 ` Junio C Hamano 2014-06-19 18:29 ` Junio C Hamano 2014-06-20 1:55 ` Jeremiah Mahler 2014-06-19 20:45 ` Jeremiah Mahler
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.