All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dcache: faster dentry_cmp()
@ 2012-02-14 22:45 Alexey Dobriyan
  2012-02-14 22:58 ` Al Viro
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2012-02-14 22:45 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, npiggin

1) consistently use "unsigned int" for dentry name length,
2) reuse subtraction result for return value, exact value doesn't matter
   because function is only used in boolean context,
3) use *p++ idiom for even better code.

All of this results in performance speedup of "git diff"
which is way out of statistical error (0.4% vs 0.15% of 3 sigma):

$ PAGER= perf stat -r 256 git-diff

 Performance counter stats for 'git-diff' (256 runs):

        115.033582 task-clock                #    0.993 CPUs utilized            ( +-  0.06% )
                 0 context-switches          #    0.000 M/sec                    ( +- 17.95% )
                 0 CPU-migrations            #    0.000 M/sec                    ( +- 19.47% )
             2,321 page-faults               #    0.020 M/sec                    ( +-  0.00% )
       384,540,991 cycles                    #    3.343 GHz                      ( +-  0.05% )
       121,833,562 stalled-cycles-frontend   #   31.68% frontend cycles idle     ( +-  0.16% )
        51,731,784 stalled-cycles-backend    #   13.45% backend  cycles idle     ( +-  0.37% )
       586,327,441 instructions              #    1.52  insns per cycle
                                             #    0.21  stalled cycles per insn  ( +-  0.00% )
       155,449,246 branches                  # 1351.338 M/sec                    ( +-  0.00% )
           542,511 branch-misses             #    0.35% of all branches          ( +-  0.07% )

       0.115856505 seconds time elapsed                                          ( +-  0.06% )

----------------------------------
after

 Performance counter stats for 'git-diff' (256 runs):

        114.486145 task-clock                #    0.993 CPUs utilized            ( +-  0.05% )
                 0 context-switches          #    0.000 M/sec                    ( +- 15.46% )
                 0 CPU-migrations            #    0.000 M/sec                    ( +- 20.06% )
             2,282 page-faults               #    0.020 M/sec                    ( +-  0.00% )
       382,725,382 cycles                    #    3.343 GHz                      ( +-  0.05% )
       119,808,563 stalled-cycles-frontend   #   31.30% frontend cycles idle     ( +-  0.15% )
        51,780,030 stalled-cycles-backend    #   13.53% backend  cycles idle     ( +-  0.33% )
       585,114,727 instructions              #    1.53  insns per cycle
                                             #    0.20  stalled cycles per insn  ( +-  0.00% )
       155,146,262 branches                  # 1355.153 M/sec                    ( +-  0.00% )
           526,739 branch-misses             #    0.34% of all branches          ( +-  0.12% )

       0.115315823 seconds time elapsed                                          ( +-  0.05% )

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 fs/dcache.c            |    6 +++---
 include/linux/dcache.h |   14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1368,7 +1368,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 					     struct inode *inode)
 {
 	struct dentry *alias;
-	int len = entry->d_name.len;
+	unsigned int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
 
@@ -1750,7 +1750,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
 		struct inode *i;
 		const char *tname;
-		int tlen;
+		unsigned int tlen;
 
 		if (dentry->d_name.hash != hash)
 			continue;
@@ -1869,7 +1869,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 	
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
 		const char *tname;
-		int tlen;
+		unsigned int tlen;
 
 		if (dentry->d_name.hash != hash)
 			continue;
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -51,18 +51,18 @@ extern struct dentry_stat_t dentry_stat;
  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
  * The strings are both count bytes long, and count is non-zero.
  */
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-				const unsigned char *ct, size_t tcount)
+static inline int dentry_cmp(const unsigned char *cs, unsigned int scount,
+			     const unsigned char *ct, unsigned int tcount)
 {
 	int ret;
-	if (scount != tcount)
-		return 1;
+
+	ret = scount - tcount;
+	if (ret)
+		return ret;
 	do {
-		ret = (*cs != *ct);
+		ret = *cs++ - *ct++;
 		if (ret)
 			break;
-		cs++;
-		ct++;
 		tcount--;
 	} while (tcount);
 	return ret;

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2012-04-20 12:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-14 22:45 [PATCH] dcache: faster dentry_cmp() Alexey Dobriyan
2012-02-14 22:58 ` Al Viro
2012-02-15  8:14   ` Alexey Dobriyan
2012-02-15  1:41 ` Matthew Wilcox
2012-02-15  8:14   ` Alexey Dobriyan
2012-02-15  1:46 ` Andi Kleen
2012-02-15  8:16   ` Alexey Dobriyan
2012-02-15 11:19     ` Alexey Dobriyan
2012-02-15 20:20       ` Andi Kleen
2012-04-20 12:06 ` Nick Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.