git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [FYI PATCH] git wrapper: DWIM mistyped commands
@ 2008-07-22 20:01 Johannes Schindelin
  2008-07-22 20:16 ` [SCNR] " Pierre Habouzit
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Johannes Schindelin @ 2008-07-22 20:01 UTC (permalink / raw)
  To: git


This patch introduces a modified Damerau-Levenshtein algorithm into
Git's code base, and uses it with the following penalties to show some
similar commands when an unknown command was encountered:

	swap = 0, insertion = 1, substitution = 2, deletion = 4

A typical output would now look like this:

	$ git sm
	git: 'sm' is not a git-command. See 'git --help'.

	Did you mean one of these?
		am
		rm

The cut-off is at similarity rating 6, which was empirically determined
to give sensible results.

As a convenience, if there is only one candidate, Git continues under
the assumption that the user mistyped it.  Example:

	$ git reabse
	WARNING: You called a Git program named 'reabse', which does
	not exist.
	Continuing under the assumption that you meant 'rebase'
	[...]

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---

	So I mistyped 'reabse' for the hundred trillionth time, but I
	will never have to correct my mistakes again.

	Note: this patch is _not_ meant for inclusion.

 Makefile      |    2 +
 builtin.h     |    2 +-
 git.c         |    4 ++-
 help.c        |   61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 levenshtein.c |   47 +++++++++++++++++++++++++++++++++++++++++++
 levenshtein.h |    8 +++++++
 6 files changed, 121 insertions(+), 3 deletions(-)
 create mode 100644 levenshtein.c
 create mode 100644 levenshtein.h

diff --git a/Makefile b/Makefile
index 19bdd03..7e114e0 100644
--- a/Makefile
+++ b/Makefile
@@ -347,6 +347,7 @@ LIB_H += git-compat-util.h
 LIB_H += graph.h
 LIB_H += grep.h
 LIB_H += hash.h
+LIB_H += levenshtein.h
 LIB_H += list-objects.h
 LIB_H += ll-merge.h
 LIB_H += log-tree.h
@@ -421,6 +422,7 @@ LIB_OBJS += hash.o
 LIB_OBJS += help.o
 LIB_OBJS += ident.o
 LIB_OBJS += interpolate.o
+LIB_OBJS += levenshtein.o
 LIB_OBJS += list-objects.o
 LIB_OBJS += ll-merge.o
 LIB_OBJS += lockfile.o
diff --git a/builtin.h b/builtin.h
index 0e605d4..fc5f108 100644
--- a/builtin.h
+++ b/builtin.h
@@ -11,7 +11,7 @@ extern const char git_usage_string[];
 extern const char git_more_info_string[];
 
 extern void list_common_cmds_help(void);
-extern void help_unknown_cmd(const char *cmd);
+extern const char *help_unknown_cmd(const char *cmd);
 extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int fmt_merge_msg(int merge_summary, struct strbuf *in,
diff --git a/git.c b/git.c
index 1bfd271..d7510ef 100644
--- a/git.c
+++ b/git.c
@@ -500,7 +500,9 @@ int main(int argc, const char **argv)
 				cmd, argv[0]);
 			exit(1);
 		}
-		help_unknown_cmd(cmd);
+		argv[0] = help_unknown_cmd(cmd);
+		handle_internal_command(argc, argv);
+		execv_dashed_external(argv);
 	}
 
 	fprintf(stderr, "Failed to run command '%s': %s\n",
diff --git a/help.c b/help.c
index bfc84ae..480befe 100644
--- a/help.c
+++ b/help.c
@@ -9,6 +9,7 @@
 #include "common-cmds.h"
 #include "parse-options.h"
 #include "run-command.h"
+#include "levenshtein.h"
 
 static struct man_viewer_list {
 	struct man_viewer_list *next;
@@ -666,9 +667,67 @@ static void show_html_page(const char *git_cmd)
 	open_html(page_path.buf);
 }
 
-void help_unknown_cmd(const char *cmd)
+static const char *levenshtein_cmd;
+static int similarity(const char *cmd) {
+	return levenshtein(levenshtein_cmd, cmd, 0, 2, 1, 4);
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = similarity(s1);
+	int l2 = similarity(s2);
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+const char *help_unknown_cmd(const char *cmd)
 {
+	int i, best_similarity = 0;
+	char cwd[PATH_MAX];
+
+	if (!getcwd(cwd, sizeof(cwd))) {
+		error("Could not get current working directory");
+		cwd[0] = '\0';
+	}
+
+	load_command_list();
+	ALLOC_GROW(main_cmds.names, main_cmds.cnt + other_cmds.cnt,
+			main_cmds.alloc);
+	memcpy(main_cmds.names + main_cmds.cnt, other_cmds.names,
+		other_cmds.cnt * sizeof(other_cmds.names[0]));
+	main_cmds.cnt += other_cmds.cnt;
+
+	levenshtein_cmd = cmd;
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(*main_cmds.names), levenshtein_compare);
+
+	if (!main_cmds.cnt)
+		die ("Uh oh.  Your system reports no Git commands at all.");
+	best_similarity = similarity(main_cmds.names[0]->name);
+	if (main_cmds.cnt < 2 || best_similarity <
+			similarity(main_cmds.names[1]->name)) {
+		if (!*cwd)
+			exit(1);
+		if (chdir(cwd))
+			die ("Could not change directory back to '%s'", cwd);
+		fprintf(stderr, "WARNING: You called a Git program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, main_cmds.names[0]->name);
+		return main_cmds.names[0]->name;
+	}
+
 	fprintf(stderr, "git: '%s' is not a git-command. See 'git --help'.\n", cmd);
+
+	if (best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean one of these?\n");
+
+		for (i = 0; i < main_cmds.cnt && best_similarity ==
+				similarity(main_cmds.names[i]->name); i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+
 	exit(1);
 }
 
diff --git a/levenshtein.c b/levenshtein.c
new file mode 100644
index 0000000..db52f2c
--- /dev/null
+++ b/levenshtein.c
@@ -0,0 +1,47 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = xmalloc(sizeof(int) * (len2 + 1));
+	int *row1 = xmalloc(sizeof(int) * (len2 + 1));
+	int *row2 = xmalloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (j + 1 < len2 && row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/levenshtein.h b/levenshtein.h
new file mode 100644
index 0000000..0173abe
--- /dev/null
+++ b/levenshtein.h
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif
-- 
1.6.0.rc0.21.g91175

^ permalink raw reply related	[flat|nested] 26+ messages in thread
* [PATCH] Remove calculation of the longest command name from where it is not used
@ 2008-08-28 17:15 Alex Riesen, Alex Riesen
  2008-08-28 21:27 ` [PATCH updated] git wrapper: DWIM mistyped commands Alex Riesen
  0 siblings, 1 reply; 26+ messages in thread
From: Alex Riesen, Alex Riesen @ 2008-08-28 17:15 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Johannes Schindelin

Just calculate it where it is needed - it is cheap and trivial,
as all the lengths are already there (stored when creating the
command lists).

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
---

And it less code than before (it is even more deletions than
insertions).

BTW, the Johannesses typo-guesser conflicts heavily in recent master.
Pity. I'm going to rebase it and send out the rebased version.

 builtin-help.c  |    4 ++--
 builtin-merge.c |    8 ++++----
 help.c          |   34 +++++++++++++++-------------------
 help.h          |    6 +++---
 4 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/builtin-help.c b/builtin-help.c
index 391f749..9225102 100644
--- a/builtin-help.c
+++ b/builtin-help.c
@@ -418,7 +418,7 @@ int cmd_help(int argc, const char **argv, const char *prefix)
 {
 	int nongit;
 	const char *alias;
-	unsigned int longest = load_command_list("git-", &main_cmds, &other_cmds);
+	load_command_list("git-", &main_cmds, &other_cmds);
 
 	setup_git_directory_gently(&nongit);
 	git_config(git_help_config, NULL);
@@ -428,7 +428,7 @@ int cmd_help(int argc, const char **argv, const char *prefix)
 
 	if (show_all) {
 		printf("usage: %s\n\n", git_usage_string);
-		list_commands("git commands", longest, &main_cmds, &other_cmds);
+		list_commands("git commands", &main_cmds, &other_cmds);
 		printf("%s\n", git_more_info_string);
 		return 0;
 	}
diff --git a/builtin-merge.c b/builtin-merge.c
index d6bcbec..dcd08f7 100644
--- a/builtin-merge.c
+++ b/builtin-merge.c
@@ -80,7 +80,7 @@ static struct strategy *get_strategy(const char *name)
 	int i;
 	struct strategy *ret;
 	static struct cmdnames main_cmds, other_cmds;
-	static int longest;
+	static int loaded;
 
 	if (!name)
 		return NULL;
@@ -89,14 +89,14 @@ static struct strategy *get_strategy(const char *name)
 		if (!strcmp(name, all_strategy[i].name))
 			return &all_strategy[i];
 
-	if (!longest) {
+	if (!loaded) {
 		struct cmdnames not_strategies;
+		loaded = 1;
 
 		memset(&main_cmds, 0, sizeof(struct cmdnames));
 		memset(&other_cmds, 0, sizeof(struct cmdnames));
 		memset(&not_strategies, 0, sizeof(struct cmdnames));
-		longest = load_command_list("git-merge-", &main_cmds,
-				&other_cmds);
+		load_command_list("git-merge-", &main_cmds, &other_cmds);
 		for (i = 0; i < main_cmds.cnt; i++) {
 			int j, found = 0;
 			struct cmdname *ent = main_cmds.names[i];
diff --git a/help.c b/help.c
index 1afbac0..a17a746 100644
--- a/help.c
+++ b/help.c
@@ -133,11 +133,10 @@ static int is_executable(const char *name)
 	return st.st_mode & S_IXUSR;
 }
 
-static unsigned int list_commands_in_dir(struct cmdnames *cmds,
+static void list_commands_in_dir(struct cmdnames *cmds,
 					 const char *path,
 					 const char *prefix)
 {
-	unsigned int longest = 0;
 	int prefix_len;
 	DIR *dir = opendir(path);
 	struct dirent *de;
@@ -145,7 +144,7 @@ static unsigned int list_commands_in_dir(struct cmdnames *cmds,
 	int len;
 
 	if (!dir)
-		return 0;
+		return;
 	if (!prefix)
 		prefix = "git-";
 	prefix_len = strlen(prefix);
@@ -168,29 +167,22 @@ static unsigned int list_commands_in_dir(struct cmdnames *cmds,
 		if (has_extension(de->d_name, ".exe"))
 			entlen -= 4;
 
-		if (longest < entlen)
-			longest = entlen;
-
 		add_cmdname(cmds, de->d_name + prefix_len, entlen);
 	}
 	closedir(dir);
 	strbuf_release(&buf);
-
-	return longest;
 }
 
-unsigned int load_command_list(const char *prefix,
+void load_command_list(const char *prefix,
 		struct cmdnames *main_cmds,
 		struct cmdnames *other_cmds)
 {
-	unsigned int longest = 0;
-	unsigned int len;
 	const char *env_path = getenv("PATH");
 	char *paths, *path, *colon;
 	const char *exec_path = git_exec_path();
 
 	if (exec_path)
-		longest = list_commands_in_dir(main_cmds, exec_path, prefix);
+		list_commands_in_dir(main_cmds, exec_path, prefix);
 
 	if (!env_path) {
 		fprintf(stderr, "PATH not set\n");
@@ -202,9 +194,7 @@ unsigned int load_command_list(const char *prefix,
 		if ((colon = strchr(path, PATH_SEP)))
 			*colon = 0;
 
-		len = list_commands_in_dir(other_cmds, path, prefix);
-		if (len > longest)
-			longest = len;
+		list_commands_in_dir(other_cmds, path, prefix);
 
 		if (!colon)
 			break;
@@ -220,14 +210,20 @@ unsigned int load_command_list(const char *prefix,
 	      sizeof(*other_cmds->names), cmdname_compare);
 	uniq(other_cmds);
 	exclude_cmds(other_cmds, main_cmds);
-
-	return longest;
 }
 
-void list_commands(const char *title, unsigned int longest,
-		struct cmdnames *main_cmds, struct cmdnames *other_cmds)
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds)
 {
 	const char *exec_path = git_exec_path();
+	int i, longest = 0;
+
+	for (i = 0; i < main_cmds->cnt; i++)
+		if (longest < main_cmds->names[i]->len)
+			longest = main_cmds->names[i]->len;
+	for (i = 0; i < other_cmds->cnt; i++)
+		if (longest < other_cmds->names[i]->len)
+			longest = other_cmds->names[i]->len;
 
 	if (main_cmds->cnt) {
 		printf("available %s in '%s'\n", title, exec_path);
diff --git a/help.h b/help.h
index 3f1ae89..2733433 100644
--- a/help.h
+++ b/help.h
@@ -16,14 +16,14 @@ static inline void mput_char(char c, unsigned int num)
 		putchar(c);
 }
 
-unsigned int load_command_list(const char *prefix,
+void load_command_list(const char *prefix,
 		struct cmdnames *main_cmds,
 		struct cmdnames *other_cmds);
 void add_cmdname(struct cmdnames *cmds, const char *name, int len);
 /* Here we require that excludes is a sorted list. */
 void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
 int is_in_cmdlist(struct cmdnames *c, const char *s);
-void list_commands(const char *title, unsigned int longest,
-		struct cmdnames *main_cmds, struct cmdnames *other_cmds);
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds);
 
 #endif /* HELP_H */
-- 
1.6.0.1.150.g5966

^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2008-09-08  6:52 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-07-22 20:01 [FYI PATCH] git wrapper: DWIM mistyped commands Johannes Schindelin
2008-07-22 20:16 ` [SCNR] " Pierre Habouzit
2008-07-22 20:19   ` Johannes Schindelin
2008-07-22 20:34     ` Pierre Habouzit
2008-07-22 20:37 ` Alex Riesen
2008-07-22 21:03   ` [PATCH] Add help.autocorrect to enable/disable autocorrecting Alex Riesen
2008-07-22 21:08     ` Johannes Schindelin
2008-07-22 21:26       ` Alex Riesen
2008-07-22 21:44         ` Johannes Schindelin
2008-07-22 22:25           ` Alex Riesen
2008-07-23 16:44             ` Johannes Schindelin
2008-07-23 18:44               ` Alex Riesen
2008-07-23 19:00                 ` Johannes Schindelin
2008-07-23 19:04                   ` Johannes Schindelin
2008-07-22 23:08           ` Junio C Hamano
2008-07-23 16:41             ` [PATCH] Wait help.autocorrect deciseconds before running corrected command Alex Riesen
2008-07-23 16:57               ` Johannes Schindelin
2008-07-23 18:45                 ` Alex Riesen
2008-07-22 23:05 ` [FYI PATCH] git wrapper: DWIM mistyped commands Junio C Hamano
2008-07-22 23:10   ` Sverre Rabbelier
2008-08-28 17:15 [PATCH] Remove calculation of the longest command name from where it is not used Alex Riesen, Alex Riesen
2008-08-28 21:27 ` [PATCH updated] git wrapper: DWIM mistyped commands Alex Riesen
2008-08-28 21:28   ` [PATCH] Add help.autocorrect to enable/disable autocorrecting Alex Riesen
2008-08-29 10:11     ` Andreas Ericsson
2008-09-08  6:50     ` Junio C Hamano
2008-08-30 15:36   ` [PATCH updated] git wrapper: DWIM mistyped commands Junio C Hamano
2008-08-30 16:44     ` Alex Riesen
2008-08-30 17:13       ` [PATCH] Reuse cmdname->len to store pre-calculated similarity indexes Alex Riesen
2008-08-30 17:26         ` Junio C Hamano
2008-08-31 13:50           ` [PATCH] git wrapper: DWIM mistyped commands Alex Riesen
2008-08-31 13:54             ` [PATCH] Add help.autocorrect to enable/disable autocorrecting Alex Riesen
2008-08-31 14:49               ` Matthieu Moy
2008-08-31 16:33                 ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).