git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] Move pathspec matching from builtin-add.c into dir.c
@ 2006-05-19 23:07 Linus Torvalds
  2006-05-19 23:19 ` [PATCH 2/2] Add builtin "git rm" command Linus Torvalds
  0 siblings, 1 reply; 3+ messages in thread
From: Linus Torvalds @ 2006-05-19 23:07 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List


I'll use it for builtin-rm.c too.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---

diff --git a/builtin-add.c b/builtin-add.c
index 2afb82d..02fe38b 100644
--- a/builtin-add.c
+++ b/builtin-add.c
@@ -13,86 +13,6 @@ #include "cache-tree.h"
 static const char builtin_add_usage[] =
 "git-add [-n] [-v] <filepattern>...";
 
-static int common_prefix(const char **pathspec)
-{
-	const char *path, *slash, *next;
-	int prefix;
-
-	if (!pathspec)
-		return 0;
-
-	path = *pathspec;
-	slash = strrchr(path, '/');
-	if (!slash)
-		return 0;
-
-	prefix = slash - path + 1;
-	while ((next = *++pathspec) != NULL) {
-		int len = strlen(next);
-		if (len >= prefix && !memcmp(path, next, len))
-			continue;
-		for (;;) {
-			if (!len)
-				return 0;
-			if (next[--len] != '/')
-				continue;
-			if (memcmp(path, next, len+1))
-				continue;
-			prefix = len + 1;
-			break;
-		}
-	}
-	return prefix;
-}
-
-static int match_one(const char *match, const char *name, int namelen)
-{
-	int matchlen;
-
-	/* If the match was just the prefix, we matched */
-	matchlen = strlen(match);
-	if (!matchlen)
-		return 1;
-
-	/*
-	 * If we don't match the matchstring exactly,
-	 * we need to match by fnmatch
-	 */
-	if (strncmp(match, name, matchlen))
-		return !fnmatch(match, name, 0);
-
-	/*
-	 * If we did match the string exactly, we still
-	 * need to make sure that it happened on a path
-	 * component boundary (ie either the last character
-	 * of the match was '/', or the next character of
-	 * the name was '/' or the terminating NUL.
-	 */
-	return	match[matchlen-1] == '/' ||
-		name[matchlen] == '/' ||
-		!name[matchlen];
-}
-
-static int match(const char **pathspec, const char *name, int namelen, int prefix, char *seen)
-{
-	int retval;
-	const char *match;
-
-	name += prefix;
-	namelen -= prefix;
-
-	for (retval = 0; (match = *pathspec++) != NULL; seen++) {
-		if (retval & *seen)
-			continue;
-		match += prefix;
-		if (match_one(match, name, namelen)) {
-			retval = 1;
-			*seen = 1;
-		}
-	}
-	return retval;
-}
-
 static void prune_directory(struct dir_struct *dir, const char **pathspec, int prefix)
 {
 	char *seen;
@@ -108,7 +28,7 @@ static void prune_directory(struct dir_s
 	i = dir->nr;
 	while (--i >= 0) {
 		struct dir_entry *entry = *src++;
-		if (!match(pathspec, entry->name, entry->len, prefix, seen)) {
+		if (!match_pathspec(pathspec, entry->name, entry->len, prefix, seen)) {
 			free(entry);
 			continue;
 		}
diff --git a/dir.c b/dir.c
index d40b62e..d778ecd 100644
--- a/dir.c
+++ b/dir.c
@@ -11,6 +11,86 @@ #include <fnmatch.h>
 #include "cache.h"
 #include "dir.h"
 
+int common_prefix(const char **pathspec)
+{
+	const char *path, *slash, *next;
+	int prefix;
+
+	if (!pathspec)
+		return 0;
+
+	path = *pathspec;
+	slash = strrchr(path, '/');
+	if (!slash)
+		return 0;
+
+	prefix = slash - path + 1;
+	while ((next = *++pathspec) != NULL) {
+		int len = strlen(next);
+		if (len >= prefix && !memcmp(path, next, len))
+			continue;
+		for (;;) {
+			if (!len)
+				return 0;
+			if (next[--len] != '/')
+				continue;
+			if (memcmp(path, next, len+1))
+				continue;
+			prefix = len + 1;
+			break;
+		}
+	}
+	return prefix;
+}
+
+static int match_one(const char *match, const char *name, int namelen)
+{
+	int matchlen;
+
+	/* If the match was just the prefix, we matched */
+	matchlen = strlen(match);
+	if (!matchlen)
+		return 1;
+
+	/*
+	 * If we don't match the matchstring exactly,
+	 * we need to match by fnmatch
+	 */
+	if (strncmp(match, name, matchlen))
+		return !fnmatch(match, name, 0);
+
+	/*
+	 * If we did match the string exactly, we still
+	 * need to make sure that it happened on a path
+	 * component boundary (ie either the last character
+	 * of the match was '/', or the next character of
+	 * the name was '/' or the terminating NUL.
+	 */
+	return	match[matchlen-1] == '/' ||
+		name[matchlen] == '/' ||
+		!name[matchlen];
+}
+
+int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen)
+{
+	int retval;
+	const char *match;
+
+	name += prefix;
+	namelen -= prefix;
+
+	for (retval = 0; (match = *pathspec++) != NULL; seen++) {
+		if (retval & *seen)
+			continue;
+		match += prefix;
+		if (match_one(match, name, namelen)) {
+			retval = 1;
+			*seen = 1;
+		}
+	}
+	return retval;
+}
+
 void add_exclude(const char *string, const char *base,
 		 int baselen, struct exclude_list *which)
 {
diff --git a/dir.h b/dir.h
index 4f65f57..56a1b7f 100644
--- a/dir.h
+++ b/dir.h
@@ -39,6 +39,9 @@ struct dir_struct {
 	struct exclude_list exclude_list[3];
 };
 
+extern int common_prefix(const char **pathspec);
+extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen);
+
 extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen);
 extern int excluded(struct dir_struct *, const char *);
 extern void add_excludes_from_file(struct dir_struct *, const char *fname);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] Add builtin "git rm" command
  2006-05-19 23:07 [PATCH 1/2] Move pathspec matching from builtin-add.c into dir.c Linus Torvalds
@ 2006-05-19 23:19 ` Linus Torvalds
  2006-05-21  8:20   ` Junio C Hamano
  0 siblings, 1 reply; 3+ messages in thread
From: Linus Torvalds @ 2006-05-19 23:19 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List


This changes semantics very subtly, because it adds a new atomicity 
guarantee.

In particular, if you "git rm" several files, it will now do all or 
nothing. The old shell-script really looped over the removed files one by 
one, and would basically randomly fail in the middle if "-f" was used and 
one of the files didn't exist in the working directory.

This C builtin one will not re-write the index after each remove, but 
instead remove all files at once. However, that means that if "-f" is used 
(to also force removal of the file from the working directory), and some 
files have already been removed from the workspace, it won't stop in the 
middle in some half-way state like the old one did.

So what happens is that if the _first_ file fails to be removed with "-f", 
we abort the whole "git rm". But once we've started removing, we don't 
leave anything half done. If some of the other files don't exist, we'll 
just ignore errors of removal from the working tree.

This is only an issue with "-f", of course.

I think the new behaviour is strictly an improvement, but perhaps more 
importantly, it is _different_. As a special case, the semantics are 
identical for the single-file case (which is the only one our test-suite 
seems to test).

The other question is what to do with leading directories. The old "git 
rm" script didn't do anything, which is somewhat inconsistent. This one 
will actually clean up directories that have become empty as a result of 
removing the last file, but maybe we want to have a flag to decide the 
behaviour?

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---

diff --git a/Makefile b/Makefile
index 5373986..06b31d8 100644
--- a/Makefile
+++ b/Makefile
@@ -120,7 +120,7 @@ SCRIPT_SH = \
 	git-merge-one-file.sh git-parse-remote.sh \
 	git-prune.sh git-pull.sh git-rebase.sh \
 	git-repack.sh git-request-pull.sh git-reset.sh \
-	git-resolve.sh git-revert.sh git-rm.sh git-sh-setup.sh \
+	git-resolve.sh git-revert.sh git-sh-setup.sh \
 	git-tag.sh git-verify-tag.sh \
 	git-applymbox.sh git-applypatch.sh git-am.sh \
 	git-merge.sh git-merge-stupid.sh git-merge-octopus.sh \
@@ -170,7 +170,8 @@ PROGRAMS = \
 
 BUILT_INS = git-log$X git-whatchanged$X git-show$X \
 	git-count-objects$X git-diff$X git-push$X \
-	git-grep$X git-add$X git-rev-list$X git-check-ref-format$X
+	git-grep$X git-add$X git-rm$X git-rev-list$X \
+	git-check-ref-format$X
 
 # what 'all' will build and 'install' will install, in gitexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS)
@@ -218,7 +219,8 @@ LIB_OBJS = \
 
 BUILTIN_OBJS = \
 	builtin-log.o builtin-help.o builtin-count.o builtin-diff.o builtin-push.o \
-	builtin-grep.o builtin-add.o builtin-rev-list.o builtin-check-ref-format.o
+	builtin-grep.o builtin-add.o builtin-rev-list.o builtin-check-ref-format.o \
+	builtin-rm.o
 
 GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
 LIBS = $(GITLIBS) -lz
diff --git a/builtin-rm.c b/builtin-rm.c
new file mode 100644
index 0000000..67e0f79
--- /dev/null
+++ b/builtin-rm.c
@@ -0,0 +1,150 @@
+/*
+ * "git rm" builtin command
+ *
+ * Copyright (C) Linus Torvalds 2006
+ */
+#include "cache.h"
+#include "builtin.h"
+#include "dir.h"
+
+static const char builtin_rm_usage[] =
+"git-rm [-n] [-v] [-f] <filepattern>...";
+
+static struct {
+	int nr, alloc;
+	const char **name;
+} list;
+
+static void add_list(const char *name)
+{
+	if (list.nr >= list.alloc) {
+		list.alloc = alloc_nr(list.alloc);
+		list.name = xrealloc(list.name, list.alloc * sizeof(const char *));
+	}
+	list.name[list.nr++] = name;
+}
+
+static int remove_file(const char *name)
+{
+	int ret;
+	char *slash;
+
+	ret = unlink(name);
+	if (!ret && (slash = strrchr(name, '/'))) {
+		char *n = strdup(name);
+		do {
+			n[slash - name] = 0;
+			name = n;
+		} while (!rmdir(name) && (slash = strrchr(name, '/')));
+	}
+	return ret;
+}
+
+static struct cache_file cache_file;
+
+int cmd_rm(int argc, const char **argv, char **envp)
+{
+	int i, newfd;
+	int verbose = 0, show_only = 0, force = 0;
+	const char *prefix = setup_git_directory();
+	const char **pathspec;
+	char *seen;
+
+	git_config(git_default_config);
+
+	newfd = hold_index_file_for_update(&cache_file, get_index_file());
+	if (newfd < 0)
+		die("unable to create new index file");
+
+	if (read_cache() < 0)
+		die("index file corrupt");
+
+	for (i = 1 ; i < argc ; i++) {
+		const char *arg = argv[i];
+
+		if (*arg != '-')
+			break;
+		if (!strcmp(arg, "--")) {
+			i++;
+			break;
+		}
+		if (!strcmp(arg, "-n")) {
+			show_only = 1;
+			continue;
+		}
+		if (!strcmp(arg, "-v")) {
+			verbose = 1;
+			continue;
+		}
+		if (!strcmp(arg, "-f")) {
+			force = 1;
+			continue;
+		}
+		die(builtin_rm_usage);
+	}
+	pathspec = get_pathspec(prefix, argv + i);
+
+	seen = NULL;
+	if (pathspec) {
+		for (i = 0; pathspec[i] ; i++)
+			/* nothing */;
+		seen = xmalloc(i);
+		memset(seen, 0, i);
+	}
+
+	for (i = 0; i < active_nr; i++) {
+		struct cache_entry *ce = active_cache[i];
+		if (!match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen))
+			continue;
+		add_list(ce->name);
+	}
+
+	if (pathspec) {
+		const char *match;
+		for (i = 0; (match = pathspec[i]) != NULL ; i++) {
+			if (*match && !seen[i])
+				die("pathspec '%s' did not match any files", match);
+		}
+	}
+
+	/*
+	 * First remove the names from the index: we won't commit
+	 * the index unless all of them succeed
+	 */
+	for (i = 0; i < list.nr; i++) {
+		const char *path = list.name[i];
+		printf("rm '%s'\n", path);
+
+		if (remove_file_from_cache(path))
+			die("git rm: unable to remove %s", path);
+	}
+
+	/*
+	 * Then, if we used "-f", remove the filenames from the
+	 * workspace. If we fail to remove the first one, we
+	 * abort the "git rm" (but once we've successfully removed
+	 * any file at all, we'll go ahead and commit to it all:
+	 * by then we've already committed ourself and can't fail
+	 * in the middle)
+	 */
+	if (force) {
+		int removed = 0;
+		for (i = 0; i < list.nr; i++) {  
+			const char *path = list.name[i];
+			if (!remove_file(path)) {
+				removed = 1;
+				continue;
+			}
+			if (!removed)
+				die("git rm: %s: %s", path, strerror(errno));
+		}
+	}
+
+	if (active_cache_changed) {
+		if (write_cache(newfd, active_cache, active_nr) ||
+		    commit_index_file(&cache_file))
+			die("Unable to write new index file");
+	}
+	
+	return 0;
+}
diff --git a/builtin.h b/builtin.h
index 78275ea..d150c7c 100644
--- a/builtin.h
+++ b/builtin.h
@@ -25,6 +25,7 @@ extern int cmd_count_objects(int argc, c
 
 extern int cmd_push(int argc, const char **argv, char **envp);
 extern int cmd_grep(int argc, const char **argv, char **envp);
+extern int cmd_rm(int argc, const char **argv, char **envp);
 extern int cmd_add(int argc, const char **argv, char **envp);
 extern int cmd_rev_list(int argc, const char **argv, char **envp);
 extern int cmd_check_ref_format(int argc, const char **argv, char **envp);
diff --git a/git-rm.sh b/git-rm.sh
deleted file mode 100755
index fda4541..0000000
--- a/git-rm.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/sh
-
-USAGE='[-f] [-n] [-v] [--] <file>...'
-SUBDIRECTORY_OK='Yes'
-. git-sh-setup
-
-remove_files=
-show_only=
-verbose=
-while : ; do
-  case "$1" in
-    -f)
-	remove_files=true
-	;;
-    -n)
-	show_only=true
-	;;
-    -v)
-	verbose=--verbose
-	;;
-    --)
-	shift; break
-	;;
-    -*)
-	usage
-	;;
-    *)
-	break
-	;;
-  esac
-  shift
-done
-
-# This is typo-proofing. If some paths match and some do not, we want
-# to do nothing.
-case "$#" in
-0)	;;
-*)
-	git-ls-files --error-unmatch -- "$@" >/dev/null || {
-		echo >&2 "Maybe you misspelled it?"
-		exit 1
-	}
-	;;
-esac
-
-if test -f "$GIT_DIR/info/exclude"
-then
-	git-ls-files -z \
-	--exclude-from="$GIT_DIR/info/exclude" \
-	--exclude-per-directory=.gitignore -- "$@"
-else
-	git-ls-files -z \
-	--exclude-per-directory=.gitignore -- "$@"
-fi |
-case "$show_only,$remove_files" in
-true,*)
-	xargs -0 echo
-	;;
-*,true)
-	xargs -0 sh -c "
-		while [ \$# -gt 0 ]; do
-			file=\$1; shift
-			rm -- \"\$file\" && git-update-index --remove $verbose \"\$file\"
-		done
-	" inline
-	;;
-*)
-	git-update-index --force-remove $verbose -z --stdin
-	;;
-esac
diff --git a/git.c b/git.c
index 7db5cc1..63aa311 100644
--- a/git.c
+++ b/git.c
@@ -51,6 +51,7 @@ static void handle_internal_command(int 
 		{ "count-objects", cmd_count_objects },
 		{ "diff", cmd_diff },
 		{ "grep", cmd_grep },
+		{ "rm", cmd_rm },
 		{ "add", cmd_add },
 		{ "rev-list", cmd_rev_list },
 		{ "check-ref-format", cmd_check_ref_format }

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] Add builtin "git rm" command
  2006-05-19 23:19 ` [PATCH 2/2] Add builtin "git rm" command Linus Torvalds
@ 2006-05-21  8:20   ` Junio C Hamano
  0 siblings, 0 replies; 3+ messages in thread
From: Junio C Hamano @ 2006-05-21  8:20 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

Linus Torvalds <torvalds@osdl.org> writes:

> I think the new behaviour is strictly an improvement, but perhaps more 
> importantly, it is _different_. As a special case, the semantics are 
> identical for the single-file case (which is the only one our test-suite 
> seems to test).
>
> The other question is what to do with leading directories. The old "git 
> rm" script didn't do anything, which is somewhat inconsistent. This one 
> will actually clean up directories that have become empty as a result of 
> removing the last file, but maybe we want to have a flag to decide the 
> behaviour?

I too think these are improvements.  Thanks for the patch.

BTW, this needed another "evil merge" into "next", so this time
I made a separate evil merge branch that I speculated as a
possibly better alternative approach in an earlier message.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2006-05-21  8:20 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-05-19 23:07 [PATCH 1/2] Move pathspec matching from builtin-add.c into dir.c Linus Torvalds
2006-05-19 23:19 ` [PATCH 2/2] Add builtin "git rm" command Linus Torvalds
2006-05-21  8:20   ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).