git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: "Kristian Høgsberg" <krh@redhat.com>
Cc: git@vger.kernel.org
Subject: Re: [PATCH 7/7] Implement git commit as a builtin command.
Date: Wed, 19 Sep 2007 18:27:29 -0700	[thread overview]
Message-ID: <7vk5qmm8hq.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <11900740163661-git-send-email-krh@redhat.com> (Kristian Høgsberg's message of "Mon, 17 Sep 2007 20:06:48 -0400")

Kristian Høgsberg <krh@redhat.com> writes:

> diff --git a/builtin-commit.c b/builtin-commit.c
> new file mode 100644
> index 0000000..ee98de9
> --- /dev/null
> +++ b/builtin-commit.c
> @@ -0,0 +1,740 @@
> +/*
> + * Builtin "git commit"
> + *
> + * Copyright (c) 2007 Kristian Høgsberg <krh@redhat.com>
> + * Based on git-commit.sh by Junio C Hamano and Linus Torvalds
> + */
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +

With 85023577a8f4b540aa64aa37f6f44578c0c305a3 (simplify
inclusion of system header files.), we introduced a rule against
these lines.  We probably would want a Coding Guideline (aka
"Hacking Git") document somewhere.

> +#include "cache.h"
> +#include "cache-tree.h"
> +#include "builtin.h"
> +#include "diff.h"
> +#include "diffcore.h"
> +#include "commit.h"
> +#include "revision.h"
> +#include "wt-status.h"
> +#include "run-command.h"
> +#include "refs.h"
> +#include "log-tree.h"
> +#include "strbuf.h"
> +#include "utf8.h"
> +
> +static const char builtin_commit_usage[] =
> +	"[-a | --interactive] [-s] [-v] [--no-verify] [-m <message> | -F <logfile> | (-C|-c) <commit> | --amend] [-u] [-e] [--author <author>] [--template <file>] [[-i | -o] <path>...]";
> +
> +static unsigned char head_sha1[20], merge_head_sha1[20];
> +static char *use_message_buffer;
> +static const char commit_editmsg[] = "COMMIT_EDITMSG";
> +static struct lock_file lock_file;
> +
> +enum option_type {
> +    OPTION_NONE,
> +    OPTION_STRING,
> +    OPTION_INTEGER,
> +    OPTION_LAST,
> +};
> +
> +struct option {
> +    enum option_type type;
> +    const char *long_name;
> +    char short_name;
> +    void *value;
> +};
> +
> +static int scan_options(const char ***argv, struct option *options)
> +{
> +	const char *value, *eq;
> +	int i;
> +
> +	if (**argv == NULL)
> +		return 0;
> +	if ((**argv)[0] != '-')
> +		return 0;
> +	if (!strcmp(**argv, "--"))
> +		return 0;
> +
> +	value = NULL;
> +	for (i = 0; options[i].type != OPTION_LAST; i++) {
> +		if ((**argv)[1] == '-') {
> +			if (!prefixcmp(options[i].long_name, **argv + 2)) {
> +				if (options[i].type != OPTION_NONE)
> +					value = *++(*argv);
> +				goto match;
> +			}
> +
> +			eq = strchr(**argv + 2, '=');
> +			if (eq && options[i].type != OPTION_NONE &&
> +			    !strncmp(**argv + 2, 
> +				     options[i].long_name, eq - **argv - 2)) {
> +				value = eq + 1;
> +				goto match;
> +			}
> +		}
> +
> +		if ((**argv)[1] == options[i].short_name) {
> +			if ((**argv)[2] == '\0') {
> +				if (options[i].type != OPTION_NONE)
> +					value = *++(*argv);
> +				goto match;
> +			}
> +
> +			if (options[i].type != OPTION_NONE) {
> +				value = **argv + 2;
> +				goto match;
> +			}
> +		}
> +	}

How do you disambiguate between "--author <me>" and "--amend"?
"The order in *options list matters" is an acceptable answer but
it needs to be documented.

> +
> +	usage(builtin_commit_usage);
> +
> + match:
> +	switch (options[i].type) {
> +	case OPTION_NONE:
> +		*(int *)options[i].value = 1;
> +		break;
> +	case OPTION_STRING:
> +		if (value == NULL)
> +			die("option %s requires a value.", (*argv)[-1]);
> +		*(const char **)options[i].value = value;
> +		break;
> +	case OPTION_INTEGER:
> +		if (value == NULL)
> +			die("option %s requires a value.", (*argv)[-1]);
> +		*(int *)options[i].value = atoi(value);
> +		break;
> +	default:
> +		assert(0);
> +	}
> +
> +	(*argv)++;
> +
> +	return 1;
> +}

I do not particularly like this OPTION_LAST convention, but that
is a minor detail.  I also suspect in the longer term we might
be better off using getopt() or popt(), but that would be a
larger project.

In any case, if you want to use this option parser, you would
need to add a new file, perhaps parse_options.c, and move this
part there, so that other parts of the system can reuse it.

And the same comment goes for the launch_editor still in
builtin-tag.c.  We should move it to editor.c or something.

> +
> +static char *logfile, *force_author, *message, *template_file;
> +static char *edit_message, *use_message;
> +static int all, edit_flag, also, interactive, only, no_verify, amend, signoff;
> +static int quiet, verbose, untracked_files;
> +
> +static int no_edit, initial_commit, in_merge;
> +const char *only_include_assumed;
> +
> +static struct option commit_options[] = {
> +	{ OPTION_STRING, "file", 'F', (void *) &logfile },
> +	{ OPTION_NONE, "all", 'a', &all },
> +	{ OPTION_STRING, "author", 0, (void *) &force_author },
> +	{ OPTION_NONE, "edit", 'e', &edit_flag },

Why do some get casted to (void*) and others don't?  It doesn't
seem to have any pattern.  I am puzzled...

> +	{ OPTION_NONE, "include", 'i', &also },
> +	{ OPTION_NONE, "interactive", 0, &interactive },
> +	{ OPTION_NONE, "only", 'o', &only },
> +	{ OPTION_STRING, "message", 'm', &message },
> +	{ OPTION_NONE, "no-verify", 'n', &no_verify },
> +	{ OPTION_NONE, "amend", 0, &amend },
> +	{ OPTION_STRING, "reedit-message", 'c', &edit_message },
> +	{ OPTION_STRING, "reuse-message", 'C', &use_message },
> +	{ OPTION_NONE, "signoff", 's', &signoff },
> +	{ OPTION_NONE, "quiet", 'q', &quiet },
> +	{ OPTION_NONE, "verbose", 'v', &verbose },
> +	{ OPTION_NONE, "untracked-files", 0, &untracked_files },
> +	{ OPTION_STRING, "template", 't', &template_file },
> +	{ OPTION_LAST },
> +};
> +
> +/* FIXME: Taken from builtin-add, should be shared. */

You're darn right.  I thought you have some other patch that
touches builtin-add already...

> +
> +static void update_callback(struct diff_queue_struct *q,
> +			    struct diff_options *opt, void *cbdata)
> +{
> +	int i, verbose;
> +
> +	verbose = *((int *)cbdata);
> +	for (i = 0; i < q->nr; i++) {
> +		struct diff_filepair *p = q->queue[i];
> +		const char *path = p->one->path;
> +		switch (p->status) {
> +		default:
> +			die("unexpacted diff status %c", p->status);
> +		case DIFF_STATUS_UNMERGED:
> +		case DIFF_STATUS_MODIFIED:
> +		case DIFF_STATUS_TYPE_CHANGED:
> +			add_file_to_cache(path, verbose);
> +			break;
> +		case DIFF_STATUS_DELETED:
> +			remove_file_from_cache(path);
> +			cache_tree_invalidate_path(active_cache_tree, path);

I've updated remove_file_from_cache() to invalidate the path in
cache-tree so this does not hurt but is no longer necessary.  I
removed this line in the version I queued in 'pu'.

> +			if (verbose)
> +				printf("remove '%s'\n", path);
> +			break;
> +		}
> +	}
> +}
> +
> +static void
> +add_files_to_cache(int fd, const char **files, const char *prefix)
> +{
> +	struct rev_info rev;
> +
> +	init_revisions(&rev, "");
> +	setup_revisions(0, NULL, &rev, NULL);
> +	rev.prune_data = get_pathspec(prefix, files);
> +	rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
> +	rev.diffopt.format_callback = update_callback;
> +	rev.diffopt.format_callback_data = &verbose;
> +
> +	run_diff_files(&rev, 0);
> +	refresh_cache(REFRESH_QUIET);

Your update_callback() does add_file_to_cache() which picks up
the stat information from the filesystem already, so I do not
see much point calling refresh_cache() afterwards.  On the other
hand, refreshing before running diff_files() might make sense,
as you can avoid a lot of unnecessary work when you are told to
do "git commit ."

Have you tested this with an unmerged index?

> +
> +	if (write_cache(fd, active_cache, active_nr) || close(fd))
> +		die("unable to write new index file");
> +}
> +
> +static char *
> +prepare_index(const char **files, const char *prefix)
> +{
> +	int fd;
> +	struct tree *tree;
> +	struct lock_file *next_index_lock;
> +
> +	fd = hold_locked_index(&lock_file, 1);
> +	if (read_cache() < 0)
> +		die("index file corrupt");
> +
> +	if (all) {
> +		add_files_to_cache(fd, files, NULL);
> +		return lock_file.filename;

Should you be passing files, which is used as pathspec to decide
if your update_callback() should be called, under --all option?

> +	} else if (also) {
> +		add_files_to_cache(fd, files, prefix);
> +		return lock_file.filename;
> +	}
> +
> +	if (interactive)
> +		interactive_add();

Don't you need to

 (1) abort if the user aborts out of interactive add with ^C?
 (2) re-read the index after interactive_add() returns?

> +	if (*files == NULL) {
> +		/* Commit index as-is. */
> +		rollback_lock_file(&lock_file);
> +		return get_index_file();
> +	}
> +
> +	/*
> +	 * FIXME: Warn on unknown files.  Shell script does
> +	 *
> +	 *   commit_only=`git-ls-files --error-unmatch -- "$@"`
> +	 */

This should be much easier to do than from the shell script, as
you have active_cache[] (aka "the_index.cache[]") in-core.

> +	/*
> +	 * FIXME: shell script does
> +	 *
> +	 *   git-read-tree --index-output="$TMP_INDEX" -i -m HEAD
> +	 *
> +	 * which warns about unmerged files in the index.
> +	 */

I think "unmerged warning" is an unintended side effect.  The
point of the command is to have a copy of index, as there is no
way for shell script to work with more than one index at a time
without using a temporary file.

> +
> +	/* update the user index file */
> +	add_files_to_cache(fd, files, prefix);
> +
> +	if (!initial_commit) {
> +		tree = parse_tree_indirect(head_sha1);
> +		if (!tree)
> +			die("failed to unpack HEAD tree object");
> +		if (read_tree(tree, 0, NULL))
> +			die("failed to read HEAD tree object");
> +	}

Huh?  Doesn't this read_tree() defeat the add_files_to_cache()
you did earlier?

> +
> +	/* Uh oh, abusing lock_file to create a garbage collected file */
> +	next_index_lock = xmalloc(sizeof(*next_index_lock));
> +	fd = hold_lock_file_for_update(next_index_lock,
> +				       git_path("next-index-%d", getpid()), 1);

That's not an abuse, but I wonder what happened to the fd you
got at the beginning of the function.

> +	add_files_to_cache(fd, files, prefix);

and then this is puzzling to me.

I am starting to suspect that you might be better off if you do
not follow the use of temporary index file that was in the shell
script version to the letter.  You can use more than one index
in the core at the same time, now you are built-in.

> +
> +	return next_index_lock->filename;
> +}

... and if we were to go that route, wt_status structure would
have a pointer to the "struct index_state" instead of the
filename of the index... oops, wt_status_print() will discard
and re-read the cache from file, so that approach would not work
right now without fixing wt-status first.  Hmm.

> +static int run_status(FILE *fp, const char *index_file)
> +{
> +	struct wt_status s;
> +
> +	wt_status_prepare(&s);
> +
> +	if (amend) {
> +		s.amend = 1;
> +		s.reference = "HEAD^1";
> +	}
> +	s.verbose = verbose;
> +	s.untracked = untracked_files;
> +	s.index_file = index_file;
> +	s.fp = fp;
> +
> +	wt_status_print(&s);
> +
> +	return s.commitable;
> +}

I did not look at the rest of the patch; it should be either
obviously correct or outright incorrect --- anybody would notice
the breakage immediately.

The prepare_index() part is the most (and only) "interesting"
part of the puzzle.  I need to look at this again and think
about it a bit more.  It needs to:

 * save the current index and restore in case the whole
   "git-commit" is aborted in any way (e.g. ^C, empty log
   message from the editor); This is easy to do from C with
   hold_locked_index().

 * when doing a partial commit,
   - read HEAD in a temporary index, if not initial commit;
   - update the temporary index with the given paths;
   - write out the temporary index as a tree to build a commit;

   - update the real index the same way with the given paths;
   - if all goes well, commit the updated real index; again,
     this is easy in C with commit_locked_index();

 * when not doing a partial commit,
   - update the real index with given paths (if "--also") or all
     what diff-files reports (if "--all");
   - write out the real index as a tree to build a commit;
   - if all goes well, commit the updated real index; again,
     this is easy in C with commit_locked_index();

A thing to keep in mind is that you can write out the temporary
index as a tree from the core without writing it out first to a
temporary index file at all.  Perhaps the code should be
structured like this.

    - parse options and all the other necessary setup;

    - hold_locked_index() to lock the "real" index;

    - prepare_index() is responsibile for doing two things

      1. build, write out and return the tree object to be
         committed;

      2. leave the_index[] in the state to become the "real"
         index if this commit command is not aborted;

    - take the tree object, put commit log message around it and
      make a commit object;

    - commit_locked_index() to write out the "real" index.

Now, the task #1 for prepare_index() is simpler for "also" and
"all" case.  You do the equivalent of "git-add -u" or "git add
<path>" and run cache_tree_update() to write out the tree, and
when you are done, both the_index.cache and the_index.cache_tree
are up-to-date, ready to be written out by the caller at the
very end.

For a partial commit, the task is a bit more convoluted, as
writing out the tree needs to be done from outside the_index.  I
would say something like this:

	int prepare_index(unsigned char *sha1) {
		if (partial commit) {
                       struct index_state tmp_index;

                       /* "temporary index" only in-core */
                       memset(&tmp_index, 0, sizeof(tmp_index));
                       read_index(&tmp_index);
                       add_files_to_index(files, prefix, &tmp_index);
                       write_index_as_tree(&tmp_index, sha1);
                       discard_index(&tmp_index);

                       /* update the index the same way */
                       add_files_to_index(files, prefix, &the_index);
                       return ok;
		}
                /* otherwise */
                if (files && *files)
                	add_files_to_index(files, prefix, &the_index);
		else if (all)
                	add_files_to_index(NULL, prefix, &the_index);
		write_index_as_tree(&the_index, sha1);
                return ok;
	}

where

 * add_files_to_index() is similar to your add_files_to_cache()
   but takes struct index_state; the callback can still diff
   with the_index.cache (aka active_cache) as that diff is what
   we are interested in updating anyway;

 * write_index_as_tree() would be a new function that takes
   struct index_state and writes that as a tree.  It would
   essentially be the builtin-write-tree.c::write_tree()
   function except the opening and reading the index (or
   updating the cache-tree) part, something like:

write_index_as_tree(struct index_state *istate, unsigned char *sha1)
{
       	if (!cache_tree_fully_valid(istate->cache_tree))
        	cache_tree_update(istate->cache_tree,
                		  istate->cache,
                                  istate->cache_nr, 0, 0);
	hashcpy(sha1, istate->cache_tree->sha1);
}

  parent reply	other threads:[~2007-09-20  1:27 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-18  0:06 [PATCH 1/7] Enable wt-status output to a given FILE pointer Kristian Høgsberg
2007-09-18  0:06 ` [PATCH 2/7] Enable wt-status to run against non-standard index file Kristian Høgsberg
2007-09-18  0:06   ` [PATCH 3/7] Introduce entry point for launching add--interactive Kristian Høgsberg
2007-09-18  0:06     ` [PATCH 4/7] Clean up stripspace a bit, use strbuf even more Kristian Høgsberg
2007-09-18  0:06       ` [PATCH 5/7] Add strbuf_read_file() Kristian Høgsberg
2007-09-18  0:06         ` [PATCH 6/7] Export rerere() and launch_editor() Kristian Høgsberg
2007-09-18  0:06           ` [PATCH 7/7] Implement git commit as a builtin command Kristian Høgsberg
2007-09-18 13:58             ` Johannes Schindelin
2007-09-18 15:07               ` Kristian Høgsberg
2007-09-20  1:27             ` Junio C Hamano [this message]
2007-09-21 17:18               ` Kristian Høgsberg
2007-09-21 19:32                 ` Junio C Hamano
2007-09-24 20:27                   ` Kristian Høgsberg
2007-09-18 13:14           ` [PATCH 6/7] Export rerere() and launch_editor() Johannes Schindelin
2007-09-19 23:52           ` Junio C Hamano
2007-09-21 18:01             ` Kristian Høgsberg
2007-09-18 13:12       ` [PATCH 4/7] Clean up stripspace a bit, use strbuf even more Johannes Schindelin
2007-09-18 13:52         ` Kristian Høgsberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vk5qmm8hq.fsf@gitster.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=krh@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).