git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Matthieu Moy <Matthieu.Moy@imag.fr>
To: git@vger.kernel.org
Subject: Re: [RFC][PATCH] remarks about custom diff driver
Date: Tue, 28 Aug 2007 01:29:11 +0200	[thread overview]
Message-ID: <vpqabsc8sl4.fsf@bauges.imag.fr> (raw)
In-Reply-To: <vpq8x7x5knh.fsf@bauges.imag.fr> (Matthieu Moy's message of "Mon\, 27 Aug 2007 12\:36\:34 +0200")

Matthieu Moy <Matthieu.Moy@imag.fr> writes:

> Indeed, what I would have needed is a custum text converter. In my
> case, I would have said something like
>
> # ~/.gitconfig
> [textconv "odt2txt"]
> 	command=odt2txt
>
> Then, in .gitattributes
>
> *.ods textconv=odt2txt
> *.odp textconv=odt2txt
> *.odt textconv=odt2txt

I started implementing something like this. The code is rather ugly
(I'm not so fluent in C, and not familiar with the git codebase ...),
but it gives an idea, and seems to work.

If you're interested in having a look (but I'll have to cleanup the
code before asking for a detailed review), here's the patch.

In the particular case of OpenDocument files, that makes a few tens of
lines of C in git itself (once and for all), but it makes it totally
trivial for the user to configure git to get all of "git diff". Sounds
promising to me.


>From 7e1bbb5fc6dc5c0adb8cad5b875d57ee17247f4a Mon Sep 17 00:00:00 2001
From: Matthieu Moy <Matthieu.Moy@imag.fr>
Date: Tue, 28 Aug 2007 00:46:07 +0200
Subject: [PATCH] Prototype for a textconverter filter in git-diff.

That feature is similar to the custom diff driver, but the user only has
to provide a text filter (a command to convert a file into a plain-text
representation). Git takes care of diffing, mode change, ...

In particular, with

[textconv "odt2txt"]
          command=odt2txt

*.ods textconv=odt2txt
*.odp textconv=odt2txt
*.odt textconv=odt2txt

One can use "git diff" on OpenOffice files (including "git diff --color"
and friends).

This could be extended so that "git blame" can use it also.
---
 diff.c |  193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 171 insertions(+), 22 deletions(-)

diff --git a/diff.c b/diff.c
index a7e7671..ad99f27 100644
--- a/diff.c
+++ b/diff.c
@@ -50,11 +50,12 @@ static int parse_diff_color_slot(const char *var, int ofs)
 	die("bad config variable '%s'", var);
 }
 
-static struct ll_diff_driver {
+static struct ll_cmd_driver {
 	const char *name;
-	struct ll_diff_driver *next;
+	struct ll_cmd_driver *next;
 	char *cmd;
-} *user_diff, **user_diff_tail;
+} *user_diff, **user_diff_tail,
+	*user_textconv, **user_textconv_tail;
 
 static void read_config_if_needed(void)
 {
@@ -70,29 +71,30 @@ static void read_config_if_needed(void)
  * this in a bit convoluted way to allow low level diff driver
  * called "color".
  */
-static int parse_lldiff_command(const char *var, const char *ep, const char *value)
+static int parse_ll_command(const char *var, const char *name,
+			    const char *ep, const char *value,
+			    struct ll_cmd_driver **user_driver,
+			    struct ll_cmd_driver ***user_driver_tail)
 {
-	const char *name;
 	int namelen;
-	struct ll_diff_driver *drv;
+	struct ll_cmd_driver *drv;
 
-	name = var + 5;
 	namelen = ep - name;
-	for (drv = user_diff; drv; drv = drv->next)
+	for (drv = *user_driver; drv; drv = drv->next)
 		if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
 			break;
 	if (!drv) {
 		char *namebuf;
-		drv = xcalloc(1, sizeof(struct ll_diff_driver));
+		drv = xcalloc(1, sizeof(struct ll_cmd_driver));
 		namebuf = xmalloc(namelen + 1);
 		memcpy(namebuf, name, namelen);
 		namebuf[namelen] = 0;
 		drv->name = namebuf;
 		drv->next = NULL;
-		if (!user_diff_tail)
-			user_diff_tail = &user_diff;
-		*user_diff_tail = drv;
-		user_diff_tail = &(drv->next);
+		if (!*user_driver_tail)
+			*user_driver_tail = &*user_driver;
+		**user_driver_tail = drv;
+		*user_driver_tail = &(drv->next);
 	}
 
 	if (!value)
@@ -101,6 +103,38 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val
 	return 0;
 }
 
+static void setup_textconv_attr_check(struct git_attr_check *check)
+{
+	static struct git_attr *attr_textconv;
+
+	if (!attr_textconv) {
+		attr_textconv = git_attr("textconv", 8);
+	}
+	check[0].attr = attr_textconv;
+}
+
+static const char *external_textconv_attr(const char *name)
+{
+	struct git_attr_check attr_textconv_check;
+
+	setup_textconv_attr_check(&attr_textconv_check);
+	if (!git_checkattr(name, 1, &attr_textconv_check)) {
+		const char *value = attr_textconv_check.value;
+		if (!ATTR_TRUE(value) &&
+		    !ATTR_FALSE(value) &&
+		    !ATTR_UNSET(value)) {
+			struct ll_cmd_driver *drv;
+
+			read_config_if_needed();
+			for (drv = user_textconv; drv; drv = drv->next) {
+				if (!strcmp(drv->name, value))
+					return drv->cmd;
+			}
+		}
+	}
+	return NULL;
+}
+
 /*
  * 'diff.<what>.funcname' attribute can be specified in the configuration
  * to define a customized regexp to find the beginning of a function to
@@ -171,11 +205,21 @@ int git_diff_ui_config(const char *var, const char *value)
 
 		if (ep != var + 4) {
 			if (!strcmp(ep, ".command"))
-				return parse_lldiff_command(var, ep, value);
+				return parse_ll_command(var, var + 5, ep, value,
+							&user_diff, &user_diff_tail);
 			if (!strcmp(ep, ".funcname"))
 				return parse_funcname_pattern(var, ep, value);
 		}
 	}
+	if (!prefixcmp(var, "textconv.")) {
+		const char *ep = strrchr(var, '.');
+
+		if (ep != var + 8) {
+			if (!strcmp(ep, ".command"))
+				return parse_ll_command(var, var + 9, ep, value,
+							&user_textconv, &user_textconv_tail);
+		}
+	}
 	if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) {
 		int slot = parse_diff_color_slot(var, 11);
 		color_parse(value, var, diff_colors[slot]);
@@ -244,6 +288,13 @@ static struct diff_tempfile {
 	char tmp_path[PATH_MAX];
 } diff_temp[2];
 
+/* Forward declarations */
+static void prepare_temp_file(const char *name,
+			      struct diff_tempfile *temp,
+			      struct diff_filespec *one);
+static int spawn_prog(const char *pgm, const char **arg);
+/* End forward declarations */
+
 static int count_lines(const char *data, int size)
 {
 	int count, ch, completely_empty = 1, nl_just_seen = 0;
@@ -1260,6 +1311,72 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
 	return NULL;
 }
 
+static int spawn_prog_to_buf(const char *pgm, const char **arg, char **buf, unsigned long * size)
+{
+	pid_t pid;
+	int status;
+	int fd[2];
+
+#define INCREMENT 1024
+	ssize_t got;
+
+	fflush(NULL);
+	if (pipe(fd) < 0)
+		die("unable to create pipe");
+	pid = fork();
+	if (pid < 0)
+		die("unable to fork");
+	if (!pid) {
+		dup2(fd[1], 1);
+		close(fd[0]);
+		close(fd[1]);
+		execvp(pgm, (char *const*) arg);
+		exit(255);
+	}
+	close(fd[1]);
+
+	*size = 0;
+	*buf = NULL;
+	while (1) {
+		*buf = xrealloc(*buf, *size + INCREMENT);
+		got = xread(fd[0], *buf + *size, INCREMENT);
+		if (!got)
+			break; /* EOF */
+		if (got < 0)
+			return error("error while reading from stdin %s",
+				     strerror(errno));
+		*size += got;
+	}
+
+	while (waitpid(pid, &status, 0) < 0) {
+		if (errno == EINTR)
+			continue;
+		return -1;
+	}
+
+#if 0
+	/*
+	 * copied from the external diff spawn_prog. Is it needed
+	 * here, too ?
+	 */
+
+	/* Earlier we did not check the exit status because
+	 * diff exits non-zero if files are different, and
+	 * we are not interested in knowing that.  It was a
+	 * mistake which made it harder to quit a diff-*
+	 * session that uses the git-apply-patch-script as
+	 * the GIT_EXTERNAL_DIFF.  A custom GIT_EXTERNAL_DIFF
+	 * should also exit non-zero only when it wants to
+	 * abort the entire diff-* session.
+	 */
+	if (WIFEXITED(status) && !WEXITSTATUS(status))
+		return 0;
+	return -1;
+#endif
+}
+
+
+
 static void builtin_diff(const char *name_a,
 			 const char *name_b,
 			 struct diff_filespec *one,
@@ -1314,6 +1431,38 @@ static void builtin_diff(const char *name_a,
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
+	const char * textconv = external_textconv_attr(name_a);
+	const char * textconvb = external_textconv_attr(name_b);
+
+	/*
+	 * Only use the textconv driver is it is set, and is the same
+	 * for source and destination file.
+	 */
+	if (textconv && textconvb && !strcmp(textconv, textconvb)) {
+		const char * spawn_arg[3];
+		const char ** arg = &spawn_arg[0];
+		char *bufa, *bufb;
+		unsigned long sizea, sizeb;
+		struct diff_tempfile *temp = diff_temp;
+		prepare_temp_file(name_a, &temp[0], one);
+		*arg++ = textconv;
+		*arg++ = temp[0].name;
+		*arg++ = NULL;
+		spawn_prog_to_buf(textconv, spawn_arg, &(mf1.ptr), &(mf1.size));
+		one->data = mf1.ptr;
+		one->size = mf1.size;
+		one->should_free = 1;
+		one->should_munmap = 1;
+
+		prepare_temp_file(name_b, &temp[1], two);
+		spawn_arg[1] = temp[1].name;
+		spawn_prog_to_buf(textconv, spawn_arg, &(mf2.ptr), &(mf2.size));
+		two->data = mf2.ptr;
+		two->size = mf2.size;
+		two->should_free = 1;
+		two->should_munmap = 1;
+	}
+
 	if (!o->text &&
 	    (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) {
 		/* Quite common confusing case */
@@ -1890,7 +2039,7 @@ static const char *external_diff_attr(const char *name)
 		if (!ATTR_TRUE(value) &&
 		    !ATTR_FALSE(value) &&
 		    !ATTR_UNSET(value)) {
-			struct ll_diff_driver *drv;
+			struct ll_cmd_driver *drv;
 
 			read_config_if_needed();
 			for (drv = user_diff; drv; drv = drv->next)
@@ -2853,14 +3002,14 @@ struct patch_id_t {
 static int remove_space(char *line, int len)
 {
 	int i;
-        char *dst = line;
-        unsigned char c;
+	char *dst = line;
+	unsigned char c;
 
-        for (i = 0; i < len; i++)
-                if (!isspace((c = line[i])))
-                        *dst++ = c;
+	for (i = 0; i < len; i++)
+		if (!isspace((c = line[i])))
+			*dst++ = c;
 
-        return dst - line;
+	return dst - line;
 }
 
 static void patch_id_consume(void *priv, char *line, unsigned long len)
-- 
1.5.3.rc6.21.g153d-dirty



-- 
Matthieu

      parent reply	other threads:[~2007-08-27 23:29 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-27 10:36 [RFC] remarks about custom diff driver Matthieu Moy
2007-08-27 10:45 ` Jeff King
2007-08-27 23:29 ` Matthieu Moy [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=vpqabsc8sl4.fsf@bauges.imag.fr \
    --to=matthieu.moy@imag.fr \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).