git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] git-archive: accept --owner and --group like GNU tar
@ 2017-12-29 14:05 suzuki toshiya
  2018-01-01 18:29 ` René Scharfe
       [not found] ` <df39f62558314cf6a9d9df3e23f31dd8@OS2PR01MB1147.jpnprd01.prod.outlook.com>
  0 siblings, 2 replies; 13+ messages in thread
From: suzuki toshiya @ 2017-12-29 14:05 UTC (permalink / raw)
  To: git; +Cc: suzuki toshiya

The ownership of files created by git-archive is always
root:root. Add --owner and --group options which work
like the GNU tar equivalent to allow overriding these
defaults.

Signed-off-by: suzuki toshiya <mpsuzuki@hiroshima-u.ac.jp>
---
 Documentation/git-archive.txt |  13 +++
 archive-tar.c                 |   8 +-
 archive.c                     | 224 ++++++++++++++++++++++++++++++++++++++++++
 archive.h                     |   4 +
 t/t5005-archive-uid-gid.sh    | 140 ++++++++++++++++++++++++++
 t/t5005/parse-tar-file.py     |  60 +++++++++++
 tar.h                         |   2 +
 7 files changed, 447 insertions(+), 4 deletions(-)
 create mode 100755 t/t5005-archive-uid-gid.sh
 create mode 100755 t/t5005/parse-tar-file.py

diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
index cfa1e4ebe..0d156f6c1 100644
--- a/Documentation/git-archive.txt
+++ b/Documentation/git-archive.txt
@@ -11,6 +11,7 @@ SYNOPSIS
 [verse]
 'git archive' [--format=<fmt>] [--list] [--prefix=<prefix>/] [<extra>]
 	      [-o <file> | --output=<file>] [--worktree-attributes]
+	      [--owner [username[:uid]] [--group [groupname[:gid]]
 	      [--remote=<repo> [--exec=<git-upload-archive>]] <tree-ish>
 	      [<path>...]
 
@@ -63,6 +64,18 @@ OPTIONS
 	This can be any options that the archiver backend understands.
 	See next section.
 
+--owner=<name>[:<uid>]::
+	Force <name> as owner and <uid> as uid for the files in the tar
+	archive.  If <uid> is not supplied, <name> can be either a user
+	name or numeric UID.  In this case the missing part (UID or
+	name) will be inferred from the current host's user database.
+
+--group=<name>[:<gid>]::
+	Force <name> as group and <gid> as gid for the files in the tar
+	archive.  If <gid> is not supplied, <name> can be either a group
+	name or numeric GID.  In this case the missing part (GID or
+	name) will be inferred from the current host's group database.
+
 --remote=<repo>::
 	Instead of making a tar archive from the local repository,
 	retrieve a tar archive from a remote repository. Note that the
diff --git a/archive-tar.c b/archive-tar.c
index c6ed96ee7..ca6471870 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -204,10 +204,10 @@ static void prepare_header(struct archiver_args *args,
 	xsnprintf(header->size, sizeof(header->size), "%011lo", S_ISREG(mode) ? size : 0);
 	xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", (unsigned long) args->time);
 
-	xsnprintf(header->uid, sizeof(header->uid), "%07o", 0);
-	xsnprintf(header->gid, sizeof(header->gid), "%07o", 0);
-	strlcpy(header->uname, "root", sizeof(header->uname));
-	strlcpy(header->gname, "root", sizeof(header->gname));
+	xsnprintf(header->uid, sizeof(header->uid), "%07lo", args->uid);
+	xsnprintf(header->gid, sizeof(header->gid), "%07lo", args->gid);
+	strlcpy(header->uname, args->uname, sizeof(header->uname));
+	strlcpy(header->gname, args->gname, sizeof(header->gname));
 	xsnprintf(header->devmajor, sizeof(header->devmajor), "%07o", 0);
 	xsnprintf(header->devminor, sizeof(header->devminor), "%07o", 0);
 
diff --git a/archive.c b/archive.c
index 0b7b62af0..aa4b16b75 100644
--- a/archive.c
+++ b/archive.c
@@ -8,6 +8,7 @@
 #include "parse-options.h"
 #include "unpack-trees.h"
 #include "dir.h"
+#include "tar.h"
 
 static char const * const archive_usage[] = {
 	N_("git archive [<options>] <tree-ish> [<path>...]"),
@@ -417,6 +418,223 @@ static void parse_treeish_arg(const char **argv,
 	{ OPTION_SET_INT, (s), NULL, (v), NULL, "", \
 	  PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_HIDDEN, NULL, (p) }
 
+/*
+ * GNU tar --owner, --group options reject hexdigit, signed int values.
+ * strtol(), atoi() are too permissive to simulate the behaviour.
+ */
+#define STR_IS_DIGIT_OK 0
+#define STR_IS_NOT_DIGIT -1
+#define STR_IS_DIGIT_TOO_LARGE -2
+
+static int try_as_simple_digit(const char *s, unsigned long *dst)
+{
+	unsigned long ul;
+	char *endptr;
+
+	if (strlen(s) != strspn(s, "0123456789"))
+		return STR_IS_NOT_DIGIT;
+
+	errno = 0;
+	ul = strtoul(s, &endptr, 10);
+
+	/* catch ERANGE */
+	if (errno) {
+		errno = 0;
+		return STR_IS_DIGIT_TOO_LARGE;
+	}
+
+#if ULONG_MAX > 0xFFFFFFFFUL
+	/*
+	 * --owner, --group rejects uid/gid greater than 32-bit
+	 * limits, even on 64-bit platforms.
+	 */
+	if (ul > 0xFFFFFFFFUL)
+		return STR_IS_DIGIT_TOO_LARGE;
+#endif
+
+	if (dst)
+		*dst = ul;
+	return STR_IS_DIGIT_OK;
+}
+
+static const char *skip_leading_colon(const char *s)
+{
+	const char *col_pos;
+
+	col_pos = strchr(s, ':');
+	if (!col_pos)
+		return s;
+
+	return (col_pos + 1);
+}
+
+#define STR_IS_NAME_COLON_DIGIT 0
+#define STR_HAS_NO_COLON -1
+#define STR_HAS_DIGIT_BROKEN -2
+#define STR_HAS_DIGIT_TOO_LARGE -3
+
+static int try_as_name_colon_digit(const char *s, const char **dst_s,
+		unsigned long *dst_ul)
+{
+	int r;
+	const char *s2;
+
+	s2 = skip_leading_colon(s);
+	if (s2 == s)
+		return STR_HAS_NO_COLON;
+
+	r = try_as_simple_digit(s2, dst_ul);
+	switch (r) {
+	case STR_IS_DIGIT_OK:
+		*dst_s = xstrndup(s, s2 - s - 1);
+		return STR_IS_NAME_COLON_DIGIT;
+	case STR_IS_DIGIT_TOO_LARGE:
+		return STR_HAS_DIGIT_TOO_LARGE;
+	default:
+		return STR_HAS_DIGIT_BROKEN;
+	}
+}
+
+#define NAME_ID_BOTH_GIVEN 0
+#define NAME_ID_ID_GUESSED 1
+#define NAME_ID_ID_UNTOUCHED 2
+#define NAME_ID_NAME_GUESSED 3
+#define NAME_ID_NAME_EMPTY 4
+#define NAME_ID_ERR_ID_TOO_LARGE -126
+#define NAME_ID_ERR_SYNTAX -127
+#define NAME_ID_ERR_PARAMS -128
+
+static int set_args_uname_uid(struct archiver_args *args,
+		const char *tar_owner)
+{
+	int r;
+	struct passwd *pw = NULL;
+
+	if (!args || !tar_owner)
+		return NAME_ID_ERR_PARAMS;
+
+	r = try_as_name_colon_digit(tar_owner, &(args->uname),
+				    &(args->uid));
+	switch (r) {
+	case STR_IS_NAME_COLON_DIGIT:
+		return NAME_ID_BOTH_GIVEN;
+	case STR_HAS_DIGIT_TOO_LARGE:
+		return NAME_ID_ERR_ID_TOO_LARGE;
+	case STR_HAS_DIGIT_BROKEN:
+		return NAME_ID_ERR_SYNTAX;
+	}
+
+	/* the operand is known to be single token */
+
+	r = try_as_simple_digit(tar_owner, &(args->uid));
+	switch (r) {
+	case STR_IS_DIGIT_TOO_LARGE:
+		return NAME_ID_ERR_ID_TOO_LARGE;
+	case STR_IS_DIGIT_OK:
+		pw = getpwuid(args->uid);
+		if (!pw) {
+			args->uname = xstrdup("");
+			return NAME_ID_NAME_EMPTY;
+		}
+		args->uname = xstrdup(pw->pw_name);
+		return NAME_ID_NAME_GUESSED;
+	}
+
+	/* the operand is known to be non-digit */
+
+	args->uname = xstrdup(tar_owner);
+	pw = getpwnam(tar_owner);
+	if (!pw)
+		return NAME_ID_ID_UNTOUCHED;
+	args->uid = pw->pw_uid;
+	return NAME_ID_ID_GUESSED;
+}
+
+static int set_args_gname_gid(struct archiver_args *args,
+		const char *tar_group)
+{
+	int r;
+	struct group *gr = NULL;
+
+	if (!args || !tar_group)
+		return NAME_ID_ERR_PARAMS;
+
+	r = try_as_name_colon_digit(tar_group, &(args->gname),
+				    &(args->gid));
+	switch (r) {
+	case STR_IS_NAME_COLON_DIGIT:
+		return NAME_ID_BOTH_GIVEN;
+	case STR_HAS_DIGIT_TOO_LARGE:
+		return NAME_ID_ERR_ID_TOO_LARGE;
+	case STR_HAS_DIGIT_BROKEN:
+		return NAME_ID_ERR_SYNTAX;
+	}
+
+	/* the operand is known to be single token */
+
+	r = try_as_simple_digit(tar_group, &(args->gid));
+	switch (r) {
+	case STR_IS_DIGIT_TOO_LARGE:
+		return NAME_ID_ERR_ID_TOO_LARGE;
+	case STR_IS_DIGIT_OK:
+		gr = getgrgid(args->gid);
+		if (!gr) {
+			args->gname = xstrdup("");
+			return NAME_ID_NAME_EMPTY;
+		}
+		args->gname = xstrdup(gr->gr_name);
+		return NAME_ID_NAME_GUESSED;
+	}
+
+	/* the operand is known to be non-digit */
+
+	args->gname = xstrdup(tar_group);
+	gr = getgrnam(tar_group);
+	if (!gr)
+		return NAME_ID_ID_UNTOUCHED;
+	args->gid = gr->gr_gid;
+	return NAME_ID_ID_GUESSED;
+}
+
+static void set_args_tar_owner_group(struct archiver_args *args,
+		const char *tar_owner, const char *tar_group)
+{
+	int r;
+
+	/* initialize by default values */
+	args->uname = xstrdup("root");
+	args->gname = xstrdup("root");
+	args->uid = 0;
+	args->gid = 0;
+
+	/*
+	 * GNU tar --format=ustar checks if uid is in 0..209751.
+	 * Too long digit string could not be dealt as numeric,
+	 * it is rejected as a syntax error before range check.
+	 */
+	r = set_args_uname_uid(args, tar_owner);
+	switch (r) {
+	case NAME_ID_ERR_ID_TOO_LARGE:
+	case NAME_ID_ERR_SYNTAX:
+		die("'%s': Invalid owner ID",
+		    skip_leading_colon(tar_owner));
+	}
+	if (args->uid > MAX_ID_IN_TAR_US)
+		die("value %ld out of uid_t range 0..%ld", args->uid,
+		     MAX_ID_IN_TAR_US);
+
+	r = set_args_gname_gid(args, tar_group);
+	switch (r) {
+	case NAME_ID_ERR_ID_TOO_LARGE:
+	case NAME_ID_ERR_SYNTAX:
+		die("'%s': Invalid group ID",
+		    skip_leading_colon(tar_group));
+	}
+	if (args->gid > MAX_ID_IN_TAR_US)
+		die("value %ld out of gid_t range 0..%ld", args->gid,
+		    MAX_ID_IN_TAR_US);
+}
+
 static int parse_archive_args(int argc, const char **argv,
 		const struct archiver **ar, struct archiver_args *args,
 		const char *name_hint, int is_remote)
@@ -431,6 +649,8 @@ static int parse_archive_args(int argc, const char **argv,
 	int i;
 	int list = 0;
 	int worktree_attributes = 0;
+	const char *tar_owner = NULL;
+	const char *tar_group = NULL;
 	struct option opts[] = {
 		OPT_GROUP(""),
 		OPT_STRING(0, "format", &format, N_("fmt"), N_("archive format")),
@@ -459,6 +679,8 @@ static int parse_archive_args(int argc, const char **argv,
 			N_("retrieve the archive from remote repository <repo>")),
 		OPT_STRING(0, "exec", &exec, N_("command"),
 			N_("path to the remote git-upload-archive command")),
+		OPT_STRING(0, "owner", &tar_owner, N_("owner"), N_("<name[:uid]> in tar")),
+		OPT_STRING(0, "group", &tar_group, N_("group"), N_("<name[:gid]> in tar")),
 		OPT_END()
 	};
 
@@ -507,6 +729,8 @@ static int parse_archive_args(int argc, const char **argv,
 	args->baselen = strlen(base);
 	args->worktree_attributes = worktree_attributes;
 
+	set_args_tar_owner_group(args, tar_owner, tar_group);
+
 	return argc;
 }
 
diff --git a/archive.h b/archive.h
index 62d1d82c1..b2cfb1e4e 100644
--- a/archive.h
+++ b/archive.h
@@ -15,6 +15,10 @@ struct archiver_args {
 	unsigned int worktree_attributes : 1;
 	unsigned int convert : 1;
 	int compression_level;
+	unsigned long uid;
+	unsigned long gid;
+	const char *uname;
+	const char *gname;
 };
 
 #define ARCHIVER_WANT_COMPRESSION_LEVELS 1
diff --git a/t/t5005-archive-uid-gid.sh b/t/t5005-archive-uid-gid.sh
new file mode 100755
index 000000000..c5e08d890
--- /dev/null
+++ b/t/t5005-archive-uid-gid.sh
@@ -0,0 +1,140 @@
+#!/bin/sh
+
+test_description='test --owner --group options for git-archive'
+. ./test-lib.sh
+
+test_expect_success 'create commit with a few empty files' '
+	git init . 1>/dev/null 2>/dev/null &&
+	touch uid-gid-test.001 &&
+	mkdir uid-gid-test.002 &&
+	mkdir uid-gid-test.002/uid-gid-test.003 &&
+	git add uid-gid-test.001 &&
+	git add uid-gid-test.002 &&
+	git add uid-gid-test.002/uid-gid-test.003 &&
+	git commit -m "uid-gid-test" 2>/dev/null 1>/dev/null
+'
+
+check_uid_gid_uname_gname_in_tar() {
+	# $1 tar pathname
+	# $2 uid (digit in string)
+	# $3 gid (digit in string)
+	# $4 uname (string)
+	# $5 gname (string)
+	uid=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=uid --fail-if-multi $1`
+	if test $? != 0 -o x"${uid}" != "x"$2
+	then
+		echo "(some) uid differs from the specified value"
+		return $?
+	fi
+
+	gid=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=gid --fail-if-multi $1`
+	if test $? != 0 -o x"${gid}" != "x"$3
+	then
+		echo "(some) gid differs from the specified value"
+		return $?
+	fi
+
+	uname=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=uname --fail-if-multi $1`
+	if test $? != 0 -o x"${uname}" != "x"$4
+	then
+		echo "(some) uname differs from the specified value"
+		return $?
+	fi
+
+	gname=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=gname --fail-if-multi $1`
+	if test $? != 0 -o x"${gname}" != "x"$5
+	then
+		echo "(some) gname differs from the specified value"
+		return $?
+	fi
+
+	return 0
+}
+
+test_expect_success 'test a case with explicitly specified name/id, owner=nobody:1234 group=nogroup:5678' '
+	git archive --format=tar --owner nobody:1234 --group nogroup:5678 HEAD > uid-gid-test1.tar &&
+	check_uid_gid_uname_gname_in_tar uid-gid-test1.tar 1234 5678 nobody nogroup &&
+	return $?
+'
+
+test_expect_success 'test a case with only string is given, owner=(current my name) group=(current my group)' '
+	my_uid=`id -u` &&
+	my_gid=`id -g` &&
+	my_uname=`id -u -n` &&
+	my_gname=`id -g -n` &&
+	git archive --format=tar --owner ${my_uname} --group ${my_gname} HEAD > uid-gid-test2.tar &&
+	check_uid_gid_uname_gname_in_tar uid-gid-test2.tar ${my_uid} ${my_gid} ${my_uname} ${my_gname} &&
+	return $?
+'
+
+test_expect_success 'test a case with only number is given, owner=(current my uid) group=(current my gid)' '
+	my_uid=`id -u` &&
+	my_gid=`id -g` &&
+	my_uname=`id -u -n` &&
+	my_gname=`id -g -n` &&
+	git archive --format=tar --owner ${my_uid} --group ${my_gid} HEAD > uid-gid-test3.tar &&
+	check_uid_gid_uname_gname_in_tar uid-gid-test3.tar ${my_uid} ${my_gid} ${my_uname} ${my_gname} &&
+	return $?
+'
+
+test_expect_success 'test a case with only uid is given, owner=(current my uid)' '
+	my_uid=`id -u` &&
+	my_gid=`id -g` &&
+	my_uname=`id -u -n` &&
+	my_gname=`id -g -n` &&
+	git archive --format=tar --owner ${my_uid} HEAD > uid-gid-test4.tar &&
+	check_uid_gid_uname_gname_in_tar uid-gid-test4.tar ${my_uid} ${my_gid} ${my_uname} ${my_gname} &&
+	return $?
+'
+
+test_expect_success 'test a case with no owner/group are given' '
+	git archive --format=tar HEAD > uid-gid-test5.tar &&
+	check_uid_gid_uname_gname_in_tar uid-gid-test5.tar 0 0 root root &&
+	return $?
+'
+
+test_expect_success 'test a case with max uid for ustar' '
+	git archive --format=tar --owner nobody:209751 --group nogroup:1234 HEAD > uid-gid-test6.tar &&
+	check_uid_gid_uname_gname_in_tar uid-gid-test6.tar 209751 1234 nobody nogroup &&
+	return $?
+'
+
+test_expect_success 'test a case with max gid for ustar' '
+	git archive --format=tar --group nogroup:209751 --owner nobody:1234 HEAD > uid-gid-test7.tar &&
+	check_uid_gid_uname_gname_in_tar uid-gid-test7.tar 1234 209751 nobody nogroup &&
+	return $?
+'
+
+test_expect_success 'test a case with uid greater than 32-bit (must fail)' '
+	test_must_fail git archive --format=tar --owner 4294967296 --group 1234 HEAD >/dev/null
+'
+
+test_expect_success 'test a case with gid greater than 32-bit (must fail)' '
+	test_must_fail git archive --format=tar --group 4294967296 --owner 1234 HEAD >/dev/null
+'
+
+test_expect_success 'test a case with uid greater than ustar limit (must fail)' '
+	test_must_fail git archive --format=tar --owner 2097152 --group 1234 HEAD >/dev/null
+'
+
+test_expect_success 'test a case with gid greater than ustar limit (must fail)' '
+	test_must_fail git archive --format=tar --group 2097152 --owner 1234 HEAD >/dev/null
+'
+
+test_expect_success 'test a case with valid username plus uid greater than 32-bit (must fail)' '
+	test_must_fail git archive --format=tar --owner nobody:4294967296 HEAD >/dev/null
+'
+
+test_expect_success 'test a case with valid groupname plus gid greater than 32-bit (must fail)' '
+	test_must_fail git archive --format=tar --group nogroup:4294967296 HEAD >/dev/null
+'
+
+test_expect_success 'test a case with valid username plus uid greater than ustar limit (must fail)' '
+	test_must_fail git archive --format=tar --owner nobody:2097152 HEAD >/dev/null
+'
+
+test_expect_success 'test a case with valid groupname plus gid greater than ustar limit (must fail)' '
+	test_must_fail git archive --format=tar --group nogroup:2097152 HEAD >/dev/null
+'
+
+test_done
diff --git a/t/t5005/parse-tar-file.py b/t/t5005/parse-tar-file.py
new file mode 100755
index 000000000..7d03b55d4
--- /dev/null
+++ b/t/t5005/parse-tar-file.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import sys
+import getopt
+import tarfile
+
+optlist, args = getopt.getopt( sys.argv[1:], "", [
+                               "print=", "show=",
+                               "uniq",
+                               "fail-if-multi",
+                ] )
+
+infos_to_print = []
+uniq = False
+fail_if_multi = False
+
+for opt in optlist:
+    if opt[0] == "--print":
+        infos_to_print.append(opt[1])
+    elif opt[0] == "--show":
+        infos_to_print.append(opt[1])
+    elif opt[0] == "--uniq":
+        uniq = True
+    elif opt[0] == "--fail-if-multi":
+        uniq = True
+        fail_if_multi = True
+
+if len(infos_to_print) == 0:
+    infos_to_print = ["uid", "gid", "uname", "gname", "name"]
+
+if len(args) > 0:
+    tar = tarfile.open( path=args[0], mode="r|" )
+else:
+    tar = tarfile.open( mode="r|", fileobj=sys.stdin )
+
+out_lines = []
+for tarinfo in tar:
+    infos = []
+    for info_tag in infos_to_print:
+        if info_tag == "uid":
+            infos.append( str(tarinfo.uid) )
+        elif info_tag == "gid":
+            infos.append( str(tarinfo.gid) )
+        elif info_tag == "uname" or info_tag == "owner":
+            infos.append( tarinfo.uname )
+        elif info_tag == "gname" or info_tag == "group":
+            infos.append( tarinfo.gname )
+        elif info_tag == "name" or info_tag == "pathname":
+            infos.append( tarinfo.name )
+        out_lines.append( "\t".join(infos) )
+tar.close()
+
+if uniq:
+    out_lines = list(set(out_lines))
+    if fail_if_multi and (len(out_lines) > 1):
+        sys.stderr.write("*** not unique value, " + str(len(out_lines)) + " values found\n")
+        sys.exit(len(out_lines))
+
+for line in out_lines:
+    print line
diff --git a/tar.h b/tar.h
index 3467705e9..1e6b57470 100644
--- a/tar.h
+++ b/tar.h
@@ -5,6 +5,8 @@
 #define TYPEFLAG_GLOBAL_HEADER	'g'
 #define TYPEFLAG_EXT_HEADER	'x'
 
+#define MAX_ID_IN_TAR_US	0x1FFFFFUL
+
 struct ustar_header {
 	char name[100];		/*   0 */
 	char mode[8];		/* 100 */
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2017-12-29 14:05 [PATCH] git-archive: accept --owner and --group like GNU tar suzuki toshiya
@ 2018-01-01 18:29 ` René Scharfe
  2018-01-02  0:32   ` Perry Hutchison
       [not found] ` <df39f62558314cf6a9d9df3e23f31dd8@OS2PR01MB1147.jpnprd01.prod.outlook.com>
  1 sibling, 1 reply; 13+ messages in thread
From: René Scharfe @ 2018-01-01 18:29 UTC (permalink / raw)
  To: suzuki toshiya, git

Am 29.12.2017 um 15:05 schrieb suzuki toshiya:
> The ownership of files created by git-archive is always
> root:root. Add --owner and --group options which work
> like the GNU tar equivalent to allow overriding these
> defaults.

In which situations do you use the new options?

(The sender would need to know the names and/or IDs on the receiving
end.  And the receiver would need to be root to set both IDs, or be a
group member to set the group ID; I guess the latter is more common.)

> Signed-off-by: suzuki toshiya <mpsuzuki@hiroshima-u.ac.jp>
> ---
>   Documentation/git-archive.txt |  13 +++
>   archive-tar.c                 |   8 +-
>   archive.c                     | 224 ++++++++++++++++++++++++++++++++++++++++++
>   archive.h                     |   4 +
>   t/t5005-archive-uid-gid.sh    | 140 ++++++++++++++++++++++++++
>   t/t5005/parse-tar-file.py     |  60 +++++++++++
>   tar.h                         |   2 +
>   7 files changed, 447 insertions(+), 4 deletions(-)
>   create mode 100755 t/t5005-archive-uid-gid.sh
>   create mode 100755 t/t5005/parse-tar-file.py

Would it make sense to support the new options for ZIP files as well?

> diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
> index cfa1e4ebe..0d156f6c1 100644
> --- a/Documentation/git-archive.txt
> +++ b/Documentation/git-archive.txt
> @@ -11,6 +11,7 @@ SYNOPSIS
>   [verse]
>   'git archive' [--format=<fmt>] [--list] [--prefix=<prefix>/] [<extra>]
>   	      [-o <file> | --output=<file>] [--worktree-attributes]
> +	      [--owner [username[:uid]] [--group [groupname[:gid]]
>   	      [--remote=<repo> [--exec=<git-upload-archive>]] <tree-ish>
>   	      [<path>...]
>   
> @@ -63,6 +64,18 @@ OPTIONS
>   	This can be any options that the archiver backend understands.
>   	See next section.
>   
> +--owner=<name>[:<uid>]::
> +	Force <name> as owner and <uid> as uid for the files in the tar
> +	archive.  If <uid> is not supplied, <name> can be either a user
> +	name or numeric UID.  In this case the missing part (UID or
> +	name) will be inferred from the current host's user database.
> +
> +--group=<name>[:<gid>]::
> +	Force <name> as group and <gid> as gid for the files in the tar
> +	archive.  If <gid> is not supplied, <name> can be either a group
> +	name or numeric GID.  In this case the missing part (GID or
> +	name) will be inferred from the current host's group database.
> +

IIUC the default behavior is kept, i.e. without these options the
archive entries appear to be owned by root:root.  I think it's a good
idea to mention this here.

bsdtar has --uname, --uid, --gname, and -gid, which seem simpler.  At
least you could use OPT_STRING and OPT_INTEGER with them (plus a range
check).  And they should be easier to explain.

> diff --git a/archive.c b/archive.c
> index 0b7b62af0..aa4b16b75 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -8,6 +8,7 @@
>   #include "parse-options.h"
>   #include "unpack-trees.h"
>   #include "dir.h"
> +#include "tar.h"
>   
>   static char const * const archive_usage[] = {
>   	N_("git archive [<options>] <tree-ish> [<path>...]"),
> @@ -417,6 +418,223 @@ static void parse_treeish_arg(const char **argv,
>   	{ OPTION_SET_INT, (s), NULL, (v), NULL, "", \
>   	  PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_HIDDEN, NULL, (p) }
>   
> +/*
> + * GNU tar --owner, --group options reject hexdigit, signed int values.
> + * strtol(), atoi() are too permissive to simulate the behaviour.
> + */
> +#define STR_IS_DIGIT_OK 0
> +#define STR_IS_NOT_DIGIT -1
> +#define STR_IS_DIGIT_TOO_LARGE -2
> +
> +static int try_as_simple_digit(const char *s, unsigned long *dst)
> +{
> +	unsigned long ul;
> +	char *endptr;
> +
> +	if (strlen(s) != strspn(s, "0123456789"))
> +		return STR_IS_NOT_DIGIT;
> +
> +	errno = 0;
> +	ul = strtoul(s, &endptr, 10);
> +
> +	/* catch ERANGE */
> +	if (errno) {
> +		errno = 0;
> +		return STR_IS_DIGIT_TOO_LARGE;
> +	}
> +
> +#if ULONG_MAX > 0xFFFFFFFFUL
> +	/*
> +	 * --owner, --group rejects uid/gid greater than 32-bit
> +	 * limits, even on 64-bit platforms.
> +	 */
> +	if (ul > 0xFFFFFFFFUL)
> +		return STR_IS_DIGIT_TOO_LARGE;
> +#endif

The #if is not really necessary, is it?  Compilers should be able to
optimize the conditional out on 32-bit platforms.

> +static int set_args_uname_uid(struct archiver_args *args,
> +		const char *tar_owner)
> +{
> +	int r;
> +	struct passwd *pw = NULL;
> +
> +	if (!args || !tar_owner)
> +		return NAME_ID_ERR_PARAMS;
> +
> +	r = try_as_name_colon_digit(tar_owner, &(args->uname),
> +				    &(args->uid));
> +	switch (r) {
> +	case STR_IS_NAME_COLON_DIGIT:
> +		return NAME_ID_BOTH_GIVEN;
> +	case STR_HAS_DIGIT_TOO_LARGE:
> +		return NAME_ID_ERR_ID_TOO_LARGE;
> +	case STR_HAS_DIGIT_BROKEN:
> +		return NAME_ID_ERR_SYNTAX;
> +	}
> +
> +	/* the operand is known to be single token */
> +
> +	r = try_as_simple_digit(tar_owner, &(args->uid));
> +	switch (r) {
> +	case STR_IS_DIGIT_TOO_LARGE:
> +		return NAME_ID_ERR_ID_TOO_LARGE;
> +	case STR_IS_DIGIT_OK:
> +		pw = getpwuid(args->uid);
> +		if (!pw) {
> +			args->uname = xstrdup("");
> +			return NAME_ID_NAME_EMPTY;
> +		}
> +		args->uname = xstrdup(pw->pw_name);
> +		return NAME_ID_NAME_GUESSED;
> +	}
> +
> +	/* the operand is known to be non-digit */
> +
> +	args->uname = xstrdup(tar_owner);
> +	pw = getpwnam(tar_owner);
> +	if (!pw)
> +		return NAME_ID_ID_UNTOUCHED;
> +	args->uid = pw->pw_uid;
> +	return NAME_ID_ID_GUESSED;
> +}

How well does this work on Windows?  In daemon.c we avoid calling
getpwnam(3), getgrnam(3) etc. if NO_POSIX_GOODIES is not defined.

> diff --git a/t/t5005-archive-uid-gid.sh b/t/t5005-archive-uid-gid.sh
> new file mode 100755
> index 000000000..c5e08d890
> --- /dev/null
> +++ b/t/t5005-archive-uid-gid.sh
> @@ -0,0 +1,140 @@
> +#!/bin/sh
> +
> +test_description='test --owner --group options for git-archive'
> +. ./test-lib.sh
> +
> +test_expect_success 'create commit with a few empty files' '
> +	git init . 1>/dev/null 2>/dev/null &&
> +	touch uid-gid-test.001 &&
> +	mkdir uid-gid-test.002 &&
> +	mkdir uid-gid-test.002/uid-gid-test.003 &&
> +	git add uid-gid-test.001 &&
> +	git add uid-gid-test.002 &&
> +	git add uid-gid-test.002/uid-gid-test.003 &&
> +	git commit -m "uid-gid-test" 2>/dev/null 1>/dev/null
> +'
> +
> +check_uid_gid_uname_gname_in_tar() {
> +	# $1 tar pathname
> +	# $2 uid (digit in string)
> +	# $3 gid (digit in string)
> +	# $4 uname (string)
> +	# $5 gname (string)
> +	uid=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=uid --fail-if-multi $1`
> +	if test $? != 0 -o x"${uid}" != "x"$2
> +	then
> +		echo "(some) uid differs from the specified value"
> +		return $?
> +	fi
> +
> +	gid=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=gid --fail-if-multi $1`
> +	if test $? != 0 -o x"${gid}" != "x"$3
> +	then
> +		echo "(some) gid differs from the specified value"
> +		return $?
> +	fi
> +
> +	uname=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=uname --fail-if-multi $1`
> +	if test $? != 0 -o x"${uname}" != "x"$4
> +	then
> +		echo "(some) uname differs from the specified value"
> +		return $?
> +	fi
> +
> +	gname=`python "$TEST_DIRECTORY"/t5005/parse-tar-file.py --print=gname --fail-if-multi $1`
> +	if test $? != 0 -o x"${gname}" != "x"$5
> +	then
> +		echo "(some) gname differs from the specified value"
> +		return $?
> +	fi
> +
> +	return 0
> +}

GNU tar and bsdtar show the names of owner and group with -t -v at
least, albeit in slightly different formats.  Can this help avoid
parsing the archive on our own?

But getting a short program like zipdetails for tar would be nice as
well of course. :)

René

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2018-01-01 18:29 ` René Scharfe
@ 2018-01-02  0:32   ` Perry Hutchison
  2018-01-04  0:43     ` René Scharfe
  0 siblings, 1 reply; 13+ messages in thread
From: Perry Hutchison @ 2018-01-02  0:32 UTC (permalink / raw)
  To: l.s.r; +Cc: git, mpsuzuki

Ren?? Scharfe <l.s.r@web.de> wrote:
> Am 29.12.2017 um 15:05 schrieb suzuki toshiya:
> > The ownership of files created by git-archive is always
> > root:root. Add --owner and --group options which work
> > like the GNU tar equivalent to allow overriding these
> > defaults.
> ... the receiver would need to be root to set both IDs, or be a
> group member to set the group ID; I guess the latter is more common.

If the received files are owned by root:root as stated, I guess the
receiver must be running as root, no?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
       [not found] ` <df39f62558314cf6a9d9df3e23f31dd8@OS2PR01MB1147.jpnprd01.prod.outlook.com>
@ 2018-01-02  6:58   ` suzuki toshiya
  2018-01-02 21:36     ` René Scharfe
       [not found]     ` <59a1fc058278463996ed68c970a5e08a@OS2PR01MB1147.jpnprd01.prod.outlook.com>
  0 siblings, 2 replies; 13+ messages in thread
From: suzuki toshiya @ 2018-01-02  6:58 UTC (permalink / raw)
  To: René Scharfe; +Cc: git@vger.kernel.org

Dear René ,

René Scharfe wrote:
> Am 29.12.2017 um 15:05 schrieb suzuki toshiya:
>> The ownership of files created by git-archive is always
>> root:root. Add --owner and --group options which work
>> like the GNU tar equivalent to allow overriding these
>> defaults.
> 
> In which situations do you use the new options?
> 
> (The sender would need to know the names and/or IDs on the receiving
> end.  And the receiver would need to be root to set both IDs, or be a
> group member to set the group ID; I guess the latter is more common.)

Thank you for asking the background.

In the case that additional contents are appended to the tar file
generated by git-archive, the part by git-archive and the part
appended by common tar would have different UID/GID, because common
tar preserves the UID/GID of the original files.

Of cource, both of GNU tar and bsdtar have the options to set
UID/GID manually, but their syntax are different.

In the recent source package of poppler (poppler.freedesktop.org),
there are 2 sets of UID/GIDs are found:
https://poppler.freedesktop.org/poppler-0.62.0.tar.xz

I've discussed with the maintainers of poppler, and there was a
suggestion to propose a feature to git.
https://lists.freedesktop.org/archives/poppler/2017-December/012739.html

So now I'm trying.

> Would it make sense to support the new options for ZIP files as well?

I was not aware of the availability of UID/GID in pkzip file format...
Oh, checking APPNOTE.TXT ( 
https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT ),
there is a storage! (see 4.5.7-Unix Extra Field). But it seems
that current git-archive emits pkzip without the field.

The background why I propose the options for tar format was described
in above. Similar things are hoped by pkzip users? If it's required,
I will try.

>> +--owner=<name>[:<uid>]::
>> +	Force <name> as owner and <uid> as uid for the files in the tar
>> +	archive.  If <uid> is not supplied, <name> can be either a user
>> +	name or numeric UID.  In this case the missing part (UID or
>> +	name) will be inferred from the current host's user database.
>> +
>> +--group=<name>[:<gid>]::
>> +	Force <name> as group and <gid> as gid for the files in the tar
>> +	archive.  If <gid> is not supplied, <name> can be either a group
>> +	name or numeric GID.  In this case the missing part (GID or
>> +	name) will be inferred from the current host's group database.
>> +
> 
> IIUC the default behavior is kept, i.e. without these options the
> archive entries appear to be owned by root:root.  I think it's a good
> idea to mention this here.

Indeed. The default behaviour of git-archive without these options
(root:root) would be different from that of (common) tar (preserving
uid/gid of the files to be archived), it should be clarified.

> bsdtar has --uname, --uid, --gname, and -gid, which seem simpler.  At
> least you could use OPT_STRING and OPT_INTEGER with them (plus a range
> check).  And they should be easier to explain.

Thank you very much for proposing good alternative. Indeed, such well-
separated options make the code simple & stable. However, according
to the manual search systems of FreeBSD ( https://www.freebsd.org/cgi/man.cgi ),
the options for such functionalities are not always same.

FreeBSD 8.2 and earlier: --uname, --gname, --uid, --gid are unavailable.
it seems that using "mtree" was the preferred way to specify such).

FreeBSD 8.3 and later: --uname, --gname, --uid, --gid are available.
the manual says follows:

      --uid id
	     Use the provided user id number and ignore	the user name from the
	     archive.  On create, if --uname is	not also specified, the	user
	     name will be set to match the user	id.

      --uname name
	     Use the provided user name.  On extract, this overrides the user
	     name in the archive; if the provided user name does not exist on
	     the system, it will be ignored and	the user id (from the archive
	     or	from the --uid option) will be used instead.  On create, this
	     sets the user name	that will be stored in the archive; the	name
	     is	not verified against the system	user database.

Thus, to emulate (post 2012-) bsdtar perfectly, getpwnam(), getpwuid() etc
would be still needed to implement "--uid" X-(.

Tracking the history of bsdtar, maybe I should track the history of GNU
tar. According to ChangeLog, even --owner --group are rather newer option
since 1.13.18 (released on 2000-10-29). The original syntax was like this.

`--owner=USER'
      Specifies that `tar' should use USER as the owner of members when
      creating archives, instead of the user associated with the source
      file.  USER is first decoded as a user symbolic name, but if this
      interpretation fails, it has to be a decimal numeric user ID.

      There is no value indicating a missing number, and `0' usually
      means `root'.  Some people like to force `0' as the value to offer
      in their distributions for the owner of files, because the `root'
      user is anonymous anyway, so that might as well be the owner of
      anonymous archives.

      This option does not affect extraction from archives.

Oh, there is no colon separated syntax! According to ChangeLog, the
introduction of colon separated syntax was on 2011-08-13 and
released as GNU tar-1.27 (2013-10-06).

`--owner=USER'
      Specifies that `tar' should use USER as the owner of members when
      creating archives, instead of the user associated with the source
      file.  USER can specify a symbolic name, or a numeric ID, or both
      as NAME:ID.  *Note override::.

      This option does not affect extraction from archives.

Comparing the original --owner and current --owner description, a
strange point is that the original description says "USER is first
decoded as a user symbolic name, but if this interpretation fails,
it has to be a decimal numeric user ID." What? It seems that
"checking whether the specified username is known by the host system
and its numerical uid is resolvable - if unresolvable, try to
parse as decimal value - if failed, take it as fatal error". Here
I quote the related part.

tar-1.14/src/names.c

     119 /* Given UNAME, set the corresponding UID and return 1, or else, return 
0.  */
     120 int
     121 uname_to_uid (char const *uname, uid_t *uidp)
     122 {
     123   struct passwd *passwd;
     124
     125   if (cached_no_such_uname
     126       && strcmp (uname, cached_no_such_uname) == 0)
     127     return 0;
     128
     129   if (!cached_uname
     130       || uname[0] != cached_uname[0]
     131       || strcmp (uname, cached_uname) != 0)
     132     {
     133       passwd = getpwnam (uname);
     134       if (passwd)
     135         {
     136           cached_uid = passwd->pw_uid;
     137           assign_string (&cached_uname, passwd->pw_name);
     138         }
     139       else
     140         {
     141           assign_string (&cached_no_such_uname, uname);
     142           return 0;
     143         }
     144     }
     145   *uidp = cached_uid;
     146   return 1;
     147 }   1087       case OWNER_OPTION:

tar-1.14/src/tar.c
    1088         if (! (strlen (optarg) < UNAME_FIELD_SIZE
    1089                && uname_to_uid (optarg, &owner_option)))
    1090           {
    1091             uintmax_t u;
    1092             if (xstrtoumax (optarg, 0, 10, &u, "") == LONGINT_OK
    1093                 && u == (uid_t) u)
    1094               owner_option = u;
    1095             else
    1096               FATAL_ERROR ((0, 0, "%s: %s", quotearg_colon (optarg),
    1097                             _("Invalid owner")));
    1098           }
    1099         break;

In summary, there are following types.

a) older GNU tar
--owner must match with the host database, no option to set
uname & uid separately.

b) newer GNU tar
--owner accepts unknown username and/or uid.
if only one part is given and known by the host system,
the missing part is deduced by it.
if only one part is given and unknown by the host system,
the missing part is unchanged from the file to be archived.

c) newer bsd tar
--uname/--uid accept unknown username and/or uid.
username is just used to override uname entry of the archive,
but uid is used to override both of uid and uname entries,
if uid is known and username is not specified.
If uid is unknown, uid is overriden, but the username entry
is unchanged from the file to be archived.

which behaviour is to be simulated? I want to propose
yet another one, similar to c) but incompatble.

d) --uname, --uid, --gname, --gid check only the syntax
(to kick the username starting with digit, non-digit
uid, etc) and no check for known/unknown.

>> +#if ULONG_MAX > 0xFFFFFFFFUL
>> +	/*
>> +	 * --owner, --group rejects uid/gid greater than 32-bit
>> +	 * limits, even on 64-bit platforms.
>> +	 */
>> +	if (ul > 0xFFFFFFFFUL)
>> +		return STR_IS_DIGIT_TOO_LARGE;
>> +#endif
> 
> The #if is not really necessary, is it?  Compilers should be able to
> optimize the conditional out on 32-bit platforms.

Thanks for finding this, I'm glad to have a chance to ask a
question; git is not needed to care for 16-bit platforms?

>> +	/* the operand is known to be non-digit */
>> +
>> +	args->uname = xstrdup(tar_owner);
>> +	pw = getpwnam(tar_owner);
> 
> How well does this work on Windows?  In daemon.c we avoid calling
> getpwnam(3), getgrnam(3) etc. if NO_POSIX_GOODIES is not defined.

OK, I can enclose them by ifdefs of NO_POSIX_GOODIES. But,
maybe the design the options would be discussed for first.

Both of latest GNU and BSD tar call getpwnam() or getpwuid(),
but designing as all of --uname --uid --gname --gid as "only syntax
is checked (non-digit UID/GID should be refused), but known/unknown
is not checked" would be the most portable.

> GNU tar and bsdtar show the names of owner and group with -t -v at
> least, albeit in slightly different formats.  Can this help avoid
> parsing the archive on our own?

Yeah, writing yet another tar archive parser in C, to avoid the additional
dependency to Python or newer Perl (Archive::Tar since perl-5.10), is
painful, I feel (not only for me but also for the maintainers).
If tar command itself works well, it would be the best.

But, I'm not sure whether the format of "tar tv" output is stably
standardized. It's the reason why I wrote Python tool. If I execute
git-archive with sufficently long randomized username & uid in
several times, it would be good test?

> But getting a short program like zipdetails for tar would be nice as
> well of course. :)

I wrote something in C:
https://github.com/mpsuzuki/git/blob/pullreq-20171227-c/t/helper/test-parse-tar-file.c

but if somebody wants the support of other tar variants,
he/she would have some headache :-)

Regards,
mpsuzuki


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2018-01-02  6:58   ` suzuki toshiya
@ 2018-01-02 21:36     ` René Scharfe
       [not found]     ` <59a1fc058278463996ed68c970a5e08a@OS2PR01MB1147.jpnprd01.prod.outlook.com>
  1 sibling, 0 replies; 13+ messages in thread
From: René Scharfe @ 2018-01-02 21:36 UTC (permalink / raw)
  To: suzuki toshiya; +Cc: git@vger.kernel.org

Am 02.01.2018 um 07:58 schrieb suzuki toshiya:
> Dear René ,
> 
> René Scharfe wrote:
>> Am 29.12.2017 um 15:05 schrieb suzuki toshiya:
>>> The ownership of files created by git-archive is always
>>> root:root. Add --owner and --group options which work
>>> like the GNU tar equivalent to allow overriding these
>>> defaults.
>>
>> In which situations do you use the new options?
>>
>> (The sender would need to know the names and/or IDs on the receiving
>> end.  And the receiver would need to be root to set both IDs, or be a
>> group member to set the group ID; I guess the latter is more common.)
> 
> Thank you for asking the background.
> 
> In the case that additional contents are appended to the tar file
> generated by git-archive, the part by git-archive and the part
> appended by common tar would have different UID/GID, because common
> tar preserves the UID/GID of the original files.
> 
> Of cource, both of GNU tar and bsdtar have the options to set
> UID/GID manually, but their syntax are different.
> 
> In the recent source package of poppler (poppler.freedesktop.org),
> there are 2 sets of UID/GIDs are found:
> https://poppler.freedesktop.org/poppler-0.62.0.tar.xz
> 
> I've discussed with the maintainers of poppler, and there was a
> suggestion to propose a feature to git.
> https://lists.freedesktop.org/archives/poppler/2017-December/012739.html

So this is in the context of generating release tarballs that contain
untracked files as well.  That's done in Git's own Makefile, too:

  dist: git-archive$(X) configure
          ./git-archive --format=tar \
                  --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar
          @mkdir -p $(GIT_TARNAME)
          @cp configure $(GIT_TARNAME)
          @echo $(GIT_VERSION) > $(GIT_TARNAME)/version
          @$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version
          $(TAR) rf $(GIT_TARNAME).tar \
                  $(GIT_TARNAME)/configure \
                  $(GIT_TARNAME)/version \
                  $(GIT_TARNAME)/git-gui/version
          @$(RM) -r $(GIT_TARNAME)
          gzip -f -9 $(GIT_TARNAME).tar

Having files with different owners and groups is a non-issue when
extracting with --no-same-owner, which is the default for regular users.
I assume this covers most use cases in the wild.

The generated archive leaks the IDs of the user preparing the archive in
the appended entries for untracked files.  I think that's more of a
concern.  Publishing a valid non-root username on your build system may
invite attackers.

Changing the build procedure to set owner and group to root as well as
UID and GID to zero seems like a better idea.  This is complicated by
the inconsistent command line options for GNU tar and bsdtar, as you
mentioned.

So how about making it possible to append untracked files using git
archive?  This could simplify the dist target for Git as well.  It's
orthogonal to adding the ability to explicitly specify owner and group,
but might suffice in most (all?) cases.

Not sure what kind of file name transformation abilities would be
needed and how to package them nicely.  The --transform option of GNU
tar with its sed replace expressions seems quite heavy for me.  With
poppler it's only used to add the --prefix string; I'd expect that to
be done for all appended files anyway.

Perhaps something like --append-file=<FILE> with no transformation
feature is already enough for most cases?

>> Would it make sense to support the new options for ZIP files as well?
> 
> I was not aware of the availability of UID/GID in pkzip file format...
> Oh, checking APPNOTE.TXT ( https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT ),
> there is a storage! (see 4.5.7-Unix Extra Field). But it seems
> that current git-archive emits pkzip without the field.

Indeed.  Git doesn't track owners and groups, so it doesn't make
sense to emit that kind of information so far.  If git archive grows
options to specify such meta-information then it should be supported
by all archive formats (or documented to be tar-specific).

However, the UNIX Extra Field in ZIP archives only allows to store
UID and GID (no names), which is useless unless the sender knows the
ID range of the receiver -- which is unlikely when distributing
software on the Internet.  And even then it won't work with Windows,
which has long Security Identifiers (SIDs) instead.

So these are more advantages for letting git archive append untracked
files: It's format-agnostic and more portable.


[snipped interesting history of security-related tar options]

Btw. I like how bsdtar has --insecure as a synonym for -p (preserve
file permissions when extracting).  It's a bit sad that this is still
the default for root, though.  OpenBSD cut that behavior out of their
tar almost 20 years ago.  (An evil tar archive could be used to fill
the quota of unsuspecting users, or add setuid executables.)

>>> +#if ULONG_MAX > 0xFFFFFFFFUL
>>> +    /*
>>> +     * --owner, --group rejects uid/gid greater than 32-bit
>>> +     * limits, even on 64-bit platforms.
>>> +     */
>>> +    if (ul > 0xFFFFFFFFUL)
>>> +        return STR_IS_DIGIT_TOO_LARGE;
>>> +#endif
>>
>> The #if is not really necessary, is it?  Compilers should be able to
>> optimize the conditional out on 32-bit platforms.
> 
> Thanks for finding this, I'm glad to have a chance to ask a
> question; git is not needed to care for 16-bit platforms?

I'm not aware of attempts to compile Git on 16-bit systems, but I
doubt it would be much fun to use the result anyway.  It couldn't
handle files bigger than 64KB for a start.

>> GNU tar and bsdtar show the names of owner and group with -t -v at
>> least, albeit in slightly different formats.  Can this help avoid
>> parsing the archive on our own?
> 
> Yeah, writing yet another tar archive parser in C, to avoid the additional
> dependency to Python or newer Perl (Archive::Tar since perl-5.10), is
> painful, I feel (not only for me but also for the maintainers).
> If tar command itself works well, it would be the best.
> 
> But, I'm not sure whether the format of "tar tv" output is stably
> standardized. It's the reason why I wrote Python tool. If I execute
> git-archive with sufficently long randomized username & uid in
> several times, it would be good test?

The append-untracked feature would avoid that issue as well.

A unique username doesn't have to be long or random if you control all
other strings that "tar tv" would show (in particular file names).  You
could e.g. use "UserName" and then simply grep for it.

Checking that git archive doesn't add unwanted characters is harder if
the format isn't strictly known -- how to reliably recognize it sneaking
in an extra space or slash?  Not sure if such a test is really needed,
though.

>> But getting a short program like zipdetails for tar would be nice as
>> well of course. :)
> 
> I wrote something in C:
> https://github.com/mpsuzuki/git/blob/pullreq-20171227-c/t/helper/test-parse-tar-file.c

OK.  (But the append-untracked feature could be tested without such a
helper. :)

Just some loose comments instead of a real review: You could get the
line count down by including Git's tar.h and parse-options.h
(Documentation/technical/api-parse-options.txt).

> but if somebody wants the support of other tar variants,
> he/she would have some headache :-)

Yeah, the history of tar and its various platform-specific formats
fills volumes, no doubt.

René

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2018-01-02  0:32   ` Perry Hutchison
@ 2018-01-04  0:43     ` René Scharfe
  0 siblings, 0 replies; 13+ messages in thread
From: René Scharfe @ 2018-01-04  0:43 UTC (permalink / raw)
  Cc: git, mpsuzuki

[replying only to the list because emails to perryh@pluto.rain.com
 are rejected by my mail server with the following error message:
 "Requested action not taken: mailbox unavailable
  invalid DNS MX or A/AAAA resource record."]

Am 02.01.2018 um 01:32 schrieb Perry Hutchison:
> Ren?? Scharfe <l.s.r@web.de> wrote:
>> Am 29.12.2017 um 15:05 schrieb suzuki toshiya:
>>> The ownership of files created by git-archive is always
>>> root:root. Add --owner and --group options which work
>>> like the GNU tar equivalent to allow overriding these
>>> defaults.
>> ... the receiver would need to be root to set both IDs, or be a
>> group member to set the group ID; I guess the latter is more common.
> 
> If the received files are owned by root:root as stated, I guess the
> receiver must be running as root, no?

That depends on what you mean with "must".  Users who want the files
they extract to be owned by root need root permissions on Unix and
Linux.  If they are OK with owning the files themselves then regular
user accounts suffice.  I assume the latter is much more common.

René

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
       [not found]     ` <59a1fc058278463996ed68c970a5e08a@OS2PR01MB1147.jpnprd01.prod.outlook.com>
@ 2018-01-04  1:29       ` suzuki toshiya
       [not found]       ` <955dae095d504b00b3e1c8a956ba852a@OS2PR01MB1147.jpnprd01.prod.outlook.com>
  1 sibling, 0 replies; 13+ messages in thread
From: suzuki toshiya @ 2018-01-04  1:29 UTC (permalink / raw)
  To: René Scharfe; +Cc: git@vger.kernel.org

Dear René ,

By overlooking your response, I was writing a patch to add
uid/gid into zip archive X-D (not finished yet)
https://github.com/mpsuzuki/git/tree/add-zip-uid-gid
However, I found that most unix platforms use infozip's
extension to store uid/gid instead of pkzip's extension...

> So this is in the context of generating release tarballs that contain
> untracked files as well.  That's done in Git's own Makefile, too:

Oh, I should check other software's tarball :-)

> The generated archive leaks the IDs of the user preparing the archive in
> the appended entries for untracked files.  I think that's more of a
> concern.  Publishing a valid non-root username on your build system may
> invite attackers.

Hmm, I was not aware of such security concern about the
tarball including the developers username.

> So how about making it possible to append untracked files using git
> archive?  This could simplify the dist target for Git as well.  It's
> orthogonal to adding the ability to explicitly specify owner and group,
> but might suffice in most (all?) cases.

Hmm, it could be reasonable to assume that --append-file
would serve more cases than --uid --gid options. There
might be many people who don't care multiple UID/GID in
the source tarball, but want to append some files to the
archive generated by git-archive. I would take a look how
to do that. A point I'm afraid is that some people may
request to pass the file listing the pathnames instead of
giving many --append-file options (and a few people could
want to have a built-in default list specified by GNU
convention :-)).

I want to hear other experts' comment; no need for me to
work "--uid" "--gid" anymore, and should I switch to
"--append-file" options?

Regards,
mpsuzuki

René Scharfe wrote:
> Am 02.01.2018 um 07:58 schrieb suzuki toshiya:
>> Dear René ,
>>
>> René Scharfe wrote:
>>> Am 29.12.2017 um 15:05 schrieb suzuki toshiya:
>>>> The ownership of files created by git-archive is always
>>>> root:root. Add --owner and --group options which work
>>>> like the GNU tar equivalent to allow overriding these
>>>> defaults.
>>> In which situations do you use the new options?
>>>
>>> (The sender would need to know the names and/or IDs on the receiving
>>> end.  And the receiver would need to be root to set both IDs, or be a
>>> group member to set the group ID; I guess the latter is more common.)
>> Thank you for asking the background.
>>
>> In the case that additional contents are appended to the tar file
>> generated by git-archive, the part by git-archive and the part
>> appended by common tar would have different UID/GID, because common
>> tar preserves the UID/GID of the original files.
>>
>> Of cource, both of GNU tar and bsdtar have the options to set
>> UID/GID manually, but their syntax are different.
>>
>> In the recent source package of poppler (poppler.freedesktop.org),
>> there are 2 sets of UID/GIDs are found:
>> https://poppler.freedesktop.org/poppler-0.62.0.tar.xz
>>
>> I've discussed with the maintainers of poppler, and there was a
>> suggestion to propose a feature to git.
>> https://lists.freedesktop.org/archives/poppler/2017-December/012739.html
> 
> So this is in the context of generating release tarballs that contain
> untracked files as well.  That's done in Git's own Makefile, too:
> 
>   dist: git-archive$(X) configure
>           ./git-archive --format=tar \
>                   --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar
>           @mkdir -p $(GIT_TARNAME)
>           @cp configure $(GIT_TARNAME)
>           @echo $(GIT_VERSION) > $(GIT_TARNAME)/version
>           @$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version
>           $(TAR) rf $(GIT_TARNAME).tar \
>                   $(GIT_TARNAME)/configure \
>                   $(GIT_TARNAME)/version \
>                   $(GIT_TARNAME)/git-gui/version
>           @$(RM) -r $(GIT_TARNAME)
>           gzip -f -9 $(GIT_TARNAME).tar
> 
> Having files with different owners and groups is a non-issue when
> extracting with --no-same-owner, which is the default for regular users.
> I assume this covers most use cases in the wild.
> 
> The generated archive leaks the IDs of the user preparing the archive in
> the appended entries for untracked files.  I think that's more of a
> concern.  Publishing a valid non-root username on your build system may
> invite attackers.
> 
> Changing the build procedure to set owner and group to root as well as
> UID and GID to zero seems like a better idea.  This is complicated by
> the inconsistent command line options for GNU tar and bsdtar, as you
> mentioned.
> 
> So how about making it possible to append untracked files using git
> archive?  This could simplify the dist target for Git as well.  It's
> orthogonal to adding the ability to explicitly specify owner and group,
> but might suffice in most (all?) cases.
> 
> Not sure what kind of file name transformation abilities would be
> needed and how to package them nicely.  The --transform option of GNU
> tar with its sed replace expressions seems quite heavy for me.  With
> poppler it's only used to add the --prefix string; I'd expect that to
> be done for all appended files anyway.
> 
> Perhaps something like --append-file=<FILE> with no transformation
> feature is already enough for most cases?
> 
>>> Would it make sense to support the new options for ZIP files as well?
>> I was not aware of the availability of UID/GID in pkzip file format...
>> Oh, checking APPNOTE.TXT ( https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT ),
>> there is a storage! (see 4.5.7-Unix Extra Field). But it seems
>> that current git-archive emits pkzip without the field.
> 
> Indeed.  Git doesn't track owners and groups, so it doesn't make
> sense to emit that kind of information so far.  If git archive grows
> options to specify such meta-information then it should be supported
> by all archive formats (or documented to be tar-specific).
> 
> However, the UNIX Extra Field in ZIP archives only allows to store
> UID and GID (no names), which is useless unless the sender knows the
> ID range of the receiver -- which is unlikely when distributing
> software on the Internet.  And even then it won't work with Windows,
> which has long Security Identifiers (SIDs) instead.
> 
> So these are more advantages for letting git archive append untracked
> files: It's format-agnostic and more portable.
> 
> 
> [snipped interesting history of security-related tar options]
> 
> Btw. I like how bsdtar has --insecure as a synonym for -p (preserve
> file permissions when extracting).  It's a bit sad that this is still
> the default for root, though.  OpenBSD cut that behavior out of their
> tar almost 20 years ago.  (An evil tar archive could be used to fill
> the quota of unsuspecting users, or add setuid executables.)
> 
>>>> +#if ULONG_MAX > 0xFFFFFFFFUL
>>>> +    /*
>>>> +     * --owner, --group rejects uid/gid greater than 32-bit
>>>> +     * limits, even on 64-bit platforms.
>>>> +     */
>>>> +    if (ul > 0xFFFFFFFFUL)
>>>> +        return STR_IS_DIGIT_TOO_LARGE;
>>>> +#endif
>>> The #if is not really necessary, is it?  Compilers should be able to
>>> optimize the conditional out on 32-bit platforms.
>> Thanks for finding this, I'm glad to have a chance to ask a
>> question; git is not needed to care for 16-bit platforms?
> 
> I'm not aware of attempts to compile Git on 16-bit systems, but I
> doubt it would be much fun to use the result anyway.  It couldn't
> handle files bigger than 64KB for a start.
> 
>>> GNU tar and bsdtar show the names of owner and group with -t -v at
>>> least, albeit in slightly different formats.  Can this help avoid
>>> parsing the archive on our own?
>> Yeah, writing yet another tar archive parser in C, to avoid the additional
>> dependency to Python or newer Perl (Archive::Tar since perl-5.10), is
>> painful, I feel (not only for me but also for the maintainers).
>> If tar command itself works well, it would be the best.
>>
>> But, I'm not sure whether the format of "tar tv" output is stably
>> standardized. It's the reason why I wrote Python tool. If I execute
>> git-archive with sufficently long randomized username & uid in
>> several times, it would be good test?
> 
> The append-untracked feature would avoid that issue as well.
> 
> A unique username doesn't have to be long or random if you control all
> other strings that "tar tv" would show (in particular file names).  You
> could e.g. use "UserName" and then simply grep for it.
> 
> Checking that git archive doesn't add unwanted characters is harder if
> the format isn't strictly known -- how to reliably recognize it sneaking
> in an extra space or slash?  Not sure if such a test is really needed,
> though.
> 
>>> But getting a short program like zipdetails for tar would be nice as
>>> well of course. :)
>> I wrote something in C:
>> https://github.com/mpsuzuki/git/blob/pullreq-20171227-c/t/helper/test-parse-tar-file.c
> 
> OK.  (But the append-untracked feature could be tested without such a
> helper. :)
> 
> Just some loose comments instead of a real review: You could get the
> line count down by including Git's tar.h and parse-options.h
> (Documentation/technical/api-parse-options.txt).
> 
>> but if somebody wants the support of other tar variants,
>> he/she would have some headache :-)
> 
> Yeah, the history of tar and its various platform-specific formats
> fills volumes, no doubt.
> 
> René
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
       [not found]       ` <955dae095d504b00b3e1c8a956ba852a@OS2PR01MB1147.jpnprd01.prod.outlook.com>
@ 2018-01-04  2:25         ` suzuki toshiya
  2018-01-04 16:59           ` René Scharfe
  0 siblings, 1 reply; 13+ messages in thread
From: suzuki toshiya @ 2018-01-04  2:25 UTC (permalink / raw)
  To: René Scharfe; +Cc: git@vger.kernel.org

Hi,

> Hmm, it could be reasonable to assume that --append-file
> would serve more cases than --uid --gid options. There
> might be many people who don't care multiple UID/GID in
> the source tarball, but want to append some files to the
> archive generated by git-archive. I would take a look how
> to do that. A point I'm afraid is that some people may
> request to pass the file listing the pathnames instead of
> giving many --append-file options (and a few people could
> want to have a built-in default list specified by GNU
> convention :-)).

Taking a glance on parse-options.h, I could not find the
existing class collecting the operands as an array (or
linked list) from multiple "--xxx=yyy" options. Similar
things might be the collecting the pathnames to pathspec
structure. Should I write something with OPTION_CALLBACK?

Regards,
mpsuzuki

suzuki toshiya wrote:
> Dear René ,
> 
> By overlooking your response, I was writing a patch to add
> uid/gid into zip archive X-D (not finished yet)
> https://github.com/mpsuzuki/git/tree/add-zip-uid-gid
> However, I found that most unix platforms use infozip's
> extension to store uid/gid instead of pkzip's extension...
> 
>> So this is in the context of generating release tarballs that contain
>> untracked files as well.  That's done in Git's own Makefile, too:
> 
> Oh, I should check other software's tarball :-)
> 
>> The generated archive leaks the IDs of the user preparing the archive in
>> the appended entries for untracked files.  I think that's more of a
>> concern.  Publishing a valid non-root username on your build system may
>> invite attackers.
> 
> Hmm, I was not aware of such security concern about the
> tarball including the developers username.
> 
>> So how about making it possible to append untracked files using git
>> archive?  This could simplify the dist target for Git as well.  It's
>> orthogonal to adding the ability to explicitly specify owner and group,
>> but might suffice in most (all?) cases.
> 
> Hmm, it could be reasonable to assume that --append-file
> would serve more cases than --uid --gid options. There
> might be many people who don't care multiple UID/GID in
> the source tarball, but want to append some files to the
> archive generated by git-archive. I would take a look how
> to do that. A point I'm afraid is that some people may
> request to pass the file listing the pathnames instead of
> giving many --append-file options (and a few people could
> want to have a built-in default list specified by GNU
> convention :-)).
> 
> I want to hear other experts' comment; no need for me to
> work "--uid" "--gid" anymore, and should I switch to
> "--append-file" options?
> 
> Regards,
> mpsuzuki
> 
> René Scharfe wrote:
>> Am 02.01.2018 um 07:58 schrieb suzuki toshiya:
>>> Dear René ,
>>>
>>> René Scharfe wrote:
>>>> Am 29.12.2017 um 15:05 schrieb suzuki toshiya:
>>>>> The ownership of files created by git-archive is always
>>>>> root:root. Add --owner and --group options which work
>>>>> like the GNU tar equivalent to allow overriding these
>>>>> defaults.
>>>> In which situations do you use the new options?
>>>>
>>>> (The sender would need to know the names and/or IDs on the receiving
>>>> end.  And the receiver would need to be root to set both IDs, or be a
>>>> group member to set the group ID; I guess the latter is more common.)
>>> Thank you for asking the background.
>>>
>>> In the case that additional contents are appended to the tar file
>>> generated by git-archive, the part by git-archive and the part
>>> appended by common tar would have different UID/GID, because common
>>> tar preserves the UID/GID of the original files.
>>>
>>> Of cource, both of GNU tar and bsdtar have the options to set
>>> UID/GID manually, but their syntax are different.
>>>
>>> In the recent source package of poppler (poppler.freedesktop.org),
>>> there are 2 sets of UID/GIDs are found:
>>> https://poppler.freedesktop.org/poppler-0.62.0.tar.xz
>>>
>>> I've discussed with the maintainers of poppler, and there was a
>>> suggestion to propose a feature to git.
>>> https://lists.freedesktop.org/archives/poppler/2017-December/012739.html
>> So this is in the context of generating release tarballs that contain
>> untracked files as well.  That's done in Git's own Makefile, too:
>>
>>   dist: git-archive$(X) configure
>>           ./git-archive --format=tar \
>>                   --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar
>>           @mkdir -p $(GIT_TARNAME)
>>           @cp configure $(GIT_TARNAME)
>>           @echo $(GIT_VERSION) > $(GIT_TARNAME)/version
>>           @$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version
>>           $(TAR) rf $(GIT_TARNAME).tar \
>>                   $(GIT_TARNAME)/configure \
>>                   $(GIT_TARNAME)/version \
>>                   $(GIT_TARNAME)/git-gui/version
>>           @$(RM) -r $(GIT_TARNAME)
>>           gzip -f -9 $(GIT_TARNAME).tar
>>
>> Having files with different owners and groups is a non-issue when
>> extracting with --no-same-owner, which is the default for regular users.
>> I assume this covers most use cases in the wild.
>>
>> The generated archive leaks the IDs of the user preparing the archive in
>> the appended entries for untracked files.  I think that's more of a
>> concern.  Publishing a valid non-root username on your build system may
>> invite attackers.
>>
>> Changing the build procedure to set owner and group to root as well as
>> UID and GID to zero seems like a better idea.  This is complicated by
>> the inconsistent command line options for GNU tar and bsdtar, as you
>> mentioned.
>>
>> So how about making it possible to append untracked files using git
>> archive?  This could simplify the dist target for Git as well.  It's
>> orthogonal to adding the ability to explicitly specify owner and group,
>> but might suffice in most (all?) cases.
>>
>> Not sure what kind of file name transformation abilities would be
>> needed and how to package them nicely.  The --transform option of GNU
>> tar with its sed replace expressions seems quite heavy for me.  With
>> poppler it's only used to add the --prefix string; I'd expect that to
>> be done for all appended files anyway.
>>
>> Perhaps something like --append-file=<FILE> with no transformation
>> feature is already enough for most cases?
>>
>>>> Would it make sense to support the new options for ZIP files as well?
>>> I was not aware of the availability of UID/GID in pkzip file format...
>>> Oh, checking APPNOTE.TXT ( https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT ),
>>> there is a storage! (see 4.5.7-Unix Extra Field). But it seems
>>> that current git-archive emits pkzip without the field.
>> Indeed.  Git doesn't track owners and groups, so it doesn't make
>> sense to emit that kind of information so far.  If git archive grows
>> options to specify such meta-information then it should be supported
>> by all archive formats (or documented to be tar-specific).
>>
>> However, the UNIX Extra Field in ZIP archives only allows to store
>> UID and GID (no names), which is useless unless the sender knows the
>> ID range of the receiver -- which is unlikely when distributing
>> software on the Internet.  And even then it won't work with Windows,
>> which has long Security Identifiers (SIDs) instead.
>>
>> So these are more advantages for letting git archive append untracked
>> files: It's format-agnostic and more portable.
>>
>>
>> [snipped interesting history of security-related tar options]
>>
>> Btw. I like how bsdtar has --insecure as a synonym for -p (preserve
>> file permissions when extracting).  It's a bit sad that this is still
>> the default for root, though.  OpenBSD cut that behavior out of their
>> tar almost 20 years ago.  (An evil tar archive could be used to fill
>> the quota of unsuspecting users, or add setuid executables.)
>>
>>>>> +#if ULONG_MAX > 0xFFFFFFFFUL
>>>>> +    /*
>>>>> +     * --owner, --group rejects uid/gid greater than 32-bit
>>>>> +     * limits, even on 64-bit platforms.
>>>>> +     */
>>>>> +    if (ul > 0xFFFFFFFFUL)
>>>>> +        return STR_IS_DIGIT_TOO_LARGE;
>>>>> +#endif
>>>> The #if is not really necessary, is it?  Compilers should be able to
>>>> optimize the conditional out on 32-bit platforms.
>>> Thanks for finding this, I'm glad to have a chance to ask a
>>> question; git is not needed to care for 16-bit platforms?
>> I'm not aware of attempts to compile Git on 16-bit systems, but I
>> doubt it would be much fun to use the result anyway.  It couldn't
>> handle files bigger than 64KB for a start.
>>
>>>> GNU tar and bsdtar show the names of owner and group with -t -v at
>>>> least, albeit in slightly different formats.  Can this help avoid
>>>> parsing the archive on our own?
>>> Yeah, writing yet another tar archive parser in C, to avoid the additional
>>> dependency to Python or newer Perl (Archive::Tar since perl-5.10), is
>>> painful, I feel (not only for me but also for the maintainers).
>>> If tar command itself works well, it would be the best.
>>>
>>> But, I'm not sure whether the format of "tar tv" output is stably
>>> standardized. It's the reason why I wrote Python tool. If I execute
>>> git-archive with sufficently long randomized username & uid in
>>> several times, it would be good test?
>> The append-untracked feature would avoid that issue as well.
>>
>> A unique username doesn't have to be long or random if you control all
>> other strings that "tar tv" would show (in particular file names).  You
>> could e.g. use "UserName" and then simply grep for it.
>>
>> Checking that git archive doesn't add unwanted characters is harder if
>> the format isn't strictly known -- how to reliably recognize it sneaking
>> in an extra space or slash?  Not sure if such a test is really needed,
>> though.
>>
>>>> But getting a short program like zipdetails for tar would be nice as
>>>> well of course. :)
>>> I wrote something in C:
>>> https://github.com/mpsuzuki/git/blob/pullreq-20171227-c/t/helper/test-parse-tar-file.c
>> OK.  (But the append-untracked feature could be tested without such a
>> helper. :)
>>
>> Just some loose comments instead of a real review: You could get the
>> line count down by including Git's tar.h and parse-options.h
>> (Documentation/technical/api-parse-options.txt).
>>
>>> but if somebody wants the support of other tar variants,
>>> he/she would have some headache :-)
>> Yeah, the history of tar and its various platform-specific formats
>> fills volumes, no doubt.
>>
>> René
>>
> 
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2018-01-04  2:25         ` suzuki toshiya
@ 2018-01-04 16:59           ` René Scharfe
  2018-01-04 18:22             ` Junio C Hamano
       [not found]             ` <2112f9c245d64f4e89361df7e9de9374@OS2PR01MB1147.jpnprd01.prod.outlook.com>
  0 siblings, 2 replies; 13+ messages in thread
From: René Scharfe @ 2018-01-04 16:59 UTC (permalink / raw)
  To: suzuki toshiya; +Cc: git@vger.kernel.org

Am 04.01.2018 um 03:25 schrieb suzuki toshiya:
> Taking a glance on parse-options.h, I could not find the
> existing class collecting the operands as an array (or
> linked list) from multiple "--xxx=yyy" options. Similar
> things might be the collecting the pathnames to pathspec
> structure. Should I write something with OPTION_CALLBACK?

There is OPT_STRING_LIST; Documentation/technical/api-parse-options.txt
says:

  `OPT_STRING_LIST(short, long, &struct string_list, arg_str, description)`::
      Introduce an option with string argument.
      The string argument is stored as an element in `string_list`.
      Use of `--no-option` will clear the list of preceding values.

I don't know if it's a good idea, but perhaps we don't even need a new
option.  We could change how pathspecs of untracked files are handled:
Instead of aborting we could include them in the archive.  (Sounds like
the simplest possible interface, but may have practical problems.)

René

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2018-01-04 16:59           ` René Scharfe
@ 2018-01-04 18:22             ` Junio C Hamano
  2018-01-05 13:54               ` René Scharfe
       [not found]             ` <2112f9c245d64f4e89361df7e9de9374@OS2PR01MB1147.jpnprd01.prod.outlook.com>
  1 sibling, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2018-01-04 18:22 UTC (permalink / raw)
  To: René Scharfe; +Cc: suzuki toshiya, git@vger.kernel.org

René Scharfe <l.s.r@web.de> writes:

> I don't know if it's a good idea, but perhaps we don't even need a new
> option.  We could change how pathspecs of untracked files are handled:
> Instead of aborting we could include them in the archive.  (Sounds like
> the simplest possible interface, but may have practical problems.)

One practical problem is that users who do this

    $ git archive HEAD Documentation/ | tar tf -

would be expecting (at least) two different things, depending on the
situation they are in.

So at least you'd need an "--include-untracked" option, I guess.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
       [not found]             ` <2112f9c245d64f4e89361df7e9de9374@OS2PR01MB1147.jpnprd01.prod.outlook.com>
@ 2018-01-05  4:23               ` suzuki toshiya
  0 siblings, 0 replies; 13+ messages in thread
From: suzuki toshiya @ 2018-01-05  4:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: René Scharfe, git@vger.kernel.org

Dear Junio,

Could you tell me your thought about the way for me to go?
Do you agree with his suggestion; "--uid etc is not the right
solution, --include-untracked is better and generic" ? Or,
should I work "--uid etc" further?

Regards,
mpsuzuki

Junio C Hamano wrote:
> René Scharfe <l.s.r@web.de> writes:
> 
>> I don't know if it's a good idea, but perhaps we don't even need a new
>> option.  We could change how pathspecs of untracked files are handled:
>> Instead of aborting we could include them in the archive.  (Sounds like
>> the simplest possible interface, but may have practical problems.)
> 
> One practical problem is that users who do this
> 
>     $ git archive HEAD Documentation/ | tar tf -
> 
> would be expecting (at least) two different things, depending on the
> situation they are in.
> 
> So at least you'd need an "--include-untracked" option, I guess.
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2018-01-04 18:22             ` Junio C Hamano
@ 2018-01-05 13:54               ` René Scharfe
  2018-01-05 19:10                 ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: René Scharfe @ 2018-01-05 13:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: suzuki toshiya, git@vger.kernel.org

Am 04.01.2018 um 19:22 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
> 
>> I don't know if it's a good idea, but perhaps we don't even need a new
>> option.  We could change how pathspecs of untracked files are handled:
>> Instead of aborting we could include them in the archive.  (Sounds like
>> the simplest possible interface, but may have practical problems.)
> 
> One practical problem is that users who do this
> 
>      $ git archive HEAD Documentation/ | tar tf -
> 
> would be expecting (at least) two different things, depending on the
> situation they are in.
> 
> So at least you'd need an "--include-untracked" option, I guess.

Right, this breaks down with directories -- most build artifacts (e.g.
.o files) are probably not meant to end up in archives.  We could still
do it for regular files and symlinks.  Perhaps that's too confusing,
though, and an --add-untracked-file parameter (or whatever we want to
call it) is the way to go.

René

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-archive: accept --owner and --group like GNU tar
  2018-01-05 13:54               ` René Scharfe
@ 2018-01-05 19:10                 ` Junio C Hamano
  0 siblings, 0 replies; 13+ messages in thread
From: Junio C Hamano @ 2018-01-05 19:10 UTC (permalink / raw)
  To: René Scharfe; +Cc: suzuki toshiya, git@vger.kernel.org

René Scharfe <l.s.r@web.de> writes:

>> One practical problem is that users who do this
>> 
>>      $ git archive HEAD Documentation/ | tar tf -
>> 
>> would be expecting (at least) two different things, depending on the
>> situation they are in.
>> 
>> So at least you'd need an "--include-untracked" option, I guess.
>
> Right, this breaks down with directories -- most build artifacts (e.g.
> .o files) are probably not meant to end up in archives.

I agree that it is unwise to overload the pathspec for this purpose.
Perhaps bulk of the documentation of a project is in javadoc in its
source code and extracted into some directory, where the user would
want to include untracked things as well as tracked ones, while
untracked contents of other directories are all not meant to be
packaged.  As "git archive" is primarily about freezing the contents
of a set of paths in a single revision into an archive, and
including untracked things is secondary, perhaps the right way to do
so would be to:

 (1) leave pathspec as-is---they mean "only this area of the named
     revision goes into the resulting archive", and 

 (2) introduce a new "--add-untracked=<wildmatch>" option, that can
     be multiply given, is cumulative, and is used to specify which
     untracked paths to be included in the result from the working
     tree contents.

So

	git archive \
		--add-untracked=./configure \
		--add-untracked='Documentation/**/*.html' \
		--add-untracked='Documentation/*.[1-9]' \
		HEAD -- . ':!contrib/' ':t/'

might be a way to package up sources we use without tests but
include the built documentation files.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-01-05 19:10 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-29 14:05 [PATCH] git-archive: accept --owner and --group like GNU tar suzuki toshiya
2018-01-01 18:29 ` René Scharfe
2018-01-02  0:32   ` Perry Hutchison
2018-01-04  0:43     ` René Scharfe
     [not found] ` <df39f62558314cf6a9d9df3e23f31dd8@OS2PR01MB1147.jpnprd01.prod.outlook.com>
2018-01-02  6:58   ` suzuki toshiya
2018-01-02 21:36     ` René Scharfe
     [not found]     ` <59a1fc058278463996ed68c970a5e08a@OS2PR01MB1147.jpnprd01.prod.outlook.com>
2018-01-04  1:29       ` suzuki toshiya
     [not found]       ` <955dae095d504b00b3e1c8a956ba852a@OS2PR01MB1147.jpnprd01.prod.outlook.com>
2018-01-04  2:25         ` suzuki toshiya
2018-01-04 16:59           ` René Scharfe
2018-01-04 18:22             ` Junio C Hamano
2018-01-05 13:54               ` René Scharfe
2018-01-05 19:10                 ` Junio C Hamano
     [not found]             ` <2112f9c245d64f4e89361df7e9de9374@OS2PR01MB1147.jpnprd01.prod.outlook.com>
2018-01-05  4:23               ` suzuki toshiya

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).