git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Jeff King <peff@peff.net>
Cc: Keegan Carruthers-Smith <keegan.csmith@gmail.com>, git@vger.kernel.org
Subject: Re: git archive generates tar with malformed pax extended attribute
Date: Wed, 29 May 2019 01:34:32 +0200	[thread overview]
Message-ID: <5f9ac691-4cb1-a15d-ca44-63fb64994d59@web.de> (raw)
In-Reply-To: <20190528190815.GA20499@sigill.intra.peff.net>

Am 28.05.19 um 21:08 schrieb Jeff King:
> On Tue, May 28, 2019 at 08:01:43PM +0200, René Scharfe wrote:
>
>> Am 28.05.19 um 07:58 schrieb Jeff King:
>>> On Sat, May 25, 2019 at 03:26:53PM +0200, René Scharfe wrote:
>>>
>>>> We could
>>>> make git fsck report such symlinks.
>>>
>>> This is a little tricky, because fsck generally looks at individual
>>> objects, and the bad pattern is a combination of a tree and a blob
>>> together. I think you could make it work by reusing some of the code and
>>> patterns from 9e84a6d758 (Merge branch 'jk/submodule-fsck-loose' into
>>> maint, 2018-05-22).
>>
>> Actually it's super easy, barely an inconvenience (SCNR, watched a lot
>> of those rants recently)..  Did I miss something?
>
> Yes. You cannot rely on calling read_object_file() in real-time when the
> fsck is being done by index-pack. The blob in question may be in the
> pack you are indexing.

It figures.

So something like the patch below?

Parsing trees with symlinks twice is not ideal, but keeps the set
structure simple -- a standard oidset suffices.

The global variables are ugly.  Moving them into struct fsck_option
would be possible, but not much better, as they aren't really
options.

FSCK_MSG_MISSING_TREE_OBJECT has never been used before, it seems.

---
 fsck.c          | 58 +++++++++++++++++++++++++++++++++++++++++++++++++
 t/t1450-fsck.sh | 13 +++++++++++
 2 files changed, 71 insertions(+)

diff --git a/fsck.c b/fsck.c
index 4703f55561..a6e7d0b03f 100644
--- a/fsck.c
+++ b/fsck.c
@@ -19,6 +19,7 @@

 static struct oidset gitmodules_found = OIDSET_INIT;
 static struct oidset gitmodules_done = OIDSET_INIT;
+static struct oidset trees_with_symlinks = OIDSET_INIT;

 #define FSCK_FATAL -1
 #define FSCK_INFO -2
@@ -49,6 +50,7 @@ static struct oidset gitmodules_done = OIDSET_INIT;
 	FUNC(MISSING_PARENT, ERROR) \
 	FUNC(MISSING_SPACE_BEFORE_DATE, ERROR) \
 	FUNC(MISSING_SPACE_BEFORE_EMAIL, ERROR) \
+	FUNC(MISSING_SYMLINK_OBJECT, ERROR) \
 	FUNC(MISSING_TAG, ERROR) \
 	FUNC(MISSING_TAG_ENTRY, ERROR) \
 	FUNC(MISSING_TAG_OBJECT, ERROR) \
@@ -58,6 +60,7 @@ static struct oidset gitmodules_done = OIDSET_INIT;
 	FUNC(MISSING_TYPE_ENTRY, ERROR) \
 	FUNC(MULTIPLE_AUTHORS, ERROR) \
 	FUNC(TAG_OBJECT_NOT_TAG, ERROR) \
+	FUNC(SYMLINK_OBJECT_NOT_BLOB, ERROR) \
 	FUNC(TREE_NOT_SORTED, ERROR) \
 	FUNC(UNKNOWN_TYPE, ERROR) \
 	FUNC(ZERO_PADDED_DATE, ERROR) \
@@ -78,6 +81,7 @@ static struct oidset gitmodules_done = OIDSET_INIT;
 	FUNC(NULL_SHA1, WARN) \
 	FUNC(ZERO_PADDED_FILEMODE, WARN) \
 	FUNC(NUL_IN_COMMIT, WARN) \
+	FUNC(NUL_IN_SYMLINK_TARGET, WARN) \
 	/* infos (reported as warnings, but ignored by default) */ \
 	FUNC(GITMODULES_PARSE, INFO) \
 	FUNC(BAD_TAG_NAME, INFO) \
@@ -578,6 +582,33 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con
 	return c1 < c2 ? 0 : TREE_UNORDERED;
 }

+static int fsck_symlink(struct tree *tree, const char *name,
+			const struct object_id *oid,
+			struct fsck_options *options)
+{
+	int ret = 0;
+	enum object_type type;
+	unsigned long size;
+	void *buffer = read_object_file(oid, &type, &size);
+
+	if (!buffer)
+		ret = report(options, &tree->object,
+			     FSCK_MSG_MISSING_SYMLINK_OBJECT,
+			     "cannot read blob object for symlink %s", name);
+	else if (type != OBJ_BLOB)
+		ret = report(options, &tree->object,
+			     FSCK_MSG_SYMLINK_OBJECT_NOT_BLOB,
+			     "expected blob got %s for symlink %s",
+			     type_name(type), name);
+	else if (memchr(buffer, '\0', size))
+		ret = report(options, &tree->object,
+			     FSCK_MSG_NUL_IN_SYMLINK_TARGET,
+			     "NUL in target of symlink %s", name);
+
+	free(buffer);
+	return ret;
+}
+
 static int fsck_tree(struct tree *item, struct fsck_options *options)
 {
 	int retval = 0;
@@ -626,6 +657,8 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 						 FSCK_MSG_GITMODULES_SYMLINK,
 						 ".gitmodules is a symbolic link");
 		}
+		if (S_ISLNK(mode))
+			oidset_insert(&trees_with_symlinks, &item->object.oid);

 		if (update_tree_entry_gently(&desc)) {
 			retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
@@ -1118,8 +1151,33 @@ int fsck_finish(struct fsck_options *options)
 		free(buf);
 	}

+	oidset_iter_init(&trees_with_symlinks, &iter);
+	while ((oid = oidset_iter_next(&iter))) {
+		struct tree *tree;
+		struct tree_desc desc;
+		struct name_entry entry;
+
+		tree = lookup_tree(the_repository, oid);
+		if (!tree) {
+			struct object *obj = lookup_unknown_object(oid->hash);
+			ret |= report(options, obj,
+				      FSCK_MSG_MISSING_TREE_OBJECT,
+				      "tree %s not found", oid_to_hex(oid));
+			continue;
+		}
+		if (parse_tree(tree))
+			continue;
+		if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
+			continue;
+		while (tree_entry_gently(&desc, &entry)) {
+			if (S_ISLNK(entry.mode))
+				ret |= fsck_symlink(tree, entry.path,
+						    &entry.oid, options);
+		}
+	}

 	oidset_clear(&gitmodules_found);
 	oidset_clear(&gitmodules_done);
+	oidset_clear(&trees_with_symlinks);
 	return ret;
 }
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 0f268a3664..ce9501d063 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -822,4 +822,17 @@ test_expect_success 'detect corrupt index file in fsck' '
 	test_i18ngrep "bad index file" errors
 '

+test_expect_success 'detect NUL in symlink target' '
+	test_when_finished "git update-ref -d refs/heads/nul_in_symlink" &&
+	test_when_finished "remove_object \$commit" &&
+	test_when_finished "remove_object \$tree" &&
+	test_when_finished "remove_object \$blob" &&
+	blob=$(echo fooQbar | q_to_nul | git hash-object -w --stdin) &&
+	tree=$(echo "120000 blob $blob	symlink" | git mktree) &&
+	commit=$(git commit-tree $tree) &&
+	git update-ref refs/heads/nul_in_symlink $commit &&
+	git fsck 2>out &&
+	test_i18ngrep "NUL in target of symlink" out
+'
+
 test_done
--
2.21.0

  reply	other threads:[~2019-05-28 23:34 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-24  6:45 git archive generates tar with malformed pax extended attribute Keegan Carruthers-Smith
2019-05-24  7:06 ` Jeff King
2019-05-24  7:35   ` Keegan Carruthers-Smith
2019-05-24  8:13     ` Jeff King
2019-05-25 13:26       ` René Scharfe
2019-05-25 13:46         ` Andreas Schwab
2019-05-25 21:07         ` Ævar Arnfjörð Bjarmason
2019-05-26 21:33           ` René Scharfe
2019-05-28  5:44             ` Jeff King
2019-05-28  5:58         ` Jeff King
2019-05-28 18:01           ` René Scharfe
2019-05-28 19:08             ` Jeff King
2019-05-28 23:34               ` René Scharfe [this message]
2019-05-29  1:17                 ` Jeff King
2019-05-29 17:54                   ` René Scharfe
2019-05-30 11:55                     ` Jeff King
2019-06-02 16:58                       ` René Scharfe
2019-06-04 20:53                         ` Jeff King
2019-05-27  5:11       ` Keegan Carruthers-Smith
2019-05-25 20:46   ` Ævar Arnfjörð Bjarmason
2019-05-25 21:19     ` brian m. carlson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5f9ac691-4cb1-a15d-ca44-63fb64994d59@web.de \
    --to=l.s.r@web.de \
    --cc=git@vger.kernel.org \
    --cc=keegan.csmith@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).