git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Junio C Hamano <gitster@pobox.com>, git@vger.kernel.org
Subject: [PATCH 1/3] archive: read short blobs in archive.c::write_archive_entry()
Date: Sat, 19 Sep 2020 23:23:32 +0200	[thread overview]
Message-ID: <0951939b-7521-cf9b-1459-26d6d6774589@web.de> (raw)
In-Reply-To: <b7ef3952-872a-40af-0f2e-c392b031a280@web.de>

Centralize reading of symlink destinations and the contents of regular
files that are too small to be streamed.  This reduces code duplication
and allows future patches to add support for adding non-tracked files to
archives.  The backends are expected to stream blobs if buffer is NULL.

object_file_to_archive() is only called from archive.c and thus no
longer exported.

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 archive-tar.c | 22 +++-------------------
 archive-zip.c | 22 +++++-----------------
 archive.c     | 31 ++++++++++++++++++++++++-------
 archive.h     |  7 ++-----
 4 files changed, 34 insertions(+), 48 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 5ceec3684b..f1a1447ebd 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -242,13 +242,12 @@ static void write_extended_header(struct archiver_args *args,
 static int write_tar_entry(struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
-			   unsigned int mode)
+			   unsigned int mode,
+			   void *buffer, unsigned long size)
 {
 	struct ustar_header header;
 	struct strbuf ext_header = STRBUF_INIT;
-	unsigned int old_mode = mode;
-	unsigned long size, size_in_header;
-	void *buffer;
+	unsigned long size_in_header;
 	int err = 0;

 	memset(&header, 0, sizeof(header));
@@ -282,20 +281,6 @@ static int write_tar_entry(struct archiver_args *args,
 	} else
 		memcpy(header.name, path, pathlen);

-	if (S_ISREG(mode) && !args->convert &&
-	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
-	    size > big_file_threshold)
-		buffer = NULL;
-	else if (S_ISLNK(mode) || S_ISREG(mode)) {
-		enum object_type type;
-		buffer = object_file_to_archive(args, path, oid, old_mode, &type, &size);
-		if (!buffer)
-			return error(_("cannot read %s"), oid_to_hex(oid));
-	} else {
-		buffer = NULL;
-		size = 0;
-	}
-
 	if (S_ISLNK(mode)) {
 		if (size > sizeof(header.linkname)) {
 			xsnprintf(header.linkname, sizeof(header.linkname),
@@ -326,7 +311,6 @@ static int write_tar_entry(struct archiver_args *args,
 		else
 			err = stream_blocked(args->repo, oid);
 	}
-	free(buffer);
 	return err;
 }

diff --git a/archive-zip.c b/archive-zip.c
index e9f426298b..2961e01c75 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -285,7 +285,8 @@ static int entry_is_binary(struct index_state *istate, const char *path,
 static int write_zip_entry(struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
-			   unsigned int mode)
+			   unsigned int mode,
+			   void *buffer, unsigned long size)
 {
 	struct zip_local_header header;
 	uintmax_t offset = zip_offset;
@@ -299,10 +300,8 @@ static int write_zip_entry(struct archiver_args *args,
 	enum zip_method method;
 	unsigned char *out;
 	void *deflated = NULL;
-	void *buffer;
 	struct git_istream *stream = NULL;
 	unsigned long flags = 0;
-	unsigned long size;
 	int is_binary = -1;
 	const char *path_without_prefix = path + args->baselen;
 	unsigned int creator_version = 0;
@@ -328,13 +327,8 @@ static int write_zip_entry(struct archiver_args *args,
 		method = ZIP_METHOD_STORE;
 		attr2 = 16;
 		out = NULL;
-		size = 0;
 		compressed_size = 0;
-		buffer = NULL;
 	} else if (S_ISREG(mode) || S_ISLNK(mode)) {
-		enum object_type type = oid_object_info(args->repo, oid,
-							&size);
-
 		method = ZIP_METHOD_STORE;
 		attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) :
 			(mode & 0111) ? ((mode) << 16) : 0;
@@ -343,21 +337,16 @@ static int write_zip_entry(struct archiver_args *args,
 		if (S_ISREG(mode) && args->compression_level != 0 && size > 0)
 			method = ZIP_METHOD_DEFLATE;

-		if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
-		    size > big_file_threshold) {
+		if (!buffer) {
+			enum object_type type;
 			stream = open_istream(args->repo, oid, &type, &size,
 					      NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
 					     oid_to_hex(oid));
 			flags |= ZIP_STREAM;
-			out = buffer = NULL;
+			out = NULL;
 		} else {
-			buffer = object_file_to_archive(args, path, oid, mode,
-							&type, &size);
-			if (!buffer)
-				return error(_("cannot read %s"),
-					     oid_to_hex(oid));
 			crc = crc32(crc, buffer, size);
 			is_binary = entry_is_binary(args->repo->index,
 						    path_without_prefix,
@@ -511,7 +500,6 @@ static int write_zip_entry(struct archiver_args *args,
 	}

 	free(deflated);
-	free(buffer);

 	if (compressed_size > 0xffffffff || size > 0xffffffff ||
 	    offset > 0xffffffff) {
diff --git a/archive.c b/archive.c
index 0de6048bfc..4fbe5329c5 100644
--- a/archive.c
+++ b/archive.c
@@ -70,10 +70,12 @@ static void format_subst(const struct commit *commit,
 	free(to_free);
 }

-void *object_file_to_archive(const struct archiver_args *args,
-			     const char *path, const struct object_id *oid,
-			     unsigned int mode, enum object_type *type,
-			     unsigned long *sizep)
+static void *object_file_to_archive(const struct archiver_args *args,
+				    const char *path,
+				    const struct object_id *oid,
+				    unsigned int mode,
+				    enum object_type *type,
+				    unsigned long *sizep)
 {
 	void *buffer;
 	const struct commit *commit = args->convert ? args->commit : NULL;
@@ -145,6 +147,9 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	write_archive_entry_fn_t write_entry = c->write_entry;
 	int err;
 	const char *path_without_prefix;
+	unsigned long size;
+	void *buffer;
+	enum object_type type;

 	args->convert = 0;
 	strbuf_reset(&path);
@@ -167,7 +172,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-		err = write_entry(args, oid, path.buf, path.len, mode);
+		err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
 		if (err)
 			return err;
 		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
@@ -175,7 +180,19 @@ static int write_archive_entry(const struct object_id *oid, const char *base,

 	if (args->verbose)
 		fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-	return write_entry(args, oid, path.buf, path.len, mode);
+
+	/* Stream it? */
+	if (S_ISREG(mode) && !args->convert &&
+	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
+	    size > big_file_threshold)
+		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
+
+	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
+	if (!buffer)
+		return error(_("cannot read %s"), oid_to_hex(oid));
+	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
+	free(buffer);
+	return err;
 }

 static void queue_directory(const unsigned char *sha1,
@@ -265,7 +282,7 @@ int write_archive_entries(struct archiver_args *args,
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)len, args->base);
 		err = write_entry(args, &args->tree->object.oid, args->base,
-				  len, 040777);
+				  len, 040777, NULL, 0);
 		if (err)
 			return err;
 	}
diff --git a/archive.h b/archive.h
index 3bd96bf6bb..d83b41a01f 100644
--- a/archive.h
+++ b/archive.h
@@ -49,12 +49,9 @@ void init_archivers(void);
 typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
 					const struct object_id *oid,
 					const char *path, size_t pathlen,
-					unsigned int mode);
+					unsigned int mode,
+					void *buffer, unsigned long size);

 int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
-void *object_file_to_archive(const struct archiver_args *args,
-			     const char *path, const struct object_id *oid,
-			     unsigned int mode, enum object_type *type,
-			     unsigned long *sizep);

 #endif	/* ARCHIVE_H */
--
2.28.0


  reply	other threads:[~2020-09-19 21:23 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-19  1:41 What's cooking in git.git (Sep 2020, #05; Fri, 18) Junio C Hamano
2020-09-19 21:13 ` René Scharfe
2020-09-19 21:23   ` René Scharfe [this message]
2020-09-19 21:23   ` [PATCH 2/3] archive: add --add-file René Scharfe
2020-09-19 21:23   ` [PATCH 3/3] Makefile: use git-archive --add-file René Scharfe
2020-09-19 22:35   ` What's cooking in git.git (Sep 2020, #05; Fri, 18) Junio C Hamano
2020-09-20  7:58     ` René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0951939b-7521-cf9b-1459-26d6d6774589@web.de \
    --to=l.s.r@web.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --subject='Re: [PATCH 1/3] archive: read short blobs in archive.c::write_archive_entry()' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).