git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH WIP 1/4] convert.c: refactor in order to skip conversion early without looking into file content
Date: Thu, 28 May 2009 15:29:07 +1000	[thread overview]
Message-ID: <1243488550-15357-2-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1243488550-15357-1-git-send-email-pclouds@gmail.com>

convert_to_{git,working_tree} require the entire blob content in
memory. This is impossible for large files (especially files that
cannot be mapped into memory at all). Those files won't likely be
converted.

This patch moves out some condition checks that does not require file
content, then large file-related routines can do early check to see if
it's possible to skip conversion. If not, follow the common routes.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h   |    2 +
 convert.c |   86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/cache.h b/cache.h
index b8503ad..f3fc822 100644
--- a/cache.h
+++ b/cache.h
@@ -933,6 +933,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
 extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int convert_to_git_needed(const char *path, size_t len);
+extern int convert_to_working_tree_needed(const char *path, size_t len);
 
 /* add */
 /*
diff --git a/convert.c b/convert.c
index 1816e97..809c3e8 100644
--- a/convert.c
+++ b/convert.c
@@ -120,13 +120,18 @@ static void check_safe_crlf(const char *path, int action,
 	}
 }
 
+static int crlf_to_git_noneed(const char *path, size_t len, int action)
+{
+	return (action == CRLF_BINARY) || !auto_crlf || !len;
+}
+
 static int crlf_to_git(const char *path, const char *src, size_t len,
                        struct strbuf *buf, int action, enum safe_crlf checksafe)
 {
 	struct text_stat stats;
 	char *dst;
 
-	if ((action == CRLF_BINARY) || !auto_crlf || !len)
+	if (crlf_to_git_noneed(path, len, action))
 		return 0;
 
 	gather_stats(src, len, &stats);
@@ -179,17 +184,19 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 	return 1;
 }
 
+static int crlf_to_worktree_noneed(const char *path, size_t len, int action)
+{
+	return 	(action == CRLF_BINARY) || (action == CRLF_INPUT) ||
+		auto_crlf <= 0 || !len;
+}
+
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
                             struct strbuf *buf, int action)
 {
 	char *to_free = NULL;
 	struct text_stat stats;
 
-	if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
-	    auto_crlf <= 0)
-		return 0;
-
-	if (!len)
+	if (crlf_to_worktree_noneed(path, len, action))
 		return 0;
 
 	gather_stats(src, len, &stats);
@@ -271,6 +278,11 @@ static int filter_buffer(int fd, void *data)
 	return (write_err || status);
 }
 
+static int apply_filter_noneed(const char *path, const char *cmd)
+{
+	return cmd == NULL;
+}
+
 static int apply_filter(const char *path, const char *src, size_t len,
                         struct strbuf *dst, const char *cmd)
 {
@@ -285,7 +297,7 @@ static int apply_filter(const char *path, const char *src, size_t len,
 	struct async async;
 	struct filter_params params;
 
-	if (!cmd)
+	if (apply_filter_noneed(path, cmd))
 		return 0;
 
 	memset(&async, 0, sizeof(async));
@@ -428,12 +440,20 @@ static int count_ident(const char *cp, unsigned long size)
 	return cnt;
 }
 
+static int ident_conversion_noneed(const char *path, int ident)
+{
+	return !ident;
+}
+
 static int ident_to_git(const char *path, const char *src, size_t len,
                         struct strbuf *buf, int ident)
 {
 	char *dst, *dollar;
 
-	if (!ident || !count_ident(src, len))
+	if (ident_conversion_noneed(path, ident))
+		return 0;
+
+	if (!count_ident(src, len))
 		return 0;
 
 	/* only grow if not in place */
@@ -471,7 +491,7 @@ static int ident_to_worktree(const char *path, const char *src, size_t len,
 	char *to_free = NULL, *dollar;
 	int cnt;
 
-	if (!ident)
+	if (ident_conversion_noneed(path, ident))
 		return 0;
 
 	cnt = count_ident(src, len);
@@ -597,6 +617,28 @@ int convert_to_git(const char *path, const char *src, size_t len,
 	return ret | ident_to_git(path, src, len, dst, ident);
 }
 
+int convert_to_git_needed(const char *path, size_t len)
+{
+	struct git_attr_check check[3];
+	int crlf = CRLF_GUESS;
+	int ident = 0;
+	const char *filter = NULL;
+
+	setup_convert_check(check);
+	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+		struct convert_driver *drv;
+		crlf = git_path_check_crlf(path, check + 0);
+		ident = git_path_check_ident(path, check + 1);
+		drv = git_path_check_convert(path, check + 2);
+		if (drv && drv->clean)
+			filter = drv->clean;
+	}
+
+	return !apply_filter_noneed(path, filter) ||
+		!crlf_to_git_noneed(path, len, crlf) ||
+		!ident_conversion_noneed(path, ident);
+}
+
 int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
 	struct git_attr_check check[3];
@@ -626,3 +668,29 @@ int convert_to_working_tree(const char *path, const char *src, size_t len, struc
 	}
 	return ret | apply_filter(path, src, len, dst, filter);
 }
+
+int convert_to_working_tree_needed(const char *path, size_t len)
+{
+	struct git_attr_check check[3];
+	int crlf = CRLF_GUESS;
+	int ident = 0;
+	const char *filter = NULL;
+
+	/*
+	 * any additional conversion should be added to
+	 * convert_to_working_tree_needed() as well
+	 */
+	setup_convert_check(check);
+	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+		struct convert_driver *drv;
+		crlf = git_path_check_crlf(path, check + 0);
+		ident = git_path_check_ident(path, check + 1);
+		drv = git_path_check_convert(path, check + 2);
+		if (drv && drv->smudge)
+			filter = drv->smudge;
+	}
+
+	return !ident_conversion_noneed(path, ident) ||
+		!crlf_to_worktree_noneed(path, len, crlf) ||
+		!apply_filter_noneed(path, filter);
+}
-- 
1.6.3.1.257.gbd13

  reply	other threads:[~2009-05-28  5:29 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-28  5:29 [PATCH WIP 0/4] Special code path for large blobs Nguyễn Thái Ngọc Duy
2009-05-28  5:29 ` Nguyễn Thái Ngọc Duy [this message]
2009-05-28  5:29   ` [PATCH WIP 2/4] sha1_file.c: add streaming interface for reading blobs Nguyễn Thái Ngọc Duy
2009-05-28  5:29     ` [PATCH WIP 3/4] write_entry: use streaming interface for checkout large files Nguyễn Thái Ngọc Duy
2009-05-28  5:29       ` [PATCH WIP 4/4] index_fd: support indexing " Nguyễn Thái Ngọc Duy
2009-05-28 18:03 ` [PATCH WIP 0/4] Special code path for large blobs Nicolas Pitre
2009-06-02  4:46   ` Nguyen Thai Ngoc Duy
2009-06-02 14:45     ` Shawn O. Pearce
2009-06-02 17:22       ` Nicolas Pitre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243488550-15357-2-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).