git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Jonathan Nieder <jrnieder@gmail.com>,
	David Turner <dturner@twosigma.com>
Cc: git@vger.kernel.org, Jeff King <peff@peff.net>,
	Duy Nguyen <pclouds@gmail.com>
Subject: Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)
Date: Wed, 19 Apr 2017 19:28:29 +0200	[thread overview]
Message-ID: <c0333c81-d3b2-ca2d-a553-75642d8fb949@web.de> (raw)
In-Reply-To: <20170419012824.GA28740@aiede.svl.corp.google.com>

Am 19.04.2017 um 03:28 schrieb Jonathan Nieder:
> David Turner wrote:
>> @@ -274,7 +278,7 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
>>   			 * running.
>>   			 */
>>   			time(NULL) - st.st_mtime <= 12 * 3600 &&
>> -			fscanf(fp, "%"SCNuMAX" %127c", &pid, locking_host) == 2 &&
>> +			fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&
> 
> I hoped this could be simplified since HOST_NAME_MAX is a numeric literal,
> using the double-expansion trick:
> 
> #define STR_(s) # s
> #define STR(s) STR_(s)
> 
> 			fscanf(fp, "%" SCNuMAX " %" STR(HOST_NAME_MAX) "c",
> 			       &pid, locking_host);
> 
> Unfortunately, I don't think there's anything stopping a platform from
> defining
> 
> 	#define HOST_NAME_MAX 0x100
> 
> which would break that.
> 
> So this run-time calculation appears to be necessary.

I had another look at this last night and cooked up the following
patch.  Might have gone overboard with it..

-- >8 --
Subject: [PATCH] gc: support arbitrary hostnames and pids in lock_repo_for_gc()

git gc writes its pid and hostname into a pidfile to prevent concurrent
garbage collection.  Repositories may be shared between systems with
different limits for host name length and different pid ranges.  Use a
strbuf to store the file contents to allow for arbitrarily long
hostnames and pids to be shown to the user on early abort.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 builtin/gc.c | 151 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 111 insertions(+), 40 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index 2daede7820..4c1c01e87d 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -228,21 +228,99 @@ static int need_to_gc(void)
 	return 1;
 }
 
+struct pidfile {
+	struct strbuf buf;
+	char *hostname;
+};
+
+#define PIDFILE_INIT { STRBUF_INIT }
+
+static void pidfile_release(struct pidfile *pf)
+{
+	pf->hostname = NULL;
+	strbuf_release(&pf->buf);
+}
+
+static int pidfile_read(struct pidfile *pf, const char *path,
+			unsigned int max_age_seconds)
+{
+	int fd;
+	struct stat st;
+	ssize_t len;
+	char *space;
+	int rc = -1;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return rc;
+
+	if (fstat(fd, &st))
+		goto out;
+	if (time(NULL) - st.st_mtime > max_age_seconds)
+		goto out;
+	if (st.st_size > (size_t)st.st_size)
+		goto out;
+
+	len = strbuf_read(&pf->buf, fd, st.st_size);
+	if (len < 0)
+		goto out;
+
+	space = strchr(pf->buf.buf, ' ');
+	if (!space) {
+		pidfile_release(pf);
+		goto out;
+	}
+	pf->hostname = space + 1;
+	*space = '\0';
+
+	rc = 0;
+out:
+	close(fd);
+	return rc;
+}
+
+static int parse_pid(const char *value, pid_t *ret)
+{
+	if (value && *value) {
+		char *end;
+		intmax_t val;
+
+		errno = 0;
+		val = strtoimax(value, &end, 0);
+		if (errno == ERANGE)
+			return 0;
+		if (*end)
+			return 0;
+		if (labs(val) > maximum_signed_value_of_type(pid_t)) {
+			errno = ERANGE;
+			return 0;
+		}
+		*ret = val;
+		return 1;
+	}
+	errno = EINVAL;
+	return 0;
+}
+
+static int pidfile_process_exists(const struct pidfile *pf)
+{
+	pid_t pid;
+	return parse_pid(pf->buf.buf, &pid) &&
+		(!kill(pid, 0) || errno == EPERM);
+}
+
 /* return NULL on success, else hostname running the gc */
-static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
+static int lock_repo_for_gc(int force, struct pidfile *pf)
 {
 	static struct lock_file lock;
 	char my_host[128];
 	struct strbuf sb = STRBUF_INIT;
-	struct stat st;
-	uintmax_t pid;
-	FILE *fp;
 	int fd;
 	char *pidfile_path;
 
 	if (is_tempfile_active(&pidfile))
 		/* already locked */
-		return NULL;
+		return 0;
 
 	if (gethostname(my_host, sizeof(my_host)))
 		xsnprintf(my_host, sizeof(my_host), "unknown");
@@ -251,34 +329,27 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
 	fd = hold_lock_file_for_update(&lock, pidfile_path,
 				       LOCK_DIE_ON_ERROR);
 	if (!force) {
-		static char locking_host[128];
-		int should_exit;
-		fp = fopen(pidfile_path, "r");
-		memset(locking_host, 0, sizeof(locking_host));
-		should_exit =
-			fp != NULL &&
-			!fstat(fileno(fp), &st) &&
-			/*
-			 * 12 hour limit is very generous as gc should
-			 * never take that long. On the other hand we
-			 * don't really need a strict limit here,
-			 * running gc --auto one day late is not a big
-			 * problem. --force can be used in manual gc
-			 * after the user verifies that no gc is
-			 * running.
-			 */
-			time(NULL) - st.st_mtime <= 12 * 3600 &&
-			fscanf(fp, "%"SCNuMAX" %127c", &pid, locking_host) == 2 &&
+		/*
+		 * 12 hour limit is very generous as gc should
+		 * never take that long. On the other hand we
+		 * don't really need a strict limit here,
+		 * running gc --auto one day late is not a big
+		 * problem. --force can be used in manual gc
+		 * after the user verifies that no gc is
+		 * running.
+		 */
+		const unsigned max_age_seconds = 12 * 3600;
+
+		if (!pidfile_read(pf, pidfile_path, max_age_seconds)) {
 			/* be gentle to concurrent "gc" on remote hosts */
-			(strcmp(locking_host, my_host) || !kill(pid, 0) || errno == EPERM);
-		if (fp != NULL)
-			fclose(fp);
-		if (should_exit) {
-			if (fd >= 0)
-				rollback_lock_file(&lock);
-			*ret_pid = pid;
-			free(pidfile_path);
-			return locking_host;
+			if (strcmp(pf->hostname, my_host) ||
+			    pidfile_process_exists(pf)) {
+				if (fd >= 0)
+					rollback_lock_file(&lock);
+				free(pidfile_path);
+				return -1;
+			}
+			pidfile_release(pf);
 		}
 	}
 
@@ -289,7 +360,7 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
 	commit_lock_file(&lock);
 	register_tempfile(&pidfile, pidfile_path);
 	free(pidfile_path);
-	return NULL;
+	return 0;
 }
 
 static int report_last_gc_error(void)
@@ -344,8 +415,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 	int auto_gc = 0;
 	int quiet = 0;
 	int force = 0;
-	const char *name;
-	pid_t pid;
+	struct pidfile pf = PIDFILE_INIT;
 	int daemonized = 0;
 
 	struct option builtin_gc_options[] = {
@@ -420,12 +490,13 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 	} else
 		add_repack_all_option();
 
-	name = lock_repo_for_gc(force, &pid);
-	if (name) {
-		if (auto_gc)
+	if (lock_repo_for_gc(force, &pf)) {
+		if (auto_gc) {
+			pidfile_release(&pf);
 			return 0; /* be quiet on --auto */
-		die(_("gc is already running on machine '%s' pid %"PRIuMAX" (use --force if not)"),
-		    name, (uintmax_t)pid);
+		}
+		die(_("gc is already running on machine '%s' pid %s (use --force if not)"),
+		    pf.hostname, pf.buf.buf);
 	}
 
 	if (daemonized) {
-- 
2.12.2


  parent reply	other threads:[~2017-04-19 17:28 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-18 21:57 [PATCH v3 0/2] gethostbyname fixes David Turner
2017-04-18 21:57 ` [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2) David Turner
2017-04-19  1:28   ` Jonathan Nieder
2017-04-19  2:57     ` Junio C Hamano
2017-04-19 14:03     ` René Scharfe
2017-04-19 17:28     ` René Scharfe [this message]
2017-04-19 19:08       ` David Turner
2017-04-19 19:09       ` Torsten Bögershausen
2017-04-19 20:02         ` René Scharfe
2017-04-20 18:37           ` Torsten Bögershausen
2017-04-20 19:28             ` René Scharfe
2017-04-21  4:18               ` Torsten Bögershausen
2017-04-18 21:57 ` [PATCH v3 2/2] xgethostname: handle long hostnames David Turner
2017-04-19  1:35   ` Jonathan Nieder
2017-04-19  2:51     ` Junio C Hamano
2017-04-19 15:50       ` David Turner
2017-04-19 16:43         ` René Scharfe
2017-04-19  2:48   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c0333c81-d3b2-ca2d-a553-75642d8fb949@web.de \
    --to=l.s.r@web.de \
    --cc=dturner@twosigma.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).