git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Takashi Iwai <tiwai@suse.de>
To: git@vger.kernel.org
Subject: glibc mutex deadlock in signal handler
Date: Thu, 03 Sep 2015 13:00:53 +0200	[thread overview]
Message-ID: <s5hfv2vn4wq.wl-tiwai@suse.de> (raw)

Hi,

we've got a bug report of git-log stall in below:
  https://bugzilla.opensuse.org/show_bug.cgi?id=942297

In short, git-log is left unterminated sometimes when pager is
aborted.  When this happens, git process can't be killed by SIGTERM,
but only by SIGKILL.  And, further investigation revealed the possible
mutex deadlock used in glibc *alloc()/free().

I tried to reproduce this by adding a fault injection in glibc code,
and actually got the similar stack trace as reported.  The problem is
that glibc malloc (in this case realloc() and free()) takes a private
mutex.  Thus calling any function does *alloc() or free() in a signal
handler may deadlock.  In the case of git, it was free() calls in
various cleanup codes and the printf() / strerror() invocations.

Also, basically it's not safe to call fflush() or raise(), either.
But they seem to work practically on the current systems.

Below is a band-aid patch I tested and confirmed to work in the
fault-injection case.  But, some unsafe calls mentioned in the above
are left.  If we want to be in a safer side, we should really limit
the things to do in a signal handler, e.g. only calling close() and
doing waitpid(), I suppose.

Any better ideas?


thanks,

Takashi

---
diff --git a/pager.c b/pager.c
index 27d4c8a17aa1..57dda0142fa9 100644
--- a/pager.c
+++ b/pager.c
@@ -14,19 +14,25 @@
 static const char *pager_argv[] = { NULL, NULL };
 static struct child_process pager_process = CHILD_PROCESS_INIT;
 
-static void wait_for_pager(void)
+static void flush_pager(void)
 {
 	fflush(stdout);
 	fflush(stderr);
 	/* signal EOF to pager */
 	close(1);
 	close(2);
+}
+
+static void wait_for_pager(void)
+{
+	flush_pager();
 	finish_command(&pager_process);
 }
 
 static void wait_for_pager_signal(int signo)
 {
-	wait_for_pager();
+	flush_pager();
+	finish_command_in_signal(&pager_process);
 	sigchain_pop(signo);
 	raise(signo);
 }
diff --git a/run-command.c b/run-command.c
index 3277cf797ed4..a8cdc8f32944 100644
--- a/run-command.c
+++ b/run-command.c
@@ -18,26 +18,27 @@ struct child_to_clean {
 static struct child_to_clean *children_to_clean;
 static int installed_child_cleanup_handler;
 
-static void cleanup_children(int sig)
+static void cleanup_children(int sig, int in_signal)
 {
 	while (children_to_clean) {
 		struct child_to_clean *p = children_to_clean;
 		children_to_clean = p->next;
 		kill(p->pid, sig);
-		free(p);
+		if (!in_signal)
+			free(p);
 	}
 }
 
 static void cleanup_children_on_signal(int sig)
 {
-	cleanup_children(sig);
+	cleanup_children(sig, 1);
 	sigchain_pop(sig);
 	raise(sig);
 }
 
 static void cleanup_children_on_exit(void)
 {
-	cleanup_children(SIGTERM);
+	cleanup_children(SIGTERM, 0);
 }
 
 static void mark_child_for_cleanup(pid_t pid)
@@ -220,7 +221,7 @@ static inline void set_cloexec(int fd)
 		fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
 }
 
-static int wait_or_whine(pid_t pid, const char *argv0)
+static int wait_or_whine(pid_t pid, const char *argv0, int in_signal)
 {
 	int status, code = -1;
 	pid_t waiting;
@@ -231,13 +232,17 @@ static int wait_or_whine(pid_t pid, const char *argv0)
 
 	if (waiting < 0) {
 		failed_errno = errno;
-		error("waitpid for %s failed: %s", argv0, strerror(errno));
+		if (!in_signal)
+			error("waitpid for %s failed: %s", argv0,
+			      strerror(errno));
 	} else if (waiting != pid) {
-		error("waitpid is confused (%s)", argv0);
+		if (!in_signal)
+			error("waitpid is confused (%s)", argv0);
 	} else if (WIFSIGNALED(status)) {
 		code = WTERMSIG(status);
 		if (code != SIGINT && code != SIGQUIT)
-			error("%s died of signal %d", argv0, code);
+			if (!in_signal)
+				error("%s died of signal %d", argv0, code);
 		/*
 		 * This return value is chosen so that code & 0xff
 		 * mimics the exit code that a POSIX shell would report for
@@ -254,10 +259,12 @@ static int wait_or_whine(pid_t pid, const char *argv0)
 			failed_errno = ENOENT;
 		}
 	} else {
-		error("waitpid is confused (%s)", argv0);
+		if (!in_signal)
+			error("waitpid is confused (%s)", argv0);
 	}
 
-	clear_child_for_cleanup(pid);
+	if (!in_signal)
+		clear_child_for_cleanup(pid);
 
 	errno = failed_errno;
 	return code;
@@ -437,7 +444,7 @@ fail_pipe:
 		 * At this point we know that fork() succeeded, but execvp()
 		 * failed. Errors have been reported to our stderr.
 		 */
-		wait_or_whine(cmd->pid, cmd->argv[0]);
+		wait_or_whine(cmd->pid, cmd->argv[0], 0);
 		failed_errno = errno;
 		cmd->pid = -1;
 	}
@@ -536,12 +543,18 @@ fail_pipe:
 
 int finish_command(struct child_process *cmd)
 {
-	int ret = wait_or_whine(cmd->pid, cmd->argv[0]);
+	int ret = wait_or_whine(cmd->pid, cmd->argv[0], 0);
 	argv_array_clear(&cmd->args);
 	argv_array_clear(&cmd->env_array);
 	return ret;
 }
 
+int finish_command_in_signal(struct child_process *cmd)
+{
+	return wait_or_whine(cmd->pid, cmd->argv[0], 1);
+}
+
+
 int run_command(struct child_process *cmd)
 {
 	int code;
@@ -772,7 +785,7 @@ error:
 int finish_async(struct async *async)
 {
 #ifdef NO_PTHREADS
-	return wait_or_whine(async->pid, "child process");
+	return wait_or_whine(async->pid, "child process", 0);
 #else
 	void *ret = (void *)(intptr_t)(-1);
 
diff --git a/run-command.h b/run-command.h
index 5b4425a3cbe1..275d35c442ac 100644
--- a/run-command.h
+++ b/run-command.h
@@ -50,6 +50,7 @@ void child_process_init(struct child_process *);
 
 int start_command(struct child_process *);
 int finish_command(struct child_process *);
+int finish_command_in_signal(struct child_process *);
 int run_command(struct child_process *);
 
 /*

             reply	other threads:[~2015-09-03 11:01 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-03 11:00 Takashi Iwai [this message]
2015-09-03 18:12 ` glibc mutex deadlock in signal handler Junio C Hamano
2015-09-03 19:34   ` Takashi Iwai
2015-09-03 20:55     ` Andreas Schwab
2015-09-04  5:52       ` Takashi Iwai
2015-09-04  9:23         ` Jeff King
2015-09-04  9:35           ` Takashi Iwai
2015-09-04 13:04             ` Jeff King
2015-09-04 13:40               ` Takashi Iwai
2015-09-04 21:56           ` Junio C Hamano
2015-09-05  8:59             ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=s5hfv2vn4wq.wl-tiwai@suse.de \
    --to=tiwai@suse.de \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).