git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: git@jeffhostetler.com
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net,
	Jeff Hostetler <jeffhost@microsoft.com>
Subject: [PATCH v1 03/25] structured-logging: add structured logging framework
Date: Fri, 13 Jul 2018 16:55:59 +0000	[thread overview]
Message-ID: <20180713165621.52017-4-git@jeffhostetler.com> (raw)
In-Reply-To: <20180713165621.52017-1-git@jeffhostetler.com>

From: Jeff Hostetler <jeffhost@microsoft.com>

Teach git to optionally generate structured logging data in JSON using
the json-writer API.  "cmd_start" and "cmd_end" events are generated.

Structured logging is only available when git is built with
STRUCTURED_LOGGING=1.

Structured logging is only enabled when the config setting "slog.path"
is set to an absolute pathname.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Documentation/config.txt |   8 ++
 compat/mingw.h           |   7 +
 config.c                 |   3 +
 git-compat-util.h        |   9 ++
 git.c                    |   8 +-
 structured-logging.c     | 366 +++++++++++++++++++++++++++++++++++++++++++++++
 structured-logging.h     |  82 +++++++++++
 usage.c                  |   4 +
 8 files changed, 486 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf..c79f2bf 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3168,6 +3168,14 @@ showbranch.default::
 	The default set of branches for linkgit:git-show-branch[1].
 	See linkgit:git-show-branch[1].
 
+slog.path::
+	(EXPERIMENTAL) Enable structured logging to a file.  This must be
+	an absolute path.  (Git must be compiled with STRUCTURED_LOGGING=1.)
+
+slog.pretty::
+	(EXPERIMENTAL) Pretty-print structured log data when true.
+	(Git must be compiled with STRUCTURED_LOGGING=1.)
+
 splitIndex.maxPercentChange::
 	When the split index feature is used, this specifies the
 	percent of entries the split index can contain compared to the
diff --git a/compat/mingw.h b/compat/mingw.h
index 571019d..d8d8cd3 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -144,8 +144,15 @@ static inline int fcntl(int fd, int cmd, ...)
 	errno = EINVAL;
 	return -1;
 }
+
 /* bash cannot reliably detect negative return codes as failure */
+#if defined(STRUCTURED_LOGGING)
+#include "structured-logging.h"
+#define exit(code) exit(strlog_exit_code((code) & 0xff))
+#else
 #define exit(code) exit((code) & 0xff)
+#endif
+
 #define sigemptyset(x) (void)0
 static inline int sigaddset(sigset_t *set, int signum)
 { return 0; }
diff --git a/config.c b/config.c
index fbbf0f8..b27b024 100644
--- a/config.c
+++ b/config.c
@@ -1476,6 +1476,9 @@ int git_default_config(const char *var, const char *value, void *dummy)
 		return 0;
 	}
 
+	if (starts_with(var, "slog."))
+		return slog_default_config(var, value);
+
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/git-compat-util.h b/git-compat-util.h
index 9a64998..f5352fd 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1239,4 +1239,13 @@ extern void unleak_memory(const void *ptr, size_t len);
 #define UNLEAK(var) do {} while (0)
 #endif
 
+#include "structured-logging.h"
+#if defined(STRUCTURED_LOGGING) && !defined(exit)
+/*
+ * Intercept all calls to exit() so that exit-code can be included
+ * in the "cmd_exit" message written by the at-exit routine.
+ */
+#define exit(code) exit(slog_exit_code(code))
+#endif
+
 #endif
diff --git a/git.c b/git.c
index c2f48d5..024a40d 100644
--- a/git.c
+++ b/git.c
@@ -413,6 +413,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 		setup_work_tree();
 
 	trace_argv_printf(argv, "trace: built-in: git");
+	slog_set_command_name(p->cmd);
 
 	status = p->fn(argc, argv, prefix);
 	if (status)
@@ -700,7 +701,7 @@ static int run_argv(int *argcp, const char ***argv)
 	return done_alias;
 }
 
-int cmd_main(int argc, const char **argv)
+static int real_cmd_main(int argc, const char **argv)
 {
 	const char *cmd;
 	int done_help = 0;
@@ -779,3 +780,8 @@ int cmd_main(int argc, const char **argv)
 
 	return 1;
 }
+
+int cmd_main(int argc, const char **argv)
+{
+	return slog_wrap_main(real_cmd_main, argc, argv);
+}
diff --git a/structured-logging.c b/structured-logging.c
index 702fd84..afa2224 100644
--- a/structured-logging.c
+++ b/structured-logging.c
@@ -1,3 +1,10 @@
+#include "cache.h"
+#include "config.h"
+#include "version.h"
+#include "json-writer.h"
+#include "sigchain.h"
+#include "argv-array.h"
+
 #if !defined(STRUCTURED_LOGGING)
 /*
  * Structured logging is not available.
@@ -6,4 +13,363 @@
 
 #else
 
+#define SLOG_VERSION 0
+
+static uint64_t my__start_time;
+static uint64_t my__exit_time;
+static int my__is_config_loaded;
+static int my__is_enabled;
+static int my__is_pretty;
+static int my__signal;
+static int my__exit_code;
+static int my__pid;
+static int my__wrote_start_event;
+static int my__log_fd = -1;
+
+static char *my__log_path;
+static char *my__command_name;
+static char *my__sub_command_name;
+
+static struct argv_array my__argv = ARGV_ARRAY_INIT;
+static struct json_writer my__errors = JSON_WRITER_INIT;
+
+/*
+ * Write a single event to the structured log file.
+ */
+static void emit_event(struct json_writer *jw, const char *event_name)
+{
+	if (my__log_fd == -1) {
+		my__log_fd = open(my__log_path,
+				  O_WRONLY | O_APPEND | O_CREAT,
+				  0644);
+		if (my__log_fd == -1) {
+			warning("slog: could not open '%s' for logging: %s",
+				my__log_path, strerror(errno));
+			my__is_enabled = 0;
+			return;
+		}
+	}
+
+	/*
+	 * A properly authored JSON string does not have a final NL
+	 * (even when pretty-printing is enabled).  Structured logging
+	 * output should look like a series of terminated forms one
+	 * per line.  Temporarily append a NL to the buffer so that
+	 * the disk write happens atomically.
+	 */
+	strbuf_addch(&jw->json, '\n');
+	if (write(my__log_fd, jw->json.buf, jw->json.len) != jw->json.len)
+		warning("slog: could not write event '%s': %s",
+			event_name, strerror(errno));
+
+	strbuf_setlen(&jw->json, jw->json.len - 1);
+}
+
+static void emit_start_event(void)
+{
+	struct json_writer jw = JSON_WRITER_INIT;
+
+	/* build "cmd_start" event message */
+	jw_object_begin(&jw, my__is_pretty);
+	{
+		jw_object_string(&jw, "event", "cmd_start");
+		jw_object_intmax(&jw, "clock_us", (intmax_t)my__start_time);
+		jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+		if (my__command_name && *my__command_name)
+			jw_object_string(&jw, "command", my__command_name);
+		if (my__sub_command_name && *my__sub_command_name)
+			jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+		jw_object_inline_begin_array(&jw, "argv");
+		{
+			int k;
+			for (k = 0; k < my__argv.argc; k++)
+				jw_array_string(&jw, my__argv.argv[k]);
+		}
+		jw_end(&jw);
+	}
+	jw_end(&jw);
+
+	emit_event(&jw, "cmd_start");
+	jw_release(&jw);
+
+	my__wrote_start_event = 1;
+}
+
+static void emit_exit_event(void)
+{
+	struct json_writer jw = JSON_WRITER_INIT;
+	uint64_t atexit_time = getnanotime() / 1000;
+
+	/* close unterminated forms */
+	if (my__errors.json.len)
+		jw_end(&my__errors);
+
+	/* build "cmd_exit" event message */
+	jw_object_begin(&jw, my__is_pretty);
+	{
+		jw_object_string(&jw, "event", "cmd_exit");
+		jw_object_intmax(&jw, "clock_us", (intmax_t)atexit_time);
+		jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+		if (my__command_name && *my__command_name)
+			jw_object_string(&jw, "command", my__command_name);
+		if (my__sub_command_name && *my__sub_command_name)
+			jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+		jw_object_inline_begin_array(&jw, "argv");
+		{
+			int k;
+			for (k = 0; k < my__argv.argc; k++)
+				jw_array_string(&jw, my__argv.argv[k]);
+		}
+		jw_end(&jw);
+
+		jw_object_inline_begin_object(&jw, "result");
+		{
+			jw_object_intmax(&jw, "exit_code", my__exit_code);
+			if (my__errors.json.len)
+				jw_object_sub_jw(&jw, "errors", &my__errors);
+
+			if (my__signal)
+				jw_object_intmax(&jw, "signal", my__signal);
+
+			if (my__exit_time > 0)
+				jw_object_intmax(&jw, "elapsed_core_us",
+						 my__exit_time - my__start_time);
+
+			jw_object_intmax(&jw, "elapsed_total_us",
+					 atexit_time - my__start_time);
+		}
+		jw_end(&jw);
+
+		jw_object_inline_begin_object(&jw, "version");
+		{
+			jw_object_string(&jw, "git", git_version_string);
+			jw_object_intmax(&jw, "slog", SLOG_VERSION);
+		}
+		jw_end(&jw);
+	}
+	jw_end(&jw);
+
+	emit_event(&jw, "cmd_exit");
+	jw_release(&jw);
+}
+
+static int cfg_path(const char *key, const char *value)
+{
+	if (is_absolute_path(value)) {
+		my__log_path = xstrdup(value);
+		my__is_enabled = 1;
+	} else {
+		warning("'%s' must be an absolute path: '%s'",
+			key, value);
+	}
+
+	return 0;
+}
+
+static int cfg_pretty(const char *key, const char *value)
+{
+	my__is_pretty = git_config_bool(key, value);
+	return 0;
+}
+
+int slog_default_config(const char *key, const char *value)
+{
+	const char *sub;
+
+	/*
+	 * git_default_config() calls slog_default_config() with "slog.*"
+	 * k/v pairs.  git_default_config() MAY or MAY NOT be called when
+	 * cmd_<command>() calls git_config().
+	 *
+	 * Remember if we've ever been called.
+	 */
+	my__is_config_loaded = 1;
+
+	if (skip_prefix(key, "slog.", &sub)) {
+		if (!strcmp(sub, "path"))
+			return cfg_path(key, value);
+		if (!strcmp(sub, "pretty"))
+			return cfg_pretty(key, value);
+	}
+
+	return 0;
+}
+
+static int lazy_load_config_cb(const char *key, const char * value, void *data)
+{
+	return slog_default_config(key, value);
+}
+
+/*
+ * If cmd_<command>() did not cause slog_default_config() to be called
+ * during git_config(), we try to lookup our config settings the first
+ * time we actually need them.
+ *
+ * (We do this rather than using read_early_config() at initialization
+ * because we want any "-c key=value" arguments to be included.)
+ */
+static inline void lazy_load_config(void)
+{
+	if (my__is_config_loaded)
+		return;
+	my__is_config_loaded = 1;
+
+	read_early_config(lazy_load_config_cb, NULL);
+}
+
+int slog_is_enabled(void)
+{
+	lazy_load_config();
+
+	return my__is_enabled;
+}
+
+static void do_final_steps(int in_signal)
+{
+	static int completed = 0;
+
+	if (completed)
+		return;
+	completed = 1;
+
+	if (slog_is_enabled()) {
+		if (!my__wrote_start_event)
+			emit_start_event();
+		emit_exit_event();
+		my__is_enabled = 0;
+	}
+
+	if (my__log_fd != -1)
+		close(my__log_fd);
+	free(my__log_path);
+	free(my__command_name);
+	free(my__sub_command_name);
+	argv_array_clear(&my__argv);
+	jw_release(&my__errors);
+}
+
+static void slog_atexit(void)
+{
+	do_final_steps(0);
+}
+
+static void slog_signal(int signo)
+{
+	my__signal = signo;
+
+	do_final_steps(1);
+
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+static void intern_argv(int argc, const char **argv)
+{
+	int k;
+
+	for (k = 0; k < argc; k++)
+		argv_array_push(&my__argv, argv[k]);
+}
+
+/*
+ * Collect basic startup information before cmd_main() has a chance
+ * to alter the command line and before we have seen the config (to
+ * know if logging is enabled).  And since the config isn't loaded
+ * until cmd_main() dispatches to cmd_<command>(), we have to wait
+ * and lazy-write the "cmd_start" event.
+ *
+ * This also implies that commands such as "help" and "version" that
+ * don't need load the config won't generate any log data.
+ */
+static void initialize(int argc, const char **argv)
+{
+	my__start_time = getnanotime() / 1000;
+	my__pid = getpid();
+
+	intern_argv(argc, argv);
+
+	atexit(slog_atexit);
+
+	/*
+	 * Put up backstop signal handler to ensure we get the "cmd_exit"
+	 * event.  This is primarily for when the pager throws SIGPIPE
+	 * when the user quits.
+	 */
+	sigchain_push(SIGPIPE, slog_signal);
+}
+
+int slog_wrap_main(slog_fn_main_t fn_main, int argc, const char **argv)
+{
+	int result;
+
+	initialize(argc, argv);
+	result = fn_main(argc, argv);
+	slog_exit_code(result);
+
+	return result;
+}
+
+void slog_set_command_name(const char *command_name)
+{
+	/*
+	 * Capture the command name even if logging is not enabled
+	 * because we don't know if the config has been loaded yet by
+	 * the cmd_<command>() and/or it may be too early to force a
+	 * lazy load.
+	 */
+	if (my__command_name)
+		free(my__command_name);
+	my__command_name = xstrdup(command_name);
+}
+
+void slog_set_sub_command_name(const char *sub_command_name)
+{
+	/*
+	 * Capture the sub-command name even if logging is not enabled
+	 * because we don't know if the config has been loaded yet by
+	 * the cmd_<command>() and/or it may be too early to force a
+	 * lazy load.
+	 */
+	if (my__sub_command_name)
+		free(my__sub_command_name);
+	my__sub_command_name = xstrdup(sub_command_name);
+}
+
+int slog_is_pretty(void)
+{
+	return my__is_pretty;
+}
+
+int slog_exit_code(int exit_code)
+{
+	my__exit_time = getnanotime() / 1000;
+	my__exit_code = exit_code;
+
+	return exit_code;
+}
+
+void slog_error_message(const char *prefix, const char *fmt, va_list params)
+{
+	struct strbuf em = STRBUF_INIT;
+	va_list copy_params;
+
+	if (prefix && *prefix)
+		strbuf_addstr(&em, prefix);
+
+	va_copy(copy_params, params);
+	strbuf_vaddf(&em, fmt, copy_params);
+	va_end(copy_params);
+
+	if (!my__errors.json.len)
+		jw_array_begin(&my__errors, my__is_pretty);
+	jw_array_string(&my__errors, em.buf);
+	/* leave my__errors array unterminated for now */
+
+	strbuf_release(&em);
+}
+
 #endif
diff --git a/structured-logging.h b/structured-logging.h
index c9e8c1d..61e98e6 100644
--- a/structured-logging.h
+++ b/structured-logging.h
@@ -1,13 +1,95 @@
 #ifndef STRUCTURED_LOGGING_H
 #define STRUCTURED_LOGGING_H
 
+typedef int (*slog_fn_main_t)(int, const char **);
+
 #if !defined(STRUCTURED_LOGGING)
 /*
  * Structured logging is not available.
  * Stub out all API routines.
  */
+#define slog_is_available() (0)
+#define slog_default_config(k, v) (0)
+#define slog_wrap_main(real_cmd_main, argc, argv) ((real_cmd_main)((argc), (argv)))
+#define slog_set_command_name(n) do { } while (0)
+#define slog_set_sub_command_name(n) do { } while (0)
+#define slog_is_enabled() (0)
+#define slog_is_pretty() (0)
+#define slog_exit_code(exit_code) (exit_code)
+#define slog_error_message(prefix, fmt, params) do { } while (0)
 
 #else
 
+/*
+ * Is structured logging available (compiled-in)?
+ */
+#define slog_is_available() (1)
+
+/*
+ * Process "slog.*" config settings.
+ */
+int slog_default_config(const char *key, const char *value);
+
+/*
+ * Wrapper for the "real" cmd_main().  Initialize structured logging if
+ * enabled, run the given real_cmd_main(), and capture the return value.
+ *
+ * Note:  common-main.c is shared by many top-level commands.
+ * common-main.c:main() does common process setup before calling
+ * the version of cmd_main() found in the executable.  Some commands
+ * SHOULD NOT do logging (such as t/helper/test-tool).  Ones that do
+ * need some common initialization/teardown.
+ *
+ * Use this function for any top-level command that should do logging.
+ *
+ * Usage:
+ *
+ * static int real_cmd_main(int argc, const char **argv)
+ * {
+ *     ....the actual code for the command....
+ * }
+ *
+ * int cmd_main(int argc, const char **argv)
+ * {
+ *     return slog_wrap_main(real_cmd_main, argc, argv);
+ * }
+ * 
+ *
+ * See git.c for an example.
+ */ 
+int slog_wrap_main(slog_fn_main_t real_cmd_main, int argc, const char **argv);
+
+/*
+ * Record a canonical command name and optional sub-command name for the
+ * current process.  For example, "checkout" and "switch-branch".
+ */
+void slog_set_command_name(const char *name);
+void slog_set_sub_command_name(const char *name);
+
+/*
+ * Is structured logging enabled?
+ */
+int slog_is_enabled(void);
+
+/*
+ * Is JSON pretty-printing enabled?
+ */
+int slog_is_pretty(void);
+
+/*
+ * Register the process exit code with the structured logging layer
+ * and return it.  This value will appear in the final "cmd_exit" event.
+ *
+ * Use this to wrap all calls to exit().
+ * Use this before returning in main().
+ */
+int slog_exit_code(int exit_code);
+
+/*
+ * Append formatted error message to the structured log result.
+ * Messages from this will appear in the final "cmd_exit" event.
+ */
+void slog_error_message(const char *prefix, const char *fmt, va_list params);
+
 #endif /* STRUCTURED_LOGGING */
 #endif /* STRUCTURED_LOGGING_H */
diff --git a/usage.c b/usage.c
index cc80333..5d48f6b 100644
--- a/usage.c
+++ b/usage.c
@@ -27,12 +27,16 @@ static NORETURN void usage_builtin(const char *err, va_list params)
 
 static NORETURN void die_builtin(const char *err, va_list params)
 {
+	slog_error_message("fatal: ", err, params);
+
 	vreportf("fatal: ", err, params);
 	exit(128);
 }
 
 static void error_builtin(const char *err, va_list params)
 {
+	slog_error_message("error: ", err, params);
+
 	vreportf("error: ", err, params);
 }
 
-- 
2.9.3


  parent reply	other threads:[~2018-07-13 16:57 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-13 16:55 [PATCH v1 00/25] RFC: structured logging git
2018-07-13 16:55 ` [PATCH v1 01/25] structured-logging: design document git
2018-07-14  8:34   ` Simon Ruderich
2018-08-03 15:26   ` Ben Peart
2018-08-09 14:30     ` Jeff Hostetler
2018-08-21  4:47   ` Jonathan Nieder
2018-07-13 16:55 ` [PATCH v1 02/25] structured-logging: add STRUCTURED_LOGGING=1 to Makefile git
2018-08-21  4:49   ` Jonathan Nieder
2018-07-13 16:55 ` git [this message]
2018-07-26  9:09   ` [PATCH v1 03/25] structured-logging: add structured logging framework SZEDER Gábor
2018-07-27 12:45     ` Jeff Hostetler
2018-08-21  5:05   ` Jonathan Nieder
2018-07-13 16:56 ` [PATCH v1 04/25] structured-logging: add session-id to log events git
2018-07-13 16:56 ` [PATCH v1 05/25] structured-logging: set sub_command field for branch command git
2018-07-13 16:56 ` [PATCH v1 06/25] structured-logging: set sub_command field for checkout command git
2018-07-13 16:56 ` [PATCH v1 07/25] structured-logging: t0420 basic tests git
2018-07-13 16:56 ` [PATCH v1 08/25] structured-logging: add detail-event facility git
2018-07-13 16:56 ` [PATCH v1 09/25] structured-logging: add detail-event for lazy_init_name_hash git
2018-07-13 16:56 ` [PATCH v1 10/25] structured-logging: add timer facility git
2018-07-13 16:56 ` [PATCH v1 11/25] structured-logging: add timer around do_read_index git
2018-07-13 16:56 ` [PATCH v1 12/25] structured-logging: add timer around do_write_index git
2018-07-13 16:56 ` [PATCH v1 13/25] structured-logging: add timer around wt-status functions git
2018-07-13 16:56 ` [PATCH v1 14/25] structured-logging: add timer around preload_index git
2018-07-13 16:56 ` [PATCH v1 15/25] structured-logging: t0420 tests for timers git
2018-07-13 16:56 ` [PATCH v1 16/25] structured-logging: add aux-data facility git
2018-07-13 16:56 ` [PATCH v1 17/25] structured-logging: add aux-data for index size git
2018-07-13 16:56 ` [PATCH v1 18/25] structured-logging: add aux-data for size of sparse-checkout file git
2018-07-13 16:56 ` [PATCH v1 19/25] structured-logging: t0420 tests for aux-data git
2018-07-13 16:56 ` [PATCH v1 20/25] structured-logging: add structured logging to remote-curl git
2018-07-13 16:56 ` [PATCH v1 21/25] structured-logging: add detail-events for child processes git
2018-07-13 16:56 ` [PATCH v1 22/25] structured-logging: add child process classification git
2018-07-13 16:56 ` [PATCH v1 23/25] structured-logging: t0420 tests for child process detail events git
2018-07-13 16:56 ` [PATCH v1 24/25] structured-logging: t0420 tests for interacitve child_summary git
2018-07-13 16:56 ` [PATCH v1 25/25] structured-logging: add config data facility git
2018-07-13 18:51 ` [PATCH v1 00/25] RFC: structured logging David Lang
2018-07-16 13:29   ` Jeff Hostetler
2018-08-28 17:38 ` Junio C Hamano
2018-08-28 18:47   ` Jeff Hostetler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180713165621.52017-4-git@jeffhostetler.com \
    --to=git@jeffhostetler.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jeffhost@microsoft.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).