From: git@jeffhostetler.com
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net,
Jeff Hostetler <jeffhost@microsoft.com>
Subject: [PATCH v1 03/25] structured-logging: add structured logging framework
Date: Fri, 13 Jul 2018 16:55:59 +0000 [thread overview]
Message-ID: <20180713165621.52017-4-git@jeffhostetler.com> (raw)
In-Reply-To: <20180713165621.52017-1-git@jeffhostetler.com>
From: Jeff Hostetler <jeffhost@microsoft.com>
Teach git to optionally generate structured logging data in JSON using
the json-writer API. "cmd_start" and "cmd_end" events are generated.
Structured logging is only available when git is built with
STRUCTURED_LOGGING=1.
Structured logging is only enabled when the config setting "slog.path"
is set to an absolute pathname.
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
Documentation/config.txt | 8 ++
compat/mingw.h | 7 +
config.c | 3 +
git-compat-util.h | 9 ++
git.c | 8 +-
structured-logging.c | 366 +++++++++++++++++++++++++++++++++++++++++++++++
structured-logging.h | 82 +++++++++++
usage.c | 4 +
8 files changed, 486 insertions(+), 1 deletion(-)
diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf..c79f2bf 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3168,6 +3168,14 @@ showbranch.default::
The default set of branches for linkgit:git-show-branch[1].
See linkgit:git-show-branch[1].
+slog.path::
+ (EXPERIMENTAL) Enable structured logging to a file. This must be
+ an absolute path. (Git must be compiled with STRUCTURED_LOGGING=1.)
+
+slog.pretty::
+ (EXPERIMENTAL) Pretty-print structured log data when true.
+ (Git must be compiled with STRUCTURED_LOGGING=1.)
+
splitIndex.maxPercentChange::
When the split index feature is used, this specifies the
percent of entries the split index can contain compared to the
diff --git a/compat/mingw.h b/compat/mingw.h
index 571019d..d8d8cd3 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -144,8 +144,15 @@ static inline int fcntl(int fd, int cmd, ...)
errno = EINVAL;
return -1;
}
+
/* bash cannot reliably detect negative return codes as failure */
+#if defined(STRUCTURED_LOGGING)
+#include "structured-logging.h"
+#define exit(code) exit(strlog_exit_code((code) & 0xff))
+#else
#define exit(code) exit((code) & 0xff)
+#endif
+
#define sigemptyset(x) (void)0
static inline int sigaddset(sigset_t *set, int signum)
{ return 0; }
diff --git a/config.c b/config.c
index fbbf0f8..b27b024 100644
--- a/config.c
+++ b/config.c
@@ -1476,6 +1476,9 @@ int git_default_config(const char *var, const char *value, void *dummy)
return 0;
}
+ if (starts_with(var, "slog."))
+ return slog_default_config(var, value);
+
/* Add other config variables here and to Documentation/config.txt. */
return 0;
}
diff --git a/git-compat-util.h b/git-compat-util.h
index 9a64998..f5352fd 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1239,4 +1239,13 @@ extern void unleak_memory(const void *ptr, size_t len);
#define UNLEAK(var) do {} while (0)
#endif
+#include "structured-logging.h"
+#if defined(STRUCTURED_LOGGING) && !defined(exit)
+/*
+ * Intercept all calls to exit() so that exit-code can be included
+ * in the "cmd_exit" message written by the at-exit routine.
+ */
+#define exit(code) exit(slog_exit_code(code))
+#endif
+
#endif
diff --git a/git.c b/git.c
index c2f48d5..024a40d 100644
--- a/git.c
+++ b/git.c
@@ -413,6 +413,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
setup_work_tree();
trace_argv_printf(argv, "trace: built-in: git");
+ slog_set_command_name(p->cmd);
status = p->fn(argc, argv, prefix);
if (status)
@@ -700,7 +701,7 @@ static int run_argv(int *argcp, const char ***argv)
return done_alias;
}
-int cmd_main(int argc, const char **argv)
+static int real_cmd_main(int argc, const char **argv)
{
const char *cmd;
int done_help = 0;
@@ -779,3 +780,8 @@ int cmd_main(int argc, const char **argv)
return 1;
}
+
+int cmd_main(int argc, const char **argv)
+{
+ return slog_wrap_main(real_cmd_main, argc, argv);
+}
diff --git a/structured-logging.c b/structured-logging.c
index 702fd84..afa2224 100644
--- a/structured-logging.c
+++ b/structured-logging.c
@@ -1,3 +1,10 @@
+#include "cache.h"
+#include "config.h"
+#include "version.h"
+#include "json-writer.h"
+#include "sigchain.h"
+#include "argv-array.h"
+
#if !defined(STRUCTURED_LOGGING)
/*
* Structured logging is not available.
@@ -6,4 +13,363 @@
#else
+#define SLOG_VERSION 0
+
+static uint64_t my__start_time;
+static uint64_t my__exit_time;
+static int my__is_config_loaded;
+static int my__is_enabled;
+static int my__is_pretty;
+static int my__signal;
+static int my__exit_code;
+static int my__pid;
+static int my__wrote_start_event;
+static int my__log_fd = -1;
+
+static char *my__log_path;
+static char *my__command_name;
+static char *my__sub_command_name;
+
+static struct argv_array my__argv = ARGV_ARRAY_INIT;
+static struct json_writer my__errors = JSON_WRITER_INIT;
+
+/*
+ * Write a single event to the structured log file.
+ */
+static void emit_event(struct json_writer *jw, const char *event_name)
+{
+ if (my__log_fd == -1) {
+ my__log_fd = open(my__log_path,
+ O_WRONLY | O_APPEND | O_CREAT,
+ 0644);
+ if (my__log_fd == -1) {
+ warning("slog: could not open '%s' for logging: %s",
+ my__log_path, strerror(errno));
+ my__is_enabled = 0;
+ return;
+ }
+ }
+
+ /*
+ * A properly authored JSON string does not have a final NL
+ * (even when pretty-printing is enabled). Structured logging
+ * output should look like a series of terminated forms one
+ * per line. Temporarily append a NL to the buffer so that
+ * the disk write happens atomically.
+ */
+ strbuf_addch(&jw->json, '\n');
+ if (write(my__log_fd, jw->json.buf, jw->json.len) != jw->json.len)
+ warning("slog: could not write event '%s': %s",
+ event_name, strerror(errno));
+
+ strbuf_setlen(&jw->json, jw->json.len - 1);
+}
+
+static void emit_start_event(void)
+{
+ struct json_writer jw = JSON_WRITER_INIT;
+
+ /* build "cmd_start" event message */
+ jw_object_begin(&jw, my__is_pretty);
+ {
+ jw_object_string(&jw, "event", "cmd_start");
+ jw_object_intmax(&jw, "clock_us", (intmax_t)my__start_time);
+ jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+ if (my__command_name && *my__command_name)
+ jw_object_string(&jw, "command", my__command_name);
+ if (my__sub_command_name && *my__sub_command_name)
+ jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+ jw_object_inline_begin_array(&jw, "argv");
+ {
+ int k;
+ for (k = 0; k < my__argv.argc; k++)
+ jw_array_string(&jw, my__argv.argv[k]);
+ }
+ jw_end(&jw);
+ }
+ jw_end(&jw);
+
+ emit_event(&jw, "cmd_start");
+ jw_release(&jw);
+
+ my__wrote_start_event = 1;
+}
+
+static void emit_exit_event(void)
+{
+ struct json_writer jw = JSON_WRITER_INIT;
+ uint64_t atexit_time = getnanotime() / 1000;
+
+ /* close unterminated forms */
+ if (my__errors.json.len)
+ jw_end(&my__errors);
+
+ /* build "cmd_exit" event message */
+ jw_object_begin(&jw, my__is_pretty);
+ {
+ jw_object_string(&jw, "event", "cmd_exit");
+ jw_object_intmax(&jw, "clock_us", (intmax_t)atexit_time);
+ jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+ if (my__command_name && *my__command_name)
+ jw_object_string(&jw, "command", my__command_name);
+ if (my__sub_command_name && *my__sub_command_name)
+ jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+ jw_object_inline_begin_array(&jw, "argv");
+ {
+ int k;
+ for (k = 0; k < my__argv.argc; k++)
+ jw_array_string(&jw, my__argv.argv[k]);
+ }
+ jw_end(&jw);
+
+ jw_object_inline_begin_object(&jw, "result");
+ {
+ jw_object_intmax(&jw, "exit_code", my__exit_code);
+ if (my__errors.json.len)
+ jw_object_sub_jw(&jw, "errors", &my__errors);
+
+ if (my__signal)
+ jw_object_intmax(&jw, "signal", my__signal);
+
+ if (my__exit_time > 0)
+ jw_object_intmax(&jw, "elapsed_core_us",
+ my__exit_time - my__start_time);
+
+ jw_object_intmax(&jw, "elapsed_total_us",
+ atexit_time - my__start_time);
+ }
+ jw_end(&jw);
+
+ jw_object_inline_begin_object(&jw, "version");
+ {
+ jw_object_string(&jw, "git", git_version_string);
+ jw_object_intmax(&jw, "slog", SLOG_VERSION);
+ }
+ jw_end(&jw);
+ }
+ jw_end(&jw);
+
+ emit_event(&jw, "cmd_exit");
+ jw_release(&jw);
+}
+
+static int cfg_path(const char *key, const char *value)
+{
+ if (is_absolute_path(value)) {
+ my__log_path = xstrdup(value);
+ my__is_enabled = 1;
+ } else {
+ warning("'%s' must be an absolute path: '%s'",
+ key, value);
+ }
+
+ return 0;
+}
+
+static int cfg_pretty(const char *key, const char *value)
+{
+ my__is_pretty = git_config_bool(key, value);
+ return 0;
+}
+
+int slog_default_config(const char *key, const char *value)
+{
+ const char *sub;
+
+ /*
+ * git_default_config() calls slog_default_config() with "slog.*"
+ * k/v pairs. git_default_config() MAY or MAY NOT be called when
+ * cmd_<command>() calls git_config().
+ *
+ * Remember if we've ever been called.
+ */
+ my__is_config_loaded = 1;
+
+ if (skip_prefix(key, "slog.", &sub)) {
+ if (!strcmp(sub, "path"))
+ return cfg_path(key, value);
+ if (!strcmp(sub, "pretty"))
+ return cfg_pretty(key, value);
+ }
+
+ return 0;
+}
+
+static int lazy_load_config_cb(const char *key, const char * value, void *data)
+{
+ return slog_default_config(key, value);
+}
+
+/*
+ * If cmd_<command>() did not cause slog_default_config() to be called
+ * during git_config(), we try to lookup our config settings the first
+ * time we actually need them.
+ *
+ * (We do this rather than using read_early_config() at initialization
+ * because we want any "-c key=value" arguments to be included.)
+ */
+static inline void lazy_load_config(void)
+{
+ if (my__is_config_loaded)
+ return;
+ my__is_config_loaded = 1;
+
+ read_early_config(lazy_load_config_cb, NULL);
+}
+
+int slog_is_enabled(void)
+{
+ lazy_load_config();
+
+ return my__is_enabled;
+}
+
+static void do_final_steps(int in_signal)
+{
+ static int completed = 0;
+
+ if (completed)
+ return;
+ completed = 1;
+
+ if (slog_is_enabled()) {
+ if (!my__wrote_start_event)
+ emit_start_event();
+ emit_exit_event();
+ my__is_enabled = 0;
+ }
+
+ if (my__log_fd != -1)
+ close(my__log_fd);
+ free(my__log_path);
+ free(my__command_name);
+ free(my__sub_command_name);
+ argv_array_clear(&my__argv);
+ jw_release(&my__errors);
+}
+
+static void slog_atexit(void)
+{
+ do_final_steps(0);
+}
+
+static void slog_signal(int signo)
+{
+ my__signal = signo;
+
+ do_final_steps(1);
+
+ sigchain_pop(signo);
+ raise(signo);
+}
+
+static void intern_argv(int argc, const char **argv)
+{
+ int k;
+
+ for (k = 0; k < argc; k++)
+ argv_array_push(&my__argv, argv[k]);
+}
+
+/*
+ * Collect basic startup information before cmd_main() has a chance
+ * to alter the command line and before we have seen the config (to
+ * know if logging is enabled). And since the config isn't loaded
+ * until cmd_main() dispatches to cmd_<command>(), we have to wait
+ * and lazy-write the "cmd_start" event.
+ *
+ * This also implies that commands such as "help" and "version" that
+ * don't need load the config won't generate any log data.
+ */
+static void initialize(int argc, const char **argv)
+{
+ my__start_time = getnanotime() / 1000;
+ my__pid = getpid();
+
+ intern_argv(argc, argv);
+
+ atexit(slog_atexit);
+
+ /*
+ * Put up backstop signal handler to ensure we get the "cmd_exit"
+ * event. This is primarily for when the pager throws SIGPIPE
+ * when the user quits.
+ */
+ sigchain_push(SIGPIPE, slog_signal);
+}
+
+int slog_wrap_main(slog_fn_main_t fn_main, int argc, const char **argv)
+{
+ int result;
+
+ initialize(argc, argv);
+ result = fn_main(argc, argv);
+ slog_exit_code(result);
+
+ return result;
+}
+
+void slog_set_command_name(const char *command_name)
+{
+ /*
+ * Capture the command name even if logging is not enabled
+ * because we don't know if the config has been loaded yet by
+ * the cmd_<command>() and/or it may be too early to force a
+ * lazy load.
+ */
+ if (my__command_name)
+ free(my__command_name);
+ my__command_name = xstrdup(command_name);
+}
+
+void slog_set_sub_command_name(const char *sub_command_name)
+{
+ /*
+ * Capture the sub-command name even if logging is not enabled
+ * because we don't know if the config has been loaded yet by
+ * the cmd_<command>() and/or it may be too early to force a
+ * lazy load.
+ */
+ if (my__sub_command_name)
+ free(my__sub_command_name);
+ my__sub_command_name = xstrdup(sub_command_name);
+}
+
+int slog_is_pretty(void)
+{
+ return my__is_pretty;
+}
+
+int slog_exit_code(int exit_code)
+{
+ my__exit_time = getnanotime() / 1000;
+ my__exit_code = exit_code;
+
+ return exit_code;
+}
+
+void slog_error_message(const char *prefix, const char *fmt, va_list params)
+{
+ struct strbuf em = STRBUF_INIT;
+ va_list copy_params;
+
+ if (prefix && *prefix)
+ strbuf_addstr(&em, prefix);
+
+ va_copy(copy_params, params);
+ strbuf_vaddf(&em, fmt, copy_params);
+ va_end(copy_params);
+
+ if (!my__errors.json.len)
+ jw_array_begin(&my__errors, my__is_pretty);
+ jw_array_string(&my__errors, em.buf);
+ /* leave my__errors array unterminated for now */
+
+ strbuf_release(&em);
+}
+
#endif
diff --git a/structured-logging.h b/structured-logging.h
index c9e8c1d..61e98e6 100644
--- a/structured-logging.h
+++ b/structured-logging.h
@@ -1,13 +1,95 @@
#ifndef STRUCTURED_LOGGING_H
#define STRUCTURED_LOGGING_H
+typedef int (*slog_fn_main_t)(int, const char **);
+
#if !defined(STRUCTURED_LOGGING)
/*
* Structured logging is not available.
* Stub out all API routines.
*/
+#define slog_is_available() (0)
+#define slog_default_config(k, v) (0)
+#define slog_wrap_main(real_cmd_main, argc, argv) ((real_cmd_main)((argc), (argv)))
+#define slog_set_command_name(n) do { } while (0)
+#define slog_set_sub_command_name(n) do { } while (0)
+#define slog_is_enabled() (0)
+#define slog_is_pretty() (0)
+#define slog_exit_code(exit_code) (exit_code)
+#define slog_error_message(prefix, fmt, params) do { } while (0)
#else
+/*
+ * Is structured logging available (compiled-in)?
+ */
+#define slog_is_available() (1)
+
+/*
+ * Process "slog.*" config settings.
+ */
+int slog_default_config(const char *key, const char *value);
+
+/*
+ * Wrapper for the "real" cmd_main(). Initialize structured logging if
+ * enabled, run the given real_cmd_main(), and capture the return value.
+ *
+ * Note: common-main.c is shared by many top-level commands.
+ * common-main.c:main() does common process setup before calling
+ * the version of cmd_main() found in the executable. Some commands
+ * SHOULD NOT do logging (such as t/helper/test-tool). Ones that do
+ * need some common initialization/teardown.
+ *
+ * Use this function for any top-level command that should do logging.
+ *
+ * Usage:
+ *
+ * static int real_cmd_main(int argc, const char **argv)
+ * {
+ * ....the actual code for the command....
+ * }
+ *
+ * int cmd_main(int argc, const char **argv)
+ * {
+ * return slog_wrap_main(real_cmd_main, argc, argv);
+ * }
+ *
+ *
+ * See git.c for an example.
+ */
+int slog_wrap_main(slog_fn_main_t real_cmd_main, int argc, const char **argv);
+
+/*
+ * Record a canonical command name and optional sub-command name for the
+ * current process. For example, "checkout" and "switch-branch".
+ */
+void slog_set_command_name(const char *name);
+void slog_set_sub_command_name(const char *name);
+
+/*
+ * Is structured logging enabled?
+ */
+int slog_is_enabled(void);
+
+/*
+ * Is JSON pretty-printing enabled?
+ */
+int slog_is_pretty(void);
+
+/*
+ * Register the process exit code with the structured logging layer
+ * and return it. This value will appear in the final "cmd_exit" event.
+ *
+ * Use this to wrap all calls to exit().
+ * Use this before returning in main().
+ */
+int slog_exit_code(int exit_code);
+
+/*
+ * Append formatted error message to the structured log result.
+ * Messages from this will appear in the final "cmd_exit" event.
+ */
+void slog_error_message(const char *prefix, const char *fmt, va_list params);
+
#endif /* STRUCTURED_LOGGING */
#endif /* STRUCTURED_LOGGING_H */
diff --git a/usage.c b/usage.c
index cc80333..5d48f6b 100644
--- a/usage.c
+++ b/usage.c
@@ -27,12 +27,16 @@ static NORETURN void usage_builtin(const char *err, va_list params)
static NORETURN void die_builtin(const char *err, va_list params)
{
+ slog_error_message("fatal: ", err, params);
+
vreportf("fatal: ", err, params);
exit(128);
}
static void error_builtin(const char *err, va_list params)
{
+ slog_error_message("error: ", err, params);
+
vreportf("error: ", err, params);
}
--
2.9.3
next prev parent reply other threads:[~2018-07-13 16:57 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-07-13 16:55 [PATCH v1 00/25] RFC: structured logging git
2018-07-13 16:55 ` [PATCH v1 01/25] structured-logging: design document git
2018-07-14 8:34 ` Simon Ruderich
2018-08-03 15:26 ` Ben Peart
2018-08-09 14:30 ` Jeff Hostetler
2018-08-21 4:47 ` Jonathan Nieder
2018-07-13 16:55 ` [PATCH v1 02/25] structured-logging: add STRUCTURED_LOGGING=1 to Makefile git
2018-08-21 4:49 ` Jonathan Nieder
2018-07-13 16:55 ` git [this message]
2018-07-26 9:09 ` [PATCH v1 03/25] structured-logging: add structured logging framework SZEDER Gábor
2018-07-27 12:45 ` Jeff Hostetler
2018-08-21 5:05 ` Jonathan Nieder
2018-07-13 16:56 ` [PATCH v1 04/25] structured-logging: add session-id to log events git
2018-07-13 16:56 ` [PATCH v1 05/25] structured-logging: set sub_command field for branch command git
2018-07-13 16:56 ` [PATCH v1 06/25] structured-logging: set sub_command field for checkout command git
2018-07-13 16:56 ` [PATCH v1 07/25] structured-logging: t0420 basic tests git
2018-07-13 16:56 ` [PATCH v1 08/25] structured-logging: add detail-event facility git
2018-07-13 16:56 ` [PATCH v1 09/25] structured-logging: add detail-event for lazy_init_name_hash git
2018-07-13 16:56 ` [PATCH v1 10/25] structured-logging: add timer facility git
2018-07-13 16:56 ` [PATCH v1 11/25] structured-logging: add timer around do_read_index git
2018-07-13 16:56 ` [PATCH v1 12/25] structured-logging: add timer around do_write_index git
2018-07-13 16:56 ` [PATCH v1 13/25] structured-logging: add timer around wt-status functions git
2018-07-13 16:56 ` [PATCH v1 14/25] structured-logging: add timer around preload_index git
2018-07-13 16:56 ` [PATCH v1 15/25] structured-logging: t0420 tests for timers git
2018-07-13 16:56 ` [PATCH v1 16/25] structured-logging: add aux-data facility git
2018-07-13 16:56 ` [PATCH v1 17/25] structured-logging: add aux-data for index size git
2018-07-13 16:56 ` [PATCH v1 18/25] structured-logging: add aux-data for size of sparse-checkout file git
2018-07-13 16:56 ` [PATCH v1 19/25] structured-logging: t0420 tests for aux-data git
2018-07-13 16:56 ` [PATCH v1 20/25] structured-logging: add structured logging to remote-curl git
2018-07-13 16:56 ` [PATCH v1 21/25] structured-logging: add detail-events for child processes git
2018-07-13 16:56 ` [PATCH v1 22/25] structured-logging: add child process classification git
2018-07-13 16:56 ` [PATCH v1 23/25] structured-logging: t0420 tests for child process detail events git
2018-07-13 16:56 ` [PATCH v1 24/25] structured-logging: t0420 tests for interacitve child_summary git
2018-07-13 16:56 ` [PATCH v1 25/25] structured-logging: add config data facility git
2018-07-13 18:51 ` [PATCH v1 00/25] RFC: structured logging David Lang
2018-07-16 13:29 ` Jeff Hostetler
2018-08-28 17:38 ` Junio C Hamano
2018-08-28 18:47 ` Jeff Hostetler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180713165621.52017-4-git@jeffhostetler.com \
--to=git@jeffhostetler.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=jeffhost@microsoft.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).