From: Kevin Willford <kewillf@microsoft.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net, Kevin Willford <kewillf@microsoft.com>
Subject: [PATCH 3/3] merge-recursive: change current file dir string_lists to hashmap
Date: Mon, 28 Aug 2017 14:28:29 -0600 [thread overview]
Message-ID: <20170828202829.3056-4-kewillf@microsoft.com> (raw)
In-Reply-To: <20170828202829.3056-1-kewillf@microsoft.com>
The code was using two string_lists, one for the directories and
one for the files. The code never checks the lists independently
so we should be able to only use one list. The string_list also
is a O(log n) for lookup and insertion. Switching this to use a
hashmap will give O(1) which will save some time when there are
millions of paths that will be checked.
Signed-off-by: Kevin Willford <kewillf@microsoft.com>
---
merge-recursive.c | 48 +++++++++++++++++++++++++++++++++++++-----------
merge-recursive.h | 3 +--
2 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/merge-recursive.c b/merge-recursive.c
index d47f40ea81..ef4fe5f09f 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -24,6 +24,26 @@
#include "dir.h"
#include "submodule.h"
+struct path_hashmap_entry {
+ struct hashmap_entry;
+ char path[FLEX_ARRAY];
+};
+
+static unsigned int (*pathhash)(const char *path);
+static int (*pathcmp)(const char *a, const char *b);
+
+static int path_hashmap_cmp(const void *cmp_data,
+ const void *entry,
+ const void *entry_or_key,
+ const void *keydata)
+{
+ const struct path_hashmap_entry *a = entry;
+ const struct path_hashmap_entry *b = entry_or_key;
+ const char *key = keydata;
+
+ return pathcmp(a->path, key ? key : b->path);
+}
+
static void flush_output(struct merge_options *o)
{
if (o->buffer_output < 2 && o->obuf.len) {
@@ -314,15 +334,15 @@ static int save_files_dirs(const unsigned char *sha1,
struct strbuf *base, const char *path,
unsigned int mode, int stage, void *context)
{
+ struct path_hashmap_entry *entry;
int baselen = base->len;
struct merge_options *o = context;
strbuf_addstr(base, path);
- if (S_ISDIR(mode))
- string_list_insert(&o->current_directory_set, base->buf);
- else
- string_list_insert(&o->current_file_set, base->buf);
+ FLEX_ALLOC_MEM(entry, path, base->buf, base->len);
+ hashmap_entry_init(entry, pathhash(entry->path));
+ hashmap_add(&o->current_file_dir_set, entry);
strbuf_setlen(base, baselen);
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
@@ -642,6 +662,8 @@ static void add_flattened_path(struct strbuf *out, const char *s)
static char *unique_path(struct merge_options *o, const char *path, const char *branch)
{
+ struct path_hashmap_entry dummy;
+ struct path_hashmap_entry *entry;
struct strbuf newpath = STRBUF_INIT;
int suffix = 0;
size_t base_len;
@@ -650,14 +672,17 @@ static char *unique_path(struct merge_options *o, const char *path, const char *
add_flattened_path(&newpath, branch);
base_len = newpath.len;
- while (string_list_has_string(&o->current_file_set, newpath.buf) ||
- string_list_has_string(&o->current_directory_set, newpath.buf) ||
+ hashmap_entry_init(&dummy, pathhash(newpath.buf));
+ while (hashmap_get(&o->current_file_dir_set, &dummy, newpath.buf) ||
(!o->call_depth && file_exists(newpath.buf))) {
strbuf_setlen(&newpath, base_len);
strbuf_addf(&newpath, "_%d", suffix++);
+ hashmap_entry_init(&dummy, pathhash(newpath.buf));
}
- string_list_insert(&o->current_file_set, newpath.buf);
+ FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len);
+ hashmap_entry_init(entry, pathhash(entry->path));
+ hashmap_add(&o->current_file_dir_set, entry);
return strbuf_detach(&newpath, NULL);
}
@@ -1941,8 +1966,7 @@ int merge_trees(struct merge_options *o,
if (unmerged_cache()) {
struct string_list *entries, *re_head, *re_merge;
int i;
- string_list_clear(&o->current_file_set, 1);
- string_list_clear(&o->current_directory_set, 1);
+ hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 512);
get_files_dirs(o, head);
get_files_dirs(o, merge);
@@ -1978,6 +2002,8 @@ int merge_trees(struct merge_options *o,
string_list_clear(re_head, 0);
string_list_clear(entries, 1);
+ hashmap_free(&o->current_file_dir_set, 1);
+
free(re_merge);
free(re_head);
free(entries);
@@ -2179,8 +2205,8 @@ void init_merge_options(struct merge_options *o)
if (o->verbosity >= 5)
o->buffer_output = 0;
strbuf_init(&o->obuf, 0);
- string_list_init(&o->current_file_set, 1);
- string_list_init(&o->current_directory_set, 1);
+ pathhash = ignore_case ? strihash : strhash;
+ pathcmp = ignore_case ? strcasecmp : strcmp;
string_list_init(&o->df_conflict_file_set, 1);
}
diff --git a/merge-recursive.h b/merge-recursive.h
index 735343b413..80d69d1401 100644
--- a/merge-recursive.h
+++ b/merge-recursive.h
@@ -25,8 +25,7 @@ struct merge_options {
int show_rename_progress;
int call_depth;
struct strbuf obuf;
- struct string_list current_file_set;
- struct string_list current_directory_set;
+ struct hashmap current_file_dir_set;
struct string_list df_conflict_file_set;
};
--
2.14.1.329.g6edf0add19
next prev parent reply other threads:[~2017-08-28 20:29 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-28 20:28 [PATCH 0/3] merge-recursive: replace string_list with hashmap Kevin Willford
2017-08-28 20:28 ` [PATCH 1/3] merge-recursive: fix memory leak Kevin Willford
2017-08-28 22:42 ` Ben Peart
2017-08-29 8:12 ` Jeff King
2017-08-28 20:28 ` [PATCH 2/3] merge-recursive: remove return value from get_files_dirs Kevin Willford
2017-08-28 22:45 ` Ben Peart
2017-08-29 8:19 ` Jeff King
2017-08-29 8:17 ` Jeff King
2017-08-29 15:58 ` Kevin Willford
2017-08-29 16:50 ` Jeff King
2017-08-31 18:12 ` Stefan Beller
2017-08-28 20:28 ` Kevin Willford [this message]
2017-08-28 23:06 ` [PATCH 3/3] merge-recursive: change current file dir string_lists to hashmap Ben Peart
2017-08-29 8:41 ` Jeff King
2017-09-06 3:35 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170828202829.3056-4-kewillf@microsoft.com \
--to=kewillf@microsoft.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).