From: "René Scharfe" <l.s.r@web.de>
To: Jeff King <peff@peff.net>, Duy Nguyen <pclouds@gmail.com>
Cc: Junio C Hamano <gitster@pobox.com>,
Git List <git@vger.kernel.org>,
Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: Re: [PATCH] fast-export: avoid NULL pointer arithmetic
Date: Sat, 12 May 2018 10:45:16 +0200 [thread overview]
Message-ID: <80397e16-8667-e0cd-4049-aad453d35e6f@web.de> (raw)
In-Reply-To: <20180511174237.GA19670@sigill.intra.peff.net>
Am 11.05.2018 um 19:42 schrieb Jeff King:
> On Fri, May 11, 2018 at 03:34:19PM +0200, Duy Nguyen wrote:
>
>> On Fri, May 11, 2018 at 03:11:46PM +0200, Duy Nguyen wrote:
>>> Back to fast-export, can we just allocate a new int on heap and point
>>> it there? Allocating small pieces becomes quite cheap and fast with
>>> mem-pool.h and we can avoid this storing integer in pointer business.
>>
>> Something like this seems to work, but we use 4-ish more bytes per
>> object, or 100MB overhead on a repo with 25M objects. I think it's a
>> reasonable trade off.
>
> I'm not sure I agree. 4 bytes per object certainly isn't the end of the
> world, but what was the problem we were solving in the first place? Just
> that we weren't comfortable with the round-trip from uintptr_t to void
> and back? Is this actually a problem on real platforms? If not, it seems
> silly to incur a run-time cost.
Storing integer values in pointers is a trick that seems to have worked
so far for fast-export. A portable way to avoid that trick without
requiring more memory would be to use a union.
Or we could roll our own custom hash map, as I mused in an earlier post.
That would duplicate quite a bit of code; are there reusable pieces
hidden within that could be extracted into common functions?
---
builtin/fast-export.c | 105 ++++++++++++++++++++++++++++++++----------
1 file changed, 81 insertions(+), 24 deletions(-)
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 530df12f05..627b0032f3 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -14,7 +14,6 @@
#include "diffcore.h"
#include "log-tree.h"
#include "revision.h"
-#include "decorate.h"
#include "string-list.h"
#include "utf8.h"
#include "parse-options.h"
@@ -71,9 +70,65 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt,
return 0;
}
-static struct decoration idnums;
+struct object_mark_entry {
+ const struct object *base;
+ uint32_t mark;
+};
+
+struct object_marks {
+ unsigned int size;
+ unsigned int nr;
+ struct object_mark_entry *entries;
+};
+
+static struct object_marks idnums;
static uint32_t last_idnum;
+static unsigned int hash_obj(const struct object *obj, unsigned int n)
+{
+ return sha1hash(obj->oid.hash) % n;
+}
+
+static void set_object_mark(struct object_marks *n, const struct object *base,
+ uint32_t mark)
+{
+ unsigned int size = n->size;
+ struct object_mark_entry *entries = n->entries;
+ unsigned int j = hash_obj(base, size);
+
+ while (entries[j].base) {
+ if (entries[j].base == base) {
+ entries[j].mark = mark;
+ return;
+ }
+ if (++j >= size)
+ j = 0;
+ }
+ entries[j].base = base;
+ entries[j].mark = mark;
+ n->nr++;
+}
+
+static void grow_object_marks(struct object_marks *n)
+{
+ unsigned int i;
+ unsigned int old_size = n->size;
+ struct object_mark_entry *old_entries = n->entries;
+
+ n->size = (old_size + 1000) * 3 / 2;
+ n->entries = xcalloc(n->size, sizeof(n->entries[0]));
+ n->nr = 0;
+
+ for (i = 0; i < old_size; i++) {
+ const struct object *base = old_entries[i].base;
+ uint32_t mark = old_entries[i].mark;
+
+ if (mark)
+ set_object_mark(n, base, mark);
+ }
+ free(old_entries);
+}
+
static int has_unshown_parent(struct commit *commit)
{
struct commit_list *parent;
@@ -156,20 +211,13 @@ static void anonymize_path(struct strbuf *out, const char *path,
}
}
-/* Since intptr_t is C99, we do not use it here */
-static inline uint32_t *mark_to_ptr(uint32_t mark)
-{
- return ((uint32_t *)NULL) + mark;
-}
-
-static inline uint32_t ptr_to_mark(void * mark)
-{
- return (uint32_t *)mark - (uint32_t *)NULL;
-}
-
static inline void mark_object(struct object *object, uint32_t mark)
{
- add_decoration(&idnums, object, mark_to_ptr(mark));
+ unsigned int nr = idnums.nr + 1;
+
+ if (nr > idnums.size * 2 / 3)
+ grow_object_marks(&idnums);
+ return set_object_mark(&idnums, object, mark);
}
static inline void mark_next_object(struct object *object)
@@ -179,10 +227,21 @@ static inline void mark_next_object(struct object *object)
static int get_object_mark(struct object *object)
{
- void *decoration = lookup_decoration(&idnums, object);
- if (!decoration)
+ unsigned int j;
+
+ /* nothing to lookup */
+ if (!idnums.size)
return 0;
- return ptr_to_mark(decoration);
+ j = hash_obj(object, idnums.size);
+ for (;;) {
+ struct object_mark_entry *ref = idnums.entries + j;
+ if (ref->base == object)
+ return ref->mark;
+ if (!ref->base)
+ return 0;
+ if (++j == idnums.size)
+ j = 0;
+ }
}
static void show_progress(void)
@@ -897,8 +956,7 @@ static void handle_tags_and_duplicates(void)
static void export_marks(char *file)
{
unsigned int i;
- uint32_t mark;
- struct decoration_entry *deco = idnums.entries;
+ struct object_mark_entry *entry = idnums.entries;
FILE *f;
int e = 0;
@@ -907,15 +965,14 @@ static void export_marks(char *file)
die_errno("Unable to open marks file %s for writing.", file);
for (i = 0; i < idnums.size; i++) {
- if (deco->base && deco->base->type == 1) {
- mark = ptr_to_mark(deco->decoration);
- if (fprintf(f, ":%"PRIu32" %s\n", mark,
- oid_to_hex(&deco->base->oid)) < 0) {
+ if (entry->base && entry->base->type == 1) {
+ if (fprintf(f, ":%"PRIu32" %s\n", entry->mark,
+ oid_to_hex(&entry->base->oid)) < 0) {
e = 1;
break;
}
}
- deco++;
+ entry++;
}
e |= ferror(f);
--
2.17.0
next prev parent reply other threads:[~2018-05-12 8:45 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-09 21:06 [PATCH] fast-export: avoid NULL pointer arithmetic René Scharfe
2018-05-09 21:43 ` Johannes Schindelin
2018-05-10 9:24 ` René Scharfe
2018-05-10 10:51 ` Junio C Hamano
2018-05-10 19:47 ` René Scharfe
2018-05-11 2:16 ` Junio C Hamano
2018-05-11 4:49 ` Junio C Hamano
2018-05-11 6:19 ` Duy Nguyen
2018-05-11 8:56 ` Jeff King
2018-05-11 13:11 ` Duy Nguyen
2018-05-11 13:34 ` Duy Nguyen
2018-05-11 17:42 ` Jeff King
2018-05-12 8:45 ` René Scharfe [this message]
2018-05-12 8:49 ` René Scharfe
2018-05-14 1:37 ` Junio C Hamano
2018-05-15 19:36 ` René Scharfe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=80397e16-8667-e0cd-4049-aad453d35e6f@web.de \
--to=l.s.r@web.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=pclouds@gmail.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).