git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: René Scharfe <l.s.r@web.de>
Cc: Jeff King <peff@peff.net>, Duy Nguyen <pclouds@gmail.com>,
	Git List <git@vger.kernel.org>,
	Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: Re: [PATCH] fast-export: avoid NULL pointer arithmetic
Date: Mon, 14 May 2018 10:37:38 +0900
Message-ID: <xmqqwow7c90d.fsf@gitster-ct.c.googlers.com> (raw)
In-Reply-To: <80397e16-8667-e0cd-4049-aad453d35e6f@web.de>

René Scharfe <l.s.r@web.de> writes:

> Storing integer values in pointers is a trick that seems to have worked
> so far for fast-export.  A portable way to avoid that trick without
> requiring more memory would be to use a union.
>
> Or we could roll our own custom hash map, as I mused in an earlier post.
> That would duplicate quite a bit of code; are there reusable pieces
> hidden within that could be extracted into common functions?

Hmm, this together with your follow-up does not look too bad, but it
does introduce quite a lot of code that could be refactored, so I am
not sure if I really like it or not.

>
> ---
>  builtin/fast-export.c | 105 ++++++++++++++++++++++++++++++++----------
>  1 file changed, 81 insertions(+), 24 deletions(-)
>
> diff --git a/builtin/fast-export.c b/builtin/fast-export.c
> index 530df12f05..627b0032f3 100644
> --- a/builtin/fast-export.c
> +++ b/builtin/fast-export.c
> @@ -14,7 +14,6 @@
>  #include "diffcore.h"
>  #include "log-tree.h"
>  #include "revision.h"
> -#include "decorate.h"
>  #include "string-list.h"
>  #include "utf8.h"
>  #include "parse-options.h"
> @@ -71,9 +70,65 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt,
>  	return 0;
>  }
>  
> -static struct decoration idnums;
> +struct object_mark_entry {
> +	const struct object *base;
> +	uint32_t mark;
> +};
> +
> +struct object_marks {
> +	unsigned int size;
> +	unsigned int nr;
> +	struct object_mark_entry *entries;
> +};
> +
> +static struct object_marks idnums;
>  static uint32_t last_idnum;
>  
> +static unsigned int hash_obj(const struct object *obj, unsigned int n)
> +{
> +	return sha1hash(obj->oid.hash) % n;
> +}
> +
> +static void set_object_mark(struct object_marks *n, const struct object *base,
> +			    uint32_t mark)
> +{
> +	unsigned int size = n->size;
> +	struct object_mark_entry *entries = n->entries;
> +	unsigned int j = hash_obj(base, size);
> +
> +	while (entries[j].base) {
> +		if (entries[j].base == base) {
> +			entries[j].mark = mark;
> +			return;
> +		}
> +		if (++j >= size)
> +			j = 0;
> +	}
> +	entries[j].base = base;
> +	entries[j].mark = mark;
> +	n->nr++;
> +}
> +
> +static void grow_object_marks(struct object_marks *n)
> +{
> +	unsigned int i;
> +	unsigned int old_size = n->size;
> +	struct object_mark_entry *old_entries = n->entries;
> +
> +	n->size = (old_size + 1000) * 3 / 2;
> +	n->entries = xcalloc(n->size, sizeof(n->entries[0]));
> +	n->nr = 0;
> +
> +	for (i = 0; i < old_size; i++) {
> +		const struct object *base = old_entries[i].base;
> +		uint32_t mark = old_entries[i].mark;
> +
> +		if (mark)
> +			set_object_mark(n, base, mark);
> +	}
> +	free(old_entries);
> +}
> +
>  static int has_unshown_parent(struct commit *commit)
>  {
>  	struct commit_list *parent;
> @@ -156,20 +211,13 @@ static void anonymize_path(struct strbuf *out, const char *path,
>  	}
>  }
>  
> -/* Since intptr_t is C99, we do not use it here */
> -static inline uint32_t *mark_to_ptr(uint32_t mark)
> -{
> -	return ((uint32_t *)NULL) + mark;
> -}
> -
> -static inline uint32_t ptr_to_mark(void * mark)
> -{
> -	return (uint32_t *)mark - (uint32_t *)NULL;
> -}
> -
>  static inline void mark_object(struct object *object, uint32_t mark)
>  {
> -	add_decoration(&idnums, object, mark_to_ptr(mark));
> +	unsigned int nr = idnums.nr + 1;
> +
> +	if (nr > idnums.size * 2 / 3)
> +		grow_object_marks(&idnums);
> +	return set_object_mark(&idnums, object, mark);
>  }
>  
>  static inline void mark_next_object(struct object *object)
> @@ -179,10 +227,21 @@ static inline void mark_next_object(struct object *object)
>  
>  static int get_object_mark(struct object *object)
>  {
> -	void *decoration = lookup_decoration(&idnums, object);
> -	if (!decoration)
> +	unsigned int j;
> +
> +	/* nothing to lookup */
> +	if (!idnums.size)
>  		return 0;
> -	return ptr_to_mark(decoration);
> +	j = hash_obj(object, idnums.size);
> +	for (;;) {
> +		struct object_mark_entry *ref = idnums.entries + j;
> +		if (ref->base == object)
> +			return ref->mark;
> +		if (!ref->base)
> +			return 0;
> +		if (++j == idnums.size)
> +			j = 0;
> +	}
>  }
>  
>  static void show_progress(void)
> @@ -897,8 +956,7 @@ static void handle_tags_and_duplicates(void)
>  static void export_marks(char *file)
>  {
>  	unsigned int i;
> -	uint32_t mark;
> -	struct decoration_entry *deco = idnums.entries;
> +	struct object_mark_entry *entry = idnums.entries;
>  	FILE *f;
>  	int e = 0;
>  
> @@ -907,15 +965,14 @@ static void export_marks(char *file)
>  		die_errno("Unable to open marks file %s for writing.", file);
>  
>  	for (i = 0; i < idnums.size; i++) {
> -		if (deco->base && deco->base->type == 1) {
> -			mark = ptr_to_mark(deco->decoration);
> -			if (fprintf(f, ":%"PRIu32" %s\n", mark,
> -				oid_to_hex(&deco->base->oid)) < 0) {
> +		if (entry->base && entry->base->type == 1) {
> +			if (fprintf(f, ":%"PRIu32" %s\n", entry->mark,
> +				    oid_to_hex(&entry->base->oid)) < 0) {
>  			    e = 1;
>  			    break;
>  			}
>  		}
> -		deco++;
> +		entry++;
>  	}
>  
>  	e |= ferror(f);

  parent reply index

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-09 21:06 René Scharfe
2018-05-09 21:43 ` Johannes Schindelin
2018-05-10  9:24 ` René Scharfe
2018-05-10 10:51   ` Junio C Hamano
2018-05-10 19:47     ` René Scharfe
2018-05-11  2:16       ` Junio C Hamano
2018-05-11  4:49         ` Junio C Hamano
2018-05-11  6:19           ` Duy Nguyen
2018-05-11  8:56             ` Jeff King
2018-05-11 13:11               ` Duy Nguyen
2018-05-11 13:34                 ` Duy Nguyen
2018-05-11 17:42                   ` Jeff King
2018-05-12  8:45                     ` René Scharfe
2018-05-12  8:49                       ` René Scharfe
2018-05-14  1:37                       ` Junio C Hamano [this message]
2018-05-15 19:36                         ` René Scharfe

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqqwow7c90d.fsf@gitster-ct.c.googlers.com \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=johannes.schindelin@gmx.de \
    --cc=l.s.r@web.de \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox