git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
* [PATCH v2 0/9] Filter combination
@ 2019-06-01  0:35 Matthew DeVore
  2019-06-01  0:35 ` [PATCH v2 1/9] list-objects-filter: make API easier to use Matthew DeVore
                   ` (11 more replies)
  0 siblings, 12 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:35 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore

Here is a roll-up with hopefully all comments applied or responded to. Notable
changes since the last one include:

 - Added an ALLOC_GROW_BY which is used twice by this patchset to make growing
   arrays safer and cleaner
 - Cleaned up the URL-encoding by (1) using hex_to_bytes rather than rolling my
   own helpers and (2) making error-string-generation non-conditional
 - Switched to an array-based data structure rather than a linked list for both
   LOFC_COMBINE filter spec objects and the filter object itself
 - Changed the list_objects_filter API to be cleaner to use
 - Changed test cases to use sparse:oid= rather than sparse:path= since the
   latter is being disabled.

Thank you,

Matthew DeVore (9):
  list-objects-filter: make API easier to use
  list-objects-filter: put omits set in filter struct
  list-objects-filter-options: always supply *errbuf
  list-objects-filter: implement composite filters
  list-objects-filter-options: move error check up
  list-objects-filter-options: make filter_spec a strbuf
  list-objects-filter-options: allow mult. --filter
  list-objects-filter-options: clean up use of ALLOC_GROW
  list-objects-filter-options: make parser void

 Documentation/rev-list-options.txt  |  16 ++
 builtin/rev-list.c                  |   2 +-
 cache.h                             |  22 ++
 list-objects-filter-options.c       | 264 ++++++++++++++++++---
 list-objects-filter-options.h       |  32 ++-
 list-objects-filter.c               | 345 +++++++++++++++++++++-------
 list-objects-filter.h               |  35 ++-
 list-objects.c                      |  55 ++---
 t/t5616-partial-clone.sh            |  19 ++
 t/t6112-rev-list-filters-objects.sh | 197 +++++++++++++++-
 transport.c                         |   1 +
 upload-pack.c                       |   4 +-
 12 files changed, 816 insertions(+), 176 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 1/9] list-objects-filter: make API easier to use
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
@ 2019-06-01  0:35 ` Matthew DeVore
  2019-06-01  0:35 ` [PATCH v2 2/9] list-objects-filter: put omits set in filter struct Matthew DeVore
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:35 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Make the list-objects-filter.h API more opaque and easier to use. This
prepares for combined filter support, where filters will be created and
used in a new context.

Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter.c | 122 +++++++++++++++++++++++++++---------------
 list-objects-filter.h |  35 ++++++------
 list-objects.c        |  55 ++++++++-----------
 3 files changed, 117 insertions(+), 95 deletions(-)

diff --git a/list-objects-filter.c b/list-objects-filter.c
index ee449de3f7..35e0bbe123 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -19,20 +19,34 @@
  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  * that have been shown, but should be revisited if they appear
  * in the traversal (until we mark it SEEN).  This is a way to
  * let us silently de-dup calls to show() in the caller.  This
  * is subtly different from the "revision.h:SHOWN" and the
  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  * the non-de-dup usage in pack-bitmap.c
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
+struct filter {
+	enum list_objects_filter_result (*filter_object_fn)(
+		struct repository *r,
+		enum list_objects_filter_situation filter_situation,
+		struct object *obj,
+		const char *pathname,
+		const char *filename,
+		void *filter_data);
+
+	void (*free_fn)(void *filter_data);
+
+	void *filter_data;
+};
+
 /*
  * A filter for list-objects to omit ALL blobs from the traversal.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_none_data {
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
@@ -60,32 +74,31 @@ static enum list_objects_filter_result filter_blobs_none(
 	case LOFS_BLOB:
 		assert(obj->type == OBJ_BLOB);
 		assert((obj->flags & SEEN) == 0);
 
 		if (filter_data->omits)
 			oidset_insert(filter_data->omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 }
 
-static void *filter_blobs_none__init(
+static void filter_blobs_none__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 
-	*filter_fn = filter_blobs_none;
-	*filter_free_fn = free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_blobs_none;
+	filter->free_fn = free;
 }
 
 /*
  * A filter for list-objects to omit ALL trees and blobs from the traversal.
  * Can OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_trees_depth_data {
 	struct oidset *omits;
 
 	/*
@@ -194,35 +207,34 @@ static enum list_objects_filter_result filter_trees_depth(
 }
 
 static void filter_trees_free(void *filter_data) {
 	struct filter_trees_depth_data *d = filter_data;
 	if (!d)
 		return;
 	oidmap_free(&d->seen_at_depth, 1);
 	free(d);
 }
 
-static void *filter_trees_depth__init(
+static void filter_trees_depth__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	oidmap_init(&d->seen_at_depth, 0);
 	d->exclude_depth = filter_options->tree_exclude_depth;
 	d->current_depth = 0;
 
-	*filter_fn = filter_trees_depth;
-	*filter_free_fn = filter_trees_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_trees_depth;
+	filter->free_fn = filter_trees_free;
 }
 
 /*
  * A filter for list-objects to omit large blobs.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_limit_data {
 	struct oidset *omits;
 	unsigned long max_bytes;
 };
@@ -274,33 +286,32 @@ static enum list_objects_filter_result filter_blobs_limit(
 			oidset_insert(filter_data->omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 
 include_it:
 	if (filter_data->omits)
 		oidset_remove(filter_data->omits, &obj->oid);
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
-static void *filter_blobs_limit__init(
+static void filter_blobs_limit__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	d->max_bytes = filter_options->blob_limit_value;
 
-	*filter_fn = filter_blobs_limit;
-	*filter_free_fn = free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_blobs_limit;
+	filter->free_fn = free;
 }
 
 /*
  * A filter driven by a sparse-checkout specification to only
  * include blobs that a sparse checkout would populate.
  *
  * The sparse-checkout spec can be loaded from a blob with the
  * given OID or from a local pathname.  We allow an OID because
  * the repo may be bare or we may be doing the filtering on the
  * server.
@@ -450,92 +461,117 @@ static enum list_objects_filter_result filter_sparse(
 }
 
 
 static void filter_sparse_free(void *filter_data)
 {
 	struct filter_sparse_data *d = filter_data;
 	/* TODO free contents of 'd' */
 	free(d);
 }
 
-static void *filter_sparse_oid__init(
+static void filter_sparse_oid__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 					   NULL, 0, &d->el) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
-	*filter_fn = filter_sparse;
-	*filter_free_fn = filter_sparse_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_sparse;
+	filter->free_fn = filter_sparse_free;
 }
 
-static void *filter_sparse_path__init(
+static void filter_sparse_path__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 					   NULL, 0, &d->el, NULL) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
-	*filter_fn = filter_sparse;
-	*filter_free_fn = filter_sparse_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_sparse;
+	filter->free_fn = filter_sparse_free;
 }
 
-typedef void *(*filter_init_fn)(
+typedef void (*filter_init_fn)(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn);
+	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
 	filter_trees_depth__init,
 	filter_sparse_oid__init,
 	filter_sparse_path__init,
 };
 
-void *list_objects_filter__init(
+struct filter *list_objects_filter__init(
 	struct oidset *omitted,
-	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct list_objects_filter_options *filter_options)
 {
+	struct filter *filter;
 	filter_init_fn init_fn;
 
 	assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 
 	if (filter_options->choice >= LOFC__COUNT)
 		BUG("invalid list-objects filter choice: %d",
 		    filter_options->choice);
 
 	init_fn = s_filters[filter_options->choice];
-	if (init_fn)
-		return init_fn(omitted, filter_options,
-			       filter_fn, filter_free_fn);
-	*filter_fn = NULL;
-	*filter_free_fn = NULL;
-	return NULL;
+	if (!init_fn)
+		return NULL;
+
+	filter = xcalloc(1, sizeof(*filter));
+	init_fn(omitted, filter_options, filter);
+	return filter;
+}
+
+enum list_objects_filter_result list_objects_filter__filter_object(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct filter *filter)
+{
+	if (filter && (obj->flags & NOT_USER_GIVEN))
+		return filter->filter_object_fn(r, filter_situation, obj,
+						pathname, filename,
+						filter->filter_data);
+	/*
+	 * No filter is active or user gave object explicitly. Choose default
+	 * behavior based on filter situation.
+	 */
+	if (filter_situation == LOFS_END_TREE)
+		return 0;
+	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+}
+
+void list_objects_filter__free(struct filter *filter)
+{
+	if (!filter)
+		return;
+	filter->free_fn(filter->filter_data);
+	free(filter);
 }
diff --git a/list-objects-filter.h b/list-objects-filter.h
index 1d45a4ad57..6908954266 100644
--- a/list-objects-filter.h
+++ b/list-objects-filter.h
@@ -53,37 +53,34 @@ enum list_objects_filter_result {
 	LOFR_DO_SHOW   = 1<<1,
 	LOFR_SKIP_TREE = 1<<2,
 };
 
 enum list_objects_filter_situation {
 	LOFS_BEGIN_TREE,
 	LOFS_END_TREE,
 	LOFS_BLOB
 };
 
-typedef enum list_objects_filter_result (*filter_object_fn)(
+struct filter;
+
+/* Constructor for the set of defined list-objects filters. */
+struct filter *list_objects_filter__init(
+	struct oidset *omitted,
+	struct list_objects_filter_options *filter_options);
+
+/*
+ * Lets `filter` decide how to handle the `obj`. If `filter` is NULL, this
+ * function behaves as expected if no filter is configured: all objects are
+ * included.
+ */
+enum list_objects_filter_result list_objects_filter__filter_object(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
-	void *filter_data);
-
-typedef void (*filter_free_fn)(void *filter_data);
+	struct filter *filter);
 
-/*
- * Constructor for the set of defined list-objects filters.
- * Returns a generic "void *filter_data".
- *
- * The returned "filter_fn" will be used by traverse_commit_list()
- * to filter the results.
- *
- * The returned "filter_free_fn" is a destructor for the
- * filter_data.
- */
-void *list_objects_filter__init(
-	struct oidset *omitted,
-	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn);
+/* Destroys `filter`. Does nothing if `filter` is null. */
+void list_objects_filter__free(struct filter *filter);
 
 #endif /* LIST_OBJECTS_FILTER_H */
diff --git a/list-objects.c b/list-objects.c
index b5651ddd5b..9307d91fb3 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -11,32 +11,31 @@
 #include "list-objects-filter-options.h"
 #include "packfile.h"
 #include "object-store.h"
 #include "trace.h"
 
 struct traversal_context {
 	struct rev_info *revs;
 	show_object_fn show_object;
 	show_commit_fn show_commit;
 	void *show_data;
-	filter_object_fn filter_fn;
-	void *filter_data;
+	struct filter *filter;
 };
 
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
 			 const char *name)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
-	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+	enum list_objects_filter_result r;
 
 	if (!ctx->revs->blob_objects)
 		return;
 	if (!obj)
 		die("bad blob object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
 
 	/*
 	 * Pre-filter known-missing objects when explicitly requested.
@@ -47,25 +46,24 @@ static void process_blob(struct traversal_context *ctx,
 	 * may cause the actual filter to report an incomplete list
 	 * of missing objects.
 	 */
 	if (ctx->revs->exclude_promisor_objects &&
 	    !has_object_file(&obj->oid) &&
 	    is_promisor_object(&obj->oid))
 		return;
 
 	pathlen = path->len;
 	strbuf_addstr(path, name);
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_BLOB, obj,
-				   path->buf, &path->buf[pathlen],
-				   ctx->filter_data);
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_BLOB, obj,
+					       path->buf, &path->buf[pathlen],
+					       ctx->filter);
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
 		ctx->show_object(obj, path->buf, ctx->show_data);
 	strbuf_setlen(path, pathlen);
 }
 
 /*
  * Processing a gitlink entry currently does nothing, since
  * we do not recurse into the subproject.
@@ -150,21 +148,21 @@ static void process_tree_contents(struct traversal_context *ctx,
 }
 
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
 			 const char *name)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
 	int baselen = base->len;
-	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+	enum list_objects_filter_result r;
 	int failed_parse;
 
 	if (!revs->tree_objects)
 		return;
 	if (!obj)
 		die("bad tree object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
 
 	failed_parse = parse_tree_gently(tree, 1);
@@ -179,47 +177,44 @@ static void process_tree(struct traversal_context *ctx,
 		 */
 		if (revs->exclude_promisor_objects &&
 		    is_promisor_object(&obj->oid))
 			return;
 
 		if (!revs->do_not_die_on_missing_tree)
 			die("bad tree object %s", oid_to_hex(&obj->oid));
 	}
 
 	strbuf_addstr(base, name);
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_BEGIN_TREE, obj,
-				   base->buf, &base->buf[baselen],
-				   ctx->filter_data);
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_BEGIN_TREE, obj,
+					       base->buf, &base->buf[baselen],
+					       ctx->filter);
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
 		ctx->show_object(obj, base->buf, ctx->show_data);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
 		process_tree_contents(ctx, tree, base);
 
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_END_TREE, obj,
-				   base->buf, &base->buf[baselen],
-				   ctx->filter_data);
-		if (r & LOFR_MARK_SEEN)
-			obj->flags |= SEEN;
-		if (r & LOFR_DO_SHOW)
-			ctx->show_object(obj, base->buf, ctx->show_data);
-	}
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_END_TREE, obj,
+					       base->buf, &base->buf[baselen],
+					       ctx->filter);
+	if (r & LOFR_MARK_SEEN)
+		obj->flags |= SEEN;
+	if (r & LOFR_DO_SHOW)
+		ctx->show_object(obj, base->buf, ctx->show_data);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
 }
 
 static void mark_edge_parents_uninteresting(struct commit *commit,
 					    struct rev_info *revs,
 					    show_edge_fn show_edge)
 {
 	struct commit_list *parents;
@@ -395,38 +390,32 @@ static void do_traverse(struct traversal_context *ctx)
 void traverse_commit_list(struct rev_info *revs,
 			  show_commit_fn show_commit,
 			  show_object_fn show_object,
 			  void *show_data)
 {
 	struct traversal_context ctx;
 	ctx.revs = revs;
 	ctx.show_commit = show_commit;
 	ctx.show_object = show_object;
 	ctx.show_data = show_data;
-	ctx.filter_fn = NULL;
-	ctx.filter_data = NULL;
+	ctx.filter = NULL;
 	do_traverse(&ctx);
 }
 
 void traverse_commit_list_filtered(
 	struct list_objects_filter_options *filter_options,
 	struct rev_info *revs,
 	show_commit_fn show_commit,
 	show_object_fn show_object,
 	void *show_data,
 	struct oidset *omitted)
 {
 	struct traversal_context ctx;
-	filter_free_fn filter_free_fn = NULL;
 
 	ctx.revs = revs;
 	ctx.show_object = show_object;
 	ctx.show_commit = show_commit;
 	ctx.show_data = show_data;
-	ctx.filter_fn = NULL;
-
-	ctx.filter_data = list_objects_filter__init(omitted, filter_options,
-						    &ctx.filter_fn, &filter_free_fn);
+	ctx.filter = list_objects_filter__init(omitted, filter_options);
 	do_traverse(&ctx);
-	if (ctx.filter_data && filter_free_fn)
-		filter_free_fn(ctx.filter_data);
+	list_objects_filter__free(ctx.filter);
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 2/9] list-objects-filter: put omits set in filter struct
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
  2019-06-01  0:35 ` [PATCH v2 1/9] list-objects-filter: make API easier to use Matthew DeVore
@ 2019-06-01  0:35 ` Matthew DeVore
  2019-06-01  0:35 ` [PATCH v2 3/9] list-objects-filter-options: always supply *errbuf Matthew DeVore
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:35 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore

The oidset *omits pointer must be accessed by the combine filter in a
type-agnostic way once the graph traversal is over. Store that pointer
in the general `filter` struct. This will be used in a follow-up patch
to implement the combine filter.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter.c | 70 ++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 44 deletions(-)

diff --git a/list-objects-filter.c b/list-objects-filter.c
index 35e0bbe123..57bbf6ec1c 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -26,88 +26,76 @@
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
 struct filter {
 	enum list_objects_filter_result (*filter_object_fn)(
 		struct repository *r,
 		enum list_objects_filter_situation filter_situation,
 		struct object *obj,
 		const char *pathname,
 		const char *filename,
+		struct oidset *omits,
 		void *filter_data);
 
 	void (*free_fn)(void *filter_data);
 
 	void *filter_data;
-};
 
-/*
- * A filter for list-objects to omit ALL blobs from the traversal.
- * And to OPTIONALLY collect a list of the omitted OIDs.
- */
-struct filter_blobs_none_data {
+	/* If non-NULL, the filter collects a list of the omitted OIDs here. */
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
-	struct filter_blobs_none_data *filter_data = filter_data_;
-
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
 	case LOFS_BEGIN_TREE:
 		assert(obj->type == OBJ_TREE);
 		/* always include all tree objects */
 		return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 
 	case LOFS_END_TREE:
 		assert(obj->type == OBJ_TREE);
 		return LOFR_ZERO;
 
 	case LOFS_BLOB:
 		assert(obj->type == OBJ_BLOB);
 		assert((obj->flags & SEEN) == 0);
 
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 }
 
 static void filter_blobs_none__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
-	struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
-
-	filter->filter_data = d;
 	filter->filter_object_fn = filter_blobs_none;
 	filter->free_fn = free;
 }
 
 /*
  * A filter for list-objects to omit ALL trees and blobs from the traversal.
  * Can OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_trees_depth_data {
-	struct oidset *omits;
-
 	/*
 	 * Maps trees to the minimum depth at which they were seen. It is not
 	 * necessary to re-traverse a tree at deeper or equal depths than it has
 	 * already been traversed.
 	 *
 	 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
 	 * it from being traversed at shallower depths.
 	 */
 	struct oidmap seen_at_depth;
 
@@ -116,38 +104,39 @@ struct filter_trees_depth_data {
 };
 
 struct seen_map_entry {
 	struct oidmap_entry base;
 	size_t depth;
 };
 
 /* Returns 1 if the oid was in the omits set before it was invoked. */
 static int filter_trees_update_omits(
 	struct object *obj,
-	struct filter_trees_depth_data *filter_data,
+	struct oidset *omits,
 	int include_it)
 {
-	if (!filter_data->omits)
+	if (!omits)
 		return 0;
 
 	if (include_it)
-		return oidset_remove(filter_data->omits, &obj->oid);
+		return oidset_remove(omits, &obj->oid);
 	else
-		return oidset_insert(filter_data->omits, &obj->oid);
+		return oidset_insert(omits, &obj->oid);
 }
 
 static enum list_objects_filter_result filter_trees_depth(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_trees_depth_data *filter_data = filter_data_;
 	struct seen_map_entry *seen_info;
 	int include_it = filter_data->current_depth <
 		filter_data->exclude_depth;
 	int filter_res;
 	int already_seen;
 
 	/*
@@ -158,47 +147,47 @@ static enum list_objects_filter_result filter_trees_depth(
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
 	case LOFS_END_TREE:
 		assert(obj->type == OBJ_TREE);
 		filter_data->current_depth--;
 		return LOFR_ZERO;
 
 	case LOFS_BLOB:
-		filter_trees_update_omits(obj, filter_data, include_it);
+		filter_trees_update_omits(obj, omits, include_it);
 		return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
 
 	case LOFS_BEGIN_TREE:
 		seen_info = oidmap_get(
 			&filter_data->seen_at_depth, &obj->oid);
 		if (!seen_info) {
 			seen_info = xcalloc(1, sizeof(*seen_info));
 			oidcpy(&seen_info->base.oid, &obj->oid);
 			seen_info->depth = filter_data->current_depth;
 			oidmap_put(&filter_data->seen_at_depth, seen_info);
 			already_seen = 0;
 		} else {
 			already_seen =
 				filter_data->current_depth >= seen_info->depth;
 		}
 
 		if (already_seen) {
 			filter_res = LOFR_SKIP_TREE;
 		} else {
 			int been_omitted = filter_trees_update_omits(
-				obj, filter_data, include_it);
+				obj, omits, include_it);
 			seen_info->depth = filter_data->current_depth;
 
 			if (include_it)
 				filter_res = LOFR_DO_SHOW;
-			else if (filter_data->omits && !been_omitted)
+			else if (omits && !been_omitted)
 				/*
 				 * Must update omit information of children
 				 * recursively; they have not been omitted yet.
 				 */
 				filter_res = LOFR_ZERO;
 			else
 				filter_res = LOFR_SKIP_TREE;
 		}
 
 		filter_data->current_depth++;
@@ -208,50 +197,48 @@ static enum list_objects_filter_result filter_trees_depth(
 
 static void filter_trees_free(void *filter_data) {
 	struct filter_trees_depth_data *d = filter_data;
 	if (!d)
 		return;
 	oidmap_free(&d->seen_at_depth, 1);
 	free(d);
 }
 
 static void filter_trees_depth__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	oidmap_init(&d->seen_at_depth, 0);
 	d->exclude_depth = filter_options->tree_exclude_depth;
 	d->current_depth = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_trees_depth;
 	filter->free_fn = filter_trees_free;
 }
 
 /*
  * A filter for list-objects to omit large blobs.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_limit_data {
-	struct oidset *omits;
 	unsigned long max_bytes;
 };
 
 static enum list_objects_filter_result filter_blobs_limit(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_blobs_limit_data *filter_data = filter_data_;
 	unsigned long object_length;
 	enum object_type t;
 
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
@@ -275,38 +262,36 @@ static enum list_objects_filter_result filter_blobs_limit(
 			 * apply the size filter criteria.  Be conservative
 			 * and force show it (and let the caller deal with
 			 * the ambiguity).
 			 */
 			goto include_it;
 		}
 
 		if (object_length < filter_data->max_bytes)
 			goto include_it;
 
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 
 include_it:
-	if (filter_data->omits)
-		oidset_remove(filter_data->omits, &obj->oid);
+	if (omits)
+		oidset_remove(omits, &obj->oid);
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
 static void filter_blobs_limit__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	d->max_bytes = filter_options->blob_limit_value;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_blobs_limit;
 	filter->free_fn = free;
 }
 
 /*
  * A filter driven by a sparse-checkout specification to only
  * include blobs that a sparse checkout would populate.
@@ -330,33 +315,33 @@ struct frame {
 	 * omitted objects.
 	 *
 	 * 0 if everything (recursively) contained in this directory
 	 * has been explicitly included (SHOWN) in the result and
 	 * the directory may be short-cut later in the traversal.
 	 */
 	unsigned child_prov_omit : 1;
 };
 
 struct filter_sparse_data {
-	struct oidset *omits;
 	struct exclude_list el;
 
 	size_t nr, alloc;
 	struct frame *array_frame;
 };
 
 static enum list_objects_filter_result filter_sparse(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_sparse_data *filter_data = filter_data_;
 	int val, dtype;
 	struct frame *frame;
 
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
@@ -425,98 +410,93 @@ static enum list_objects_filter_result filter_sparse(
 
 		frame = &filter_data->array_frame[filter_data->nr];
 
 		dtype = DT_REG;
 		val = is_excluded_from_list(pathname, strlen(pathname),
 					    filename, &dtype, &filter_data->el,
 					    r->index);
 		if (val < 0)
 			val = frame->defval;
 		if (val > 0) {
-			if (filter_data->omits)
-				oidset_remove(filter_data->omits, &obj->oid);
+			if (omits)
+				oidset_remove(omits, &obj->oid);
 			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 		}
 
 		/*
 		 * Provisionally omit it.  We've already established that
 		 * this pathname is not in the sparse-checkout specification
 		 * with the CURRENT pathname, so we *WANT* to omit this blob.
 		 *
 		 * However, a pathname elsewhere in the tree may also
 		 * reference this same blob, so we cannot reject it yet.
 		 * Leave the LOFR_ bits unset so that if the blob appears
 		 * again in the traversal, we will be asked again.
 		 */
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 
 		/*
 		 * Remember that at least 1 blob in this tree was
 		 * provisionally omitted.  This prevents us from short
 		 * cutting the tree in future iterations.
 		 */
 		frame->child_prov_omit = 1;
 		return LOFR_ZERO;
 	}
 }
 
 
 static void filter_sparse_free(void *filter_data)
 {
 	struct filter_sparse_data *d = filter_data;
 	/* TODO free contents of 'd' */
 	free(d);
 }
 
 static void filter_sparse_oid__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 					   NULL, 0, &d->el) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
 static void filter_sparse_path__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 					   NULL, 0, &d->el, NULL) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
 typedef void (*filter_init_fn)(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
@@ -536,35 +516,37 @@ struct filter *list_objects_filter__init(
 
 	if (filter_options->choice >= LOFC__COUNT)
 		BUG("invalid list-objects filter choice: %d",
 		    filter_options->choice);
 
 	init_fn = s_filters[filter_options->choice];
 	if (!init_fn)
 		return NULL;
 
 	filter = xcalloc(1, sizeof(*filter));
-	init_fn(omitted, filter_options, filter);
+	filter->omits = omitted;
+	init_fn(filter_options, filter);
 	return filter;
 }
 
 enum list_objects_filter_result list_objects_filter__filter_object(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
 	struct filter *filter)
 {
 	if (filter && (obj->flags & NOT_USER_GIVEN))
 		return filter->filter_object_fn(r, filter_situation, obj,
 						pathname, filename,
+						filter->omits,
 						filter->filter_data);
 	/*
 	 * No filter is active or user gave object explicitly. Choose default
 	 * behavior based on filter situation.
 	 */
 	if (filter_situation == LOFS_END_TREE)
 		return 0;
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 3/9] list-objects-filter-options: always supply *errbuf
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
  2019-06-01  0:35 ` [PATCH v2 1/9] list-objects-filter: make API easier to use Matthew DeVore
  2019-06-01  0:35 ` [PATCH v2 2/9] list-objects-filter: put omits set in filter struct Matthew DeVore
@ 2019-06-01  0:35 ` Matthew DeVore
  2019-06-01  0:35 ` [PATCH v2 4/9] list-objects-filter: implement composite filters Matthew DeVore
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:35 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore

Making errbuf an optional argument complicates error reporting. Fix this
by making all callers supply an errbuf, even if they may ignore it. This
will be important in follow-up patches where the filter-spec parsing has
more pitfalls and possible errors.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index c0036f7378..aef24ddae3 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -23,47 +23,40 @@
  * convenience of the current command.
  */
 static int gently_parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
 	if (filter_options->choice) {
-		if (errbuf) {
-			strbuf_addstr(
-				errbuf,
-				_("multiple filter-specs cannot be combined"));
-		}
+		strbuf_addstr(
+			errbuf, _("multiple filter-specs cannot be combined"));
 		return 1;
 	}
 
 	filter_options->filter_spec = strdup(arg);
 
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
 
 	} else if (skip_prefix(arg, "tree:", &v0)) {
 		if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
-			if (errbuf) {
-				strbuf_addstr(
-					errbuf,
-					_("expected 'tree:<depth>'"));
-			}
+			strbuf_addstr(errbuf, _("expected 'tree:<depth>'"));
 			return 1;
 		}
 		filter_options->choice = LOFC_TREE_DEPTH;
 		return 0;
 
 	} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
 		struct object_context oc;
 		struct object_id sparse_oid;
 
 		/*
@@ -80,22 +73,21 @@ static int gently_parse_list_objects_filter(
 	} else if (skip_prefix(arg, "sparse:path=", &v0)) {
 		filter_options->choice = LOFC_SPARSE_PATH;
 		filter_options->sparse_path_value = strdup(v0);
 		return 0;
 	}
 	/*
 	 * Please update _git_fetch() in git-completion.bash when you
 	 * add new filters
 	 */
 
-	if (errbuf)
-		strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
+	strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
 
 	memset(filter_options, 0, sizeof(*filter_options));
 	return 1;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
@@ -166,19 +158,22 @@ void partial_clone_register(
 	 */
 	core_partial_clone_filter_default =
 		xstrdup(filter_options->filter_spec);
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
+	struct strbuf errbuf = STRBUF_INIT;
+
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
-					 NULL);
+					 &errbuf);
+	strbuf_release(&errbuf);
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 4/9] list-objects-filter: implement composite filters
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (2 preceding siblings ...)
  2019-06-01  0:35 ` [PATCH v2 3/9] list-objects-filter-options: always supply *errbuf Matthew DeVore
@ 2019-06-01  0:35 ` Matthew DeVore
  2019-06-03 21:51   ` Jeff Hostetler
  2019-06-01  0:35 ` [PATCH v2 5/9] list-objects-filter-options: move error check up Matthew DeVore
                   ` (7 subsequent siblings)
  11 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:35 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Allow combining filters such that only objects accepted by all filters
are shown. The motivation for this is to allow getting directory
listings without also fetching blobs. This can be done by combining
blob:none with tree:<depth>. There are massive repositories that have
larger-than-expected trees - even if you include only a single commit.

The current usage requires passing the filter to rev-list in the
following form:

	--filter=<FILTER1> --filter=<FILTER2> ...

Such usage is currently an error, so giving it a meaning is backwards-
compatible.

The URL-encoding method is being implemented before the repeated flag
logic, and the user-facing documentation for URL-encoding is being
withheld until the repeated flag feature is implemented. The
URL-encoding is in general not meant to be used directly by the user,
and it is better to describe the URL-encoding feature in terms of the
repeated flag.

Helped-by: Emily Shaffer <emilyshaffer@google.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c       | 135 ++++++++++++++++++++++-
 list-objects-filter-options.h       |  17 ++-
 list-objects-filter.c               | 159 +++++++++++++++++++++++++++
 t/t6112-rev-list-filters-objects.sh | 163 +++++++++++++++++++++++++++-
 4 files changed, 468 insertions(+), 6 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index aef24ddae3..0f1d4181cb 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -1,19 +1,24 @@
 #include "cache.h"
 #include "commit.h"
 #include "config.h"
 #include "revision.h"
 #include "argv-array.h"
 #include "list-objects.h"
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
 
+static int parse_combine_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	struct strbuf *errbuf);
+
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
  *       --filter=<arg>
  * and in the pack protocol as:
  *       "filter" SP <arg>
  *
  * The filter keyword will be used by many commands.
  * See Documentation/rev-list-options.txt for allowed values for <arg>.
  *
@@ -28,22 +33,20 @@ static int gently_parse_list_objects_filter(
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
 	if (filter_options->choice) {
 		strbuf_addstr(
 			errbuf, _("multiple filter-specs cannot be combined"));
 		return 1;
 	}
 
-	filter_options->filter_spec = strdup(arg);
-
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
 
@@ -67,36 +70,155 @@ static int gently_parse_list_objects_filter(
 		if (!get_oid_with_context(the_repository, v0, GET_OID_BLOB,
 					  &sparse_oid, &oc))
 			filter_options->sparse_oid_value = oiddup(&sparse_oid);
 		filter_options->choice = LOFC_SPARSE_OID;
 		return 0;
 
 	} else if (skip_prefix(arg, "sparse:path=", &v0)) {
 		filter_options->choice = LOFC_SPARSE_PATH;
 		filter_options->sparse_path_value = strdup(v0);
 		return 0;
+
+	} else if (skip_prefix(arg, "combine:", &v0)) {
+		return parse_combine_filter(filter_options, v0, errbuf);
+
 	}
 	/*
 	 * Please update _git_fetch() in git-completion.bash when you
 	 * add new filters
 	 */
 
 	strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
 
 	memset(filter_options, 0, sizeof(*filter_options));
 	return 1;
 }
 
+static int url_decode(struct strbuf *s, struct strbuf *errbuf)
+{
+	char *dest = s->buf;
+	char *src = s->buf;
+	size_t new_len;
+
+	while (*src) {
+		if (src[0] != '%') {
+			*dest++ = *src++;
+			continue;
+		}
+
+		if (hex_to_bytes((unsigned char *)dest, src + 1, 1)) {
+			strbuf_addstr(errbuf,
+				      "error in filter-spec - "
+				      "invalid hex sequence after %");
+			return 1;
+		}
+
+		if (!*dest) {
+			strbuf_addstr(errbuf,
+				      "error in filter-spec - unexpected %00");
+			return 1;
+		}
+
+		src += 3;
+		dest++;
+	}
+	new_len = dest - s->buf;
+	strbuf_remove(s, new_len, s->len - new_len);
+
+	return 0;
+}
+
+static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
+
+static int has_reserved_character(
+	struct strbuf *sub_spec, struct strbuf *errbuf)
+{
+	const char *c = sub_spec->buf;
+	while (*c) {
+		if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
+			strbuf_addf(errbuf,
+				    "must escape char in sub-filter-spec: '%c'",
+				    *c);
+			return 1;
+		}
+		c++;
+	}
+
+	return 0;
+}
+
+static int parse_combine_subfilter(
+	struct list_objects_filter_options *filter_options,
+	struct strbuf *subspec,
+	struct strbuf *errbuf)
+{
+	size_t new_index = filter_options->sub_nr++;
+
+	ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
+		   filter_options->sub_alloc);
+	memset(&filter_options->sub[new_index], 0,
+	       sizeof(*filter_options->sub));
+
+	return has_reserved_character(subspec, errbuf) ||
+		url_decode(subspec, errbuf) ||
+		gently_parse_list_objects_filter(
+			&filter_options->sub[new_index], subspec->buf, errbuf);
+}
+
+static int parse_combine_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	struct strbuf *errbuf)
+{
+	struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
+	size_t sub;
+	int result;
+
+	if (!subspecs[0]) {
+		strbuf_addf(errbuf,
+			    _("expected something after combine:"));
+		result = 1;
+		goto cleanup;
+	}
+
+	for (sub = 0; subspecs[sub]; sub++) {
+		if (subspecs[sub + 1]) {
+			/*
+			 * This is not the last subspec. Remove trailing "+" so
+			 * we can parse it.
+			 */
+			size_t last = subspecs[sub]->len - 1;
+			assert(subspecs[sub]->buf[last] == '+');
+			strbuf_remove(subspecs[sub], last, 1);
+		}
+		result = parse_combine_subfilter(
+			filter_options, subspecs[sub], errbuf);
+		if (result)
+			goto cleanup;
+	}
+
+	filter_options->choice = LOFC_COMBINE;
+
+cleanup:
+	strbuf_list_free(subspecs);
+	if (result) {
+		list_objects_filter_release(filter_options);
+		memset(filter_options, 0, sizeof(*filter_options));
+	}
+	return result;
+}
+
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
+	filter_options->filter_spec = strdup(arg);
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
@@ -119,23 +241,30 @@ void expand_list_objects_filter_spec(
 	else if (filter->choice == LOFC_TREE_DEPTH)
 		strbuf_addf(expanded_spec, "tree:%lu",
 			    filter->tree_exclude_depth);
 	else
 		strbuf_addstr(expanded_spec, filter->filter_spec);
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
+	size_t sub;
+
+	if (!filter_options)
+		return;
 	free(filter_options->filter_spec);
 	free(filter_options->sparse_oid_value);
 	free(filter_options->sparse_path_value);
+	for (sub = 0; sub < filter_options->sub_nr; sub++)
+		list_objects_filter_release(&filter_options->sub[sub]);
+	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
 	const struct list_objects_filter_options *filter_options)
 {
 	/*
 	 * Record the name of the partial clone remote in the
 	 * config and in the global variable -- the latter is
@@ -165,15 +294,17 @@ void partial_clone_register(
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
+
+	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index e3adc78ebf..8f08ed74a1 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -7,20 +7,21 @@
 /*
  * The list of defined filters for list-objects.
  */
 enum list_objects_filter_choice {
 	LOFC_DISABLED = 0,
 	LOFC_BLOB_NONE,
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
 	LOFC_SPARSE_PATH,
+	LOFC_COMBINE,
 	LOFC__COUNT /* must be last */
 };
 
 struct list_objects_filter_options {
 	/*
 	 * 'filter_spec' is the raw argument value given on the command line
 	 * or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
@@ -32,28 +33,38 @@ struct list_objects_filter_options {
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
 	unsigned int no_filter : 1;
 
 	/*
-	 * Parsed values (fields) from within the filter-spec.  These are
-	 * choice-specific; not all values will be defined for any given
-	 * choice.
+	 * BEGIN choice-specific parsed values from within the filter-spec. Only
+	 * some values will be defined for any given choice.
 	 */
+
 	struct object_id *sparse_oid_value;
 	char *sparse_path_value;
 	unsigned long blob_limit_value;
 	unsigned long tree_exclude_depth;
+
+	/* LOFC_COMBINE values */
+
+	/* This array contains all the subfilters which this filter combines. */
+	size_t sub_nr, sub_alloc;
+	struct list_objects_filter_options *sub;
+
+	/*
+	 * END choice-specific parsed values.
+	 */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
 int parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
diff --git a/list-objects-filter.c b/list-objects-filter.c
index 57bbf6ec1c..c8a006edf9 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -19,30 +19,45 @@
  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  * that have been shown, but should be revisited if they appear
  * in the traversal (until we mark it SEEN).  This is a way to
  * let us silently de-dup calls to show() in the caller.  This
  * is subtly different from the "revision.h:SHOWN" and the
  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  * the non-de-dup usage in pack-bitmap.c
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
+struct subfilter {
+	struct filter *filter;
+	struct oidset seen;
+	struct oidset omits;
+	struct object_id skip_tree;
+	unsigned is_skipping_tree : 1;
+};
+
 struct filter {
 	enum list_objects_filter_result (*filter_object_fn)(
 		struct repository *r,
 		enum list_objects_filter_situation filter_situation,
 		struct object *obj,
 		const char *pathname,
 		const char *filename,
 		struct oidset *omits,
 		void *filter_data);
 
+	/*
+	 * Optional. If this function is supplied and the filter needs to
+	 * collect omits, then this function is called once before free_fn is
+	 * called.
+	 */
+	void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);
+
 	void (*free_fn)(void *filter_data);
 
 	void *filter_data;
 
 	/* If non-NULL, the filter collects a list of the omitted OIDs here. */
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
@@ -482,34 +497,176 @@ static void filter_sparse_path__init(
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
+/* A filter which only shows objects shown by all sub-filters. */
+struct combine_filter_data {
+	struct subfilter *sub;
+	size_t nr;
+};
+
+static int should_delegate(enum list_objects_filter_situation filter_situation,
+			   struct object *obj,
+			   struct subfilter *sub)
+{
+	if (!sub->is_skipping_tree)
+		return 1;
+	if (filter_situation == LOFS_END_TREE &&
+		oideq(&obj->oid, &sub->skip_tree)) {
+		sub->is_skipping_tree = 0;
+		return 1;
+	}
+	return 0;
+}
+
+static enum list_objects_filter_result process_subfilter(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct subfilter *sub)
+{
+	enum list_objects_filter_result result;
+
+	/*
+	 * Check should_delegate before oidset_contains so that
+	 * is_skipping_tree gets unset even when the object is marked as seen.
+	 * As of this writing, no filter uses LOFR_MARK_SEEN on trees that also
+	 * uses LOFR_SKIP_TREE, so the ordering is only theoretically
+	 * important. Be cautious if you change the order of the below checks
+	 * and more filters have been added!
+	 */
+	if (!should_delegate(filter_situation, obj, sub))
+		return LOFR_ZERO;
+	if (oidset_contains(&sub->seen, &obj->oid))
+		return LOFR_ZERO;
+
+	result = list_objects_filter__filter_object(
+		r, filter_situation, obj, pathname, filename, sub->filter);
+
+	if (result & LOFR_MARK_SEEN)
+		oidset_insert(&sub->seen, &obj->oid);
+
+	if (result & LOFR_SKIP_TREE) {
+		sub->is_skipping_tree = 1;
+		sub->skip_tree = obj->oid;
+	}
+
+	return result;
+}
+
+static enum list_objects_filter_result filter_combine(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct oidset *omits,
+	void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	enum list_objects_filter_result combined_result =
+		LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
+	size_t sub;
+
+	for (sub = 0; sub < d->nr; sub++) {
+		enum list_objects_filter_result sub_result = process_subfilter(
+			r, filter_situation, obj, pathname, filename,
+			&d->sub[sub]);
+		if (!(sub_result & LOFR_DO_SHOW))
+			combined_result &= ~LOFR_DO_SHOW;
+		if (!(sub_result & LOFR_MARK_SEEN))
+			combined_result &= ~LOFR_MARK_SEEN;
+		if (!d->sub[sub].is_skipping_tree)
+			combined_result &= ~LOFR_SKIP_TREE;
+	}
+
+	return combined_result;
+}
+
+static void filter_combine__free(void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	size_t sub;
+	for (sub = 0; sub < d->nr; sub++) {
+		list_objects_filter__free(d->sub[sub].filter);
+		oidset_clear(&d->sub[sub].seen);
+		if (d->sub[sub].omits.set.size)
+			BUG("expected oidset to be cleared already");
+	}
+	free(d->sub);
+}
+
+static void add_all(struct oidset *dest, struct oidset *src) {
+	struct oidset_iter iter;
+	struct object_id *src_oid;
+
+	oidset_iter_init(src, &iter);
+	while ((src_oid = oidset_iter_next(&iter)) != NULL)
+		oidset_insert(dest, src_oid);
+}
+
+static void filter_combine__finalize_omits(
+	struct oidset *omits,
+	void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	size_t sub;
+
+	for (sub = 0; sub < d->nr; sub++) {
+		add_all(omits, &d->sub[sub].omits);
+		oidset_clear(&d->sub[sub].omits);
+	}
+}
+
+static void filter_combine__init(
+	struct list_objects_filter_options *filter_options,
+	struct filter* filter)
+{
+	struct combine_filter_data *d = xcalloc(1, sizeof(*d));
+	size_t sub;
+
+	d->nr = filter_options->sub_nr;
+	d->sub = xcalloc(d->nr, sizeof(*d->sub));
+	for (sub = 0; sub < d->nr; sub++)
+		d->sub[sub].filter = list_objects_filter__init(
+			filter->omits ? &d->sub[sub].omits : NULL,
+			&filter_options->sub[sub]);
+
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_combine;
+	filter->free_fn = filter_combine__free;
+	filter->finalize_omits_fn = filter_combine__finalize_omits;
+}
+
 typedef void (*filter_init_fn)(
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
 	filter_trees_depth__init,
 	filter_sparse_oid__init,
 	filter_sparse_path__init,
+	filter_combine__init,
 };
 
 struct filter *list_objects_filter__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options)
 {
 	struct filter *filter;
 	filter_init_fn init_fn;
 
 	assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
@@ -547,13 +704,15 @@ enum list_objects_filter_result list_objects_filter__filter_object(
 	 */
 	if (filter_situation == LOFS_END_TREE)
 		return 0;
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
 void list_objects_filter__free(struct filter *filter)
 {
 	if (!filter)
 		return;
+	if (filter->finalize_omits_fn && filter->omits)
+		filter->finalize_omits_fn(filter->omits, filter->filter_data);
 	filter->free_fn(filter->filter_data);
 	free(filter);
 }
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index 9c11427719..c36199457d 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -284,21 +284,33 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' '
 # Make sure tree:0 does not iterate through any trees.
 
 test_expect_success 'verify skipping tree iteration when not collecting omits' '
 	GIT_TRACE=1 git -C r3 rev-list \
 		--objects --filter=tree:0 HEAD 2>filter_trace &&
 	grep "Skipping contents of tree [.][.][.]" filter_trace >actual &&
 	# One line for each commit traversed.
 	test_line_count = 2 actual &&
 
 	# Make sure no other trees were considered besides the root.
-	! grep "Skipping contents of tree [^.]" filter_trace
+	! grep "Skipping contents of tree [^.]" filter_trace &&
+
+	# Try this again with "combine:". If both sub-filters are skipping
+	# trees, the composite filter should also skip trees. This is not
+	# important unless the user does combine:tree:X+tree:Y or another filter
+	# besides "tree:" is implemented in the future which can skip trees.
+	GIT_TRACE=1 git -C r3 rev-list \
+		--objects --filter=combine:tree:1+tree:3 HEAD 2>filter_trace &&
+
+	# Only skip the dir1/ tree, which is shared between the two commits.
+	grep "Skipping contents of tree " filter_trace >actual &&
+	test_write_lines "Skipping contents of tree dir1/..." >expected &&
+	test_cmp expected actual
 '
 
 # Test tree:# filters.
 
 expect_has () {
 	commit=$1 &&
 	name=$2 &&
 
 	hash=$(git -C r3 rev-parse $commit:$name) &&
 	grep "^$hash $name$" actual
@@ -336,20 +348,138 @@ test_expect_success 'verify tree:3 includes everything expected' '
 	expect_has HEAD dir1/sparse1 &&
 	expect_has HEAD dir1/sparse2 &&
 	expect_has HEAD pattern &&
 	expect_has HEAD sparse1 &&
 	expect_has HEAD sparse2 &&
 
 	# There are also 2 commit objects
 	test_line_count = 10 actual
 '
 
+test_expect_success 'combine:... for a simple combination' '
+	git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
+		>actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD dir1 &&
+
+	# There are also 2 commit objects
+	test_line_count = 5 actual
+'
+
+test_expect_success 'combine:... with URL encoding' '
+	git -C r3 rev-list --objects \
+		--filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD dir1 &&
+
+	# There are also 2 commit objects
+	test_line_count = 5 actual
+'
+
+expect_invalid_filter_spec () {
+	spec="$1" &&
+	err="$2" &&
+
+	test_must_fail git -C r3 rev-list --objects --filter="$spec" HEAD \
+		>actual 2>actual_stderr &&
+	test_must_be_empty actual &&
+	test_i18ngrep "$err" actual_stderr
+}
+
+test_expect_success 'combine:... while URL-encoding things that should not be' '
+	expect_invalid_filter_spec combine%3Atree:2+blob:none \
+		"invalid filter-spec"
+'
+
+test_expect_success 'combine: with nothing after the :' '
+	expect_invalid_filter_spec combine: "expected something after combine:"
+'
+
+test_expect_success 'parse error in first sub-filter in combine:' '
+	expect_invalid_filter_spec combine:tree:asdf+blob:none \
+		"expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with invalid URL-encoded sequences' '
+	# Not enough hex chars
+	expect_invalid_filter_spec combine:tree:2+blob:non%a \
+		"error in filter-spec - invalid hex sequence after %" &&
+	# Non-hex digit after %
+	expect_invalid_filter_spec combine:tree:2+blob%G5none \
+		"error in filter-spec - invalid hex sequence after %" &&
+	# Null byte encoded by %
+	expect_invalid_filter_spec combine:tree:2+blob%00none \
+		"error in filter-spec - unexpected %00"
+'
+
+test_expect_success 'combine:... with non-encoded reserved chars' '
+	expect_invalid_filter_spec combine:tree:2+sparse:@xyz \
+		"must escape char in sub-filter-spec: .@." &&
+	expect_invalid_filter_spec combine:tree:2+sparse:\` \
+		"must escape char in sub-filter-spec: .\`." &&
+	expect_invalid_filter_spec combine:tree:2+sparse:~abc \
+		"must escape char in sub-filter-spec: .\~."
+'
+
+test_expect_success 'validate err msg for "combine:<valid-filter>+"' '
+	expect_invalid_filter_spec combine:tree:2+ "expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
+	git -C r3 rev-list --objects --filter="combine:tree:2+bl%6Fb:n%6fne" \
+		HEAD >actual &&
+	test_line_count = 5 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
+		HEAD >actual &&
+	test_line_count = 5 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
+	test_line_count = 2 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
+		HEAD >actual &&
+	test_line_count = 5 actual
+'
+
+test_expect_success 'add a sparse pattern blob whose path has reserved chars' '
+	cp r3/pattern r3/pattern1+renamed% &&
+	git -C r3 add pattern1+renamed% &&
+	git -C r3 commit -m "add sparse pattern file with reserved chars"
+'
+
+test_expect_success 'combine:... with more than two sub-filters' '
+	git -C r3 rev-list --objects \
+		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
+		HEAD >actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD~2 "" &&
+	expect_has HEAD dir1 &&
+	expect_has HEAD dir1/sparse1 &&
+	expect_has HEAD dir1/sparse2 &&
+
+	# Should also have 3 commits
+	test_line_count = 9 actual &&
+
+	# Try again, this time making sure the last sub-filter is only
+	# URL-decoded once.
+	cp actual expect &&
+
+	git -C r3 rev-list --objects \
+		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
+		HEAD >actual &&
+	test_cmp expect actual
+'
+
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
 test_expect_success 'setup r4' '
 	git init r4 &&
 
 	echo foo > r4/foo &&
 	mkdir r4/subdir &&
 	echo bar > r4/subdir/bar &&
 
@@ -379,20 +509,51 @@ test_expect_success 'test tree:# filter provisional omit for blob and tree' '
 
 test_expect_success 'verify skipping tree iteration when collecting omits' '
 	GIT_TRACE=1 git -C r4 rev-list --filter-print-omitted \
 		--objects --filter=tree:0 HEAD 2>filter_trace &&
 	grep "^Skipping contents of tree " filter_trace >actual &&
 
 	echo "Skipping contents of tree subdir/..." >expect &&
 	test_cmp expect actual
 '
 
+test_expect_success 'setup r5' '
+	git init r5 &&
+	mkdir -p r5/subdir &&
+
+	echo 1     >r5/short-root          &&
+	echo 12345 >r5/long-root           &&
+	echo a     >r5/subdir/short-subdir &&
+	echo abcde >r5/subdir/long-subdir  &&
+
+	git -C r5 add short-root long-root subdir &&
+	git -C r5 commit -m "commit msg"
+'
+
+test_expect_success 'verify collecting omits in combined: filter' '
+	# Note that this test guards against the naive implementation of simply
+	# giving both filters the same "omits" set and expecting it to
+	# automatically merge them.
+	git -C r5 rev-list --objects --quiet --filter-print-omitted \
+		--filter=combine:tree:2+blob:limit=3 HEAD >actual &&
+
+	# Expect 0 trees/commits, 3 blobs omitted (all blobs except short-root)
+	omitted_1=$(echo 12345 | git hash-object --stdin) &&
+	omitted_2=$(echo a     | git hash-object --stdin) &&
+	omitted_3=$(echo abcde | git hash-object --stdin) &&
+
+	grep ~$omitted_1 actual &&
+	grep ~$omitted_2 actual &&
+	grep ~$omitted_3 actual &&
+	test_line_count = 3 actual
+'
+
 # Test tree:<depth> where a tree is iterated to twice - once where a subentry is
 # too deep to be included, and again where the blob inside it is shallow enough
 # to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
 # can't use it because a tree can be iterated over again at a lower depth).
 
 test_expect_success 'tree:<depth> where we iterate over tree at two levels' '
 	git init r5 &&
 
 	mkdir -p r5/a/subdir/b &&
 	echo foo > r5/a/subdir/b/foo &&
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 5/9] list-objects-filter-options: move error check up
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (3 preceding siblings ...)
  2019-06-01  0:35 ` [PATCH v2 4/9] list-objects-filter: implement composite filters Matthew DeVore
@ 2019-06-01  0:35 ` Matthew DeVore
  2019-06-01  0:36 ` [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf Matthew DeVore
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:35 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore

Move the check that filter_options->choice is set to higher in the call
stack. This can only be set when the gentle parse function is called
from one of the two call sites.

This is important because in an upcoming patch this may or may not be an
error, and whether it is an error is only known to the
parse_list_objects_filter function.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 0f1d4181cb..e8132b811e 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -27,25 +27,22 @@ static int parse_combine_filter(
  * expand_list_objects_filter_spec() first).  We also "intern" the arg for the
  * convenience of the current command.
  */
 static int gently_parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
-	if (filter_options->choice) {
-		strbuf_addstr(
-			errbuf, _("multiple filter-specs cannot be combined"));
-		return 1;
-	}
+	if (filter_options->choice)
+		BUG("filter_options already populated");
 
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
@@ -204,20 +201,22 @@ cleanup:
 		list_objects_filter_release(filter_options);
 		memset(filter_options, 0, sizeof(*filter_options));
 	}
 	return result;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
+	if (filter_options->choice)
+		die(_("multiple filter-specs cannot be combined"));
 	filter_options->filter_spec = strdup(arg);
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (4 preceding siblings ...)
  2019-06-01  0:35 ` [PATCH v2 5/9] list-objects-filter-options: move error check up Matthew DeVore
@ 2019-06-01  0:36 ` Matthew DeVore
  2019-06-10 20:13   ` Junio C Hamano
  2019-06-01  0:36 ` [PATCH v2 7/9] list-objects-filter-options: allow mult. --filter Matthew DeVore
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:36 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore

Make the filter_spec string a strbuf rather than a raw C string. A
future patch will need to grow this string dynamically.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 builtin/rev-list.c            |  2 +-
 list-objects-filter-options.c | 16 ++++++++++------
 list-objects-filter-options.h |  2 +-
 upload-pack.c                 |  2 +-
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 9f31837d30..7137f13a74 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -460,21 +460,21 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 			continue;
 		}
 
 		if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
 			parse_list_objects_filter(&filter_options, arg);
 			if (filter_options.choice && !revs.blob_objects)
 				die(_("object filtering requires --objects"));
 			if (filter_options.choice == LOFC_SPARSE_OID &&
 			    !filter_options.sparse_oid_value)
 				die(_("invalid sparse value '%s'"),
-				    filter_options.filter_spec);
+				    filter_options.filter_spec.buf);
 			continue;
 		}
 		if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
 			list_objects_filter_set_no_filter(&filter_options);
 			continue;
 		}
 		if (!strcmp(arg, "--filter-print-omitted")) {
 			arg_print_omitted = 1;
 			continue;
 		}
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index e8132b811e..5687425847 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -203,21 +203,22 @@ cleanup:
 	}
 	return result;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
-	filter_options->filter_spec = strdup(arg);
+	strbuf_init(&filter_options->filter_spec, 0);
+	strbuf_addstr(&filter_options->filter_spec, arg);
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
@@ -226,39 +227,39 @@ int opt_parse_list_objects_filter(const struct option *opt,
 		return 0;
 	}
 
 	return parse_list_objects_filter(filter_options, arg);
 }
 
 void expand_list_objects_filter_spec(
 	const struct list_objects_filter_options *filter,
 	struct strbuf *expanded_spec)
 {
-	strbuf_init(expanded_spec, strlen(filter->filter_spec));
+	strbuf_init(expanded_spec, 0);
 	if (filter->choice == LOFC_BLOB_LIMIT)
 		strbuf_addf(expanded_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
 	else if (filter->choice == LOFC_TREE_DEPTH)
 		strbuf_addf(expanded_spec, "tree:%lu",
 			    filter->tree_exclude_depth);
 	else
-		strbuf_addstr(expanded_spec, filter->filter_spec);
+		strbuf_addstr(expanded_spec, filter->filter_spec.buf);
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
 	size_t sub;
 
 	if (!filter_options)
 		return;
-	free(filter_options->filter_spec);
+	strbuf_release(&filter_options->filter_spec);
 	free(filter_options->sparse_oid_value);
 	free(filter_options->sparse_path_value);
 	for (sub = 0; sub < filter_options->sub_nr; sub++)
 		list_objects_filter_release(&filter_options->sub[sub]);
 	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
@@ -278,32 +279,35 @@ void partial_clone_register(
 	git_config_set("core.repositoryformatversion", "1");
 	git_config_set("extensions.partialclone", remote);
 
 	repository_format_partial_clone = xstrdup(remote);
 
 	/*
 	 * Record the initial filter-spec in the config as
 	 * the default for subsequent fetches from this remote.
 	 */
 	core_partial_clone_filter_default =
-		xstrdup(filter_options->filter_spec);
+		xstrdup(filter_options->filter_spec.buf);
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 
-	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
+	if (!filter_options->filter_spec.buf)
+		strbuf_init(&filter_options->filter_spec, 0);
+	strbuf_addstr(&filter_options->filter_spec,
+		      core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 8f08ed74a1..e1e23fd191 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -19,21 +19,21 @@ enum list_objects_filter_choice {
 };
 
 struct list_objects_filter_options {
 	/*
 	 * 'filter_spec' is the raw argument value given on the command line
 	 * or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
 	 */
-	char *filter_spec;
+	struct strbuf filter_spec;
 
 	/*
 	 * 'choice' is determined by parsing the filter-spec.  This indicates
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
diff --git a/upload-pack.c b/upload-pack.c
index d2ea5eb20d..2cdd499f28 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -133,21 +133,21 @@ static void create_pack_file(const struct object_array *have_obj,
 
 	argv_array_push(&pack_objects.args, "--stdout");
 	if (shallow_nr)
 		argv_array_push(&pack_objects.args, "--shallow");
 	if (!no_progress)
 		argv_array_push(&pack_objects.args, "--progress");
 	if (use_ofs_delta)
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.filter_spec) {
+	if (filter_options.filter_spec.len) {
 		struct strbuf expanded_filter_spec = STRBUF_INIT;
 		expand_list_objects_filter_spec(&filter_options,
 						&expanded_filter_spec);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
 			sq_quote_buf(&buf, expanded_filter_spec.buf);
 			argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
 			strbuf_release(&buf);
 		} else {
 			argv_array_pushf(&pack_objects.args, "--filter=%s",
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 7/9] list-objects-filter-options: allow mult. --filter
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (5 preceding siblings ...)
  2019-06-01  0:36 ` [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf Matthew DeVore
@ 2019-06-01  0:36 ` Matthew DeVore
  2019-06-01  0:36 ` [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:36 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Allow combining of multiple filters by simply repeating the --filter
flag. Before this patch, the user had to combine them in a single flag
somewhat awkwardly (e.g. --filter=combine:FOO+BAR), including
URL-encoding the individual filters.

To make this work, in the --filter flag parsing callback, rather than
error out when we detect that the filter_options struct is already
populated, we modify it in-place to contain the added sub-filter. The
existing sub-filter becomes the lhs of the combined filter, and the
next sub-filter becomes the rhs. We also have to URL-encode the LHS and
RHS sub-filters.

We can simplify the operation if the LHS is already a combine: filter.
In that case, we just append the URL-encoded RHS sub-filter to the LHS
spec to get the new spec.

Helped-by: Emily Shaffer <emilyshaffer@google.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 Documentation/rev-list-options.txt  | 16 +++++
 list-objects-filter-options.c       | 90 ++++++++++++++++++++++++++---
 list-objects-filter-options.h       | 11 ++++
 t/t5616-partial-clone.sh            | 19 ++++++
 t/t6112-rev-list-filters-objects.sh | 44 ++++++++++++--
 transport.c                         |  1 +
 upload-pack.c                       |  2 +
 7 files changed, 171 insertions(+), 12 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index ddbc1de43f..7b4116f279 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -730,20 +730,36 @@ specification contained in <path>.
 +
 The form '--filter=tree:<depth>' omits all blobs and trees whose depth
 from the root tree is >= <depth> (minimum depth if an object is located
 at multiple depths in the commits traversed). <depth>=0 will not include
 any trees or blobs unless included explicitly in the command-line (or
 standard input when --stdin is used). <depth>=1 will include only the
 tree and blobs which are referenced directly by a commit reachable from
 <commit> or an explicitly-given object. <depth>=2 is like <depth>=1
 while also including trees and blobs one more level removed from an
 explicitly-given commit or tree.
++
+Multiple '--filter=' flags can be specified to combine filters. Only
+objects which are accepted by every filter are included.
++
+The form '--filter=combine:<filter1>+<filter2>+...<filterN>' can also be
+used to combined several filters, but this is harder than just repeating
+the '--filter' flag and is usually not necessary. Filters are joined by
+'{plus}' and individual filters are %-encoded (i.e. URL-encoded).
+Besides the '{plus}' and '%' characters, the following characters are
+reserved and also must be encoded: `~!@#$^&*()[]{}\;",<>?`+&#39;&#96;+
+as well as all characters with ASCII code &lt;= `0x20`, which includes
+space and newline.
++
+Other arbitrary characters can also be encoded. For instance,
+'combine:tree:3+blob:none' and 'combine:tree%3A3+blob%3Anone' are
+equivalent.
 
 --no-filter::
 	Turn off any previous `--filter=` argument.
 
 --filter-print-omitted::
 	Only useful with `--filter=`; prints a list of the objects omitted
 	by the filter.  Object IDs are prefixed with a ``~'' character.
 
 --missing=<missing-action>::
 	A debug option to help with future "partial clone" development.
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 5687425847..5e98e4a309 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -1,18 +1,19 @@
 #include "cache.h"
 #include "commit.h"
 #include "config.h"
 #include "revision.h"
 #include "argv-array.h"
 #include "list-objects.h"
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
+#include "trace.h"
 
 static int parse_combine_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf);
 
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
  *       --filter=<arg>
@@ -197,30 +198,105 @@ static int parse_combine_filter(
 
 cleanup:
 	strbuf_list_free(subspecs);
 	if (result) {
 		list_objects_filter_release(filter_options);
 		memset(filter_options, 0, sizeof(*filter_options));
 	}
 	return result;
 }
 
-int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
-			      const char *arg)
+static void add_url_encoded(struct strbuf *dest, const char *s)
+{
+	while (*s) {
+		if (*s <= ' ' || strchr(RESERVED_NON_WS, *s) ||
+			*s == '%' || *s == '+')
+			strbuf_addf(dest, "%%%02X", (int)*s);
+		else
+			strbuf_addf(dest, "%c", *s);
+		s++;
+	}
+}
+
+/*
+ * Changes filter_options into an equivalent LOFC_COMBINE filter options
+ * instance. Does not do anything if filter_options is already LOFC_COMBINE.
+ */
+static void transform_to_combine_type(
+	struct list_objects_filter_options *filter_options)
+{
+	assert(filter_options->choice);
+	if (filter_options->choice == LOFC_COMBINE)
+		return;
+	{
+		const int initial_sub_alloc = 2;
+		struct list_objects_filter_options *sub_array =
+			xcalloc(initial_sub_alloc, sizeof(*sub_array));
+		sub_array[0] = *filter_options;
+		memset(filter_options, 0, sizeof(*filter_options));
+		filter_options->sub = sub_array;
+		filter_options->sub_alloc = initial_sub_alloc;
+	}
+	filter_options->sub_nr = 1;
+	filter_options->choice = LOFC_COMBINE;
+	strbuf_init(&filter_options->filter_spec, 0);
+	strbuf_addstr(&filter_options->filter_spec, "combine:");
+	add_url_encoded(&filter_options->filter_spec,
+			filter_options->sub[0].filter_spec.buf);
+	/*
+	 * We don't need the filter_spec strings for subfilter specs, only the
+	 * top level.
+	 */
+	strbuf_release(&filter_options->sub[0].filter_spec);
+}
+
+void list_objects_filter_die_if_populated(
+	struct list_objects_filter_options *filter_options)
 {
-	struct strbuf buf = STRBUF_INIT;
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
-	strbuf_init(&filter_options->filter_spec, 0);
-	strbuf_addstr(&filter_options->filter_spec, arg);
-	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
-		die("%s", buf.buf);
+}
+
+int parse_list_objects_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg)
+{
+	struct strbuf errbuf = STRBUF_INIT;
+	int parse_error;
+
+	if (!filter_options->choice) {
+		strbuf_init(&filter_options->filter_spec, 0);
+		strbuf_addstr(&filter_options->filter_spec, arg);
+
+		parse_error = gently_parse_list_objects_filter(
+			filter_options, arg, &errbuf);
+	} else {
+		/*
+		 * Make filter_options an LOFC_COMBINE spec so we can trivially
+		 * add subspecs to it.
+		 */
+		transform_to_combine_type(filter_options);
+
+		strbuf_addstr(&filter_options->filter_spec, "+");
+		add_url_encoded(&filter_options->filter_spec, arg);
+		trace_printf("Generated composite filter-spec: %s\n",
+			     filter_options->filter_spec.buf);
+		ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1,
+			   filter_options->sub_alloc);
+		filter_options = &filter_options->sub[filter_options->sub_nr++];
+		memset(filter_options, 0, sizeof(*filter_options));
+
+		parse_error = gently_parse_list_objects_filter(
+			filter_options, arg, &errbuf);
+	}
+	if (parse_error)
+		die("%s", errbuf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg) {
 		list_objects_filter_set_no_filter(filter_options);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index e1e23fd191..f8c8a624e4 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -56,20 +56,31 @@ struct list_objects_filter_options {
 	struct list_objects_filter_options *sub;
 
 	/*
 	 * END choice-specific parsed values.
 	 */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
+void list_objects_filter_die_if_populated(
+	struct list_objects_filter_options *filter_options);
+
+/*
+ * Parses the filter spec string given by arg and either (1) simply places the
+ * result in filter_options if it is not yet populated or (2) combines it with
+ * the filter already in filter_options if it is already populated. In the case
+ * of (2), the filter specs are combined as if specified with 'combine:'.
+ *
+ * Dies and prints a user-facing message if an error occurs.
+ */
 int parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 9a8f9886b3..11536f4028 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -201,20 +201,39 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr
 	test_line_count = 70 fetched_objects &&
 
 	awk -f print_1.awk fetched_objects |
 	xargs -n1 git -C dst cat-file -t >fetched_types &&
 
 	sort -u fetched_types >unique_types.observed &&
 	test_write_lines blob commit tree >unique_types.expected &&
 	test_cmp unique_types.expected unique_types.observed
 '
 
+test_expect_success 'implicitly construct combine: filter with repeated flags' '
+	GIT_TRACE=$(pwd)/trace git clone --bare \
+		--filter=blob:none --filter=tree:1 \
+		"file://$(pwd)/srv.bare" pc2 &&
+	grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \
+		trace &&
+	git -C pc2 rev-list --objects --missing=allow-any HEAD >objects &&
+
+	# We should have gotten some root trees.
+	grep " $" objects &&
+	# Should not have gotten any non-root trees or blobs.
+	! grep " ." objects &&
+
+	xargs -n 1 git -C pc2 cat-file -t <objects >types &&
+	sort -u types >unique_types.actual &&
+	test_write_lines commit tree >unique_types.expected &&
+	test_cmp unique_types.expected unique_types.actual
+'
+
 test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
 	rm -rf src dst &&
 	git init src &&
 	test_commit -C src x &&
 	test_config -C src uploadpack.allowfilter 1 &&
 	test_config -C src uploadpack.allowanysha1inwant 1 &&
 
 	# Create a tag pointing to a blob.
 	BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) &&
 	git -C src tag myblob "$BLOB" &&
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index c36199457d..7fb5e50cde 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -357,21 +357,30 @@ test_expect_success 'verify tree:3 includes everything expected' '
 
 test_expect_success 'combine:... for a simple combination' '
 	git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
 		>actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
 	# There are also 2 commit objects
-	test_line_count = 5 actual
+	test_line_count = 5 actual &&
+
+	cp actual expected &&
+
+	# Try again using repeated --filter - this is equivalent to a manual
+	# combine with "combine:...+..."
+	git -C r3 rev-list --objects --filter=combine:tree:2 \
+		--filter=blob:none HEAD >actual &&
+
+	test_cmp expected actual
 '
 
 test_expect_success 'combine:... with URL encoding' '
 	git -C r3 rev-list --objects \
 		--filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
@@ -435,24 +444,26 @@ test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
 	git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
 		HEAD >actual &&
 	test_line_count = 5 actual &&
 	git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
 	test_line_count = 2 actual &&
 	git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
 		HEAD >actual &&
 	test_line_count = 5 actual
 '
 
-test_expect_success 'add a sparse pattern blob whose path has reserved chars' '
+test_expect_success 'add sparse pattern blobs whose paths have reserved chars' '
 	cp r3/pattern r3/pattern1+renamed% &&
-	git -C r3 add pattern1+renamed% &&
-	git -C r3 commit -m "add sparse pattern file with reserved chars"
+	cp r3/pattern "r3/p;at%ter+n" &&
+	cp r3/pattern r3/^~pattern &&
+	git -C r3 add pattern1+renamed% "p;at%ter+n" ^~pattern &&
+	git -C r3 commit -m "add sparse pattern files with reserved chars"
 '
 
 test_expect_success 'combine:... with more than two sub-filters' '
 	git -C r3 rev-list --objects \
 		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
 		HEAD >actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD~2 "" &&
@@ -463,21 +474,44 @@ test_expect_success 'combine:... with more than two sub-filters' '
 	# Should also have 3 commits
 	test_line_count = 9 actual &&
 
 	# Try again, this time making sure the last sub-filter is only
 	# URL-decoded once.
 	cp actual expect &&
 
 	git -C r3 rev-list --objects \
 		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
 		HEAD >actual &&
-	test_cmp expect actual
+	test_cmp expect actual &&
+
+	# Use the same composite filter again, but with a pattern file name that
+	# requires encoding multiple characters, and use implicit filter
+	# combining.
+	GIT_TRACE=$(pwd)/trace git -C r3 rev-list --objects \
+		--filter=tree:3 --filter=blob:limit=40 \
+		--filter=sparse:oid="master:p;at%ter+n" \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Generated composite filter-spec: combine:tree:3+blob:limit=40+sparse:oid=master:p%3Bat%25ter%2B" \
+		trace &&
+
+	# Repeat the above test, but this time, the characters to encode are in
+	# the LHS of the combined filter.
+	GIT_TRACE=$(pwd)/trace git -C r3 rev-list --objects \
+		--filter=sparse:oid=master:^~pattern \
+		--filter=tree:3 --filter=blob:limit=40 \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Generated composite filter-spec: combine:sparse:oid=master:%5E%7Epattern+tree:3+blob:limit=40" \
+		trace
 '
 
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
 test_expect_success 'setup r4' '
 	git init r4 &&
 
 	echo foo > r4/foo &&
 	mkdir r4/subdir &&
diff --git a/transport.c b/transport.c
index f1fcd2c4b0..ee7dd1c062 100644
--- a/transport.c
+++ b/transport.c
@@ -217,20 +217,21 @@ static int set_git_option(struct git_transport_options *opts,
 	} else if (!strcmp(name, TRANS_OPT_DEEPEN_RELATIVE)) {
 		opts->deepen_relative = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_FROM_PROMISOR)) {
 		opts->from_promisor = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) {
 		opts->no_dependents = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) {
+		list_objects_filter_die_if_populated(&opts->filter_options);
 		parse_list_objects_filter(&opts->filter_options, value);
 		return 0;
 	}
 	return 1;
 }
 
 static int connect_setup(struct transport *transport, int for_push)
 {
 	struct git_transport_data *data = transport->data;
 	int flags = transport->verbose > 0 ? CONNECT_VERBOSE : 0;
diff --git a/upload-pack.c b/upload-pack.c
index 2cdd499f28..16e748ba58 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -877,20 +877,21 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
 		if (process_deepen(reader->line, &depth))
 			continue;
 		if (process_deepen_since(reader->line, &deepen_since, &deepen_rev_list))
 			continue;
 		if (process_deepen_not(reader->line, &deepen_not, &deepen_rev_list))
 			continue;
 
 		if (skip_prefix(reader->line, "filter ", &arg)) {
 			if (!filter_capability_requested)
 				die("git upload-pack: filtering capability not negotiated");
+			list_objects_filter_die_if_populated(&filter_options);
 			parse_list_objects_filter(&filter_options, arg);
 			continue;
 		}
 
 		if (!skip_prefix(reader->line, "want ", &arg) ||
 		    parse_oid_hex(arg, &oid_buf, &features))
 			die("git upload-pack: protocol error, "
 			    "expected to get object ID, not '%s'", reader->line);
 
 		if (parse_feature_request(features, "deepen-relative"))
@@ -1296,20 +1297,21 @@ static void process_args(struct packet_reader *request,
 			continue;
 		if (process_deepen_not(arg, &data->deepen_not,
 				       &data->deepen_rev_list))
 			continue;
 		if (!strcmp(arg, "deepen-relative")) {
 			data->deepen_relative = 1;
 			continue;
 		}
 
 		if (allow_filter && skip_prefix(arg, "filter ", &p)) {
+			list_objects_filter_die_if_populated(&filter_options);
 			parse_list_objects_filter(&filter_options, p);
 			continue;
 		}
 
 		if ((git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
 		     allow_sideband_all) &&
 		    !strcmp(arg, "sideband-all")) {
 			data->writer.use_sideband = 1;
 			continue;
 		}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (6 preceding siblings ...)
  2019-06-01  0:36 ` [PATCH v2 7/9] list-objects-filter-options: allow mult. --filter Matthew DeVore
@ 2019-06-01  0:36 ` Matthew DeVore
  2019-06-03 22:07   ` Jacob Keller
  2019-06-01  0:36 ` [PATCH v2 9/9] list-objects-filter-options: make parser void Matthew DeVore
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:36 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore

Introduce a new macro ALLOC_GROW_BY which automatically zeros the added
array elements and takes care of updating the nr value. Use the macro in
code introduced earlier in this patchset.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 cache.h                       | 22 ++++++++++++++++++++++
 list-objects-filter-options.c | 17 +++++++----------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/cache.h b/cache.h
index fa8ede9a2d..847fbdeff0 100644
--- a/cache.h
+++ b/cache.h
@@ -652,33 +652,55 @@ int init_db(const char *git_dir, const char *real_git_dir,
 void sanitize_stdfds(void);
 int daemonize(void);
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
 /*
  * Realloc the buffer pointed at by variable 'x' so that it can hold
  * at least 'nr' entries; the number of entries currently allocated
  * is 'alloc', using the standard growing factor alloc_nr() macro.
  *
+ * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some
+ * added niceties.
+ *
  * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
  */
 #define ALLOC_GROW(x, nr, alloc) \
 	do { \
 		if ((nr) > alloc) { \
 			if (alloc_nr(alloc) < (nr)) \
 				alloc = (nr); \
 			else \
 				alloc = alloc_nr(alloc); \
 			REALLOC_ARRAY(x, alloc); \
 		} \
 	} while (0)
 
+/*
+ * Similar to ALLOC_GROW but handles updating of the nr value and
+ * zeroing the bytes of the newly-grown array elements.
+ *
+ * DO NOT USE any expression with side-effect for any of the
+ * arguments.
+ */
+#define ALLOC_GROW_BY(x, nr, increase, alloc) \
+	do { \
+		if (increase) { \
+			size_t new_nr = nr + (increase); \
+			if (new_nr < nr) \
+				BUG("negative growth in ALLOC_GROW_BY"); \
+			ALLOC_GROW(x, new_nr, alloc); \
+			memset((x) + nr, 0, sizeof(*(x)) * (increase)); \
+			nr = new_nr; \
+		} \
+	} while (0)
+
 /* Initialize and use the cache information */
 struct lock_file;
 void preload_index(struct index_state *index,
 		   const struct pathspec *pathspec,
 		   unsigned int refresh_flags);
 int do_read_index(struct index_state *istate, const char *path,
 		  int must_exist); /* for testting only! */
 int read_index_from(struct index_state *, const char *path,
 		    const char *gitdir);
 int is_index_unborn(struct index_state *);
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 5e98e4a309..d8abe6cfcf 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -142,26 +142,24 @@ static int has_reserved_character(
 	}
 
 	return 0;
 }
 
 static int parse_combine_subfilter(
 	struct list_objects_filter_options *filter_options,
 	struct strbuf *subspec,
 	struct strbuf *errbuf)
 {
-	size_t new_index = filter_options->sub_nr++;
+	size_t new_index = filter_options->sub_nr;
 
-	ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
-		   filter_options->sub_alloc);
-	memset(&filter_options->sub[new_index], 0,
-	       sizeof(*filter_options->sub));
+	ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+		      filter_options->sub_alloc);
 
 	return has_reserved_character(subspec, errbuf) ||
 		url_decode(subspec, errbuf) ||
 		gently_parse_list_objects_filter(
 			&filter_options->sub[new_index], subspec->buf, errbuf);
 }
 
 static int parse_combine_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
@@ -273,27 +271,26 @@ int parse_list_objects_filter(
 		/*
 		 * Make filter_options an LOFC_COMBINE spec so we can trivially
 		 * add subspecs to it.
 		 */
 		transform_to_combine_type(filter_options);
 
 		strbuf_addstr(&filter_options->filter_spec, "+");
 		add_url_encoded(&filter_options->filter_spec, arg);
 		trace_printf("Generated composite filter-spec: %s\n",
 			     filter_options->filter_spec.buf);
-		ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1,
-			   filter_options->sub_alloc);
-		filter_options = &filter_options->sub[filter_options->sub_nr++];
-		memset(filter_options, 0, sizeof(*filter_options));
+		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
-			filter_options, arg, &errbuf);
+			&filter_options->sub[filter_options->sub_nr - 1], arg,
+			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 9/9] list-objects-filter-options: make parser void
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (7 preceding siblings ...)
  2019-06-01  0:36 ` [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
@ 2019-06-01  0:36 ` Matthew DeVore
  2019-06-03 21:35 ` [PATCH v2 0/9] Filter combination Jeff Hostetler
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-01  0:36 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer
  Cc: Matthew DeVore, matvore

This function always returns 0, so make it return void instead.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 12 +++++-------
 list-objects-filter-options.h |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index d8abe6cfcf..ed02c88eb6 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -247,21 +247,21 @@ static void transform_to_combine_type(
 	strbuf_release(&filter_options->sub[0].filter_spec);
 }
 
 void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options)
 {
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
 }
 
-int parse_list_objects_filter(
+void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 	int parse_error;
 
 	if (!filter_options->choice) {
 		strbuf_init(&filter_options->filter_spec, 0);
 		strbuf_addstr(&filter_options->filter_spec, arg);
 
@@ -280,34 +280,32 @@ int parse_list_objects_filter(
 			     filter_options->filter_spec.buf);
 		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
 			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
 			&filter_options->sub[filter_options->sub_nr - 1], arg,
 			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
-	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
-	if (unset || !arg) {
+	if (unset || !arg)
 		list_objects_filter_set_no_filter(filter_options);
-		return 0;
-	}
-
-	return parse_list_objects_filter(filter_options, arg);
+	else
+		parse_list_objects_filter(filter_options, arg);
+	return 0;
 }
 
 void expand_list_objects_filter_spec(
 	const struct list_objects_filter_options *filter,
 	struct strbuf *expanded_spec)
 {
 	strbuf_init(expanded_spec, 0);
 	if (filter->choice == LOFC_BLOB_LIMIT)
 		strbuf_addf(expanded_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index f8c8a624e4..2c0ce6383a 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -67,21 +67,21 @@ void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options);
 
 /*
  * Parses the filter spec string given by arg and either (1) simply places the
  * result in filter_options if it is not yet populated or (2) combines it with
  * the filter already in filter_options if it is already populated. In the case
  * of (2), the filter specs are combined as if specified with 'combine:'.
  *
  * Dies and prints a user-facing message if an error occurs.
  */
-int parse_list_objects_filter(
+void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 0/9] Filter combination
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (8 preceding siblings ...)
  2019-06-01  0:36 ` [PATCH v2 9/9] list-objects-filter-options: make parser void Matthew DeVore
@ 2019-06-03 21:35 ` Jeff Hostetler
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
  11 siblings, 0 replies; 57+ messages in thread
From: Jeff Hostetler @ 2019-06-03 21:35 UTC (permalink / raw)
  To: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer
  Cc: matvore



On 5/31/2019 8:35 PM, Matthew DeVore wrote:
> Here is a roll-up with hopefully all comments applied or responded to. Notable
> changes since the last one include:
> 
>   - Added an ALLOC_GROW_BY which is used twice by this patchset to make growing
>     arrays safer and cleaner
>   - Cleaned up the URL-encoding by (1) using hex_to_bytes rather than rolling my
>     own helpers and (2) making error-string-generation non-conditional
>   - Switched to an array-based data structure rather than a linked list for both
>     LOFC_COMBINE filter spec objects and the filter object itself
>   - Changed the list_objects_filter API to be cleaner to use
>   - Changed test cases to use sparse:oid= rather than sparse:path= since the
>     latter is being disabled.
> 
> Thank you,
> 
> Matthew DeVore (9):
>    list-objects-filter: make API easier to use
>    list-objects-filter: put omits set in filter struct
>    list-objects-filter-options: always supply *errbuf
>    list-objects-filter: implement composite filters
>    list-objects-filter-options: move error check up
>    list-objects-filter-options: make filter_spec a strbuf
>    list-objects-filter-options: allow mult. --filter
>    list-objects-filter-options: clean up use of ALLOC_GROW
>    list-objects-filter-options: make parser void
> 
>   Documentation/rev-list-options.txt  |  16 ++
>   builtin/rev-list.c                  |   2 +-
>   cache.h                             |  22 ++
>   list-objects-filter-options.c       | 264 ++++++++++++++++++---
>   list-objects-filter-options.h       |  32 ++-
>   list-objects-filter.c               | 345 +++++++++++++++++++++-------
>   list-objects-filter.h               |  35 ++-
>   list-objects.c                      |  55 ++---
>   t/t5616-partial-clone.sh            |  19 ++
>   t/t6112-rev-list-filters-objects.sh | 197 +++++++++++++++-
>   transport.c                         |   1 +
>   upload-pack.c                       |   4 +-
>   12 files changed, 816 insertions(+), 176 deletions(-)
> 

This looks much nicer.
Thanks
Jeff

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 4/9] list-objects-filter: implement composite filters
  2019-06-01  0:35 ` [PATCH v2 4/9] list-objects-filter: implement composite filters Matthew DeVore
@ 2019-06-03 21:51   ` Jeff Hostetler
  2019-06-06 22:32     ` Matthew DeVore
  0 siblings, 1 reply; 57+ messages in thread
From: Jeff Hostetler @ 2019-06-03 21:51 UTC (permalink / raw)
  To: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer
  Cc: matvore, Junio C Hamano



On 5/31/2019 8:35 PM, Matthew DeVore wrote:
> Allow combining filters such that only objects accepted by all filters
> are shown. The motivation for this is to allow getting directory
> listings without also fetching blobs. This can be done by combining
> blob:none with tree:<depth>. There are massive repositories that have
> larger-than-expected trees - even if you include only a single commit.
> 
> The current usage requires passing the filter to rev-list in the
> following form:
> 
> 	--filter=<FILTER1> --filter=<FILTER2> ...
> 
> Such usage is currently an error, so giving it a meaning is backwards-
> compatible.
> 
> The URL-encoding method is being implemented before the repeated flag
> logic, and the user-facing documentation for URL-encoding is being
> withheld until the repeated flag feature is implemented. The
> URL-encoding is in general not meant to be used directly by the user,
> and it is better to describe the URL-encoding feature in terms of the
> repeated flag.
> 
> Helped-by: Emily Shaffer <emilyshaffer@google.com>
> Helped-by: Jeff Hostetler <git@jeffhostetler.com>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: Matthew DeVore <matvore@google.com>
> ---
>   list-objects-filter-options.c       | 135 ++++++++++++++++++++++-
>   list-objects-filter-options.h       |  17 ++-
>   list-objects-filter.c               | 159 +++++++++++++++++++++++++++
>   t/t6112-rev-list-filters-objects.sh | 163 +++++++++++++++++++++++++++-
>   4 files changed, 468 insertions(+), 6 deletions(-)
> 

[...]

> +static enum list_objects_filter_result filter_combine(
> +	struct repository *r,
> +	enum list_objects_filter_situation filter_situation,
> +	struct object *obj,
> +	const char *pathname,
> +	const char *filename,
> +	struct oidset *omits,
> +	void *filter_data)
> +{
> +	struct combine_filter_data *d = filter_data;
> +	enum list_objects_filter_result combined_result =
> +		LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
> +	size_t sub;
> +
> +	for (sub = 0; sub < d->nr; sub++) {
> +		enum list_objects_filter_result sub_result = process_subfilter(
> +			r, filter_situation, obj, pathname, filename,
> +			&d->sub[sub]);
> +		if (!(sub_result & LOFR_DO_SHOW))
> +			combined_result &= ~LOFR_DO_SHOW;
> +		if (!(sub_result & LOFR_MARK_SEEN))
> +			combined_result &= ~LOFR_MARK_SEEN;
> +		if (!d->sub[sub].is_skipping_tree)
> +			combined_result &= ~LOFR_SKIP_TREE;
> +	}
> +
> +	return combined_result;
> +}

This may be too subtle a point for this phase, so feel free to ignore
this.

Since we are assuming 'compose' is an AND operation, there may be an
opportunity to short-cut some of this loop for blobs.  That is, if the
object is a blob and any filter rejects it, it is omitted, so we don't
need to keep looping for that object.  (Tree objects cannot be short-cut
this way because a tree may appear at different depths or in different
sparse "cones" and may have to be reconsidered.)

So you could add an "affects blobs only" bit to the per-filter data
and try this out.  For example a "compose:blob:none+sparse:foo" should
perform better than "compose:sparse:foo+blob:none" but give the same
results.

Again, this might be premature, so feel free to disregard.
Jeff


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW
  2019-06-01  0:36 ` [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
@ 2019-06-03 22:07   ` Jacob Keller
  2019-06-03 22:39     ` Matthew DeVore
  0 siblings, 1 reply; 57+ messages in thread
From: Jacob Keller @ 2019-06-03 22:07 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: Git mailing list, Jonathan Tan, jrn, dstolee, Jeff Hostetler,
	Jonathan Nieder, Duy Nguyen, Emily Shaffer, matvore

On Fri, May 31, 2019 at 5:40 PM Matthew DeVore <matvore@google.com> wrote:
>
> Introduce a new macro ALLOC_GROW_BY which automatically zeros the added
> array elements and takes care of updating the nr value. Use the macro in
> code introduced earlier in this patchset.
>
> Signed-off-by: Matthew DeVore <matvore@google.com>
> ---
>  cache.h                       | 22 ++++++++++++++++++++++
>  list-objects-filter-options.c | 17 +++++++----------
>  2 files changed, 29 insertions(+), 10 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index fa8ede9a2d..847fbdeff0 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -652,33 +652,55 @@ int init_db(const char *git_dir, const char *real_git_dir,
>  void sanitize_stdfds(void);
>  int daemonize(void);
>
>  #define alloc_nr(x) (((x)+16)*3/2)
>
>  /*
>   * Realloc the buffer pointed at by variable 'x' so that it can hold
>   * at least 'nr' entries; the number of entries currently allocated
>   * is 'alloc', using the standard growing factor alloc_nr() macro.
>   *
> + * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some
> + * added niceties.
> + *
>   * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
>   */
>  #define ALLOC_GROW(x, nr, alloc) \
>         do { \
>                 if ((nr) > alloc) { \
>                         if (alloc_nr(alloc) < (nr)) \
>                                 alloc = (nr); \
>                         else \
>                                 alloc = alloc_nr(alloc); \
>                         REALLOC_ARRAY(x, alloc); \
>                 } \
>         } while (0)
>
> +/*
> + * Similar to ALLOC_GROW but handles updating of the nr value and
> + * zeroing the bytes of the newly-grown array elements.
> + *
> + * DO NOT USE any expression with side-effect for any of the
> + * arguments.
> + */

Since ALLOC_GROW already doesn't handle this safely, there isn't
necessarily a reason to fix it, but you could read the macro values
into temporary variables inside the do { } while(0) loop in order to
avoid the multiple-expansion side effect issues...

Thanks,
Jake

> +#define ALLOC_GROW_BY(x, nr, increase, alloc) \
> +       do { \
> +               if (increase) { \
> +                       size_t new_nr = nr + (increase); \
> +                       if (new_nr < nr) \
> +                               BUG("negative growth in ALLOC_GROW_BY"); \
> +                       ALLOC_GROW(x, new_nr, alloc); \
> +                       memset((x) + nr, 0, sizeof(*(x)) * (increase)); \
> +                       nr = new_nr; \
> +               } \
> +       } while (0)
> +
>  /* Initialize and use the cache information */
>  struct lock_file;
>  void preload_index(struct index_state *index,
>                    const struct pathspec *pathspec,
>                    unsigned int refresh_flags);
>  int do_read_index(struct index_state *istate, const char *path,
>                   int must_exist); /* for testting only! */
>  int read_index_from(struct index_state *, const char *path,
>                     const char *gitdir);
>  int is_index_unborn(struct index_state *);
> diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
> index 5e98e4a309..d8abe6cfcf 100644
> --- a/list-objects-filter-options.c
> +++ b/list-objects-filter-options.c
> @@ -142,26 +142,24 @@ static int has_reserved_character(
>         }
>
>         return 0;
>  }
>
>  static int parse_combine_subfilter(
>         struct list_objects_filter_options *filter_options,
>         struct strbuf *subspec,
>         struct strbuf *errbuf)
>  {
> -       size_t new_index = filter_options->sub_nr++;
> +       size_t new_index = filter_options->sub_nr;
>
> -       ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
> -                  filter_options->sub_alloc);
> -       memset(&filter_options->sub[new_index], 0,
> -              sizeof(*filter_options->sub));
> +       ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
> +                     filter_options->sub_alloc);
>
>         return has_reserved_character(subspec, errbuf) ||
>                 url_decode(subspec, errbuf) ||
>                 gently_parse_list_objects_filter(
>                         &filter_options->sub[new_index], subspec->buf, errbuf);
>  }
>
>  static int parse_combine_filter(
>         struct list_objects_filter_options *filter_options,
>         const char *arg,
> @@ -273,27 +271,26 @@ int parse_list_objects_filter(
>                 /*
>                  * Make filter_options an LOFC_COMBINE spec so we can trivially
>                  * add subspecs to it.
>                  */
>                 transform_to_combine_type(filter_options);
>
>                 strbuf_addstr(&filter_options->filter_spec, "+");
>                 add_url_encoded(&filter_options->filter_spec, arg);
>                 trace_printf("Generated composite filter-spec: %s\n",
>                              filter_options->filter_spec.buf);
> -               ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1,
> -                          filter_options->sub_alloc);
> -               filter_options = &filter_options->sub[filter_options->sub_nr++];
> -               memset(filter_options, 0, sizeof(*filter_options));
> +               ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
> +                             filter_options->sub_alloc);
>
>                 parse_error = gently_parse_list_objects_filter(
> -                       filter_options, arg, &errbuf);
> +                       &filter_options->sub[filter_options->sub_nr - 1], arg,
> +                       &errbuf);
>         }
>         if (parse_error)
>                 die("%s", errbuf.buf);
>         return 0;
>  }
>
>  int opt_parse_list_objects_filter(const struct option *opt,
>                                   const char *arg, int unset)
>  {
>         struct list_objects_filter_options *filter_options = opt->value;
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW
  2019-06-03 22:07   ` Jacob Keller
@ 2019-06-03 22:39     ` Matthew DeVore
  2019-06-04  3:16       ` Jacob Keller
  0 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-03 22:39 UTC (permalink / raw)
  To: Jacob Keller
  Cc: Matthew DeVore, Git mailing list, Jonathan Tan, jrn, dstolee,
	Jeff Hostetler, Jonathan Nieder, Duy Nguyen, Emily Shaffer

On Mon, Jun 03, 2019 at 03:07:40PM -0700, Jacob Keller wrote:
> > +/*
> > + * Similar to ALLOC_GROW but handles updating of the nr value and
> > + * zeroing the bytes of the newly-grown array elements.
> > + *
> > + * DO NOT USE any expression with side-effect for any of the
> > + * arguments.
> > + */
> 
> Since ALLOC_GROW already doesn't handle this safely, there isn't
> necessarily a reason to fix it, but you could read the macro values
> into temporary variables inside the do { } while(0) loop in order to
> avoid the multiple-expansion side effect issues...

For x I don't think that's possible since we don't know the pointer type. For
nr and alloc it doesn't make sense since they're being assigned to. For
`increase` I could try this:

	size_t ALLOC_GROW_BY__increase = (increase);

but I'm not sure how well this works when `increase` is a signed type. This
seemed sufficiently pitfall-y that I didn't attempt it. Relatedly, I was
thinking something like this would be nice, if anyone has time for such a
refactor:

struct growth_info {
	size_t nr, alloc;
}

And use that to replace individual "size_t foo_nr, foo_alloc"

And make ALLOC_GROW_BY use it. I think a bulk, maybe even most, ALLOC_GROW
invocations can be changed to ALLOC_GROW_BY.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW
  2019-06-03 22:39     ` Matthew DeVore
@ 2019-06-04  3:16       ` Jacob Keller
  0 siblings, 0 replies; 57+ messages in thread
From: Jacob Keller @ 2019-06-04  3:16 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: Matthew DeVore, Git mailing list, Jonathan Tan, Jonathan Nieder,
	dstolee, Jeff Hostetler, Jonathan Nieder, Duy Nguyen,
	Emily Shaffer

On Mon, Jun 3, 2019 at 3:39 PM Matthew DeVore <matvore@comcast.net> wrote:
>
> On Mon, Jun 03, 2019 at 03:07:40PM -0700, Jacob Keller wrote:
> > > +/*
> > > + * Similar to ALLOC_GROW but handles updating of the nr value and
> > > + * zeroing the bytes of the newly-grown array elements.
> > > + *
> > > + * DO NOT USE any expression with side-effect for any of the
> > > + * arguments.
> > > + */
> >
> > Since ALLOC_GROW already doesn't handle this safely, there isn't
> > necessarily a reason to fix it, but you could read the macro values
> > into temporary variables inside the do { } while(0) loop in order to
> > avoid the multiple-expansion side effect issues...
>
> For x I don't think that's possible since we don't know the pointer type. For
> nr and alloc it doesn't make sense since they're being assigned to. For
> `increase` I could try this:
>

Ah.. you could do the compiler typeof extensions, but I guess we
probably don't wanna rely on that.

>         size_t ALLOC_GROW_BY__increase = (increase);
>
> but I'm not sure how well this works when `increase` is a signed type. This
> seemed sufficiently pitfall-y that I didn't attempt it.

Ok that makes sense.

Regards,
Jake

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 4/9] list-objects-filter: implement composite filters
  2019-06-03 21:51   ` Jeff Hostetler
@ 2019-06-06 22:32     ` Matthew DeVore
  2019-06-07 17:58       ` Jeff Hostetler
  0 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-06 22:32 UTC (permalink / raw)
  To: Jeff Hostetler
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer, Junio C Hamano

On Mon, Jun 03, 2019 at 05:51:28PM -0400, Jeff Hostetler wrote:
> Since we are assuming 'compose' is an AND operation, there may be an
> opportunity to short-cut some of this loop for blobs.  That is, if the
> object is a blob and any filter rejects it, it is omitted, so we don't
> need to keep looping for that object.  (Tree objects cannot be short-cut
> this way because a tree may appear at different depths or in different
> sparse "cones" and may have to be reconsidered.)

Blobs are also treated almost the same way as tree objects in tree:<depth>
filters - they can be included by tree:<depth> - so they also need to be
reconsidered when found at different depths.

But I agree it's always true that if some prior filter has excluded a blob, the
later filters don't even need to be *called at all* for that blob, unless
perhaps it's found under a different tree later. I also think it may be too
early to implement this optimization, since filter in a later release may just
want to "know" about a blob even if it must be excluded in the final result.

Does the optimization apply to trees as well? Does a tree:<depth> filter still
want to consider children of tree X if tree X has already been excluded by
another filter? If it doesn't want to consider, we can short-circuit the checks
very aggressively. If it does want to consider, we want the short-circuiting to
be customizable at least for trees.

A minor point - I don't think that short-circuiting the for loop (breaking out
early) is important, since it will be very rare that a combine: filter has more
than 4 or so sub-filters anyway. Calling the filter_fn implementation and
letting that do internal short-circuiting (informed by the previous filters'
results) can, however, skip a lot of computation.

> So you could add an "affects blobs only" bit to the per-filter data
> and try this out.  For example a "compose:blob:none+sparse:foo" should
> perform better than "compose:sparse:foo+blob:none" but give the same
> results.

Does "affects blobs only" mean the filter includes all non-blob objects?

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 4/9] list-objects-filter: implement composite filters
  2019-06-06 22:32     ` Matthew DeVore
@ 2019-06-07 17:58       ` Jeff Hostetler
  0 siblings, 0 replies; 57+ messages in thread
From: Jeff Hostetler @ 2019-06-07 17:58 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer, Junio C Hamano



On 6/6/2019 6:32 PM, Matthew DeVore wrote:
> On Mon, Jun 03, 2019 at 05:51:28PM -0400, Jeff Hostetler wrote:
>> Since we are assuming 'compose' is an AND operation, there may be an
>> opportunity to short-cut some of this loop for blobs.  That is, if the
>> object is a blob and any filter rejects it, it is omitted, so we don't
>> need to keep looping for that object.  (Tree objects cannot be short-cut
>> this way because a tree may appear at different depths or in different
>> sparse "cones" and may have to be reconsidered.)
> 
> Blobs are also treated almost the same way as tree objects in tree:<depth>
> filters - they can be included by tree:<depth> - so they also need to be
> reconsidered when found at different depths.
> 
> But I agree it's always true that if some prior filter has excluded a blob, the
> later filters don't even need to be *called at all* for that blob, unless
> perhaps it's found under a different tree later. I also think it may be too
> early to implement this optimization, since filter in a later release may just
> want to "know" about a blob even if it must be excluded in the final result.
> 
> Does the optimization apply to trees as well? Does a tree:<depth> filter still
> want to consider children of tree X if tree X has already been excluded by
> another filter? If it doesn't want to consider, we can short-circuit the checks
> very aggressively. If it does want to consider, we want the short-circuiting to
> be customizable at least for trees.
> 
> A minor point - I don't think that short-circuiting the for loop (breaking out
> early) is important, since it will be very rare that a combine: filter has more
> than 4 or so sub-filters anyway. Calling the filter_fn implementation and
> letting that do internal short-circuiting (informed by the previous filters'
> results) can, however, skip a lot of computation.
> 
>> So you could add an "affects blobs only" bit to the per-filter data
>> and try this out.  For example a "compose:blob:none+sparse:foo" should
>> perform better than "compose:sparse:foo+blob:none" but give the same
>> results.
> 
> Does "affects blobs only" mean the filter includes all non-blob objects?
> 

I just meant that the blobs:none and blobs:limit filters give you a hard
omit.  Other filters later in the chain cannot change or override that
answer (because of the AND assumption); it doesn't matter how deep or
shallow the blob is the tree.

In the case of the tree:depth filter, a blob deep in the tree should
be provisionally omitted in case it appears later in a shallow tree
and should be included.  The tree filter can't do a hard omit on a blob
(just like it can't do a hard omit on a tree node).

WRT your question about a later filter "just wanting to know" about
a blob, I'm not sure.

So yeah, let's wait on this.  We can always add it later as an
optimization if/when it becomes a perf problem (and we have more
experience using them in practice).

Jeff



^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-01  0:36 ` [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf Matthew DeVore
@ 2019-06-10 20:13   ` Junio C Hamano
  2019-06-11  0:34     ` Matthew DeVore
  0 siblings, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2019-06-10 20:13 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	emilyshaffer, matvore

Matthew DeVore <matvore@google.com> writes:

> -	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
> +	if (!filter_options->filter_spec.buf)
> +		strbuf_init(&filter_options->filter_spec, 0);

This part made me go "Huh?" a bit.

Do we document that .buf==NULL means an uninitialized strbuf that is
safe to run strbuf_init() on?  I do not mind that as a general
convention, and it may even be a useful one (i.e. it allows you to
calloc() a structure with an embedded strbuf in it and the "if
.buf==NULL, call strbuf_init() lazily" can become an established
pattern), but at the same time it feels a bit brittle.  

Such a convention forces everybody who might want to use such an
embedded strbuf to first check .buf==NULL and lazily initialize
it---and at some point when the embedded strbuf to be used by enough
codepaths, it would make the code more robust by giving up on the
lazy initialization (iow, when *filter_options is initialized, run
strbuf_init() on its .filter_spec field).

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-10 20:13   ` Junio C Hamano
@ 2019-06-11  0:34     ` Matthew DeVore
  2019-06-11 17:33       ` Junio C Hamano
  0 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-11  0:34 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer

On Mon, Jun 10, 2019 at 01:13:54PM -0700, Junio C Hamano wrote:
> Matthew DeVore <matvore@google.com> writes:
> 
> > -	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
> > +	if (!filter_options->filter_spec.buf)
> > +		strbuf_init(&filter_options->filter_spec, 0);
> 
> This part made me go "Huh?" a bit.
> 
> Do we document that .buf==NULL means an uninitialized strbuf that is
> safe to run strbuf_init() on?  I do not mind that as a general

Kind of. The first bullet point in strbuf.h says:

 *  - The `buf` member is never NULL, so it can be used in any usual C
 *    string operations safely. strbuf's _have_ to be initialized either by
 *    `strbuf_init()` or by `= STRBUF_INIT` before the invariants, though.

So I extrapolated that if buf is NULL, it must be because it was just xcalloc'd
and not initialized. One possible improvement to the API would be to refactor
it such that there is no STRBUF_INIT, but a zero-initialized strbuf is valid.
If you expect to get a non-NULL buf, even for a zero-initialized strbuf, you
should call a function like strbuf_nonnull_buf(&buf), and that will return the
slop buf if buf is null, or the actual buf if it is non-null.

I don't understand why the API designer was so strict about requiring the
buffer to be set to non-null, since it's quite a burden for API users. If I
eagerly set all filter_options's strbuf's to STRBUF_INIT, it involves changing
a couple of global variables which currently do not need an initializer, and it
would make the code a bit messy. The structs which have a strbuf somewhere in
their nested fields would need to know that, and set up an initialization macro
to avoid the null buf.

I kind of suspect the right short-term fix is to avoid strbuf's and use a
string_list, which I join later to a full string when needed.

> convention, and it may even be a useful one (i.e. it allows you to
> calloc() a structure with an embedded strbuf in it and the "if
> .buf==NULL, call strbuf_init() lazily" can become an established
> pattern), but at the same time it feels a bit brittle.  

Is it brittle because a strbuf may be initialized to non-zero memory, and so
the "if (buf.buf == NULL)" may evaluate to false, and then go on treating
garbage like a valid buffer? I would think that's almost impossible because of
the use of xcalloc.

The only reason I realized the strbuf_init was necessary was not because I read
the documentation, but because I mistakenly called strbuf_reset, which calls
strbuf_setlen, which doesn't handle a null buf. Many other functions seem to
handle it well semi-accidentially. After I ran into the crash, I finally read
the documentation I cited above.

> 
> Such a convention forces everybody who might want to use such an
> embedded strbuf to first check .buf==NULL and lazily initialize
> it---and at some point when the embedded strbuf to be used by enough
> codepaths, it would make the code more robust by giving up on the
> lazy initialization (iow, when *filter_options is initialized, run
> strbuf_init() on its .filter_spec field).

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-11  0:34     ` Matthew DeVore
@ 2019-06-11 17:33       ` Junio C Hamano
  2019-06-11 18:44         ` Matthew DeVore
  0 siblings, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2019-06-11 17:33 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer

Matthew DeVore <matvore@comcast.net> writes:

>> convention, and it may even be a useful one (i.e. it allows you to
>> calloc() a structure with an embedded strbuf in it and the "if
>> .buf==NULL, call strbuf_init() lazily" can become an established
>> pattern), but at the same time it feels a bit brittle.  
>
> Is it brittle because a strbuf may be initialized to non-zero memory, and so
> the "if (buf.buf == NULL)" may evaluate to false, and then go on treating
> garbage like a valid buffer?

It is brittle because callers are bound to forget doing "if
(!x->buf.buf) lazy_init(&x->buf)" at some point, and blindly use an
uninitialized x->buf.  Making sure x->buf is always initialized
before any caller touches is the only way to solve it, and as you
said, there are two possible ways to make that happen.  One way that
does not violate the current API contract is to make sure whoever
allocates and/or initializes the surrounding struct that embeds a
strbuf does strbuf_init(&x->buf) before any user sees the struct.

Another would be to update strbuf API so that strbuf_init() does not
even have to use slopbuf.  But that is a much larger change that
potentially breaks existing users of strbuf API.  When you have a
strbuf that has been prepared to be usable, the current API contract
allows its users to expect buf.buf is never NULL, so they assume
that they can safely write "if (!buf.buf)", so auditing strbuf.c and
making sure a strbuf with buf==NULL gets lazily initialized is not
enough.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-11 17:33       ` Junio C Hamano
@ 2019-06-11 18:44         ` Matthew DeVore
  2019-06-11 21:34           ` Matthew DeVore
  2019-06-11 21:48           ` Junio C Hamano
  0 siblings, 2 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-11 18:44 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer

On Tue, Jun 11, 2019 at 10:33:18AM -0700, Junio C Hamano wrote:
> Matthew DeVore <matvore@comcast.net> writes:
> 
> >> convention, and it may even be a useful one (i.e. it allows you to
> >> calloc() a structure with an embedded strbuf in it and the "if
> >> .buf==NULL, call strbuf_init() lazily" can become an established
> >> pattern), but at the same time it feels a bit brittle.  
> >
> > Is it brittle because a strbuf may be initialized to non-zero memory, and so
> > the "if (buf.buf == NULL)" may evaluate to false, and then go on treating
> > garbage like a valid buffer?
> 
> It is brittle because callers are bound to forget doing "if
> (!x->buf.buf) lazy_init(&x->buf)" at some point, and blindly use an
> uninitialized x->buf.  Making sure x->buf is always initialized

A corallary proposition would be to make this particular strbuf a "struct
strbuf *" rather than an inline strbuf. It should then be rather clear to users
that it may be null. Then whoever allocates the memory can also do the
strbuf_init one-liner. The free'ing logic of list_objects_filter_options then
only becomes trivially more complicated than it was before. Does that sound
like a good compromise to you?

> before any caller touches is the only way to solve it, and as you
> said, there are two possible ways to make that happen.  One way that
> does not violate the current API contract is to make sure whoever
> allocates and/or initializes the surrounding struct that embeds a
> strbuf does strbuf_init(&x->buf) before any user sees the struct.

The thing I don't like about that is that the non-zeroness of its
initialization percolates upward to whatever the top-level struct is, which
means implementation details leak a lot. This seems quite brittle as well,
since anyone can forget to initialize some struct in the nested line.

> 
> Another would be to update strbuf API so that strbuf_init() does not
> even have to use slopbuf.  But that is a much larger change that
> potentially breaks existing users of strbuf API.  When you have a
> strbuf that has been prepared to be usable, the current API contract
> allows its users to expect buf.buf is never NULL, so they assume
> that they can safely write "if (!buf.buf)", so auditing strbuf.c and
> making sure a strbuf with buf==NULL gets lazily initialized is not
> enough.

That's true. I didn't think it matters in the case of filter_spec in
particular, since users of list_objects_filter_options are supposed to use an
accessor and not touch the strbuf directly, but looking at it like a more
general API change, it seems risky.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-11 18:44         ` Matthew DeVore
@ 2019-06-11 21:34           ` Matthew DeVore
  2019-06-11 21:48           ` Junio C Hamano
  1 sibling, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-11 21:34 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer

On Tue, Jun 11, 2019 at 11:44:27AM -0700, Matthew DeVore wrote:
> A corallary proposition would be to make this particular strbuf a "struct
> strbuf *" rather than an inline strbuf. It should then be rather clear to users
> that it may be null. Then whoever allocates the memory can also do the
> strbuf_init one-liner. The free'ing logic of list_objects_filter_options then
> only becomes trivially more complicated than it was before. Does that sound
> like a good compromise to you?
> 

This interdiff illustrates what I'm talking about. I don't think I like the
fact there are two strbuf's now, but I think you get the idea. This also fixes
a memory leak in upload-pack.c, and makes the API cleaner to use:

diff --git a/builtin/clone.c b/builtin/clone.c
index 85b0d3155d..81e6010779 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1135,27 +1135,25 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
 
 	if (option_upload_pack)
 		transport_set_option(transport, TRANS_OPT_UPLOADPACK,
 				     option_upload_pack);
 
 	if (server_options.nr)
 		transport->server_options = &server_options;
 
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-				     expanded_filter_spec.buf);
+				     spec);
 		transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 
 	if (transport->smart_options && !deepen && !filter_options.choice)
 		transport->smart_options->check_self_contained_and_connected = 1;
 
 
 	argv_array_push(&ref_prefixes, "HEAD");
 	refspec_ref_prefixes(&remote->fetch, &ref_prefixes);
 	if (option_branch)
 		expand_ref_prefix(&ref_prefixes, option_branch);
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4ba63d5ac6..dee89e1a19 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1181,27 +1181,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 	if (deepen && deepen_since)
 		set_option(transport, TRANS_OPT_DEEPEN_SINCE, deepen_since);
 	if (deepen && deepen_not.nr)
 		set_option(transport, TRANS_OPT_DEEPEN_NOT,
 			   (const char *)&deepen_not);
 	if (deepen_relative)
 		set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
 	if (update_shallow)
 		set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
-		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-			   expanded_filter_spec.buf);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
+		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 	if (negotiation_tip.nr) {
 		if (transport->smart_options)
 			add_negotiation_tips(transport->smart_options);
 		else
 			warning("Ignoring --negotiation-tip because the protocol does not support it.");
 	}
 	return transport;
 }
 
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 7137f13a74..b194430217 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -458,23 +458,26 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 		if (skip_prefix(arg, "--progress=", &arg)) {
 			show_progress = arg;
 			continue;
 		}
 
 		if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
 			parse_list_objects_filter(&filter_options, arg);
 			if (filter_options.choice && !revs.blob_objects)
 				die(_("object filtering requires --objects"));
 			if (filter_options.choice == LOFC_SPARSE_OID &&
-			    !filter_options.sparse_oid_value)
-				die(_("invalid sparse value '%s'"),
-				    filter_options.filter_spec.buf);
+			    !filter_options.sparse_oid_value) {
+				const char *spec =
+					expand_list_objects_filter_spec(
+						&filter_options);
+				die(_("invalid sparse value '%s'"), spec);
+			}
 			continue;
 		}
 		if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
 			list_objects_filter_set_no_filter(&filter_options);
 			continue;
 		}
 		if (!strcmp(arg, "--filter-print-omitted")) {
 			arg_print_omitted = 1;
 			continue;
 		}
diff --git a/fetch-pack.c b/fetch-pack.c
index 1c10f54e78..72e13b0a1d 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -332,26 +332,23 @@ static int find_common(struct fetch_negotiator *negotiator,
 		packet_buf_write(&req_buf, "deepen-since %"PRItime, max_age);
 	}
 	if (args->deepen_not) {
 		int i;
 		for (i = 0; i < args->deepen_not->nr; i++) {
 			struct string_list_item *s = args->deepen_not->items + i;
 			packet_buf_write(&req_buf, "deepen-not %s", s->string);
 		}
 	}
 	if (server_supports_filtering && args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	}
 	packet_buf_flush(&req_buf);
 	state_len = req_buf.len;
 
 	if (args->deepen) {
 		const char *arg;
 		struct object_id oid;
 
 		send_request(args, fd[1], &req_buf);
 		while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
@@ -1092,21 +1089,21 @@ static int add_haves(struct fetch_negotiator *negotiator,
 		ret = 1;
 	}
 
 	/* Increase haves to send on next round */
 	*haves_to_send = next_flush(1, *haves_to_send);
 
 	return ret;
 }
 
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
-			      const struct fetch_pack_args *args,
+			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
 			      int *haves_to_send, int *in_vain,
 			      int sideband_all)
 {
 	int ret = 0;
 	struct strbuf req_buf = STRBUF_INIT;
 
 	if (server_supports_v2("fetch", 1))
 		packet_buf_write(&req_buf, "command=fetch");
 	if (server_supports_v2("agent", 0))
@@ -1133,27 +1130,24 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 
 	/* Add shallow-info and deepen request */
 	if (server_supports_feature("fetch", "shallow", 0))
 		add_shallow_requests(&req_buf, args);
 	else if (is_repository_shallow(the_repository) || args->deepen)
 		die(_("Server does not support shallow requests"));
 
 	/* Add filter */
 	if (server_supports_feature("fetch", "filter", 0) &&
 	    args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
 		print_verbose(args, _("Server supports filter"));
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	} else if (args->filter_options.choice) {
 		warning("filtering not recognized by server, ignoring");
 	}
 
 	/* add wants */
 	add_wants(args->no_dependents, wants, &req_buf);
 
 	if (args->no_dependents) {
 		packet_buf_write(&req_buf, "done");
 		ret = 1;
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 9a5677c2c8..2523f96223 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -7,20 +7,35 @@
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
 #include "trace.h"
 #include "url.h"
 
 static int parse_combine_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf);
 
+struct filter_spec {
+	struct strbuf raw;
+	struct strbuf expanded;
+};
+
+static void maybe_init_filter_spec(struct list_objects_filter_options *o)
+{
+	if (o->filter_spec)
+		return;
+
+	o->filter_spec = xcalloc(1, sizeof(*o->filter_spec));
+	strbuf_init(&o->filter_spec->raw, 0);
+	strbuf_init(&o->filter_spec->expanded, 0);
+}
+
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
  *       --filter=<arg>
  * and in the pack protocol as:
  *       "filter" SP <arg>
  *
  * The filter keyword will be used by many commands.
  * See Documentation/rev-list-options.txt for allowed values for <arg>.
  *
@@ -182,77 +197,78 @@ static int allow_unencoded(char ch)
 }
 
 /*
  * Changes filter_options into an equivalent LOFC_COMBINE filter options
  * instance. Does not do anything if filter_options is already LOFC_COMBINE.
  */
 static void transform_to_combine_type(
 	struct list_objects_filter_options *filter_options)
 {
 	assert(filter_options->choice);
+	assert(filter_options->filter_spec);
 	if (filter_options->choice == LOFC_COMBINE)
 		return;
 	{
 		const int initial_sub_alloc = 2;
 		struct list_objects_filter_options *sub_array =
 			xcalloc(initial_sub_alloc, sizeof(*sub_array));
 		sub_array[0] = *filter_options;
 		memset(filter_options, 0, sizeof(*filter_options));
 		filter_options->sub = sub_array;
 		filter_options->sub_alloc = initial_sub_alloc;
 	}
 	filter_options->sub_nr = 1;
 	filter_options->choice = LOFC_COMBINE;
-	strbuf_init(&filter_options->filter_spec, 0);
-	strbuf_addstr(&filter_options->filter_spec, "combine:");
-	strbuf_addstr_urlencode(&filter_options->filter_spec,
-				filter_options->sub[0].filter_spec.buf,
+	strbuf_addstr(&filter_options->filter_spec->raw, "combine:");
+	strbuf_addstr_urlencode(&filter_options->filter_spec->raw,
+				filter_options->sub[0].filter_spec->raw.buf,
 				allow_unencoded);
 	/*
 	 * We don't need the filter_spec strings for subfilter specs, only the
 	 * top level.
 	 */
-	strbuf_release(&filter_options->sub[0].filter_spec);
+	strbuf_release(&filter_options->sub[0].filter_spec->raw);
 }
 
 void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options)
 {
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
 }
 
 void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 	int parse_error;
 
+	maybe_init_filter_spec(filter_options);
+
 	if (!filter_options->choice) {
-		strbuf_init(&filter_options->filter_spec, 0);
-		strbuf_addstr(&filter_options->filter_spec, arg);
+		strbuf_addstr(&filter_options->filter_spec->raw, arg);
 
 		parse_error = gently_parse_list_objects_filter(
 			filter_options, arg, &errbuf);
 	} else {
 		/*
 		 * Make filter_options an LOFC_COMBINE spec so we can trivially
 		 * add subspecs to it.
 		 */
 		transform_to_combine_type(filter_options);
 
-		strbuf_addstr(&filter_options->filter_spec, "+");
-		strbuf_addstr_urlencode(&filter_options->filter_spec, arg,
+		strbuf_addstr(&filter_options->filter_spec->raw, "+");
+		strbuf_addstr_urlencode(&filter_options->filter_spec->raw, arg,
 					allow_unencoded);
 		trace_printf("Generated composite filter-spec: %s\n",
-			     filter_options->filter_spec.buf);
+			     filter_options->filter_spec->raw.buf);
 		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
 			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
 			&filter_options->sub[filter_options->sub_nr - 1], arg,
 			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
 }
@@ -262,54 +278,62 @@ int opt_parse_list_objects_filter(const struct option *opt,
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg)
 		list_objects_filter_set_no_filter(filter_options);
 	else
 		parse_list_objects_filter(filter_options, arg);
 	return 0;
 }
 
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec)
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter)
 {
-	strbuf_init(expanded_spec, 0);
+	struct strbuf *expanded_spec = &filter->filter_spec->expanded;
+	if (expanded_spec->len)
+		return expanded_spec->buf;
+
 	if (filter->choice == LOFC_BLOB_LIMIT)
 		strbuf_addf(expanded_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
 	else if (filter->choice == LOFC_TREE_DEPTH)
 		strbuf_addf(expanded_spec, "tree:%lu",
 			    filter->tree_exclude_depth);
 	else
-		strbuf_addstr(expanded_spec, filter->filter_spec.buf);
+		strbuf_addstr(expanded_spec, filter->filter_spec->raw.buf);
+
+	return expanded_spec->buf;
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
 	size_t sub;
 
 	if (!filter_options)
 		return;
-	strbuf_release(&filter_options->filter_spec);
+	if (filter_options->filter_spec) {
+		strbuf_release(&filter_options->filter_spec->raw);
+		strbuf_release(&filter_options->filter_spec->expanded);
+		FREE_AND_NULL(filter_options->filter_spec);
+	}
 	free(filter_options->sparse_oid_value);
 	free(filter_options->sparse_path_value);
 	for (sub = 0; sub < filter_options->sub_nr; sub++)
 		list_objects_filter_release(&filter_options->sub[sub]);
 	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options)
+	struct list_objects_filter_options *filter_options)
 {
 	/*
 	 * Record the name of the partial clone remote in the
 	 * config and in the global variable -- the latter is
 	 * used throughout to indicate that partial clone is
 	 * enabled and to expect missing objects.
 	 */
 	if (repository_format_partial_clone &&
 	    *repository_format_partial_clone &&
 	    strcmp(remote, repository_format_partial_clone))
@@ -318,35 +342,34 @@ void partial_clone_register(
 	git_config_set("core.repositoryformatversion", "1");
 	git_config_set("extensions.partialclone", remote);
 
 	repository_format_partial_clone = xstrdup(remote);
 
 	/*
 	 * Record the initial filter-spec in the config as
 	 * the default for subsequent fetches from this remote.
 	 */
 	core_partial_clone_filter_default =
-		xstrdup(filter_options->filter_spec.buf);
+		xstrdup(expand_list_objects_filter_spec(filter_options));
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 
-	if (!filter_options->filter_spec.buf)
-		strbuf_init(&filter_options->filter_spec, 0);
-	strbuf_addstr(&filter_options->filter_spec,
+	maybe_init_filter_spec(filter_options);
+	strbuf_addstr(&filter_options->filter_spec->raw,
 		      core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 2c0ce6383a..07995449f1 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -11,29 +11,31 @@ enum list_objects_filter_choice {
 	LOFC_DISABLED = 0,
 	LOFC_BLOB_NONE,
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
 	LOFC_SPARSE_PATH,
 	LOFC_COMBINE,
 	LOFC__COUNT /* must be last */
 };
 
+struct filter_spec;
+
 struct list_objects_filter_options {
 	/*
-	 * 'filter_spec' is the raw argument value given on the command line
-	 * or protocol request.  (The part after the "--keyword=".)  For
+	 * 'filter_spec' contains the raw argument value given on the command
+	 * line or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
 	 */
-	struct strbuf filter_spec;
+	struct filter_spec *filter_spec;
 
 	/*
 	 * 'choice' is determined by parsing the filter-spec.  This indicates
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
@@ -86,31 +88,30 @@ int opt_parse_list_objects_filter(const struct option *opt,
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
 
 /*
  * Translates abbreviated numbers in the filter's filter_spec into their
  * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024").
  *
  * This form should be used instead of the raw filter_spec field when
  * communicating with a remote process or subprocess.
  */
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec);
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter);
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options);
 
 static inline void list_objects_filter_set_no_filter(
 	struct list_objects_filter_options *filter_options)
 {
 	list_objects_filter_release(filter_options);
 	filter_options->no_filter = 1;
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options);
+	struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/transport-helper.c b/transport-helper.c
index cec83bd663..d6313ef9f5 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -675,27 +675,23 @@ static int fetch(struct transport *transport,
 	    data->transport_options.check_self_contained_and_connected)
 		set_helper_option(transport, "check-connectivity", "true");
 
 	if (transport->cloning)
 		set_helper_option(transport, "cloning", "true");
 
 	if (data->transport_options.update_shallow)
 		set_helper_option(transport, "update-shallow", "true");
 
 	if (data->transport_options.filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(
-			&data->transport_options.filter_options,
-			&expanded_filter_spec);
-		set_helper_option(transport, "filter",
-				  expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec = expand_list_objects_filter_spec(
+			&data->transport_options.filter_options);
+		set_helper_option(transport, "filter", spec);
 	}
 
 	if (data->transport_options.negotiation_tips)
 		warning("Ignoring --negotiation-tip because the protocol does not support it.");
 
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
 	if (data->import)
 		return fetch_with_import(transport, nr_heads, to_fetch);
diff --git a/upload-pack.c b/upload-pack.c
index ba8c3a1f8e..dda2ac6f44 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -133,32 +133,31 @@ static void create_pack_file(const struct object_array *have_obj,
 
 	argv_array_push(&pack_objects.args, "--stdout");
 	if (shallow_nr)
 		argv_array_push(&pack_objects.args, "--shallow");
 	if (!no_progress)
 		argv_array_push(&pack_objects.args, "--progress");
 	if (use_ofs_delta)
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.filter_spec.len) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+	if (filter_options.choice) {
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
-			sq_quote_buf(&buf, expanded_filter_spec.buf);
+			sq_quote_buf(&buf, spec);
 			argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
 			strbuf_release(&buf);
 		} else {
 			argv_array_pushf(&pack_objects.args, "--filter=%s",
-					 expanded_filter_spec.buf);
+					 spec);
 		}
 	}
 
 	pack_objects.in = -1;
 	pack_objects.out = -1;
 	pack_objects.err = -1;
 
 	if (start_command(&pack_objects))
 		die("git upload-pack: unable to fork git-pack-objects");
 

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-11 18:44         ` Matthew DeVore
  2019-06-11 21:34           ` Matthew DeVore
@ 2019-06-11 21:48           ` Junio C Hamano
  2019-06-12  0:37             ` Matthew DeVore
  1 sibling, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2019-06-11 21:48 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer

Matthew DeVore <matvore@comcast.net> writes:

>> It is brittle because callers are bound to forget doing "if
>> (!x->buf.buf) lazy_init(&x->buf)" at some point, and blindly use an
>> uninitialized x->buf.  Making sure x->buf is always initialized
>
> A corallary proposition would be to make this particular strbuf a "struct
> strbuf *" rather than an inline strbuf. It should then be rather clear to users
> that it may be null.

Would make it less likely for uses of an uninitialized strbuf to be
left undetected as errors?  I guess so, and if that is the case it
would definitely be an improvement.

But initializing the strbuf at the point where the enclosing
structure is initialized (or calloc()'ed) is also a vaiable option,
and between the two, I think that would be even more robust.

There may be reasons why it is cumbersome to arrange it that way,
though (e.g. if the code does not introduce a "new_stuff()"
allocator that also initializes, and instead uses xcalloc() from
many places, initializing the enclosing structure properly might
take a preliminary clean-up step before the main part of the patch
series can begin).

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-11 21:48           ` Junio C Hamano
@ 2019-06-12  0:37             ` Matthew DeVore
  2019-06-12 14:55               ` Matthew DeVore
  0 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-12  0:37 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer

On Tue, Jun 11, 2019 at 02:48:51PM -0700, Junio C Hamano wrote:
> Matthew DeVore <matvore@comcast.net> writes:
> 
> >> It is brittle because callers are bound to forget doing "if
> >> (!x->buf.buf) lazy_init(&x->buf)" at some point, and blindly use an
> >> uninitialized x->buf.  Making sure x->buf is always initialized
> >
> > A corallary proposition would be to make this particular strbuf a "struct
> > strbuf *" rather than an inline strbuf. It should then be rather clear to users
> > that it may be null.
> 
> Would make it less likely for uses of an uninitialized strbuf to be
> left undetected as errors?  I guess so, and if that is the case it
> would definitely be an improvement.
> 
> But initializing the strbuf at the point where the enclosing
> structure is initialized (or calloc()'ed) is also a vaiable option,
> and between the two, I think that would be even more robust.
> 
> There may be reasons why it is cumbersome to arrange it that way,
> though (e.g. if the code does not introduce a "new_stuff()"
> allocator that also initializes, and instead uses xcalloc() from
> many places, initializing the enclosing structure properly might
> take a preliminary clean-up step before the main part of the patch
> series can begin).

These are all the locations where a struct which ultimately contains a
list_objects_filter_options is instantiated:

GLOBAL VARIABLES:

builtin/clone.c:68:static struct list_objects_filter_options filter_options;
builtin/fetch.c:66:static struct list_objects_filter_options filter_options;
builtin/pack-objects.c:112:static struct list_objects_filter_options filter_options;
builtin/rev-list.c:65:static struct list_objects_filter_options filter_options;

LOCAL VARIABLES:

builtin/fetch-pack.c:54:        struct fetch_pack_args args;
transport.c:327:        struct fetch_pack_args args;

HEAP ALLOCATIONS:

transport-helper.c:1123:	struct helper_data *data = xcalloc(1, sizeof(*data));
transport.c:964:                struct git_transport_data *data = xcalloc(1, sizeof(*data));

git_transport_options is also not directly instantiated as a local or static
variable, but it would need to have a git_transport_options_init function
defined.

I didn't count exactly the number of _INIT macros and _init functions that
would need to be defined. It seems like a lot of work. It is hard to believe
that our ability to exhaustively pinpoint all these instantiations, and to
catch ALL future instantiations, is all that reliable. I think our ability to
find the places we need to lazily instantiate the strbuf-containing-struct
(struct filter_spec in the interdiff) is more reliable.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf
  2019-06-12  0:37             ` Matthew DeVore
@ 2019-06-12 14:55               ` Matthew DeVore
  0 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-12 14:55 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, emilyshaffer

On Tue, Jun 11, 2019 at 05:37:16PM -0700, Matthew DeVore wrote:
> On Tue, Jun 11, 2019 at 02:48:51PM -0700, Junio C Hamano wrote:
> > Matthew DeVore <matvore@comcast.net> writes:
> > 
> > >> It is brittle because callers are bound to forget doing "if
> > >> (!x->buf.buf) lazy_init(&x->buf)" at some point, and blindly use an
> > >> uninitialized x->buf.  Making sure x->buf is always initialized
> > >
> > > A corallary proposition would be to make this particular strbuf a "struct
> > > strbuf *" rather than an inline strbuf. It should then be rather clear to users
> > > that it may be null.
> > 
> > Would make it less likely for uses of an uninitialized strbuf to be
> > left undetected as errors?  I guess so, and if that is the case it
> > would definitely be an improvement.
> > 
> > But initializing the strbuf at the point where the enclosing
> > structure is initialized (or calloc()'ed) is also a vaiable option,
> > and between the two, I think that would be even more robust.
> > 
> > There may be reasons why it is cumbersome to arrange it that way,
> > though (e.g. if the code does not introduce a "new_stuff()"
> > allocator that also initializes, and instead uses xcalloc() from
> > many places, initializing the enclosing structure properly might
> > take a preliminary clean-up step before the main part of the patch
> > series can begin).

Here is an alternate interdiff where I use a string_list rather than a strbuf
for the filter_spec. This is actually slightly shorter code than the earlier
interdiff using a struct filter_spec type. (not by much, maybe half-dozen lines)
I think this is my favorite approach so far.

diff --git a/builtin/clone.c b/builtin/clone.c
index 85b0d3155d..81e6010779 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1135,27 +1135,25 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
 
 	if (option_upload_pack)
 		transport_set_option(transport, TRANS_OPT_UPLOADPACK,
 				     option_upload_pack);
 
 	if (server_options.nr)
 		transport->server_options = &server_options;
 
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-				     expanded_filter_spec.buf);
+				     spec);
 		transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 
 	if (transport->smart_options && !deepen && !filter_options.choice)
 		transport->smart_options->check_self_contained_and_connected = 1;
 
 
 	argv_array_push(&ref_prefixes, "HEAD");
 	refspec_ref_prefixes(&remote->fetch, &ref_prefixes);
 	if (option_branch)
 		expand_ref_prefix(&ref_prefixes, option_branch);
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4ba63d5ac6..dee89e1a19 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1181,27 +1181,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 	if (deepen && deepen_since)
 		set_option(transport, TRANS_OPT_DEEPEN_SINCE, deepen_since);
 	if (deepen && deepen_not.nr)
 		set_option(transport, TRANS_OPT_DEEPEN_NOT,
 			   (const char *)&deepen_not);
 	if (deepen_relative)
 		set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
 	if (update_shallow)
 		set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
-		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-			   expanded_filter_spec.buf);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
+		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 	if (negotiation_tip.nr) {
 		if (transport->smart_options)
 			add_negotiation_tips(transport->smart_options);
 		else
 			warning("Ignoring --negotiation-tip because the protocol does not support it.");
 	}
 	return transport;
 }
 
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 7137f13a74..823e87c1c9 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -459,22 +459,24 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 			show_progress = arg;
 			continue;
 		}
 
 		if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
 			parse_list_objects_filter(&filter_options, arg);
 			if (filter_options.choice && !revs.blob_objects)
 				die(_("object filtering requires --objects"));
 			if (filter_options.choice == LOFC_SPARSE_OID &&
 			    !filter_options.sparse_oid_value)
-				die(_("invalid sparse value '%s'"),
-				    filter_options.filter_spec.buf);
+				die(
+					_("invalid sparse value '%s'"),
+					list_objects_filter_spec(
+						&filter_options));
 			continue;
 		}
 		if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
 			list_objects_filter_set_no_filter(&filter_options);
 			continue;
 		}
 		if (!strcmp(arg, "--filter-print-omitted")) {
 			arg_print_omitted = 1;
 			continue;
 		}
diff --git a/fetch-pack.c b/fetch-pack.c
index 1c10f54e78..72e13b0a1d 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -332,26 +332,23 @@ static int find_common(struct fetch_negotiator *negotiator,
 		packet_buf_write(&req_buf, "deepen-since %"PRItime, max_age);
 	}
 	if (args->deepen_not) {
 		int i;
 		for (i = 0; i < args->deepen_not->nr; i++) {
 			struct string_list_item *s = args->deepen_not->items + i;
 			packet_buf_write(&req_buf, "deepen-not %s", s->string);
 		}
 	}
 	if (server_supports_filtering && args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	}
 	packet_buf_flush(&req_buf);
 	state_len = req_buf.len;
 
 	if (args->deepen) {
 		const char *arg;
 		struct object_id oid;
 
 		send_request(args, fd[1], &req_buf);
 		while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
@@ -1092,21 +1089,21 @@ static int add_haves(struct fetch_negotiator *negotiator,
 		ret = 1;
 	}
 
 	/* Increase haves to send on next round */
 	*haves_to_send = next_flush(1, *haves_to_send);
 
 	return ret;
 }
 
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
-			      const struct fetch_pack_args *args,
+			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
 			      int *haves_to_send, int *in_vain,
 			      int sideband_all)
 {
 	int ret = 0;
 	struct strbuf req_buf = STRBUF_INIT;
 
 	if (server_supports_v2("fetch", 1))
 		packet_buf_write(&req_buf, "command=fetch");
 	if (server_supports_v2("agent", 0))
@@ -1133,27 +1130,24 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 
 	/* Add shallow-info and deepen request */
 	if (server_supports_feature("fetch", "shallow", 0))
 		add_shallow_requests(&req_buf, args);
 	else if (is_repository_shallow(the_repository) || args->deepen)
 		die(_("Server does not support shallow requests"));
 
 	/* Add filter */
 	if (server_supports_feature("fetch", "filter", 0) &&
 	    args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
 		print_verbose(args, _("Server supports filter"));
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	} else if (args->filter_options.choice) {
 		warning("filtering not recognized by server, ignoring");
 	}
 
 	/* add wants */
 	add_wants(args->no_dependents, wants, &req_buf);
 
 	if (args->no_dependents) {
 		packet_buf_write(&req_buf, "done");
 		ret = 1;
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 9a5677c2c8..38729a7238 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -174,20 +174,29 @@ static int parse_combine_filter(
 	return result;
 }
 
 static int allow_unencoded(char ch)
 {
 	if (ch <= ' ' || ch == '%' || ch == '+')
 		return 0;
 	return !strchr(RESERVED_NON_WS, ch);
 }
 
+static void filter_spec_append_urlencode(
+	struct list_objects_filter_options *filter, const char *raw)
+{
+	struct strbuf buf = STRBUF_INIT;
+	strbuf_addstr_urlencode(&buf, raw, allow_unencoded);
+	trace_printf("Added to composite filter-spec: %s\n", buf.buf);
+	string_list_append(&filter->filter_spec, strbuf_detach(&buf, NULL));
+}
+
 /*
  * Changes filter_options into an equivalent LOFC_COMBINE filter options
  * instance. Does not do anything if filter_options is already LOFC_COMBINE.
  */
 static void transform_to_combine_type(
 	struct list_objects_filter_options *filter_options)
 {
 	assert(filter_options->choice);
 	if (filter_options->choice == LOFC_COMBINE)
 		return;
@@ -195,64 +204,59 @@ static void transform_to_combine_type(
 		const int initial_sub_alloc = 2;
 		struct list_objects_filter_options *sub_array =
 			xcalloc(initial_sub_alloc, sizeof(*sub_array));
 		sub_array[0] = *filter_options;
 		memset(filter_options, 0, sizeof(*filter_options));
 		filter_options->sub = sub_array;
 		filter_options->sub_alloc = initial_sub_alloc;
 	}
 	filter_options->sub_nr = 1;
 	filter_options->choice = LOFC_COMBINE;
-	strbuf_init(&filter_options->filter_spec, 0);
-	strbuf_addstr(&filter_options->filter_spec, "combine:");
-	strbuf_addstr_urlencode(&filter_options->filter_spec,
-				filter_options->sub[0].filter_spec.buf,
-				allow_unencoded);
+	string_list_append(&filter_options->filter_spec, xstrdup("combine:"));
+	filter_spec_append_urlencode(
+		filter_options,
+		list_objects_filter_spec(&filter_options->sub[0]));
 	/*
 	 * We don't need the filter_spec strings for subfilter specs, only the
 	 * top level.
 	 */
-	strbuf_release(&filter_options->sub[0].filter_spec);
+	string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
 }
 
 void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options)
 {
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
 }
 
 void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 	int parse_error;
 
 	if (!filter_options->choice) {
-		strbuf_init(&filter_options->filter_spec, 0);
-		strbuf_addstr(&filter_options->filter_spec, arg);
+		string_list_append(&filter_options->filter_spec, xstrdup(arg));
 
 		parse_error = gently_parse_list_objects_filter(
 			filter_options, arg, &errbuf);
 	} else {
 		/*
 		 * Make filter_options an LOFC_COMBINE spec so we can trivially
 		 * add subspecs to it.
 		 */
 		transform_to_combine_type(filter_options);
 
-		strbuf_addstr(&filter_options->filter_spec, "+");
-		strbuf_addstr_urlencode(&filter_options->filter_spec, arg,
-					allow_unencoded);
-		trace_printf("Generated composite filter-spec: %s\n",
-			     filter_options->filter_spec.buf);
+		string_list_append(&filter_options->filter_spec, xstrdup("+"));
+		filter_spec_append_urlencode(filter_options, arg);
 		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
 			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
 			&filter_options->sub[filter_options->sub_nr - 1], arg,
 			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
 }
@@ -262,54 +266,71 @@ int opt_parse_list_objects_filter(const struct option *opt,
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg)
 		list_objects_filter_set_no_filter(filter_options);
 	else
 		parse_list_objects_filter(filter_options, arg);
 	return 0;
 }
 
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec)
+const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
+{
+	if (!filter->filter_spec.nr)
+		BUG("no filter_spec available for this filter");
+	if (filter->filter_spec.nr != 1) {
+		struct strbuf concatted = STRBUF_INIT;
+		strbuf_add_separated_string_list(
+			&concatted, "", &filter->filter_spec);
+		string_list_clear(&filter->filter_spec, /*free_util=*/0);
+		string_list_append(
+			&filter->filter_spec, strbuf_detach(&concatted, NULL));
+	}
+
+	return filter->filter_spec.items[0].string;
+}
+
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter)
 {
-	strbuf_init(expanded_spec, 0);
-	if (filter->choice == LOFC_BLOB_LIMIT)
-		strbuf_addf(expanded_spec, "blob:limit=%lu",
+	if (filter->choice == LOFC_BLOB_LIMIT) {
+		struct strbuf expanded_spec;
+		strbuf_addf(&expanded_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
-	else if (filter->choice == LOFC_TREE_DEPTH)
-		strbuf_addf(expanded_spec, "tree:%lu",
-			    filter->tree_exclude_depth);
-	else
-		strbuf_addstr(expanded_spec, filter->filter_spec.buf);
+		string_list_clear(&filter->filter_spec, /*free_util=*/0);
+		string_list_append(
+			&filter->filter_spec,
+			strbuf_detach(&expanded_spec, NULL));
+	}
+
+	return list_objects_filter_spec(filter);
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
 	size_t sub;
 
 	if (!filter_options)
 		return;
-	strbuf_release(&filter_options->filter_spec);
+	string_list_clear(&filter_options->filter_spec, /*free_util=*/0);
 	free(filter_options->sparse_oid_value);
 	free(filter_options->sparse_path_value);
 	for (sub = 0; sub < filter_options->sub_nr; sub++)
 		list_objects_filter_release(&filter_options->sub[sub]);
 	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options)
+	struct list_objects_filter_options *filter_options)
 {
 	/*
 	 * Record the name of the partial clone remote in the
 	 * config and in the global variable -- the latter is
 	 * used throughout to indicate that partial clone is
 	 * enabled and to expect missing objects.
 	 */
 	if (repository_format_partial_clone &&
 	    *repository_format_partial_clone &&
 	    strcmp(remote, repository_format_partial_clone))
@@ -318,35 +339,33 @@ void partial_clone_register(
 	git_config_set("core.repositoryformatversion", "1");
 	git_config_set("extensions.partialclone", remote);
 
 	repository_format_partial_clone = xstrdup(remote);
 
 	/*
 	 * Record the initial filter-spec in the config as
 	 * the default for subsequent fetches from this remote.
 	 */
 	core_partial_clone_filter_default =
-		xstrdup(filter_options->filter_spec.buf);
+		xstrdup(expand_list_objects_filter_spec(filter_options));
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 
-	if (!filter_options->filter_spec.buf)
-		strbuf_init(&filter_options->filter_spec, 0);
-	strbuf_addstr(&filter_options->filter_spec,
-		      core_partial_clone_filter_default);
+	string_list_append(&filter_options->filter_spec,
+			   core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 2c0ce6383a..9b31048ada 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -1,15 +1,15 @@
 #ifndef LIST_OBJECTS_FILTER_OPTIONS_H
 #define LIST_OBJECTS_FILTER_OPTIONS_H
 
 #include "parse-options.h"
-#include "strbuf.h"
+#include "string-list.h"
 
 /*
  * The list of defined filters for list-objects.
  */
 enum list_objects_filter_choice {
 	LOFC_DISABLED = 0,
 	LOFC_BLOB_NONE,
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
@@ -18,22 +18,24 @@ enum list_objects_filter_choice {
 	LOFC__COUNT /* must be last */
 };
 
 struct list_objects_filter_options {
 	/*
 	 * 'filter_spec' is the raw argument value given on the command line
 	 * or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
+	 * To get the raw filter spec given by the user, use the result of
+	 * list_objects_filter_spec().
 	 */
-	struct strbuf filter_spec;
+	struct string_list filter_spec;
 
 	/*
 	 * 'choice' is determined by parsing the filter-spec.  This indicates
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
@@ -86,31 +88,33 @@ int opt_parse_list_objects_filter(const struct option *opt,
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
 
 /*
  * Translates abbreviated numbers in the filter's filter_spec into their
  * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024").
  *
  * This form should be used instead of the raw filter_spec field when
  * communicating with a remote process or subprocess.
  */
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec);
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter);
+
+const char *list_objects_filter_spec(
+	struct list_objects_filter_options *filter);
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options);
 
 static inline void list_objects_filter_set_no_filter(
 	struct list_objects_filter_options *filter_options)
 {
 	list_objects_filter_release(filter_options);
 	filter_options->no_filter = 1;
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options);
+	struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/transport-helper.c b/transport-helper.c
index cec83bd663..d6313ef9f5 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -675,27 +675,23 @@ static int fetch(struct transport *transport,
 	    data->transport_options.check_self_contained_and_connected)
 		set_helper_option(transport, "check-connectivity", "true");
 
 	if (transport->cloning)
 		set_helper_option(transport, "cloning", "true");
 
 	if (data->transport_options.update_shallow)
 		set_helper_option(transport, "update-shallow", "true");
 
 	if (data->transport_options.filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(
-			&data->transport_options.filter_options,
-			&expanded_filter_spec);
-		set_helper_option(transport, "filter",
-				  expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec = expand_list_objects_filter_spec(
+			&data->transport_options.filter_options);
+		set_helper_option(transport, "filter", spec);
 	}
 
 	if (data->transport_options.negotiation_tips)
 		warning("Ignoring --negotiation-tip because the protocol does not support it.");
 
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
 	if (data->import)
 		return fetch_with_import(transport, nr_heads, to_fetch);
diff --git a/upload-pack.c b/upload-pack.c
index ba8c3a1f8e..dda2ac6f44 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -133,32 +133,31 @@ static void create_pack_file(const struct object_array *have_obj,
 
 	argv_array_push(&pack_objects.args, "--stdout");
 	if (shallow_nr)
 		argv_array_push(&pack_objects.args, "--shallow");
 	if (!no_progress)
 		argv_array_push(&pack_objects.args, "--progress");
 	if (use_ofs_delta)
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.filter_spec.len) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+	if (filter_options.choice) {
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
-			sq_quote_buf(&buf, expanded_filter_spec.buf);
+			sq_quote_buf(&buf, spec);
 			argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
 			strbuf_release(&buf);
 		} else {
 			argv_array_pushf(&pack_objects.args, "--filter=%s",
-					 expanded_filter_spec.buf);
+					 spec);
 		}
 	}
 
 	pack_objects.in = -1;
 	pack_objects.out = -1;
 	pack_objects.err = -1;
 
 	if (start_command(&pack_objects))
 		die("git upload-pack: unable to fork git-pack-objects");
 

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 00/10] Filter combination
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (9 preceding siblings ...)
  2019-06-03 21:35 ` [PATCH v2 0/9] Filter combination Jeff Hostetler
@ 2019-06-13 21:51 ` " Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 01/10] list-objects-filter: make API easier to use Matthew DeVore
                     ` (10 more replies)
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
  11 siblings, 11 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

It has been a while since a sent a roll-up. Here are the changes since v2:

 - Re-use more URL-encoding logic in strbuf.c
   * This was partially achieved by changing the helper function to accept a
     function that will indicate whether some character must be escaped.
 - Re-use more URL-decoding logic in url.c
 - changed the filter_spec strbuf to a string_list to avoid explicit
   initialization
 - Remove logic to "expand" tree:#k and tree:#m filter specs since there is no
   server that supports tree:# but does not support tree:#k, as they were
   implemented at the same time.

Thanks,

Matthew DeVore (10):
  list-objects-filter: make API easier to use
  list-objects-filter: put omits set in filter struct
  list-objects-filter-options: always supply *errbuf
  list-objects-filter: implement composite filters
  list-objects-filter-options: move error check up
  list-objects-filter-options: make filter_spec a string_list
  strbuf: give URL-encoding API a char predicate fn
  list-objects-filter-options: allow mult. --filter
  list-objects-filter-options: clean up use of ALLOC_GROW
  list-objects-filter-options: make parser void

 Documentation/rev-list-options.txt  |  16 ++
 builtin/clone.c                     |   8 +-
 builtin/fetch.c                     |   9 +-
 builtin/rev-list.c                  |   6 +-
 cache.h                             |  22 ++
 credential-store.c                  |   9 +-
 fetch-pack.c                        |  20 +-
 http.c                              |   6 +-
 list-objects-filter-options.c       | 267 +++++++++++++++++----
 list-objects-filter-options.h       |  57 ++++-
 list-objects-filter.c               | 345 +++++++++++++++++++++-------
 list-objects-filter.h               |  35 ++-
 list-objects.c                      |  55 ++---
 strbuf.c                            |  15 +-
 strbuf.h                            |   7 +-
 t/t5616-partial-clone.sh            |  19 ++
 t/t6112-rev-list-filters-objects.sh | 194 +++++++++++++++-
 transport-helper.c                  |  10 +-
 transport.c                         |   1 +
 upload-pack.c                       |  13 +-
 url.c                               |   6 +
 url.h                               |   8 +
 22 files changed, 879 insertions(+), 249 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 01/10] list-objects-filter: make API easier to use
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 02/10] list-objects-filter: put omits set in filter struct Matthew DeVore
                     ` (9 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Make the list-objects-filter.h API more opaque and easier to use. This
prepares for combined filter support, where filters will be created and
used in a new context.

Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter.c | 122 +++++++++++++++++++++++++++---------------
 list-objects-filter.h |  35 ++++++------
 list-objects.c        |  55 ++++++++-----------
 3 files changed, 117 insertions(+), 95 deletions(-)

diff --git a/list-objects-filter.c b/list-objects-filter.c
index ee449de3f7..35e0bbe123 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -19,20 +19,34 @@
  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  * that have been shown, but should be revisited if they appear
  * in the traversal (until we mark it SEEN).  This is a way to
  * let us silently de-dup calls to show() in the caller.  This
  * is subtly different from the "revision.h:SHOWN" and the
  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  * the non-de-dup usage in pack-bitmap.c
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
+struct filter {
+	enum list_objects_filter_result (*filter_object_fn)(
+		struct repository *r,
+		enum list_objects_filter_situation filter_situation,
+		struct object *obj,
+		const char *pathname,
+		const char *filename,
+		void *filter_data);
+
+	void (*free_fn)(void *filter_data);
+
+	void *filter_data;
+};
+
 /*
  * A filter for list-objects to omit ALL blobs from the traversal.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_none_data {
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
@@ -60,32 +74,31 @@ static enum list_objects_filter_result filter_blobs_none(
 	case LOFS_BLOB:
 		assert(obj->type == OBJ_BLOB);
 		assert((obj->flags & SEEN) == 0);
 
 		if (filter_data->omits)
 			oidset_insert(filter_data->omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 }
 
-static void *filter_blobs_none__init(
+static void filter_blobs_none__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 
-	*filter_fn = filter_blobs_none;
-	*filter_free_fn = free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_blobs_none;
+	filter->free_fn = free;
 }
 
 /*
  * A filter for list-objects to omit ALL trees and blobs from the traversal.
  * Can OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_trees_depth_data {
 	struct oidset *omits;
 
 	/*
@@ -194,35 +207,34 @@ static enum list_objects_filter_result filter_trees_depth(
 }
 
 static void filter_trees_free(void *filter_data) {
 	struct filter_trees_depth_data *d = filter_data;
 	if (!d)
 		return;
 	oidmap_free(&d->seen_at_depth, 1);
 	free(d);
 }
 
-static void *filter_trees_depth__init(
+static void filter_trees_depth__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	oidmap_init(&d->seen_at_depth, 0);
 	d->exclude_depth = filter_options->tree_exclude_depth;
 	d->current_depth = 0;
 
-	*filter_fn = filter_trees_depth;
-	*filter_free_fn = filter_trees_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_trees_depth;
+	filter->free_fn = filter_trees_free;
 }
 
 /*
  * A filter for list-objects to omit large blobs.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_limit_data {
 	struct oidset *omits;
 	unsigned long max_bytes;
 };
@@ -274,33 +286,32 @@ static enum list_objects_filter_result filter_blobs_limit(
 			oidset_insert(filter_data->omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 
 include_it:
 	if (filter_data->omits)
 		oidset_remove(filter_data->omits, &obj->oid);
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
-static void *filter_blobs_limit__init(
+static void filter_blobs_limit__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	d->max_bytes = filter_options->blob_limit_value;
 
-	*filter_fn = filter_blobs_limit;
-	*filter_free_fn = free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_blobs_limit;
+	filter->free_fn = free;
 }
 
 /*
  * A filter driven by a sparse-checkout specification to only
  * include blobs that a sparse checkout would populate.
  *
  * The sparse-checkout spec can be loaded from a blob with the
  * given OID or from a local pathname.  We allow an OID because
  * the repo may be bare or we may be doing the filtering on the
  * server.
@@ -450,92 +461,117 @@ static enum list_objects_filter_result filter_sparse(
 }
 
 
 static void filter_sparse_free(void *filter_data)
 {
 	struct filter_sparse_data *d = filter_data;
 	/* TODO free contents of 'd' */
 	free(d);
 }
 
-static void *filter_sparse_oid__init(
+static void filter_sparse_oid__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 					   NULL, 0, &d->el) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
-	*filter_fn = filter_sparse;
-	*filter_free_fn = filter_sparse_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_sparse;
+	filter->free_fn = filter_sparse_free;
 }
 
-static void *filter_sparse_path__init(
+static void filter_sparse_path__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 					   NULL, 0, &d->el, NULL) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
-	*filter_fn = filter_sparse;
-	*filter_free_fn = filter_sparse_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_sparse;
+	filter->free_fn = filter_sparse_free;
 }
 
-typedef void *(*filter_init_fn)(
+typedef void (*filter_init_fn)(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn);
+	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
 	filter_trees_depth__init,
 	filter_sparse_oid__init,
 	filter_sparse_path__init,
 };
 
-void *list_objects_filter__init(
+struct filter *list_objects_filter__init(
 	struct oidset *omitted,
-	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct list_objects_filter_options *filter_options)
 {
+	struct filter *filter;
 	filter_init_fn init_fn;
 
 	assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 
 	if (filter_options->choice >= LOFC__COUNT)
 		BUG("invalid list-objects filter choice: %d",
 		    filter_options->choice);
 
 	init_fn = s_filters[filter_options->choice];
-	if (init_fn)
-		return init_fn(omitted, filter_options,
-			       filter_fn, filter_free_fn);
-	*filter_fn = NULL;
-	*filter_free_fn = NULL;
-	return NULL;
+	if (!init_fn)
+		return NULL;
+
+	filter = xcalloc(1, sizeof(*filter));
+	init_fn(omitted, filter_options, filter);
+	return filter;
+}
+
+enum list_objects_filter_result list_objects_filter__filter_object(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct filter *filter)
+{
+	if (filter && (obj->flags & NOT_USER_GIVEN))
+		return filter->filter_object_fn(r, filter_situation, obj,
+						pathname, filename,
+						filter->filter_data);
+	/*
+	 * No filter is active or user gave object explicitly. Choose default
+	 * behavior based on filter situation.
+	 */
+	if (filter_situation == LOFS_END_TREE)
+		return 0;
+	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+}
+
+void list_objects_filter__free(struct filter *filter)
+{
+	if (!filter)
+		return;
+	filter->free_fn(filter->filter_data);
+	free(filter);
 }
diff --git a/list-objects-filter.h b/list-objects-filter.h
index 1d45a4ad57..6908954266 100644
--- a/list-objects-filter.h
+++ b/list-objects-filter.h
@@ -53,37 +53,34 @@ enum list_objects_filter_result {
 	LOFR_DO_SHOW   = 1<<1,
 	LOFR_SKIP_TREE = 1<<2,
 };
 
 enum list_objects_filter_situation {
 	LOFS_BEGIN_TREE,
 	LOFS_END_TREE,
 	LOFS_BLOB
 };
 
-typedef enum list_objects_filter_result (*filter_object_fn)(
+struct filter;
+
+/* Constructor for the set of defined list-objects filters. */
+struct filter *list_objects_filter__init(
+	struct oidset *omitted,
+	struct list_objects_filter_options *filter_options);
+
+/*
+ * Lets `filter` decide how to handle the `obj`. If `filter` is NULL, this
+ * function behaves as expected if no filter is configured: all objects are
+ * included.
+ */
+enum list_objects_filter_result list_objects_filter__filter_object(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
-	void *filter_data);
-
-typedef void (*filter_free_fn)(void *filter_data);
+	struct filter *filter);
 
-/*
- * Constructor for the set of defined list-objects filters.
- * Returns a generic "void *filter_data".
- *
- * The returned "filter_fn" will be used by traverse_commit_list()
- * to filter the results.
- *
- * The returned "filter_free_fn" is a destructor for the
- * filter_data.
- */
-void *list_objects_filter__init(
-	struct oidset *omitted,
-	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn);
+/* Destroys `filter`. Does nothing if `filter` is null. */
+void list_objects_filter__free(struct filter *filter);
 
 #endif /* LIST_OBJECTS_FILTER_H */
diff --git a/list-objects.c b/list-objects.c
index b5651ddd5b..9307d91fb3 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -11,32 +11,31 @@
 #include "list-objects-filter-options.h"
 #include "packfile.h"
 #include "object-store.h"
 #include "trace.h"
 
 struct traversal_context {
 	struct rev_info *revs;
 	show_object_fn show_object;
 	show_commit_fn show_commit;
 	void *show_data;
-	filter_object_fn filter_fn;
-	void *filter_data;
+	struct filter *filter;
 };
 
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
 			 const char *name)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
-	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+	enum list_objects_filter_result r;
 
 	if (!ctx->revs->blob_objects)
 		return;
 	if (!obj)
 		die("bad blob object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
 
 	/*
 	 * Pre-filter known-missing objects when explicitly requested.
@@ -47,25 +46,24 @@ static void process_blob(struct traversal_context *ctx,
 	 * may cause the actual filter to report an incomplete list
 	 * of missing objects.
 	 */
 	if (ctx->revs->exclude_promisor_objects &&
 	    !has_object_file(&obj->oid) &&
 	    is_promisor_object(&obj->oid))
 		return;
 
 	pathlen = path->len;
 	strbuf_addstr(path, name);
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_BLOB, obj,
-				   path->buf, &path->buf[pathlen],
-				   ctx->filter_data);
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_BLOB, obj,
+					       path->buf, &path->buf[pathlen],
+					       ctx->filter);
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
 		ctx->show_object(obj, path->buf, ctx->show_data);
 	strbuf_setlen(path, pathlen);
 }
 
 /*
  * Processing a gitlink entry currently does nothing, since
  * we do not recurse into the subproject.
@@ -150,21 +148,21 @@ static void process_tree_contents(struct traversal_context *ctx,
 }
 
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
 			 const char *name)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
 	int baselen = base->len;
-	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+	enum list_objects_filter_result r;
 	int failed_parse;
 
 	if (!revs->tree_objects)
 		return;
 	if (!obj)
 		die("bad tree object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
 
 	failed_parse = parse_tree_gently(tree, 1);
@@ -179,47 +177,44 @@ static void process_tree(struct traversal_context *ctx,
 		 */
 		if (revs->exclude_promisor_objects &&
 		    is_promisor_object(&obj->oid))
 			return;
 
 		if (!revs->do_not_die_on_missing_tree)
 			die("bad tree object %s", oid_to_hex(&obj->oid));
 	}
 
 	strbuf_addstr(base, name);
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_BEGIN_TREE, obj,
-				   base->buf, &base->buf[baselen],
-				   ctx->filter_data);
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_BEGIN_TREE, obj,
+					       base->buf, &base->buf[baselen],
+					       ctx->filter);
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
 		ctx->show_object(obj, base->buf, ctx->show_data);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
 		process_tree_contents(ctx, tree, base);
 
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_END_TREE, obj,
-				   base->buf, &base->buf[baselen],
-				   ctx->filter_data);
-		if (r & LOFR_MARK_SEEN)
-			obj->flags |= SEEN;
-		if (r & LOFR_DO_SHOW)
-			ctx->show_object(obj, base->buf, ctx->show_data);
-	}
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_END_TREE, obj,
+					       base->buf, &base->buf[baselen],
+					       ctx->filter);
+	if (r & LOFR_MARK_SEEN)
+		obj->flags |= SEEN;
+	if (r & LOFR_DO_SHOW)
+		ctx->show_object(obj, base->buf, ctx->show_data);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
 }
 
 static void mark_edge_parents_uninteresting(struct commit *commit,
 					    struct rev_info *revs,
 					    show_edge_fn show_edge)
 {
 	struct commit_list *parents;
@@ -395,38 +390,32 @@ static void do_traverse(struct traversal_context *ctx)
 void traverse_commit_list(struct rev_info *revs,
 			  show_commit_fn show_commit,
 			  show_object_fn show_object,
 			  void *show_data)
 {
 	struct traversal_context ctx;
 	ctx.revs = revs;
 	ctx.show_commit = show_commit;
 	ctx.show_object = show_object;
 	ctx.show_data = show_data;
-	ctx.filter_fn = NULL;
-	ctx.filter_data = NULL;
+	ctx.filter = NULL;
 	do_traverse(&ctx);
 }
 
 void traverse_commit_list_filtered(
 	struct list_objects_filter_options *filter_options,
 	struct rev_info *revs,
 	show_commit_fn show_commit,
 	show_object_fn show_object,
 	void *show_data,
 	struct oidset *omitted)
 {
 	struct traversal_context ctx;
-	filter_free_fn filter_free_fn = NULL;
 
 	ctx.revs = revs;
 	ctx.show_object = show_object;
 	ctx.show_commit = show_commit;
 	ctx.show_data = show_data;
-	ctx.filter_fn = NULL;
-
-	ctx.filter_data = list_objects_filter__init(omitted, filter_options,
-						    &ctx.filter_fn, &filter_free_fn);
+	ctx.filter = list_objects_filter__init(omitted, filter_options);
 	do_traverse(&ctx);
-	if (ctx.filter_data && filter_free_fn)
-		filter_free_fn(ctx.filter_data);
+	list_objects_filter__free(ctx.filter);
 }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 02/10] list-objects-filter: put omits set in filter struct
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 01/10] list-objects-filter: make API easier to use Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 03/10] list-objects-filter-options: always supply *errbuf Matthew DeVore
                     ` (8 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

The oidset *omits pointer must be accessed by the combine filter in a
type-agnostic way once the graph traversal is over. Store that pointer
in the general `filter` struct. This will be used in a follow-up patch
to implement the combine filter.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter.c | 70 ++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 44 deletions(-)

diff --git a/list-objects-filter.c b/list-objects-filter.c
index 35e0bbe123..57bbf6ec1c 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -26,88 +26,76 @@
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
 struct filter {
 	enum list_objects_filter_result (*filter_object_fn)(
 		struct repository *r,
 		enum list_objects_filter_situation filter_situation,
 		struct object *obj,
 		const char *pathname,
 		const char *filename,
+		struct oidset *omits,
 		void *filter_data);
 
 	void (*free_fn)(void *filter_data);
 
 	void *filter_data;
-};
 
-/*
- * A filter for list-objects to omit ALL blobs from the traversal.
- * And to OPTIONALLY collect a list of the omitted OIDs.
- */
-struct filter_blobs_none_data {
+	/* If non-NULL, the filter collects a list of the omitted OIDs here. */
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
-	struct filter_blobs_none_data *filter_data = filter_data_;
-
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
 	case LOFS_BEGIN_TREE:
 		assert(obj->type == OBJ_TREE);
 		/* always include all tree objects */
 		return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 
 	case LOFS_END_TREE:
 		assert(obj->type == OBJ_TREE);
 		return LOFR_ZERO;
 
 	case LOFS_BLOB:
 		assert(obj->type == OBJ_BLOB);
 		assert((obj->flags & SEEN) == 0);
 
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 }
 
 static void filter_blobs_none__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
-	struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
-
-	filter->filter_data = d;
 	filter->filter_object_fn = filter_blobs_none;
 	filter->free_fn = free;
 }
 
 /*
  * A filter for list-objects to omit ALL trees and blobs from the traversal.
  * Can OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_trees_depth_data {
-	struct oidset *omits;
-
 	/*
 	 * Maps trees to the minimum depth at which they were seen. It is not
 	 * necessary to re-traverse a tree at deeper or equal depths than it has
 	 * already been traversed.
 	 *
 	 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
 	 * it from being traversed at shallower depths.
 	 */
 	struct oidmap seen_at_depth;
 
@@ -116,38 +104,39 @@ struct filter_trees_depth_data {
 };
 
 struct seen_map_entry {
 	struct oidmap_entry base;
 	size_t depth;
 };
 
 /* Returns 1 if the oid was in the omits set before it was invoked. */
 static int filter_trees_update_omits(
 	struct object *obj,
-	struct filter_trees_depth_data *filter_data,
+	struct oidset *omits,
 	int include_it)
 {
-	if (!filter_data->omits)
+	if (!omits)
 		return 0;
 
 	if (include_it)
-		return oidset_remove(filter_data->omits, &obj->oid);
+		return oidset_remove(omits, &obj->oid);
 	else
-		return oidset_insert(filter_data->omits, &obj->oid);
+		return oidset_insert(omits, &obj->oid);
 }
 
 static enum list_objects_filter_result filter_trees_depth(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_trees_depth_data *filter_data = filter_data_;
 	struct seen_map_entry *seen_info;
 	int include_it = filter_data->current_depth <
 		filter_data->exclude_depth;
 	int filter_res;
 	int already_seen;
 
 	/*
@@ -158,47 +147,47 @@ static enum list_objects_filter_result filter_trees_depth(
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
 	case LOFS_END_TREE:
 		assert(obj->type == OBJ_TREE);
 		filter_data->current_depth--;
 		return LOFR_ZERO;
 
 	case LOFS_BLOB:
-		filter_trees_update_omits(obj, filter_data, include_it);
+		filter_trees_update_omits(obj, omits, include_it);
 		return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
 
 	case LOFS_BEGIN_TREE:
 		seen_info = oidmap_get(
 			&filter_data->seen_at_depth, &obj->oid);
 		if (!seen_info) {
 			seen_info = xcalloc(1, sizeof(*seen_info));
 			oidcpy(&seen_info->base.oid, &obj->oid);
 			seen_info->depth = filter_data->current_depth;
 			oidmap_put(&filter_data->seen_at_depth, seen_info);
 			already_seen = 0;
 		} else {
 			already_seen =
 				filter_data->current_depth >= seen_info->depth;
 		}
 
 		if (already_seen) {
 			filter_res = LOFR_SKIP_TREE;
 		} else {
 			int been_omitted = filter_trees_update_omits(
-				obj, filter_data, include_it);
+				obj, omits, include_it);
 			seen_info->depth = filter_data->current_depth;
 
 			if (include_it)
 				filter_res = LOFR_DO_SHOW;
-			else if (filter_data->omits && !been_omitted)
+			else if (omits && !been_omitted)
 				/*
 				 * Must update omit information of children
 				 * recursively; they have not been omitted yet.
 				 */
 				filter_res = LOFR_ZERO;
 			else
 				filter_res = LOFR_SKIP_TREE;
 		}
 
 		filter_data->current_depth++;
@@ -208,50 +197,48 @@ static enum list_objects_filter_result filter_trees_depth(
 
 static void filter_trees_free(void *filter_data) {
 	struct filter_trees_depth_data *d = filter_data;
 	if (!d)
 		return;
 	oidmap_free(&d->seen_at_depth, 1);
 	free(d);
 }
 
 static void filter_trees_depth__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	oidmap_init(&d->seen_at_depth, 0);
 	d->exclude_depth = filter_options->tree_exclude_depth;
 	d->current_depth = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_trees_depth;
 	filter->free_fn = filter_trees_free;
 }
 
 /*
  * A filter for list-objects to omit large blobs.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_limit_data {
-	struct oidset *omits;
 	unsigned long max_bytes;
 };
 
 static enum list_objects_filter_result filter_blobs_limit(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_blobs_limit_data *filter_data = filter_data_;
 	unsigned long object_length;
 	enum object_type t;
 
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
@@ -275,38 +262,36 @@ static enum list_objects_filter_result filter_blobs_limit(
 			 * apply the size filter criteria.  Be conservative
 			 * and force show it (and let the caller deal with
 			 * the ambiguity).
 			 */
 			goto include_it;
 		}
 
 		if (object_length < filter_data->max_bytes)
 			goto include_it;
 
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 
 include_it:
-	if (filter_data->omits)
-		oidset_remove(filter_data->omits, &obj->oid);
+	if (omits)
+		oidset_remove(omits, &obj->oid);
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
 static void filter_blobs_limit__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	d->max_bytes = filter_options->blob_limit_value;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_blobs_limit;
 	filter->free_fn = free;
 }
 
 /*
  * A filter driven by a sparse-checkout specification to only
  * include blobs that a sparse checkout would populate.
@@ -330,33 +315,33 @@ struct frame {
 	 * omitted objects.
 	 *
 	 * 0 if everything (recursively) contained in this directory
 	 * has been explicitly included (SHOWN) in the result and
 	 * the directory may be short-cut later in the traversal.
 	 */
 	unsigned child_prov_omit : 1;
 };
 
 struct filter_sparse_data {
-	struct oidset *omits;
 	struct exclude_list el;
 
 	size_t nr, alloc;
 	struct frame *array_frame;
 };
 
 static enum list_objects_filter_result filter_sparse(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_sparse_data *filter_data = filter_data_;
 	int val, dtype;
 	struct frame *frame;
 
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
@@ -425,98 +410,93 @@ static enum list_objects_filter_result filter_sparse(
 
 		frame = &filter_data->array_frame[filter_data->nr];
 
 		dtype = DT_REG;
 		val = is_excluded_from_list(pathname, strlen(pathname),
 					    filename, &dtype, &filter_data->el,
 					    r->index);
 		if (val < 0)
 			val = frame->defval;
 		if (val > 0) {
-			if (filter_data->omits)
-				oidset_remove(filter_data->omits, &obj->oid);
+			if (omits)
+				oidset_remove(omits, &obj->oid);
 			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 		}
 
 		/*
 		 * Provisionally omit it.  We've already established that
 		 * this pathname is not in the sparse-checkout specification
 		 * with the CURRENT pathname, so we *WANT* to omit this blob.
 		 *
 		 * However, a pathname elsewhere in the tree may also
 		 * reference this same blob, so we cannot reject it yet.
 		 * Leave the LOFR_ bits unset so that if the blob appears
 		 * again in the traversal, we will be asked again.
 		 */
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 
 		/*
 		 * Remember that at least 1 blob in this tree was
 		 * provisionally omitted.  This prevents us from short
 		 * cutting the tree in future iterations.
 		 */
 		frame->child_prov_omit = 1;
 		return LOFR_ZERO;
 	}
 }
 
 
 static void filter_sparse_free(void *filter_data)
 {
 	struct filter_sparse_data *d = filter_data;
 	/* TODO free contents of 'd' */
 	free(d);
 }
 
 static void filter_sparse_oid__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 					   NULL, 0, &d->el) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
 static void filter_sparse_path__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 					   NULL, 0, &d->el, NULL) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
 typedef void (*filter_init_fn)(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
@@ -536,35 +516,37 @@ struct filter *list_objects_filter__init(
 
 	if (filter_options->choice >= LOFC__COUNT)
 		BUG("invalid list-objects filter choice: %d",
 		    filter_options->choice);
 
 	init_fn = s_filters[filter_options->choice];
 	if (!init_fn)
 		return NULL;
 
 	filter = xcalloc(1, sizeof(*filter));
-	init_fn(omitted, filter_options, filter);
+	filter->omits = omitted;
+	init_fn(filter_options, filter);
 	return filter;
 }
 
 enum list_objects_filter_result list_objects_filter__filter_object(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
 	struct filter *filter)
 {
 	if (filter && (obj->flags & NOT_USER_GIVEN))
 		return filter->filter_object_fn(r, filter_situation, obj,
 						pathname, filename,
+						filter->omits,
 						filter->filter_data);
 	/*
 	 * No filter is active or user gave object explicitly. Choose default
 	 * behavior based on filter situation.
 	 */
 	if (filter_situation == LOFS_END_TREE)
 		return 0;
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 03/10] list-objects-filter-options: always supply *errbuf
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 01/10] list-objects-filter: make API easier to use Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 02/10] list-objects-filter: put omits set in filter struct Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 04/10] list-objects-filter: implement composite filters Matthew DeVore
                     ` (7 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Making errbuf an optional argument complicates error reporting. Fix this
by making all callers supply an errbuf, even if they may ignore it. This
will be important in follow-up patches where the filter-spec parsing has
more pitfalls and possible errors.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index c0036f7378..aef24ddae3 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -23,47 +23,40 @@
  * convenience of the current command.
  */
 static int gently_parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
 	if (filter_options->choice) {
-		if (errbuf) {
-			strbuf_addstr(
-				errbuf,
-				_("multiple filter-specs cannot be combined"));
-		}
+		strbuf_addstr(
+			errbuf, _("multiple filter-specs cannot be combined"));
 		return 1;
 	}
 
 	filter_options->filter_spec = strdup(arg);
 
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
 
 	} else if (skip_prefix(arg, "tree:", &v0)) {
 		if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
-			if (errbuf) {
-				strbuf_addstr(
-					errbuf,
-					_("expected 'tree:<depth>'"));
-			}
+			strbuf_addstr(errbuf, _("expected 'tree:<depth>'"));
 			return 1;
 		}
 		filter_options->choice = LOFC_TREE_DEPTH;
 		return 0;
 
 	} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
 		struct object_context oc;
 		struct object_id sparse_oid;
 
 		/*
@@ -80,22 +73,21 @@ static int gently_parse_list_objects_filter(
 	} else if (skip_prefix(arg, "sparse:path=", &v0)) {
 		filter_options->choice = LOFC_SPARSE_PATH;
 		filter_options->sparse_path_value = strdup(v0);
 		return 0;
 	}
 	/*
 	 * Please update _git_fetch() in git-completion.bash when you
 	 * add new filters
 	 */
 
-	if (errbuf)
-		strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
+	strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
 
 	memset(filter_options, 0, sizeof(*filter_options));
 	return 1;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
@@ -166,19 +158,22 @@ void partial_clone_register(
 	 */
 	core_partial_clone_filter_default =
 		xstrdup(filter_options->filter_spec);
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
+	struct strbuf errbuf = STRBUF_INIT;
+
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
-					 NULL);
+					 &errbuf);
+	strbuf_release(&errbuf);
 }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 04/10] list-objects-filter: implement composite filters
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (2 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 03/10] list-objects-filter-options: always supply *errbuf Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 05/10] list-objects-filter-options: move error check up Matthew DeVore
                     ` (6 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Allow combining filters such that only objects accepted by all filters
are shown. The motivation for this is to allow getting directory
listings without also fetching blobs. This can be done by combining
blob:none with tree:<depth>. There are massive repositories that have
larger-than-expected trees - even if you include only a single commit.

The current usage requires passing the filter to rev-list in the
following form:

	--filter=<FILTER1> --filter=<FILTER2> ...

Such usage is currently an error, so giving it a meaning is backwards-
compatible.

The URL-encoding scheme is being introduced before the repeated flag
logic, and the user-facing documentation for URL-encoding is being
withheld until the repeated flag feature is implemented. The
URL-encoding is in general not meant to be used directly by the user,
and it is better to describe the URL-encoding feature in terms of the
repeated flag.

Helped-by: Emily Shaffer <emilyshaffer@google.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c       | 106 ++++++++++++++++++-
 list-objects-filter-options.h       |  17 ++-
 list-objects-filter.c               | 159 ++++++++++++++++++++++++++++
 t/t6112-rev-list-filters-objects.sh | 151 +++++++++++++++++++++++++-
 url.c                               |   6 ++
 url.h                               |   8 ++
 6 files changed, 441 insertions(+), 6 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index aef24ddae3..ffbadf337b 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -1,18 +1,24 @@
 #include "cache.h"
 #include "commit.h"
 #include "config.h"
 #include "revision.h"
 #include "argv-array.h"
 #include "list-objects.h"
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
+#include "url.h"
+
+static int parse_combine_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	struct strbuf *errbuf);
 
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
  *       --filter=<arg>
  * and in the pack protocol as:
  *       "filter" SP <arg>
  *
  * The filter keyword will be used by many commands.
  * See Documentation/rev-list-options.txt for allowed values for <arg>.
@@ -28,22 +34,20 @@ static int gently_parse_list_objects_filter(
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
 	if (filter_options->choice) {
 		strbuf_addstr(
 			errbuf, _("multiple filter-specs cannot be combined"));
 		return 1;
 	}
 
-	filter_options->filter_spec = strdup(arg);
-
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
 
@@ -67,36 +71,125 @@ static int gently_parse_list_objects_filter(
 		if (!get_oid_with_context(the_repository, v0, GET_OID_BLOB,
 					  &sparse_oid, &oc))
 			filter_options->sparse_oid_value = oiddup(&sparse_oid);
 		filter_options->choice = LOFC_SPARSE_OID;
 		return 0;
 
 	} else if (skip_prefix(arg, "sparse:path=", &v0)) {
 		filter_options->choice = LOFC_SPARSE_PATH;
 		filter_options->sparse_path_value = strdup(v0);
 		return 0;
+
+	} else if (skip_prefix(arg, "combine:", &v0)) {
+		return parse_combine_filter(filter_options, v0, errbuf);
+
 	}
 	/*
 	 * Please update _git_fetch() in git-completion.bash when you
 	 * add new filters
 	 */
 
 	strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
 
 	memset(filter_options, 0, sizeof(*filter_options));
 	return 1;
 }
 
+static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
+
+static int has_reserved_character(
+	struct strbuf *sub_spec, struct strbuf *errbuf)
+{
+	const char *c = sub_spec->buf;
+	while (*c) {
+		if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
+			strbuf_addf(errbuf,
+				    "must escape char in sub-filter-spec: '%c'",
+				    *c);
+			return 1;
+		}
+		c++;
+	}
+
+	return 0;
+}
+
+static int parse_combine_subfilter(
+	struct list_objects_filter_options *filter_options,
+	struct strbuf *subspec,
+	struct strbuf *errbuf)
+{
+	size_t new_index = filter_options->sub_nr++;
+	char *decoded;
+	int result;
+
+	ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
+		   filter_options->sub_alloc);
+	memset(&filter_options->sub[new_index], 0,
+	       sizeof(*filter_options->sub));
+
+	decoded = url_percent_decode(subspec->buf);
+
+	result = has_reserved_character(subspec, errbuf) ||
+		gently_parse_list_objects_filter(
+			&filter_options->sub[new_index], decoded, errbuf);
+
+	free(decoded);
+	return result;
+}
+
+static int parse_combine_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	struct strbuf *errbuf)
+{
+	struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
+	size_t sub;
+	int result = 0;
+
+	if (!subspecs[0]) {
+		strbuf_addf(errbuf,
+			    _("expected something after combine:"));
+		result = 1;
+		goto cleanup;
+	}
+
+	for (sub = 0; subspecs[sub] && !result; sub++) {
+		if (subspecs[sub + 1]) {
+			/*
+			 * This is not the last subspec. Remove trailing "+" so
+			 * we can parse it.
+			 */
+			size_t last = subspecs[sub]->len - 1;
+			assert(subspecs[sub]->buf[last] == '+');
+			strbuf_remove(subspecs[sub], last, 1);
+		}
+		result = parse_combine_subfilter(
+			filter_options, subspecs[sub], errbuf);
+	}
+
+	filter_options->choice = LOFC_COMBINE;
+
+cleanup:
+	strbuf_list_free(subspecs);
+	if (result) {
+		list_objects_filter_release(filter_options);
+		memset(filter_options, 0, sizeof(*filter_options));
+	}
+	return result;
+}
+
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
+	filter_options->filter_spec = strdup(arg);
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
@@ -119,23 +212,30 @@ void expand_list_objects_filter_spec(
 	else if (filter->choice == LOFC_TREE_DEPTH)
 		strbuf_addf(expanded_spec, "tree:%lu",
 			    filter->tree_exclude_depth);
 	else
 		strbuf_addstr(expanded_spec, filter->filter_spec);
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
+	size_t sub;
+
+	if (!filter_options)
+		return;
 	free(filter_options->filter_spec);
 	free(filter_options->sparse_oid_value);
 	free(filter_options->sparse_path_value);
+	for (sub = 0; sub < filter_options->sub_nr; sub++)
+		list_objects_filter_release(&filter_options->sub[sub]);
+	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
 	const struct list_objects_filter_options *filter_options)
 {
 	/*
 	 * Record the name of the partial clone remote in the
 	 * config and in the global variable -- the latter is
@@ -165,15 +265,17 @@ void partial_clone_register(
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
+
+	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index e3adc78ebf..8f08ed74a1 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -7,20 +7,21 @@
 /*
  * The list of defined filters for list-objects.
  */
 enum list_objects_filter_choice {
 	LOFC_DISABLED = 0,
 	LOFC_BLOB_NONE,
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
 	LOFC_SPARSE_PATH,
+	LOFC_COMBINE,
 	LOFC__COUNT /* must be last */
 };
 
 struct list_objects_filter_options {
 	/*
 	 * 'filter_spec' is the raw argument value given on the command line
 	 * or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
@@ -32,28 +33,38 @@ struct list_objects_filter_options {
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
 	unsigned int no_filter : 1;
 
 	/*
-	 * Parsed values (fields) from within the filter-spec.  These are
-	 * choice-specific; not all values will be defined for any given
-	 * choice.
+	 * BEGIN choice-specific parsed values from within the filter-spec. Only
+	 * some values will be defined for any given choice.
 	 */
+
 	struct object_id *sparse_oid_value;
 	char *sparse_path_value;
 	unsigned long blob_limit_value;
 	unsigned long tree_exclude_depth;
+
+	/* LOFC_COMBINE values */
+
+	/* This array contains all the subfilters which this filter combines. */
+	size_t sub_nr, sub_alloc;
+	struct list_objects_filter_options *sub;
+
+	/*
+	 * END choice-specific parsed values.
+	 */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
 int parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
diff --git a/list-objects-filter.c b/list-objects-filter.c
index 57bbf6ec1c..c8a006edf9 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -19,30 +19,45 @@
  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  * that have been shown, but should be revisited if they appear
  * in the traversal (until we mark it SEEN).  This is a way to
  * let us silently de-dup calls to show() in the caller.  This
  * is subtly different from the "revision.h:SHOWN" and the
  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  * the non-de-dup usage in pack-bitmap.c
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
+struct subfilter {
+	struct filter *filter;
+	struct oidset seen;
+	struct oidset omits;
+	struct object_id skip_tree;
+	unsigned is_skipping_tree : 1;
+};
+
 struct filter {
 	enum list_objects_filter_result (*filter_object_fn)(
 		struct repository *r,
 		enum list_objects_filter_situation filter_situation,
 		struct object *obj,
 		const char *pathname,
 		const char *filename,
 		struct oidset *omits,
 		void *filter_data);
 
+	/*
+	 * Optional. If this function is supplied and the filter needs to
+	 * collect omits, then this function is called once before free_fn is
+	 * called.
+	 */
+	void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);
+
 	void (*free_fn)(void *filter_data);
 
 	void *filter_data;
 
 	/* If non-NULL, the filter collects a list of the omitted OIDs here. */
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
@@ -482,34 +497,176 @@ static void filter_sparse_path__init(
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
+/* A filter which only shows objects shown by all sub-filters. */
+struct combine_filter_data {
+	struct subfilter *sub;
+	size_t nr;
+};
+
+static int should_delegate(enum list_objects_filter_situation filter_situation,
+			   struct object *obj,
+			   struct subfilter *sub)
+{
+	if (!sub->is_skipping_tree)
+		return 1;
+	if (filter_situation == LOFS_END_TREE &&
+		oideq(&obj->oid, &sub->skip_tree)) {
+		sub->is_skipping_tree = 0;
+		return 1;
+	}
+	return 0;
+}
+
+static enum list_objects_filter_result process_subfilter(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct subfilter *sub)
+{
+	enum list_objects_filter_result result;
+
+	/*
+	 * Check should_delegate before oidset_contains so that
+	 * is_skipping_tree gets unset even when the object is marked as seen.
+	 * As of this writing, no filter uses LOFR_MARK_SEEN on trees that also
+	 * uses LOFR_SKIP_TREE, so the ordering is only theoretically
+	 * important. Be cautious if you change the order of the below checks
+	 * and more filters have been added!
+	 */
+	if (!should_delegate(filter_situation, obj, sub))
+		return LOFR_ZERO;
+	if (oidset_contains(&sub->seen, &obj->oid))
+		return LOFR_ZERO;
+
+	result = list_objects_filter__filter_object(
+		r, filter_situation, obj, pathname, filename, sub->filter);
+
+	if (result & LOFR_MARK_SEEN)
+		oidset_insert(&sub->seen, &obj->oid);
+
+	if (result & LOFR_SKIP_TREE) {
+		sub->is_skipping_tree = 1;
+		sub->skip_tree = obj->oid;
+	}
+
+	return result;
+}
+
+static enum list_objects_filter_result filter_combine(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct oidset *omits,
+	void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	enum list_objects_filter_result combined_result =
+		LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
+	size_t sub;
+
+	for (sub = 0; sub < d->nr; sub++) {
+		enum list_objects_filter_result sub_result = process_subfilter(
+			r, filter_situation, obj, pathname, filename,
+			&d->sub[sub]);
+		if (!(sub_result & LOFR_DO_SHOW))
+			combined_result &= ~LOFR_DO_SHOW;
+		if (!(sub_result & LOFR_MARK_SEEN))
+			combined_result &= ~LOFR_MARK_SEEN;
+		if (!d->sub[sub].is_skipping_tree)
+			combined_result &= ~LOFR_SKIP_TREE;
+	}
+
+	return combined_result;
+}
+
+static void filter_combine__free(void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	size_t sub;
+	for (sub = 0; sub < d->nr; sub++) {
+		list_objects_filter__free(d->sub[sub].filter);
+		oidset_clear(&d->sub[sub].seen);
+		if (d->sub[sub].omits.set.size)
+			BUG("expected oidset to be cleared already");
+	}
+	free(d->sub);
+}
+
+static void add_all(struct oidset *dest, struct oidset *src) {
+	struct oidset_iter iter;
+	struct object_id *src_oid;
+
+	oidset_iter_init(src, &iter);
+	while ((src_oid = oidset_iter_next(&iter)) != NULL)
+		oidset_insert(dest, src_oid);
+}
+
+static void filter_combine__finalize_omits(
+	struct oidset *omits,
+	void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	size_t sub;
+
+	for (sub = 0; sub < d->nr; sub++) {
+		add_all(omits, &d->sub[sub].omits);
+		oidset_clear(&d->sub[sub].omits);
+	}
+}
+
+static void filter_combine__init(
+	struct list_objects_filter_options *filter_options,
+	struct filter* filter)
+{
+	struct combine_filter_data *d = xcalloc(1, sizeof(*d));
+	size_t sub;
+
+	d->nr = filter_options->sub_nr;
+	d->sub = xcalloc(d->nr, sizeof(*d->sub));
+	for (sub = 0; sub < d->nr; sub++)
+		d->sub[sub].filter = list_objects_filter__init(
+			filter->omits ? &d->sub[sub].omits : NULL,
+			&filter_options->sub[sub]);
+
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_combine;
+	filter->free_fn = filter_combine__free;
+	filter->finalize_omits_fn = filter_combine__finalize_omits;
+}
+
 typedef void (*filter_init_fn)(
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
 	filter_trees_depth__init,
 	filter_sparse_oid__init,
 	filter_sparse_path__init,
+	filter_combine__init,
 };
 
 struct filter *list_objects_filter__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options)
 {
 	struct filter *filter;
 	filter_init_fn init_fn;
 
 	assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
@@ -547,13 +704,15 @@ enum list_objects_filter_result list_objects_filter__filter_object(
 	 */
 	if (filter_situation == LOFS_END_TREE)
 		return 0;
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
 void list_objects_filter__free(struct filter *filter)
 {
 	if (!filter)
 		return;
+	if (filter->finalize_omits_fn && filter->omits)
+		filter->finalize_omits_fn(filter->omits, filter->filter_data);
 	filter->free_fn(filter->filter_data);
 	free(filter);
 }
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index 9c11427719..a87341e051 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -284,21 +284,33 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' '
 # Make sure tree:0 does not iterate through any trees.
 
 test_expect_success 'verify skipping tree iteration when not collecting omits' '
 	GIT_TRACE=1 git -C r3 rev-list \
 		--objects --filter=tree:0 HEAD 2>filter_trace &&
 	grep "Skipping contents of tree [.][.][.]" filter_trace >actual &&
 	# One line for each commit traversed.
 	test_line_count = 2 actual &&
 
 	# Make sure no other trees were considered besides the root.
-	! grep "Skipping contents of tree [^.]" filter_trace
+	! grep "Skipping contents of tree [^.]" filter_trace &&
+
+	# Try this again with "combine:". If both sub-filters are skipping
+	# trees, the composite filter should also skip trees. This is not
+	# important unless the user does combine:tree:X+tree:Y or another filter
+	# besides "tree:" is implemented in the future which can skip trees.
+	GIT_TRACE=1 git -C r3 rev-list \
+		--objects --filter=combine:tree:1+tree:3 HEAD 2>filter_trace &&
+
+	# Only skip the dir1/ tree, which is shared between the two commits.
+	grep "Skipping contents of tree " filter_trace >actual &&
+	test_write_lines "Skipping contents of tree dir1/..." >expected &&
+	test_cmp expected actual
 '
 
 # Test tree:# filters.
 
 expect_has () {
 	commit=$1 &&
 	name=$2 &&
 
 	hash=$(git -C r3 rev-parse $commit:$name) &&
 	grep "^$hash $name$" actual
@@ -336,20 +348,126 @@ test_expect_success 'verify tree:3 includes everything expected' '
 	expect_has HEAD dir1/sparse1 &&
 	expect_has HEAD dir1/sparse2 &&
 	expect_has HEAD pattern &&
 	expect_has HEAD sparse1 &&
 	expect_has HEAD sparse2 &&
 
 	# There are also 2 commit objects
 	test_line_count = 10 actual
 '
 
+test_expect_success 'combine:... for a simple combination' '
+	git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
+		>actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD dir1 &&
+
+	# There are also 2 commit objects
+	test_line_count = 5 actual
+'
+
+test_expect_success 'combine:... with URL encoding' '
+	git -C r3 rev-list --objects \
+		--filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD dir1 &&
+
+	# There are also 2 commit objects
+	test_line_count = 5 actual
+'
+
+expect_invalid_filter_spec () {
+	spec="$1" &&
+	err="$2" &&
+
+	test_must_fail git -C r3 rev-list --objects --filter="$spec" HEAD \
+		>actual 2>actual_stderr &&
+	test_must_be_empty actual &&
+	test_i18ngrep "$err" actual_stderr
+}
+
+test_expect_success 'combine:... while URL-encoding things that should not be' '
+	expect_invalid_filter_spec combine%3Atree:2+blob:none \
+		"invalid filter-spec"
+'
+
+test_expect_success 'combine: with nothing after the :' '
+	expect_invalid_filter_spec combine: "expected something after combine:"
+'
+
+test_expect_success 'parse error in first sub-filter in combine:' '
+	expect_invalid_filter_spec combine:tree:asdf+blob:none \
+		"expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with non-encoded reserved chars' '
+	expect_invalid_filter_spec combine:tree:2+sparse:@xyz \
+		"must escape char in sub-filter-spec: .@." &&
+	expect_invalid_filter_spec combine:tree:2+sparse:\` \
+		"must escape char in sub-filter-spec: .\`." &&
+	expect_invalid_filter_spec combine:tree:2+sparse:~abc \
+		"must escape char in sub-filter-spec: .\~."
+'
+
+test_expect_success 'validate err msg for "combine:<valid-filter>+"' '
+	expect_invalid_filter_spec combine:tree:2+ "expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
+	git -C r3 rev-list --objects --filter="combine:tree:2+bl%6Fb:n%6fne" \
+		HEAD >actual &&
+	test_line_count = 5 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
+		HEAD >actual &&
+	test_line_count = 5 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
+	test_line_count = 2 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
+		HEAD >actual &&
+	test_line_count = 5 actual
+'
+
+test_expect_success 'add a sparse pattern blob whose path has reserved chars' '
+	cp r3/pattern r3/pattern1+renamed% &&
+	git -C r3 add pattern1+renamed% &&
+	git -C r3 commit -m "add sparse pattern file with reserved chars"
+'
+
+test_expect_success 'combine:... with more than two sub-filters' '
+	git -C r3 rev-list --objects \
+		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
+		HEAD >actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD~2 "" &&
+	expect_has HEAD dir1 &&
+	expect_has HEAD dir1/sparse1 &&
+	expect_has HEAD dir1/sparse2 &&
+
+	# Should also have 3 commits
+	test_line_count = 9 actual &&
+
+	# Try again, this time making sure the last sub-filter is only
+	# URL-decoded once.
+	cp actual expect &&
+
+	git -C r3 rev-list --objects \
+		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
+		HEAD >actual &&
+	test_cmp expect actual
+'
+
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
 test_expect_success 'setup r4' '
 	git init r4 &&
 
 	echo foo > r4/foo &&
 	mkdir r4/subdir &&
 	echo bar > r4/subdir/bar &&
 
@@ -379,20 +497,51 @@ test_expect_success 'test tree:# filter provisional omit for blob and tree' '
 
 test_expect_success 'verify skipping tree iteration when collecting omits' '
 	GIT_TRACE=1 git -C r4 rev-list --filter-print-omitted \
 		--objects --filter=tree:0 HEAD 2>filter_trace &&
 	grep "^Skipping contents of tree " filter_trace >actual &&
 
 	echo "Skipping contents of tree subdir/..." >expect &&
 	test_cmp expect actual
 '
 
+test_expect_success 'setup r5' '
+	git init r5 &&
+	mkdir -p r5/subdir &&
+
+	echo 1     >r5/short-root          &&
+	echo 12345 >r5/long-root           &&
+	echo a     >r5/subdir/short-subdir &&
+	echo abcde >r5/subdir/long-subdir  &&
+
+	git -C r5 add short-root long-root subdir &&
+	git -C r5 commit -m "commit msg"
+'
+
+test_expect_success 'verify collecting omits in combined: filter' '
+	# Note that this test guards against the naive implementation of simply
+	# giving both filters the same "omits" set and expecting it to
+	# automatically merge them.
+	git -C r5 rev-list --objects --quiet --filter-print-omitted \
+		--filter=combine:tree:2+blob:limit=3 HEAD >actual &&
+
+	# Expect 0 trees/commits, 3 blobs omitted (all blobs except short-root)
+	omitted_1=$(echo 12345 | git hash-object --stdin) &&
+	omitted_2=$(echo a     | git hash-object --stdin) &&
+	omitted_3=$(echo abcde | git hash-object --stdin) &&
+
+	grep ~$omitted_1 actual &&
+	grep ~$omitted_2 actual &&
+	grep ~$omitted_3 actual &&
+	test_line_count = 3 actual
+'
+
 # Test tree:<depth> where a tree is iterated to twice - once where a subentry is
 # too deep to be included, and again where the blob inside it is shallow enough
 # to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
 # can't use it because a tree can be iterated over again at a lower depth).
 
 test_expect_success 'tree:<depth> where we iterate over tree at two levels' '
 	git init r5 &&
 
 	mkdir -p r5/a/subdir/b &&
 	echo foo > r5/a/subdir/b/foo &&
diff --git a/url.c b/url.c
index 25576c390b..bdede647bc 100644
--- a/url.c
+++ b/url.c
@@ -79,20 +79,26 @@ char *url_decode_mem(const char *url, int len)
 
 	/* Skip protocol part if present */
 	if (colon && url < colon) {
 		strbuf_add(&out, url, colon - url);
 		len -= colon - url;
 		url = colon;
 	}
 	return url_decode_internal(&url, len, NULL, &out, 0);
 }
 
+char *url_percent_decode(const char *encoded)
+{
+	struct strbuf out = STRBUF_INIT;
+	return url_decode_internal(&encoded, strlen(encoded), NULL, &out, 0);
+}
+
 char *url_decode_parameter_name(const char **query)
 {
 	struct strbuf out = STRBUF_INIT;
 	return url_decode_internal(query, -1, "&=", &out, 1);
 }
 
 char *url_decode_parameter_value(const char **query)
 {
 	struct strbuf out = STRBUF_INIT;
 	return url_decode_internal(query, -1, "&", &out, 1);
diff --git a/url.h b/url.h
index 00b7d58c33..2a27c34277 100644
--- a/url.h
+++ b/url.h
@@ -1,16 +1,24 @@
 #ifndef URL_H
 #define URL_H
 
 struct strbuf;
 
 int is_url(const char *url);
 int is_urlschemechar(int first_flag, int ch);
 char *url_decode(const char *url);
 char *url_decode_mem(const char *url, int len);
+
+/*
+ * Similar to the url_decode_{,mem} methods above, but doesn't assume there
+ * is a scheme followed by a : at the start of the string. Instead, %-sequences
+ * before any : are also parsed.
+ */
+char *url_percent_decode(const char *encoded);
+
 char *url_decode_parameter_name(const char **query);
 char *url_decode_parameter_value(const char **query);
 
 void end_url_with_slash(struct strbuf *buf, const char *url);
 void str_end_url_with_slash(const char *url, char **dest);
 
 #endif /* URL_H */
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 05/10] list-objects-filter-options: move error check up
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (3 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 04/10] list-objects-filter: implement composite filters Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 06/10] list-objects-filter-options: make filter_spec a string_list Matthew DeVore
                     ` (5 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Move the check that filter_options->choice is set to higher in the call
stack. This can only be set when the gentle parse function is called
from one of the two call sites.

This is important because in an upcoming patch this may or may not be an
error, and whether it is an error is only known to the
parse_list_objects_filter function.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index ffbadf337b..5ff5135a91 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -28,25 +28,22 @@ static int parse_combine_filter(
  * expand_list_objects_filter_spec() first).  We also "intern" the arg for the
  * convenience of the current command.
  */
 static int gently_parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
-	if (filter_options->choice) {
-		strbuf_addstr(
-			errbuf, _("multiple filter-specs cannot be combined"));
-		return 1;
-	}
+	if (filter_options->choice)
+		BUG("filter_options already populated");
 
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
@@ -175,20 +172,22 @@ static int parse_combine_filter(
 		list_objects_filter_release(filter_options);
 		memset(filter_options, 0, sizeof(*filter_options));
 	}
 	return result;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
+	if (filter_options->choice)
+		die(_("multiple filter-specs cannot be combined"));
 	filter_options->filter_spec = strdup(arg);
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 06/10] list-objects-filter-options: make filter_spec a string_list
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (4 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 05/10] list-objects-filter-options: move error check up Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 07/10] strbuf: give URL-encoding API a char predicate fn Matthew DeVore
                     ` (4 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Junio C Hamano

Make the filter_spec string a string_list rather than a raw C string.
The list of strings must be concatted together to make a complete
filter_spec. A future patch will use this capability to build "combine:"
filter specs gradually.

A strbuf would seem to be a more natural choice for this object, but it
unfortunately requires initialization besides just zero'ing out the
memory.  This results in all container structs, and all containers of
those structs, etc., to also require initialization. Initializing them
all would be more cumbersome that simply using a string_list, which
behaves properly when its contents are zero'd.

For the purposes of code simplification, change behavior in how filter
specs are conveyed over the protocol: do not normalize the tree:<depth>
filter specs since there should be no server in existence that supports
tree:# but not tree:#k etc.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 builtin/clone.c                     |  8 ++---
 builtin/fetch.c                     |  9 ++----
 builtin/rev-list.c                  |  6 ++--
 fetch-pack.c                        | 20 ++++--------
 list-objects-filter-options.c       | 50 ++++++++++++++++++++---------
 list-objects-filter-options.h       | 27 +++++++++++-----
 t/t6112-rev-list-filters-objects.sh |  7 ----
 transport-helper.c                  | 10 ++----
 upload-pack.c                       | 11 +++----
 9 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 85b0d3155d..81e6010779 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1135,27 +1135,25 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
 
 	if (option_upload_pack)
 		transport_set_option(transport, TRANS_OPT_UPLOADPACK,
 				     option_upload_pack);
 
 	if (server_options.nr)
 		transport->server_options = &server_options;
 
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-				     expanded_filter_spec.buf);
+				     spec);
 		transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 
 	if (transport->smart_options && !deepen && !filter_options.choice)
 		transport->smart_options->check_self_contained_and_connected = 1;
 
 
 	argv_array_push(&ref_prefixes, "HEAD");
 	refspec_ref_prefixes(&remote->fetch, &ref_prefixes);
 	if (option_branch)
 		expand_ref_prefix(&ref_prefixes, option_branch);
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4ba63d5ac6..dee89e1a19 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1181,27 +1181,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 	if (deepen && deepen_since)
 		set_option(transport, TRANS_OPT_DEEPEN_SINCE, deepen_since);
 	if (deepen && deepen_not.nr)
 		set_option(transport, TRANS_OPT_DEEPEN_NOT,
 			   (const char *)&deepen_not);
 	if (deepen_relative)
 		set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
 	if (update_shallow)
 		set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
-		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-			   expanded_filter_spec.buf);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
+		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 	if (negotiation_tip.nr) {
 		if (transport->smart_options)
 			add_negotiation_tips(transport->smart_options);
 		else
 			warning("Ignoring --negotiation-tip because the protocol does not support it.");
 	}
 	return transport;
 }
 
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 9f31837d30..823e87c1c9 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -459,22 +459,24 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 			show_progress = arg;
 			continue;
 		}
 
 		if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
 			parse_list_objects_filter(&filter_options, arg);
 			if (filter_options.choice && !revs.blob_objects)
 				die(_("object filtering requires --objects"));
 			if (filter_options.choice == LOFC_SPARSE_OID &&
 			    !filter_options.sparse_oid_value)
-				die(_("invalid sparse value '%s'"),
-				    filter_options.filter_spec);
+				die(
+					_("invalid sparse value '%s'"),
+					list_objects_filter_spec(
+						&filter_options));
 			continue;
 		}
 		if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
 			list_objects_filter_set_no_filter(&filter_options);
 			continue;
 		}
 		if (!strcmp(arg, "--filter-print-omitted")) {
 			arg_print_omitted = 1;
 			continue;
 		}
diff --git a/fetch-pack.c b/fetch-pack.c
index 1c10f54e78..72e13b0a1d 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -332,26 +332,23 @@ static int find_common(struct fetch_negotiator *negotiator,
 		packet_buf_write(&req_buf, "deepen-since %"PRItime, max_age);
 	}
 	if (args->deepen_not) {
 		int i;
 		for (i = 0; i < args->deepen_not->nr; i++) {
 			struct string_list_item *s = args->deepen_not->items + i;
 			packet_buf_write(&req_buf, "deepen-not %s", s->string);
 		}
 	}
 	if (server_supports_filtering && args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	}
 	packet_buf_flush(&req_buf);
 	state_len = req_buf.len;
 
 	if (args->deepen) {
 		const char *arg;
 		struct object_id oid;
 
 		send_request(args, fd[1], &req_buf);
 		while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
@@ -1092,21 +1089,21 @@ static int add_haves(struct fetch_negotiator *negotiator,
 		ret = 1;
 	}
 
 	/* Increase haves to send on next round */
 	*haves_to_send = next_flush(1, *haves_to_send);
 
 	return ret;
 }
 
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
-			      const struct fetch_pack_args *args,
+			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
 			      int *haves_to_send, int *in_vain,
 			      int sideband_all)
 {
 	int ret = 0;
 	struct strbuf req_buf = STRBUF_INIT;
 
 	if (server_supports_v2("fetch", 1))
 		packet_buf_write(&req_buf, "command=fetch");
 	if (server_supports_v2("agent", 0))
@@ -1133,27 +1130,24 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 
 	/* Add shallow-info and deepen request */
 	if (server_supports_feature("fetch", "shallow", 0))
 		add_shallow_requests(&req_buf, args);
 	else if (is_repository_shallow(the_repository) || args->deepen)
 		die(_("Server does not support shallow requests"));
 
 	/* Add filter */
 	if (server_supports_feature("fetch", "filter", 0) &&
 	    args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
 		print_verbose(args, _("Server supports filter"));
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	} else if (args->filter_options.choice) {
 		warning("filtering not recognized by server, ignoring");
 	}
 
 	/* add wants */
 	add_wants(args->no_dependents, wants, &req_buf);
 
 	if (args->no_dependents) {
 		packet_buf_write(&req_buf, "done");
 		ret = 1;
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 5ff5135a91..c9dd41cd06 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -174,73 +174,90 @@ static int parse_combine_filter(
 	}
 	return result;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
-	filter_options->filter_spec = strdup(arg);
+	string_list_append(&filter_options->filter_spec, xstrdup(arg));
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg) {
 		list_objects_filter_set_no_filter(filter_options);
 		return 0;
 	}
 
 	return parse_list_objects_filter(filter_options, arg);
 }
 
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec)
+const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
 {
-	strbuf_init(expanded_spec, strlen(filter->filter_spec));
-	if (filter->choice == LOFC_BLOB_LIMIT)
-		strbuf_addf(expanded_spec, "blob:limit=%lu",
+	if (!filter->filter_spec.nr)
+		BUG("no filter_spec available for this filter");
+	if (filter->filter_spec.nr != 1) {
+		struct strbuf concatted = STRBUF_INIT;
+		strbuf_add_separated_string_list(
+			&concatted, "", &filter->filter_spec);
+		string_list_clear(&filter->filter_spec, /*free_util=*/0);
+		string_list_append(
+			&filter->filter_spec, strbuf_detach(&concatted, NULL));
+	}
+
+	return filter->filter_spec.items[0].string;
+}
+
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter)
+{
+	if (filter->choice == LOFC_BLOB_LIMIT) {
+		struct strbuf expanded_spec = STRBUF_INIT;
+		strbuf_addf(&expanded_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
-	else if (filter->choice == LOFC_TREE_DEPTH)
-		strbuf_addf(expanded_spec, "tree:%lu",
-			    filter->tree_exclude_depth);
-	else
-		strbuf_addstr(expanded_spec, filter->filter_spec);
+		string_list_clear(&filter->filter_spec, /*free_util=*/0);
+		string_list_append(
+			&filter->filter_spec,
+			strbuf_detach(&expanded_spec, NULL));
+	}
+
+	return list_objects_filter_spec(filter);
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
 	size_t sub;
 
 	if (!filter_options)
 		return;
-	free(filter_options->filter_spec);
+	string_list_clear(&filter_options->filter_spec, /*free_util=*/0);
 	free(filter_options->sparse_oid_value);
 	free(filter_options->sparse_path_value);
 	for (sub = 0; sub < filter_options->sub_nr; sub++)
 		list_objects_filter_release(&filter_options->sub[sub]);
 	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options)
+	struct list_objects_filter_options *filter_options)
 {
 	/*
 	 * Record the name of the partial clone remote in the
 	 * config and in the global variable -- the latter is
 	 * used throughout to indicate that partial clone is
 	 * enabled and to expect missing objects.
 	 */
 	if (repository_format_partial_clone &&
 	    *repository_format_partial_clone &&
 	    strcmp(remote, repository_format_partial_clone))
@@ -249,32 +266,33 @@ void partial_clone_register(
 	git_config_set("core.repositoryformatversion", "1");
 	git_config_set("extensions.partialclone", remote);
 
 	repository_format_partial_clone = xstrdup(remote);
 
 	/*
 	 * Record the initial filter-spec in the config as
 	 * the default for subsequent fetches from this remote.
 	 */
 	core_partial_clone_filter_default =
-		xstrdup(filter_options->filter_spec);
+		xstrdup(expand_list_objects_filter_spec(filter_options));
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 
-	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
+	string_list_append(&filter_options->filter_spec,
+			   core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 8f08ed74a1..1786c80eb4 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -1,15 +1,15 @@
 #ifndef LIST_OBJECTS_FILTER_OPTIONS_H
 #define LIST_OBJECTS_FILTER_OPTIONS_H
 
 #include "parse-options.h"
-#include "strbuf.h"
+#include "string-list.h"
 
 /*
  * The list of defined filters for list-objects.
  */
 enum list_objects_filter_choice {
 	LOFC_DISABLED = 0,
 	LOFC_BLOB_NONE,
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
@@ -18,22 +18,24 @@ enum list_objects_filter_choice {
 	LOFC__COUNT /* must be last */
 };
 
 struct list_objects_filter_options {
 	/*
 	 * 'filter_spec' is the raw argument value given on the command line
 	 * or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
+	 * To get the raw filter spec given by the user, use the result of
+	 * list_objects_filter_spec().
 	 */
-	char *filter_spec;
+	struct string_list filter_spec;
 
 	/*
 	 * 'choice' is determined by parsing the filter-spec.  This indicates
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
@@ -71,35 +73,44 @@ int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
 
 /*
  * Translates abbreviated numbers in the filter's filter_spec into their
  * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024").
+ * Returns a string owned by the list_objects_filter_options object.
  *
- * This form should be used instead of the raw filter_spec field when
- * communicating with a remote process or subprocess.
+ * This form should be used instead of the raw list_objects_filter_spec()
+ * value when communicating with a remote process or subprocess.
  */
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec);
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter);
+
+/*
+ * Returns the filter spec string more or less in the form as the user
+ * entered it. This form of the filter_spec can be used in user-facing
+ * messages.  Returns a string owned by the list_objects_filter_options
+ * object.
+ */
+const char *list_objects_filter_spec(
+	struct list_objects_filter_options *filter);
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options);
 
 static inline void list_objects_filter_set_no_filter(
 	struct list_objects_filter_options *filter_options)
 {
 	list_objects_filter_release(filter_options);
 	filter_options->no_filter = 1;
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options);
+	struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index a87341e051..4523c8f066 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -596,18 +596,11 @@ test_expect_success 'rev-list W/ missing=allow-any' '
 # Test expansion of filter specs.
 
 test_expect_success 'expand blob limit in protocol' '
 	git -C r2 config --local uploadpack.allowfilter 1 &&
 	GIT_TRACE_PACKET="$(pwd)/trace" git -c protocol.version=2 clone \
 		--filter=blob:limit=1k "file://$(pwd)/r2" limit &&
 	! grep "blob:limit=1k" trace &&
 	grep "blob:limit=1024" trace
 '
 
-test_expect_success 'expand tree depth limit in protocol' '
-	GIT_TRACE_PACKET="$(pwd)/tree_trace" git -c protocol.version=2 clone \
-		--filter=tree:0k "file://$(pwd)/r2" tree &&
-	! grep "tree:0k" tree_trace &&
-	grep "tree:0" tree_trace
-'
-
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index cec83bd663..d6313ef9f5 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -675,27 +675,23 @@ static int fetch(struct transport *transport,
 	    data->transport_options.check_self_contained_and_connected)
 		set_helper_option(transport, "check-connectivity", "true");
 
 	if (transport->cloning)
 		set_helper_option(transport, "cloning", "true");
 
 	if (data->transport_options.update_shallow)
 		set_helper_option(transport, "update-shallow", "true");
 
 	if (data->transport_options.filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(
-			&data->transport_options.filter_options,
-			&expanded_filter_spec);
-		set_helper_option(transport, "filter",
-				  expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec = expand_list_objects_filter_spec(
+			&data->transport_options.filter_options);
+		set_helper_option(transport, "filter", spec);
 	}
 
 	if (data->transport_options.negotiation_tips)
 		warning("Ignoring --negotiation-tip because the protocol does not support it.");
 
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
 	if (data->import)
 		return fetch_with_import(transport, nr_heads, to_fetch);
diff --git a/upload-pack.c b/upload-pack.c
index 24298913c0..a74d293fef 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -133,32 +133,31 @@ static void create_pack_file(const struct object_array *have_obj,
 
 	argv_array_push(&pack_objects.args, "--stdout");
 	if (shallow_nr)
 		argv_array_push(&pack_objects.args, "--shallow");
 	if (!no_progress)
 		argv_array_push(&pack_objects.args, "--progress");
 	if (use_ofs_delta)
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.filter_spec) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+	if (filter_options.choice) {
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
-			sq_quote_buf(&buf, expanded_filter_spec.buf);
+			sq_quote_buf(&buf, spec);
 			argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
 			strbuf_release(&buf);
 		} else {
 			argv_array_pushf(&pack_objects.args, "--filter=%s",
-					 expanded_filter_spec.buf);
+					 spec);
 		}
 	}
 
 	pack_objects.in = -1;
 	pack_objects.out = -1;
 	pack_objects.err = -1;
 
 	if (start_command(&pack_objects))
 		die("git upload-pack: unable to fork git-pack-objects");
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 07/10] strbuf: give URL-encoding API a char predicate fn
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (5 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 06/10] list-objects-filter-options: make filter_spec a string_list Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 08/10] list-objects-filter-options: allow mult. --filter Matthew DeVore
                     ` (3 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Allow callers to specify exactly what characters need to be URL-encoded
and which do not. This new API will be taken advantage of in a patch
later in this set.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 credential-store.c |  9 +++++----
 http.c             |  6 ++++--
 strbuf.c           | 15 ++++++++-------
 strbuf.h           |  7 ++++++-
 4 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/credential-store.c b/credential-store.c
index ac295420dd..c010497cb2 100644
--- a/credential-store.c
+++ b/credential-store.c
@@ -65,29 +65,30 @@ static void rewrite_credential_file(const char *fn, struct credential *c,
 	parse_credential_file(fn, c, NULL, print_line);
 	if (commit_lock_file(&credential_lock) < 0)
 		die_errno("unable to write credential store");
 }
 
 static void store_credential_file(const char *fn, struct credential *c)
 {
 	struct strbuf buf = STRBUF_INIT;
 
 	strbuf_addf(&buf, "%s://", c->protocol);
-	strbuf_addstr_urlencode(&buf, c->username, 1);
+	strbuf_addstr_urlencode(&buf, c->username, is_rfc3986_unreserved);
 	strbuf_addch(&buf, ':');
-	strbuf_addstr_urlencode(&buf, c->password, 1);
+	strbuf_addstr_urlencode(&buf, c->password, is_rfc3986_unreserved);
 	strbuf_addch(&buf, '@');
 	if (c->host)
-		strbuf_addstr_urlencode(&buf, c->host, 1);
+		strbuf_addstr_urlencode(&buf, c->host, is_rfc3986_unreserved);
 	if (c->path) {
 		strbuf_addch(&buf, '/');
-		strbuf_addstr_urlencode(&buf, c->path, 0);
+		strbuf_addstr_urlencode(&buf, c->path,
+					is_rfc3986_reserved_or_unreserved);
 	}
 
 	rewrite_credential_file(fn, c, &buf);
 	strbuf_release(&buf);
 }
 
 static void store_credential(const struct string_list *fns, struct credential *c)
 {
 	struct string_list_item *fn;
 
diff --git a/http.c b/http.c
index 27aa0a3192..938b9e55af 100644
--- a/http.c
+++ b/http.c
@@ -506,23 +506,25 @@ static void var_override(const char **var, char *value)
 static void set_proxyauth_name_password(CURL *result)
 {
 #if LIBCURL_VERSION_NUM >= 0x071301
 		curl_easy_setopt(result, CURLOPT_PROXYUSERNAME,
 			proxy_auth.username);
 		curl_easy_setopt(result, CURLOPT_PROXYPASSWORD,
 			proxy_auth.password);
 #else
 		struct strbuf s = STRBUF_INIT;
 
-		strbuf_addstr_urlencode(&s, proxy_auth.username, 1);
+		strbuf_addstr_urlencode(&s, proxy_auth.username,
+					is_rfc3986_unreserved);
 		strbuf_addch(&s, ':');
-		strbuf_addstr_urlencode(&s, proxy_auth.password, 1);
+		strbuf_addstr_urlencode(&s, proxy_auth.password,
+					is_rfc3986_unreserved);
 		curl_proxyuserpwd = strbuf_detach(&s, NULL);
 		curl_easy_setopt(result, CURLOPT_PROXYUSERPWD, curl_proxyuserpwd);
 #endif
 }
 
 static void init_curl_proxy_auth(CURL *result)
 {
 	if (proxy_auth.username) {
 		if (!proxy_auth.password)
 			credential_fill(&proxy_auth);
diff --git a/strbuf.c b/strbuf.c
index 0e18b259ce..60ab5144f2 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -767,55 +767,56 @@ void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s)
 		case '&':
 			strbuf_addstr(buf, "&amp;");
 			break;
 		case 0:
 			return;
 		}
 		s++;
 	}
 }
 
-static int is_rfc3986_reserved(char ch)
+int is_rfc3986_reserved_or_unreserved(char ch)
 {
+	if (is_rfc3986_unreserved(ch))
+		return 1;
 	switch (ch) {
 		case '!': case '*': case '\'': case '(': case ')': case ';':
 		case ':': case '@': case '&': case '=': case '+': case '$':
 		case ',': case '/': case '?': case '#': case '[': case ']':
 			return 1;
 	}
 	return 0;
 }
 
-static int is_rfc3986_unreserved(char ch)
+int is_rfc3986_unreserved(char ch)
 {
 	return isalnum(ch) ||
 		ch == '-' || ch == '_' || ch == '.' || ch == '~';
 }
 
 static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len,
-				 int reserved)
+				 char_predicate allow_unencoded_fn)
 {
 	strbuf_grow(sb, len);
 	while (len--) {
 		char ch = *s++;
-		if (is_rfc3986_unreserved(ch) ||
-		    (!reserved && is_rfc3986_reserved(ch)))
+		if (allow_unencoded_fn(ch))
 			strbuf_addch(sb, ch);
 		else
 			strbuf_addf(sb, "%%%02x", (unsigned char)ch);
 	}
 }
 
 void strbuf_addstr_urlencode(struct strbuf *sb, const char *s,
-			     int reserved)
+			     char_predicate allow_unencoded_fn)
 {
-	strbuf_add_urlencode(sb, s, strlen(s), reserved);
+	strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn);
 }
 
 void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes)
 {
 	if (bytes > 1 << 30) {
 		strbuf_addf(buf, "%u.%2.2u GiB",
 			    (unsigned)(bytes >> 30),
 			    (unsigned)(bytes & ((1 << 30) - 1)) / 10737419);
 	} else if (bytes > 1 << 20) {
 		unsigned x = bytes + 5243;  /* for rounding */
diff --git a/strbuf.h b/strbuf.h
index c8d98dfb95..346d722492 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -659,22 +659,27 @@ void strbuf_branchname(struct strbuf *sb, const char *name,
 		       unsigned allowed);
 
 /*
  * Like strbuf_branchname() above, but confirm that the result is
  * syntactically valid to be used as a local branch name in refs/heads/.
  *
  * The return value is "0" if the result is valid, and "-1" otherwise.
  */
 int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
 
+typedef int (*char_predicate)(char ch);
+
+int is_rfc3986_unreserved(char ch);
+int is_rfc3986_reserved_or_unreserved(char ch);
+
 void strbuf_addstr_urlencode(struct strbuf *sb, const char *name,
-			     int reserved);
+			     char_predicate allow_unencoded_fn);
 
 __attribute__((format (printf,1,2)))
 int printf_ln(const char *fmt, ...);
 __attribute__((format (printf,2,3)))
 int fprintf_ln(FILE *fp, const char *fmt, ...);
 
 char *xstrdup_tolower(const char *);
 char *xstrdup_toupper(const char *);
 
 /**
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 08/10] list-objects-filter-options: allow mult. --filter
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (6 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 07/10] strbuf: give URL-encoding API a char predicate fn Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 09/10] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
                     ` (2 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Allow combining of multiple filters by simply repeating the --filter
flag. Before this patch, the user had to combine them in a single flag
somewhat awkwardly (e.g. --filter=combine:FOO+BAR), including
URL-encoding the individual filters.

To make this work, in the --filter flag parsing callback, rather than
error out when we detect that the filter_options struct is already
populated, we modify it in-place to contain the added sub-filter. The
existing sub-filter becomes the lhs of the combined filter, and the
next sub-filter becomes the rhs. We also have to URL-encode the LHS and
RHS sub-filters.

We can simplify the operation if the LHS is already a combine: filter.
In that case, we just append the URL-encoded RHS sub-filter to the LHS
spec to get the new spec.

Helped-by: Emily Shaffer <emilyshaffer@google.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 Documentation/rev-list-options.txt  | 16 ++++++
 list-objects-filter-options.c       | 88 +++++++++++++++++++++++++++--
 list-objects-filter-options.h       | 11 ++++
 t/t5616-partial-clone.sh            | 19 +++++++
 t/t6112-rev-list-filters-objects.sh | 46 +++++++++++++--
 transport.c                         |  1 +
 upload-pack.c                       |  2 +
 7 files changed, 173 insertions(+), 10 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index ddbc1de43f..7b4116f279 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -730,20 +730,36 @@ specification contained in <path>.
 +
 The form '--filter=tree:<depth>' omits all blobs and trees whose depth
 from the root tree is >= <depth> (minimum depth if an object is located
 at multiple depths in the commits traversed). <depth>=0 will not include
 any trees or blobs unless included explicitly in the command-line (or
 standard input when --stdin is used). <depth>=1 will include only the
 tree and blobs which are referenced directly by a commit reachable from
 <commit> or an explicitly-given object. <depth>=2 is like <depth>=1
 while also including trees and blobs one more level removed from an
 explicitly-given commit or tree.
++
+Multiple '--filter=' flags can be specified to combine filters. Only
+objects which are accepted by every filter are included.
++
+The form '--filter=combine:<filter1>+<filter2>+...<filterN>' can also be
+used to combined several filters, but this is harder than just repeating
+the '--filter' flag and is usually not necessary. Filters are joined by
+'{plus}' and individual filters are %-encoded (i.e. URL-encoded).
+Besides the '{plus}' and '%' characters, the following characters are
+reserved and also must be encoded: `~!@#$^&*()[]{}\;",<>?`+&#39;&#96;+
+as well as all characters with ASCII code &lt;= `0x20`, which includes
+space and newline.
++
+Other arbitrary characters can also be encoded. For instance,
+'combine:tree:3+blob:none' and 'combine:tree%3A3+blob%3Anone' are
+equivalent.
 
 --no-filter::
 	Turn off any previous `--filter=` argument.
 
 --filter-print-omitted::
 	Only useful with `--filter=`; prints a list of the objects omitted
 	by the filter.  Object IDs are prefixed with a ``~'' character.
 
 --missing=<missing-action>::
 	A debug option to help with future "partial clone" development.
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index c9dd41cd06..ce274b1f35 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -1,18 +1,19 @@
 #include "cache.h"
 #include "commit.h"
 #include "config.h"
 #include "revision.h"
 #include "argv-array.h"
 #include "list-objects.h"
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
+#include "trace.h"
 #include "url.h"
 
 static int parse_combine_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf);
 
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
@@ -168,29 +169,106 @@ static int parse_combine_filter(
 
 cleanup:
 	strbuf_list_free(subspecs);
 	if (result) {
 		list_objects_filter_release(filter_options);
 		memset(filter_options, 0, sizeof(*filter_options));
 	}
 	return result;
 }
 
-int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
-			      const char *arg)
+static int allow_unencoded(char ch)
+{
+	if (ch <= ' ' || ch == '%' || ch == '+')
+		return 0;
+	return !strchr(RESERVED_NON_WS, ch);
+}
+
+static void filter_spec_append_urlencode(
+	struct list_objects_filter_options *filter, const char *raw)
 {
 	struct strbuf buf = STRBUF_INIT;
+	strbuf_addstr_urlencode(&buf, raw, allow_unencoded);
+	trace_printf("Add to combine filter-spec: %s\n", buf.buf);
+	string_list_append(&filter->filter_spec, strbuf_detach(&buf, NULL));
+}
+
+/*
+ * Changes filter_options into an equivalent LOFC_COMBINE filter options
+ * instance. Does not do anything if filter_options is already LOFC_COMBINE.
+ */
+static void transform_to_combine_type(
+	struct list_objects_filter_options *filter_options)
+{
+	assert(filter_options->choice);
+	if (filter_options->choice == LOFC_COMBINE)
+		return;
+	{
+		const int initial_sub_alloc = 2;
+		struct list_objects_filter_options *sub_array =
+			xcalloc(initial_sub_alloc, sizeof(*sub_array));
+		sub_array[0] = *filter_options;
+		memset(filter_options, 0, sizeof(*filter_options));
+		filter_options->sub = sub_array;
+		filter_options->sub_alloc = initial_sub_alloc;
+	}
+	filter_options->sub_nr = 1;
+	filter_options->choice = LOFC_COMBINE;
+	string_list_append(&filter_options->filter_spec, xstrdup("combine:"));
+	filter_spec_append_urlencode(
+		filter_options,
+		list_objects_filter_spec(&filter_options->sub[0]));
+	/*
+	 * We don't need the filter_spec strings for subfilter specs, only the
+	 * top level.
+	 */
+	string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
+}
+
+void list_objects_filter_die_if_populated(
+	struct list_objects_filter_options *filter_options)
+{
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
-	string_list_append(&filter_options->filter_spec, xstrdup(arg));
-	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
-		die("%s", buf.buf);
+}
+
+int parse_list_objects_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg)
+{
+	struct strbuf errbuf = STRBUF_INIT;
+	int parse_error;
+
+	if (!filter_options->choice) {
+		string_list_append(&filter_options->filter_spec, xstrdup(arg));
+
+		parse_error = gently_parse_list_objects_filter(
+			filter_options, arg, &errbuf);
+	} else {
+		/*
+		 * Make filter_options an LOFC_COMBINE spec so we can trivially
+		 * add subspecs to it.
+		 */
+		transform_to_combine_type(filter_options);
+
+		string_list_append(&filter_options->filter_spec, xstrdup("+"));
+		filter_spec_append_urlencode(filter_options, arg);
+		ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1,
+			   filter_options->sub_alloc);
+		filter_options = &filter_options->sub[filter_options->sub_nr++];
+		memset(filter_options, 0, sizeof(*filter_options));
+
+		parse_error = gently_parse_list_objects_filter(
+			filter_options, arg, &errbuf);
+	}
+	if (parse_error)
+		die("%s", errbuf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg) {
 		list_objects_filter_set_no_filter(filter_options);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 1786c80eb4..fe2e4d5649 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -58,20 +58,31 @@ struct list_objects_filter_options {
 	struct list_objects_filter_options *sub;
 
 	/*
 	 * END choice-specific parsed values.
 	 */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
+void list_objects_filter_die_if_populated(
+	struct list_objects_filter_options *filter_options);
+
+/*
+ * Parses the filter spec string given by arg and either (1) simply places the
+ * result in filter_options if it is not yet populated or (2) combines it with
+ * the filter already in filter_options if it is already populated. In the case
+ * of (2), the filter specs are combined as if specified with 'combine:'.
+ *
+ * Dies and prints a user-facing message if an error occurs.
+ */
 int parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 9a8f9886b3..11536f4028 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -201,20 +201,39 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr
 	test_line_count = 70 fetched_objects &&
 
 	awk -f print_1.awk fetched_objects |
 	xargs -n1 git -C dst cat-file -t >fetched_types &&
 
 	sort -u fetched_types >unique_types.observed &&
 	test_write_lines blob commit tree >unique_types.expected &&
 	test_cmp unique_types.expected unique_types.observed
 '
 
+test_expect_success 'implicitly construct combine: filter with repeated flags' '
+	GIT_TRACE=$(pwd)/trace git clone --bare \
+		--filter=blob:none --filter=tree:1 \
+		"file://$(pwd)/srv.bare" pc2 &&
+	grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \
+		trace &&
+	git -C pc2 rev-list --objects --missing=allow-any HEAD >objects &&
+
+	# We should have gotten some root trees.
+	grep " $" objects &&
+	# Should not have gotten any non-root trees or blobs.
+	! grep " ." objects &&
+
+	xargs -n 1 git -C pc2 cat-file -t <objects >types &&
+	sort -u types >unique_types.actual &&
+	test_write_lines commit tree >unique_types.expected &&
+	test_cmp unique_types.expected unique_types.actual
+'
+
 test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
 	rm -rf src dst &&
 	git init src &&
 	test_commit -C src x &&
 	test_config -C src uploadpack.allowfilter 1 &&
 	test_config -C src uploadpack.allowanysha1inwant 1 &&
 
 	# Create a tag pointing to a blob.
 	BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) &&
 	git -C src tag myblob "$BLOB" &&
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index 4523c8f066..fd8aec4b4f 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -357,21 +357,30 @@ test_expect_success 'verify tree:3 includes everything expected' '
 
 test_expect_success 'combine:... for a simple combination' '
 	git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
 		>actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
 	# There are also 2 commit objects
-	test_line_count = 5 actual
+	test_line_count = 5 actual &&
+
+	cp actual expected &&
+
+	# Try again using repeated --filter - this is equivalent to a manual
+	# combine with "combine:...+..."
+	git -C r3 rev-list --objects --filter=combine:tree:2 \
+		--filter=blob:none HEAD >actual &&
+
+	test_cmp expected actual
 '
 
 test_expect_success 'combine:... with URL encoding' '
 	git -C r3 rev-list --objects \
 		--filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
@@ -423,24 +432,26 @@ test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
 	git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
 		HEAD >actual &&
 	test_line_count = 5 actual &&
 	git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
 	test_line_count = 2 actual &&
 	git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
 		HEAD >actual &&
 	test_line_count = 5 actual
 '
 
-test_expect_success 'add a sparse pattern blob whose path has reserved chars' '
+test_expect_success 'add sparse pattern blobs whose paths have reserved chars' '
 	cp r3/pattern r3/pattern1+renamed% &&
-	git -C r3 add pattern1+renamed% &&
-	git -C r3 commit -m "add sparse pattern file with reserved chars"
+	cp r3/pattern "r3/p;at%ter+n" &&
+	cp r3/pattern r3/^~pattern &&
+	git -C r3 add pattern1+renamed% "p;at%ter+n" ^~pattern &&
+	git -C r3 commit -m "add sparse pattern files with reserved chars"
 '
 
 test_expect_success 'combine:... with more than two sub-filters' '
 	git -C r3 rev-list --objects \
 		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
 		HEAD >actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD~2 "" &&
@@ -451,21 +462,46 @@ test_expect_success 'combine:... with more than two sub-filters' '
 	# Should also have 3 commits
 	test_line_count = 9 actual &&
 
 	# Try again, this time making sure the last sub-filter is only
 	# URL-decoded once.
 	cp actual expect &&
 
 	git -C r3 rev-list --objects \
 		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
 		HEAD >actual &&
-	test_cmp expect actual
+	test_cmp expect actual &&
+
+	# Use the same composite filter again, but with a pattern file name that
+	# requires encoding multiple characters, and use implicit filter
+	# combining.
+	test_when_finished "rm -f trace1" &&
+	GIT_TRACE=$(pwd)/trace1 git -C r3 rev-list --objects \
+		--filter=tree:3 --filter=blob:limit=40 \
+		--filter=sparse:oid="master:p;at%ter+n" \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Add to combine filter-spec: sparse:oid=master:p%3bat%25ter%2bn" \
+		trace1 &&
+
+	# Repeat the above test, but this time, the characters to encode are in
+	# the LHS of the combined filter.
+	test_when_finished "rm -f trace2" &&
+	GIT_TRACE=$(pwd)/trace2 git -C r3 rev-list --objects \
+		--filter=sparse:oid=master:^~pattern \
+		--filter=tree:3 --filter=blob:limit=40 \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Add to combine filter-spec: sparse:oid=master:%5e%7epattern" \
+		trace2
 '
 
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
 test_expect_success 'setup r4' '
 	git init r4 &&
 
 	echo foo > r4/foo &&
 	mkdir r4/subdir &&
diff --git a/transport.c b/transport.c
index f1fcd2c4b0..ee7dd1c062 100644
--- a/transport.c
+++ b/transport.c
@@ -217,20 +217,21 @@ static int set_git_option(struct git_transport_options *opts,
 	} else if (!strcmp(name, TRANS_OPT_DEEPEN_RELATIVE)) {
 		opts->deepen_relative = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_FROM_PROMISOR)) {
 		opts->from_promisor = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) {
 		opts->no_dependents = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) {
+		list_objects_filter_die_if_populated(&opts->filter_options);
 		parse_list_objects_filter(&opts->filter_options, value);
 		return 0;
 	}
 	return 1;
 }
 
 static int connect_setup(struct transport *transport, int for_push)
 {
 	struct git_transport_data *data = transport->data;
 	int flags = transport->verbose > 0 ? CONNECT_VERBOSE : 0;
diff --git a/upload-pack.c b/upload-pack.c
index a74d293fef..dda2ac6f44 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -876,20 +876,21 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
 		if (process_deepen(reader->line, &depth))
 			continue;
 		if (process_deepen_since(reader->line, &deepen_since, &deepen_rev_list))
 			continue;
 		if (process_deepen_not(reader->line, &deepen_not, &deepen_rev_list))
 			continue;
 
 		if (skip_prefix(reader->line, "filter ", &arg)) {
 			if (!filter_capability_requested)
 				die("git upload-pack: filtering capability not negotiated");
+			list_objects_filter_die_if_populated(&filter_options);
 			parse_list_objects_filter(&filter_options, arg);
 			continue;
 		}
 
 		if (!skip_prefix(reader->line, "want ", &arg) ||
 		    parse_oid_hex(arg, &oid_buf, &features))
 			die("git upload-pack: protocol error, "
 			    "expected to get object ID, not '%s'", reader->line);
 
 		if (parse_feature_request(features, "deepen-relative"))
@@ -1297,20 +1298,21 @@ static void process_args(struct packet_reader *request,
 			continue;
 		if (process_deepen_not(arg, &data->deepen_not,
 				       &data->deepen_rev_list))
 			continue;
 		if (!strcmp(arg, "deepen-relative")) {
 			data->deepen_relative = 1;
 			continue;
 		}
 
 		if (allow_filter && skip_prefix(arg, "filter ", &p)) {
+			list_objects_filter_die_if_populated(&filter_options);
 			parse_list_objects_filter(&filter_options, p);
 			continue;
 		}
 
 		if ((git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
 		     allow_sideband_all) &&
 		    !strcmp(arg, "sideband-all")) {
 			data->writer.use_sideband = 1;
 			continue;
 		}
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 09/10] list-objects-filter-options: clean up use of ALLOC_GROW
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (7 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 08/10] list-objects-filter-options: allow mult. --filter Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-13 21:51   ` [PATCH v3 10/10] list-objects-filter-options: make parser void Matthew DeVore
  2019-06-14 19:50   ` [PATCH v3 00/10] Filter combination Junio C Hamano
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Introduce a new macro ALLOC_GROW_BY which automatically zeros the added
array elements and takes care of updating the nr value. Use the macro in
code introduced earlier in this patchset.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 cache.h                       | 22 ++++++++++++++++++++++
 list-objects-filter-options.c | 17 +++++++----------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/cache.h b/cache.h
index b4bb2e2c11..48fb0f63c2 100644
--- a/cache.h
+++ b/cache.h
@@ -653,33 +653,55 @@ int init_db(const char *git_dir, const char *real_git_dir,
 void sanitize_stdfds(void);
 int daemonize(void);
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
 /*
  * Realloc the buffer pointed at by variable 'x' so that it can hold
  * at least 'nr' entries; the number of entries currently allocated
  * is 'alloc', using the standard growing factor alloc_nr() macro.
  *
+ * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some
+ * added niceties.
+ *
  * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
  */
 #define ALLOC_GROW(x, nr, alloc) \
 	do { \
 		if ((nr) > alloc) { \
 			if (alloc_nr(alloc) < (nr)) \
 				alloc = (nr); \
 			else \
 				alloc = alloc_nr(alloc); \
 			REALLOC_ARRAY(x, alloc); \
 		} \
 	} while (0)
 
+/*
+ * Similar to ALLOC_GROW but handles updating of the nr value and
+ * zeroing the bytes of the newly-grown array elements.
+ *
+ * DO NOT USE any expression with side-effect for any of the
+ * arguments.
+ */
+#define ALLOC_GROW_BY(x, nr, increase, alloc) \
+	do { \
+		if (increase) { \
+			size_t new_nr = nr + (increase); \
+			if (new_nr < nr) \
+				BUG("negative growth in ALLOC_GROW_BY"); \
+			ALLOC_GROW(x, new_nr, alloc); \
+			memset((x) + nr, 0, sizeof(*(x)) * (increase)); \
+			nr = new_nr; \
+		} \
+	} while (0)
+
 /* Initialize and use the cache information */
 struct lock_file;
 void preload_index(struct index_state *index,
 		   const struct pathspec *pathspec,
 		   unsigned int refresh_flags);
 int do_read_index(struct index_state *istate, const char *path,
 		  int must_exist); /* for testting only! */
 int read_index_from(struct index_state *, const char *path,
 		    const char *gitdir);
 int is_index_unborn(struct index_state *);
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index ce274b1f35..9f08390628 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -109,28 +109,26 @@ static int has_reserved_character(
 	}
 
 	return 0;
 }
 
 static int parse_combine_subfilter(
 	struct list_objects_filter_options *filter_options,
 	struct strbuf *subspec,
 	struct strbuf *errbuf)
 {
-	size_t new_index = filter_options->sub_nr++;
+	size_t new_index = filter_options->sub_nr;
 	char *decoded;
 	int result;
 
-	ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
-		   filter_options->sub_alloc);
-	memset(&filter_options->sub[new_index], 0,
-	       sizeof(*filter_options->sub));
+	ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+		      filter_options->sub_alloc);
 
 	decoded = url_percent_decode(subspec->buf);
 
 	result = has_reserved_character(subspec, errbuf) ||
 		gently_parse_list_objects_filter(
 			&filter_options->sub[new_index], decoded, errbuf);
 
 	free(decoded);
 	return result;
 }
@@ -245,27 +243,26 @@ int parse_list_objects_filter(
 			filter_options, arg, &errbuf);
 	} else {
 		/*
 		 * Make filter_options an LOFC_COMBINE spec so we can trivially
 		 * add subspecs to it.
 		 */
 		transform_to_combine_type(filter_options);
 
 		string_list_append(&filter_options->filter_spec, xstrdup("+"));
 		filter_spec_append_urlencode(filter_options, arg);
-		ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1,
-			   filter_options->sub_alloc);
-		filter_options = &filter_options->sub[filter_options->sub_nr++];
-		memset(filter_options, 0, sizeof(*filter_options));
+		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
-			filter_options, arg, &errbuf);
+			&filter_options->sub[filter_options->sub_nr - 1], arg,
+			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 10/10] list-objects-filter-options: make parser void
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (8 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 09/10] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
@ 2019-06-13 21:51   ` Matthew DeVore
  2019-06-14 19:50   ` [PATCH v3 00/10] Filter combination Junio C Hamano
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-13 21:51 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

This function always returns 0, so make it return void instead.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 12 +++++-------
 list-objects-filter-options.h |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 9f08390628..a4ebf21a5b 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -222,21 +222,21 @@ static void transform_to_combine_type(
 	string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
 }
 
 void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options)
 {
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
 }
 
-int parse_list_objects_filter(
+void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 	int parse_error;
 
 	if (!filter_options->choice) {
 		string_list_append(&filter_options->filter_spec, xstrdup(arg));
 
 		parse_error = gently_parse_list_objects_filter(
@@ -252,34 +252,32 @@ int parse_list_objects_filter(
 		filter_spec_append_urlencode(filter_options, arg);
 		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
 			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
 			&filter_options->sub[filter_options->sub_nr - 1], arg,
 			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
-	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
-	if (unset || !arg) {
+	if (unset || !arg)
 		list_objects_filter_set_no_filter(filter_options);
-		return 0;
-	}
-
-	return parse_list_objects_filter(filter_options, arg);
+	else
+		parse_list_objects_filter(filter_options, arg);
+	return 0;
 }
 
 const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
 {
 	if (!filter->filter_spec.nr)
 		BUG("no filter_spec available for this filter");
 	if (filter->filter_spec.nr != 1) {
 		struct strbuf concatted = STRBUF_INIT;
 		strbuf_add_separated_string_list(
 			&concatted, "", &filter->filter_spec);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index fe2e4d5649..0a48f541d2 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -69,21 +69,21 @@ void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options);
 
 /*
  * Parses the filter spec string given by arg and either (1) simply places the
  * result in filter_options if it is not yet populated or (2) combines it with
  * the filter already in filter_options if it is already populated. In the case
  * of (2), the filter specs are combined as if specified with 'combine:'.
  *
  * Dies and prints a user-facing message if an error occurs.
  */
-int parse_list_objects_filter(
+void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 00/10] Filter combination
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
                     ` (9 preceding siblings ...)
  2019-06-13 21:51   ` [PATCH v3 10/10] list-objects-filter-options: make parser void Matthew DeVore
@ 2019-06-14 19:50   ` Junio C Hamano
  10 siblings, 0 replies; 57+ messages in thread
From: Junio C Hamano @ 2019-06-14 19:50 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer, matvore

Matthew DeVore <matvore@google.com> writes:

> It has been a while since a sent a roll-up. Here are the changes since v2:
>
>  - Re-use more URL-encoding logic in strbuf.c
>    * This was partially achieved by changing the helper function to accept a
>      function that will indicate whether some character must be escaped.
>  - Re-use more URL-decoding logic in url.c
>  - changed the filter_spec strbuf to a string_list to avoid explicit
>    initialization
>  - Remove logic to "expand" tree:#k and tree:#m filter specs since there is no
>    server that supports tree:# but does not support tree:#k, as they were
>    implemented at the same time.

Since the v2 of this topic, cc/list-objects-filter-wo-sparse-path
was merged to the mainline before Git 2.22 was tagged.  As we won't
be merging this topic to any maintenance track anyway, it is
probably a good time to rebase it on v2.22.0, to avoid unnecessary
conflicts.

Thanks.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 00/10] Filter combination
  2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
                   ` (10 preceding siblings ...)
  2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
@ 2019-06-15  0:40 ` " Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 01/10] list-objects-filter: make API easier to use Matthew DeVore
                     ` (10 more replies)
  11 siblings, 11 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

I had to rebase this onto the latest master rev. master now has the patch which
disables the sparse:path filter, and v3 of this patch set has conflicts with it.
This version does not so it can be patched in and tried out by others.

I have re-run the test suite on each commit. Sorry for the spamminess.

Thanks,

Matthew DeVore (10):
  list-objects-filter: make API easier to use
  list-objects-filter: put omits set in filter struct
  list-objects-filter-options: always supply *errbuf
  list-objects-filter: implement composite filters
  list-objects-filter-options: move error check up
  list-objects-filter-options: make filter_spec a string_list
  strbuf: give URL-encoding API a char predicate fn
  list-objects-filter-options: allow mult. --filter
  list-objects-filter-options: clean up use of ALLOC_GROW
  list-objects-filter-options: make parser void

 Documentation/rev-list-options.txt  |  16 ++
 builtin/clone.c                     |   8 +-
 builtin/fetch.c                     |   9 +-
 builtin/rev-list.c                  |   6 +-
 cache.h                             |  22 ++
 credential-store.c                  |   9 +-
 fetch-pack.c                        |  20 +-
 http.c                              |   6 +-
 list-objects-filter-options.c       | 267 ++++++++++++++++++----
 list-objects-filter-options.h       |  57 ++++-
 list-objects-filter.c               | 332 +++++++++++++++++++++-------
 list-objects-filter.h               |  35 ++-
 list-objects.c                      |  55 ++---
 strbuf.c                            |  15 +-
 strbuf.h                            |   7 +-
 t/t5616-partial-clone.sh            |  19 ++
 t/t6112-rev-list-filters-objects.sh | 194 +++++++++++++++-
 transport-helper.c                  |  10 +-
 transport.c                         |   1 +
 upload-pack.c                       |  13 +-
 url.c                               |   6 +
 url.h                               |   8 +
 22 files changed, 874 insertions(+), 241 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 01/10] list-objects-filter: make API easier to use
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-21 22:58     ` Jonathan Tan
  2019-06-15  0:40   ` [PATCH v4 02/10] list-objects-filter: put omits set in filter struct Matthew DeVore
                     ` (9 subsequent siblings)
  10 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Make the list-objects-filter.h API more opaque and easier to use. This
prepares for combined filter support, where filters will be created and
used in a new context.

Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter.c | 111 ++++++++++++++++++++++++++++--------------
 list-objects-filter.h |  35 ++++++-------
 list-objects.c        |  55 +++++++++------------
 3 files changed, 112 insertions(+), 89 deletions(-)

diff --git a/list-objects-filter.c b/list-objects-filter.c
index 53f90442c5..a8c9d8dfe0 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -19,20 +19,34 @@
  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  * that have been shown, but should be revisited if they appear
  * in the traversal (until we mark it SEEN).  This is a way to
  * let us silently de-dup calls to show() in the caller.  This
  * is subtly different from the "revision.h:SHOWN" and the
  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  * the non-de-dup usage in pack-bitmap.c
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
+struct filter {
+	enum list_objects_filter_result (*filter_object_fn)(
+		struct repository *r,
+		enum list_objects_filter_situation filter_situation,
+		struct object *obj,
+		const char *pathname,
+		const char *filename,
+		void *filter_data);
+
+	void (*free_fn)(void *filter_data);
+
+	void *filter_data;
+};
+
 /*
  * A filter for list-objects to omit ALL blobs from the traversal.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_none_data {
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
@@ -60,32 +74,31 @@ static enum list_objects_filter_result filter_blobs_none(
 	case LOFS_BLOB:
 		assert(obj->type == OBJ_BLOB);
 		assert((obj->flags & SEEN) == 0);
 
 		if (filter_data->omits)
 			oidset_insert(filter_data->omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 }
 
-static void *filter_blobs_none__init(
+static void filter_blobs_none__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 
-	*filter_fn = filter_blobs_none;
-	*filter_free_fn = free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_blobs_none;
+	filter->free_fn = free;
 }
 
 /*
  * A filter for list-objects to omit ALL trees and blobs from the traversal.
  * Can OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_trees_depth_data {
 	struct oidset *omits;
 
 	/*
@@ -194,35 +207,34 @@ static enum list_objects_filter_result filter_trees_depth(
 }
 
 static void filter_trees_free(void *filter_data) {
 	struct filter_trees_depth_data *d = filter_data;
 	if (!d)
 		return;
 	oidmap_free(&d->seen_at_depth, 1);
 	free(d);
 }
 
-static void *filter_trees_depth__init(
+static void filter_trees_depth__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	oidmap_init(&d->seen_at_depth, 0);
 	d->exclude_depth = filter_options->tree_exclude_depth;
 	d->current_depth = 0;
 
-	*filter_fn = filter_trees_depth;
-	*filter_free_fn = filter_trees_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_trees_depth;
+	filter->free_fn = filter_trees_free;
 }
 
 /*
  * A filter for list-objects to omit large blobs.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_limit_data {
 	struct oidset *omits;
 	unsigned long max_bytes;
 };
@@ -274,33 +286,32 @@ static enum list_objects_filter_result filter_blobs_limit(
 			oidset_insert(filter_data->omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 
 include_it:
 	if (filter_data->omits)
 		oidset_remove(filter_data->omits, &obj->oid);
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
-static void *filter_blobs_limit__init(
+static void filter_blobs_limit__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	d->max_bytes = filter_options->blob_limit_value;
 
-	*filter_fn = filter_blobs_limit;
-	*filter_free_fn = free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_blobs_limit;
+	filter->free_fn = free;
 }
 
 /*
  * A filter driven by a sparse-checkout specification to only
  * include blobs that a sparse checkout would populate.
  *
  * The sparse-checkout spec can be loaded from a blob with the
  * given OID or from a local pathname.  We allow an OID because
  * the repo may be bare or we may be doing the filtering on the
  * server.
@@ -450,70 +461,96 @@ static enum list_objects_filter_result filter_sparse(
 }
 
 
 static void filter_sparse_free(void *filter_data)
 {
 	struct filter_sparse_data *d = filter_data;
 	/* TODO free contents of 'd' */
 	free(d);
 }
 
-static void *filter_sparse_oid__init(
+static void filter_sparse_oid__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 	d->omits = omitted;
 	if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 					   NULL, 0, &d->el) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
-	*filter_fn = filter_sparse;
-	*filter_free_fn = filter_sparse_free;
-	return d;
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_sparse;
+	filter->free_fn = filter_sparse_free;
 }
 
-typedef void *(*filter_init_fn)(
+typedef void (*filter_init_fn)(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn);
+	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
 	filter_trees_depth__init,
 	filter_sparse_oid__init,
 };
 
-void *list_objects_filter__init(
+struct filter *list_objects_filter__init(
 	struct oidset *omitted,
-	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn)
+	struct list_objects_filter_options *filter_options)
 {
+	struct filter *filter;
 	filter_init_fn init_fn;
 
 	assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 
 	if (filter_options->choice >= LOFC__COUNT)
 		BUG("invalid list-objects filter choice: %d",
 		    filter_options->choice);
 
 	init_fn = s_filters[filter_options->choice];
-	if (init_fn)
-		return init_fn(omitted, filter_options,
-			       filter_fn, filter_free_fn);
-	*filter_fn = NULL;
-	*filter_free_fn = NULL;
-	return NULL;
+	if (!init_fn)
+		return NULL;
+
+	filter = xcalloc(1, sizeof(*filter));
+	init_fn(omitted, filter_options, filter);
+	return filter;
+}
+
+enum list_objects_filter_result list_objects_filter__filter_object(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct filter *filter)
+{
+	if (filter && (obj->flags & NOT_USER_GIVEN))
+		return filter->filter_object_fn(r, filter_situation, obj,
+						pathname, filename,
+						filter->filter_data);
+	/*
+	 * No filter is active or user gave object explicitly. Choose default
+	 * behavior based on filter situation.
+	 */
+	if (filter_situation == LOFS_END_TREE)
+		return 0;
+	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+}
+
+void list_objects_filter__free(struct filter *filter)
+{
+	if (!filter)
+		return;
+	filter->free_fn(filter->filter_data);
+	free(filter);
 }
diff --git a/list-objects-filter.h b/list-objects-filter.h
index 1d45a4ad57..6908954266 100644
--- a/list-objects-filter.h
+++ b/list-objects-filter.h
@@ -53,37 +53,34 @@ enum list_objects_filter_result {
 	LOFR_DO_SHOW   = 1<<1,
 	LOFR_SKIP_TREE = 1<<2,
 };
 
 enum list_objects_filter_situation {
 	LOFS_BEGIN_TREE,
 	LOFS_END_TREE,
 	LOFS_BLOB
 };
 
-typedef enum list_objects_filter_result (*filter_object_fn)(
+struct filter;
+
+/* Constructor for the set of defined list-objects filters. */
+struct filter *list_objects_filter__init(
+	struct oidset *omitted,
+	struct list_objects_filter_options *filter_options);
+
+/*
+ * Lets `filter` decide how to handle the `obj`. If `filter` is NULL, this
+ * function behaves as expected if no filter is configured: all objects are
+ * included.
+ */
+enum list_objects_filter_result list_objects_filter__filter_object(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
-	void *filter_data);
-
-typedef void (*filter_free_fn)(void *filter_data);
+	struct filter *filter);
 
-/*
- * Constructor for the set of defined list-objects filters.
- * Returns a generic "void *filter_data".
- *
- * The returned "filter_fn" will be used by traverse_commit_list()
- * to filter the results.
- *
- * The returned "filter_free_fn" is a destructor for the
- * filter_data.
- */
-void *list_objects_filter__init(
-	struct oidset *omitted,
-	struct list_objects_filter_options *filter_options,
-	filter_object_fn *filter_fn,
-	filter_free_fn *filter_free_fn);
+/* Destroys `filter`. Does nothing if `filter` is null. */
+void list_objects_filter__free(struct filter *filter);
 
 #endif /* LIST_OBJECTS_FILTER_H */
diff --git a/list-objects.c b/list-objects.c
index b5651ddd5b..9307d91fb3 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -11,32 +11,31 @@
 #include "list-objects-filter-options.h"
 #include "packfile.h"
 #include "object-store.h"
 #include "trace.h"
 
 struct traversal_context {
 	struct rev_info *revs;
 	show_object_fn show_object;
 	show_commit_fn show_commit;
 	void *show_data;
-	filter_object_fn filter_fn;
-	void *filter_data;
+	struct filter *filter;
 };
 
 static void process_blob(struct traversal_context *ctx,
 			 struct blob *blob,
 			 struct strbuf *path,
 			 const char *name)
 {
 	struct object *obj = &blob->object;
 	size_t pathlen;
-	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+	enum list_objects_filter_result r;
 
 	if (!ctx->revs->blob_objects)
 		return;
 	if (!obj)
 		die("bad blob object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
 
 	/*
 	 * Pre-filter known-missing objects when explicitly requested.
@@ -47,25 +46,24 @@ static void process_blob(struct traversal_context *ctx,
 	 * may cause the actual filter to report an incomplete list
 	 * of missing objects.
 	 */
 	if (ctx->revs->exclude_promisor_objects &&
 	    !has_object_file(&obj->oid) &&
 	    is_promisor_object(&obj->oid))
 		return;
 
 	pathlen = path->len;
 	strbuf_addstr(path, name);
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_BLOB, obj,
-				   path->buf, &path->buf[pathlen],
-				   ctx->filter_data);
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_BLOB, obj,
+					       path->buf, &path->buf[pathlen],
+					       ctx->filter);
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
 		ctx->show_object(obj, path->buf, ctx->show_data);
 	strbuf_setlen(path, pathlen);
 }
 
 /*
  * Processing a gitlink entry currently does nothing, since
  * we do not recurse into the subproject.
@@ -150,21 +148,21 @@ static void process_tree_contents(struct traversal_context *ctx,
 }
 
 static void process_tree(struct traversal_context *ctx,
 			 struct tree *tree,
 			 struct strbuf *base,
 			 const char *name)
 {
 	struct object *obj = &tree->object;
 	struct rev_info *revs = ctx->revs;
 	int baselen = base->len;
-	enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+	enum list_objects_filter_result r;
 	int failed_parse;
 
 	if (!revs->tree_objects)
 		return;
 	if (!obj)
 		die("bad tree object");
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return;
 
 	failed_parse = parse_tree_gently(tree, 1);
@@ -179,47 +177,44 @@ static void process_tree(struct traversal_context *ctx,
 		 */
 		if (revs->exclude_promisor_objects &&
 		    is_promisor_object(&obj->oid))
 			return;
 
 		if (!revs->do_not_die_on_missing_tree)
 			die("bad tree object %s", oid_to_hex(&obj->oid));
 	}
 
 	strbuf_addstr(base, name);
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_BEGIN_TREE, obj,
-				   base->buf, &base->buf[baselen],
-				   ctx->filter_data);
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_BEGIN_TREE, obj,
+					       base->buf, &base->buf[baselen],
+					       ctx->filter);
 	if (r & LOFR_MARK_SEEN)
 		obj->flags |= SEEN;
 	if (r & LOFR_DO_SHOW)
 		ctx->show_object(obj, base->buf, ctx->show_data);
 	if (base->len)
 		strbuf_addch(base, '/');
 
 	if (r & LOFR_SKIP_TREE)
 		trace_printf("Skipping contents of tree %s...\n", base->buf);
 	else if (!failed_parse)
 		process_tree_contents(ctx, tree, base);
 
-	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
-		r = ctx->filter_fn(ctx->revs->repo,
-				   LOFS_END_TREE, obj,
-				   base->buf, &base->buf[baselen],
-				   ctx->filter_data);
-		if (r & LOFR_MARK_SEEN)
-			obj->flags |= SEEN;
-		if (r & LOFR_DO_SHOW)
-			ctx->show_object(obj, base->buf, ctx->show_data);
-	}
+	r = list_objects_filter__filter_object(ctx->revs->repo,
+					       LOFS_END_TREE, obj,
+					       base->buf, &base->buf[baselen],
+					       ctx->filter);
+	if (r & LOFR_MARK_SEEN)
+		obj->flags |= SEEN;
+	if (r & LOFR_DO_SHOW)
+		ctx->show_object(obj, base->buf, ctx->show_data);
 
 	strbuf_setlen(base, baselen);
 	free_tree_buffer(tree);
 }
 
 static void mark_edge_parents_uninteresting(struct commit *commit,
 					    struct rev_info *revs,
 					    show_edge_fn show_edge)
 {
 	struct commit_list *parents;
@@ -395,38 +390,32 @@ static void do_traverse(struct traversal_context *ctx)
 void traverse_commit_list(struct rev_info *revs,
 			  show_commit_fn show_commit,
 			  show_object_fn show_object,
 			  void *show_data)
 {
 	struct traversal_context ctx;
 	ctx.revs = revs;
 	ctx.show_commit = show_commit;
 	ctx.show_object = show_object;
 	ctx.show_data = show_data;
-	ctx.filter_fn = NULL;
-	ctx.filter_data = NULL;
+	ctx.filter = NULL;
 	do_traverse(&ctx);
 }
 
 void traverse_commit_list_filtered(
 	struct list_objects_filter_options *filter_options,
 	struct rev_info *revs,
 	show_commit_fn show_commit,
 	show_object_fn show_object,
 	void *show_data,
 	struct oidset *omitted)
 {
 	struct traversal_context ctx;
-	filter_free_fn filter_free_fn = NULL;
 
 	ctx.revs = revs;
 	ctx.show_object = show_object;
 	ctx.show_commit = show_commit;
 	ctx.show_data = show_data;
-	ctx.filter_fn = NULL;
-
-	ctx.filter_data = list_objects_filter__init(omitted, filter_options,
-						    &ctx.filter_fn, &filter_free_fn);
+	ctx.filter = list_objects_filter__init(omitted, filter_options);
 	do_traverse(&ctx);
-	if (ctx.filter_data && filter_free_fn)
-		filter_free_fn(ctx.filter_data);
+	list_objects_filter__free(ctx.filter);
 }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 02/10] list-objects-filter: put omits set in filter struct
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 01/10] list-objects-filter: make API easier to use Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 03/10] list-objects-filter-options: always supply *errbuf Matthew DeVore
                     ` (8 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

The oidset *omits pointer must be accessed by the combine filter in a
type-agnostic way once the graph traversal is over. Store that pointer
in the general `filter` struct. This will be used in a follow-up patch
to implement the combine filter.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter.c | 68 +++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 42 deletions(-)

diff --git a/list-objects-filter.c b/list-objects-filter.c
index a8c9d8dfe0..b259039bd0 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -26,88 +26,76 @@
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
 struct filter {
 	enum list_objects_filter_result (*filter_object_fn)(
 		struct repository *r,
 		enum list_objects_filter_situation filter_situation,
 		struct object *obj,
 		const char *pathname,
 		const char *filename,
+		struct oidset *omits,
 		void *filter_data);
 
 	void (*free_fn)(void *filter_data);
 
 	void *filter_data;
-};
 
-/*
- * A filter for list-objects to omit ALL blobs from the traversal.
- * And to OPTIONALLY collect a list of the omitted OIDs.
- */
-struct filter_blobs_none_data {
+	/* If non-NULL, the filter collects a list of the omitted OIDs here. */
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
-	struct filter_blobs_none_data *filter_data = filter_data_;
-
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
 	case LOFS_BEGIN_TREE:
 		assert(obj->type == OBJ_TREE);
 		/* always include all tree objects */
 		return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 
 	case LOFS_END_TREE:
 		assert(obj->type == OBJ_TREE);
 		return LOFR_ZERO;
 
 	case LOFS_BLOB:
 		assert(obj->type == OBJ_BLOB);
 		assert((obj->flags & SEEN) == 0);
 
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 }
 
 static void filter_blobs_none__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
-	struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
-
-	filter->filter_data = d;
 	filter->filter_object_fn = filter_blobs_none;
 	filter->free_fn = free;
 }
 
 /*
  * A filter for list-objects to omit ALL trees and blobs from the traversal.
  * Can OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_trees_depth_data {
-	struct oidset *omits;
-
 	/*
 	 * Maps trees to the minimum depth at which they were seen. It is not
 	 * necessary to re-traverse a tree at deeper or equal depths than it has
 	 * already been traversed.
 	 *
 	 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
 	 * it from being traversed at shallower depths.
 	 */
 	struct oidmap seen_at_depth;
 
@@ -116,38 +104,39 @@ struct filter_trees_depth_data {
 };
 
 struct seen_map_entry {
 	struct oidmap_entry base;
 	size_t depth;
 };
 
 /* Returns 1 if the oid was in the omits set before it was invoked. */
 static int filter_trees_update_omits(
 	struct object *obj,
-	struct filter_trees_depth_data *filter_data,
+	struct oidset *omits,
 	int include_it)
 {
-	if (!filter_data->omits)
+	if (!omits)
 		return 0;
 
 	if (include_it)
-		return oidset_remove(filter_data->omits, &obj->oid);
+		return oidset_remove(omits, &obj->oid);
 	else
-		return oidset_insert(filter_data->omits, &obj->oid);
+		return oidset_insert(omits, &obj->oid);
 }
 
 static enum list_objects_filter_result filter_trees_depth(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_trees_depth_data *filter_data = filter_data_;
 	struct seen_map_entry *seen_info;
 	int include_it = filter_data->current_depth <
 		filter_data->exclude_depth;
 	int filter_res;
 	int already_seen;
 
 	/*
@@ -158,47 +147,47 @@ static enum list_objects_filter_result filter_trees_depth(
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
 	case LOFS_END_TREE:
 		assert(obj->type == OBJ_TREE);
 		filter_data->current_depth--;
 		return LOFR_ZERO;
 
 	case LOFS_BLOB:
-		filter_trees_update_omits(obj, filter_data, include_it);
+		filter_trees_update_omits(obj, omits, include_it);
 		return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
 
 	case LOFS_BEGIN_TREE:
 		seen_info = oidmap_get(
 			&filter_data->seen_at_depth, &obj->oid);
 		if (!seen_info) {
 			seen_info = xcalloc(1, sizeof(*seen_info));
 			oidcpy(&seen_info->base.oid, &obj->oid);
 			seen_info->depth = filter_data->current_depth;
 			oidmap_put(&filter_data->seen_at_depth, seen_info);
 			already_seen = 0;
 		} else {
 			already_seen =
 				filter_data->current_depth >= seen_info->depth;
 		}
 
 		if (already_seen) {
 			filter_res = LOFR_SKIP_TREE;
 		} else {
 			int been_omitted = filter_trees_update_omits(
-				obj, filter_data, include_it);
+				obj, omits, include_it);
 			seen_info->depth = filter_data->current_depth;
 
 			if (include_it)
 				filter_res = LOFR_DO_SHOW;
-			else if (filter_data->omits && !been_omitted)
+			else if (omits && !been_omitted)
 				/*
 				 * Must update omit information of children
 				 * recursively; they have not been omitted yet.
 				 */
 				filter_res = LOFR_ZERO;
 			else
 				filter_res = LOFR_SKIP_TREE;
 		}
 
 		filter_data->current_depth++;
@@ -208,50 +197,48 @@ static enum list_objects_filter_result filter_trees_depth(
 
 static void filter_trees_free(void *filter_data) {
 	struct filter_trees_depth_data *d = filter_data;
 	if (!d)
 		return;
 	oidmap_free(&d->seen_at_depth, 1);
 	free(d);
 }
 
 static void filter_trees_depth__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	oidmap_init(&d->seen_at_depth, 0);
 	d->exclude_depth = filter_options->tree_exclude_depth;
 	d->current_depth = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_trees_depth;
 	filter->free_fn = filter_trees_free;
 }
 
 /*
  * A filter for list-objects to omit large blobs.
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_limit_data {
-	struct oidset *omits;
 	unsigned long max_bytes;
 };
 
 static enum list_objects_filter_result filter_blobs_limit(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_blobs_limit_data *filter_data = filter_data_;
 	unsigned long object_length;
 	enum object_type t;
 
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
@@ -275,38 +262,36 @@ static enum list_objects_filter_result filter_blobs_limit(
 			 * apply the size filter criteria.  Be conservative
 			 * and force show it (and let the caller deal with
 			 * the ambiguity).
 			 */
 			goto include_it;
 		}
 
 		if (object_length < filter_data->max_bytes)
 			goto include_it;
 
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 		return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 	}
 
 include_it:
-	if (filter_data->omits)
-		oidset_remove(filter_data->omits, &obj->oid);
+	if (omits)
+		oidset_remove(omits, &obj->oid);
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
 static void filter_blobs_limit__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	d->max_bytes = filter_options->blob_limit_value;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_blobs_limit;
 	filter->free_fn = free;
 }
 
 /*
  * A filter driven by a sparse-checkout specification to only
  * include blobs that a sparse checkout would populate.
@@ -330,33 +315,33 @@ struct frame {
 	 * omitted objects.
 	 *
 	 * 0 if everything (recursively) contained in this directory
 	 * has been explicitly included (SHOWN) in the result and
 	 * the directory may be short-cut later in the traversal.
 	 */
 	unsigned child_prov_omit : 1;
 };
 
 struct filter_sparse_data {
-	struct oidset *omits;
 	struct exclude_list el;
 
 	size_t nr, alloc;
 	struct frame *array_frame;
 };
 
 static enum list_objects_filter_result filter_sparse(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
+	struct oidset *omits,
 	void *filter_data_)
 {
 	struct filter_sparse_data *filter_data = filter_data_;
 	int val, dtype;
 	struct frame *frame;
 
 	switch (filter_situation) {
 	default:
 		BUG("unknown filter_situation: %d", filter_situation);
 
@@ -425,78 +410,75 @@ static enum list_objects_filter_result filter_sparse(
 
 		frame = &filter_data->array_frame[filter_data->nr];
 
 		dtype = DT_REG;
 		val = is_excluded_from_list(pathname, strlen(pathname),
 					    filename, &dtype, &filter_data->el,
 					    r->index);
 		if (val < 0)
 			val = frame->defval;
 		if (val > 0) {
-			if (filter_data->omits)
-				oidset_remove(filter_data->omits, &obj->oid);
+			if (omits)
+				oidset_remove(omits, &obj->oid);
 			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 		}
 
 		/*
 		 * Provisionally omit it.  We've already established that
 		 * this pathname is not in the sparse-checkout specification
 		 * with the CURRENT pathname, so we *WANT* to omit this blob.
 		 *
 		 * However, a pathname elsewhere in the tree may also
 		 * reference this same blob, so we cannot reject it yet.
 		 * Leave the LOFR_ bits unset so that if the blob appears
 		 * again in the traversal, we will be asked again.
 		 */
-		if (filter_data->omits)
-			oidset_insert(filter_data->omits, &obj->oid);
+		if (omits)
+			oidset_insert(omits, &obj->oid);
 
 		/*
 		 * Remember that at least 1 blob in this tree was
 		 * provisionally omitted.  This prevents us from short
 		 * cutting the tree in future iterations.
 		 */
 		frame->child_prov_omit = 1;
 		return LOFR_ZERO;
 	}
 }
 
 
 static void filter_sparse_free(void *filter_data)
 {
 	struct filter_sparse_data *d = filter_data;
 	/* TODO free contents of 'd' */
 	free(d);
 }
 
 static void filter_sparse_oid__init(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter)
 {
 	struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
-	d->omits = omitted;
 	if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 					   NULL, 0, &d->el) < 0)
 		die("could not load filter specification");
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
 typedef void (*filter_init_fn)(
-	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
@@ -515,35 +497,37 @@ struct filter *list_objects_filter__init(
 
 	if (filter_options->choice >= LOFC__COUNT)
 		BUG("invalid list-objects filter choice: %d",
 		    filter_options->choice);
 
 	init_fn = s_filters[filter_options->choice];
 	if (!init_fn)
 		return NULL;
 
 	filter = xcalloc(1, sizeof(*filter));
-	init_fn(omitted, filter_options, filter);
+	filter->omits = omitted;
+	init_fn(filter_options, filter);
 	return filter;
 }
 
 enum list_objects_filter_result list_objects_filter__filter_object(
 	struct repository *r,
 	enum list_objects_filter_situation filter_situation,
 	struct object *obj,
 	const char *pathname,
 	const char *filename,
 	struct filter *filter)
 {
 	if (filter && (obj->flags & NOT_USER_GIVEN))
 		return filter->filter_object_fn(r, filter_situation, obj,
 						pathname, filename,
+						filter->omits,
 						filter->filter_data);
 	/*
 	 * No filter is active or user gave object explicitly. Choose default
 	 * behavior based on filter situation.
 	 */
 	if (filter_situation == LOFS_END_TREE)
 		return 0;
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 03/10] list-objects-filter-options: always supply *errbuf
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 01/10] list-objects-filter: make API easier to use Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 02/10] list-objects-filter: put omits set in filter struct Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 04/10] list-objects-filter: implement composite filters Matthew DeVore
                     ` (7 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Making errbuf an optional argument complicates error reporting. Fix this
by making all callers supply an errbuf, even if they may ignore it. This
will be important in follow-up patches where the filter-spec parsing has
more pitfalls and possible errors.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index a15d0f7829..8e7b4f96fa 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -23,47 +23,40 @@
  * convenience of the current command.
  */
 static int gently_parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
 	if (filter_options->choice) {
-		if (errbuf) {
-			strbuf_addstr(
-				errbuf,
-				_("multiple filter-specs cannot be combined"));
-		}
+		strbuf_addstr(
+			errbuf, _("multiple filter-specs cannot be combined"));
 		return 1;
 	}
 
 	filter_options->filter_spec = strdup(arg);
 
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
 
 	} else if (skip_prefix(arg, "tree:", &v0)) {
 		if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
-			if (errbuf) {
-				strbuf_addstr(
-					errbuf,
-					_("expected 'tree:<depth>'"));
-			}
+			strbuf_addstr(errbuf, _("expected 'tree:<depth>'"));
 			return 1;
 		}
 		filter_options->choice = LOFC_TREE_DEPTH;
 		return 0;
 
 	} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
 		struct object_context oc;
 		struct object_id sparse_oid;
 
 		/*
@@ -83,22 +76,21 @@ static int gently_parse_list_objects_filter(
 				errbuf,
 				_("sparse:path filters support has been dropped"));
 		}
 		return 1;
 	}
 	/*
 	 * Please update _git_fetch() in git-completion.bash when you
 	 * add new filters
 	 */
 
-	if (errbuf)
-		strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
+	strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
 
 	memset(filter_options, 0, sizeof(*filter_options));
 	return 1;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
@@ -168,19 +160,22 @@ void partial_clone_register(
 	 */
 	core_partial_clone_filter_default =
 		xstrdup(filter_options->filter_spec);
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
+	struct strbuf errbuf = STRBUF_INIT;
+
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
-					 NULL);
+					 &errbuf);
+	strbuf_release(&errbuf);
 }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 04/10] list-objects-filter: implement composite filters
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (2 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 03/10] list-objects-filter-options: always supply *errbuf Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-18  8:42     ` Johannes Schindelin
  2019-06-22  0:26     ` Jonathan Tan
  2019-06-15  0:40   ` [PATCH v4 05/10] list-objects-filter-options: move error check up Matthew DeVore
                     ` (6 subsequent siblings)
  10 siblings, 2 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Allow combining filters such that only objects accepted by all filters
are shown. The motivation for this is to allow getting directory
listings without also fetching blobs. This can be done by combining
blob:none with tree:<depth>. There are massive repositories that have
larger-than-expected trees - even if you include only a single commit.

The current usage requires passing the filter to rev-list in the
following form:

	--filter=<FILTER1> --filter=<FILTER2> ...

Such usage is currently an error, so giving it a meaning is backwards-
compatible.

The URL-encoding scheme is being introduced before the repeated flag
logic, and the user-facing documentation for URL-encoding is being
withheld until the repeated flag feature is implemented. The
URL-encoding is in general not meant to be used directly by the user,
and it is better to describe the URL-encoding feature in terms of the
repeated flag.

Helped-by: Emily Shaffer <emilyshaffer@google.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c       | 106 ++++++++++++++++++-
 list-objects-filter-options.h       |  17 ++-
 list-objects-filter.c               | 159 ++++++++++++++++++++++++++++
 t/t6112-rev-list-filters-objects.sh | 151 +++++++++++++++++++++++++-
 url.c                               |   6 ++
 url.h                               |   8 ++
 6 files changed, 441 insertions(+), 6 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 8e7b4f96fa..1c402c6059 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -1,18 +1,24 @@
 #include "cache.h"
 #include "commit.h"
 #include "config.h"
 #include "revision.h"
 #include "argv-array.h"
 #include "list-objects.h"
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
+#include "url.h"
+
+static int parse_combine_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	struct strbuf *errbuf);
 
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
  *       --filter=<arg>
  * and in the pack protocol as:
  *       "filter" SP <arg>
  *
  * The filter keyword will be used by many commands.
  * See Documentation/rev-list-options.txt for allowed values for <arg>.
@@ -28,22 +34,20 @@ static int gently_parse_list_objects_filter(
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
 	if (filter_options->choice) {
 		strbuf_addstr(
 			errbuf, _("multiple filter-specs cannot be combined"));
 		return 1;
 	}
 
-	filter_options->filter_spec = strdup(arg);
-
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
 
@@ -70,36 +74,125 @@ static int gently_parse_list_objects_filter(
 		filter_options->choice = LOFC_SPARSE_OID;
 		return 0;
 
 	} else if (skip_prefix(arg, "sparse:path=", &v0)) {
 		if (errbuf) {
 			strbuf_addstr(
 				errbuf,
 				_("sparse:path filters support has been dropped"));
 		}
 		return 1;
+
+	} else if (skip_prefix(arg, "combine:", &v0)) {
+		return parse_combine_filter(filter_options, v0, errbuf);
+
 	}
 	/*
 	 * Please update _git_fetch() in git-completion.bash when you
 	 * add new filters
 	 */
 
 	strbuf_addf(errbuf, "invalid filter-spec '%s'", arg);
 
 	memset(filter_options, 0, sizeof(*filter_options));
 	return 1;
 }
 
+static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
+
+static int has_reserved_character(
+	struct strbuf *sub_spec, struct strbuf *errbuf)
+{
+	const char *c = sub_spec->buf;
+	while (*c) {
+		if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
+			strbuf_addf(errbuf,
+				    "must escape char in sub-filter-spec: '%c'",
+				    *c);
+			return 1;
+		}
+		c++;
+	}
+
+	return 0;
+}
+
+static int parse_combine_subfilter(
+	struct list_objects_filter_options *filter_options,
+	struct strbuf *subspec,
+	struct strbuf *errbuf)
+{
+	size_t new_index = filter_options->sub_nr++;
+	char *decoded;
+	int result;
+
+	ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
+		   filter_options->sub_alloc);
+	memset(&filter_options->sub[new_index], 0,
+	       sizeof(*filter_options->sub));
+
+	decoded = url_percent_decode(subspec->buf);
+
+	result = has_reserved_character(subspec, errbuf) ||
+		gently_parse_list_objects_filter(
+			&filter_options->sub[new_index], decoded, errbuf);
+
+	free(decoded);
+	return result;
+}
+
+static int parse_combine_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	struct strbuf *errbuf)
+{
+	struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
+	size_t sub;
+	int result = 0;
+
+	if (!subspecs[0]) {
+		strbuf_addf(errbuf,
+			    _("expected something after combine:"));
+		result = 1;
+		goto cleanup;
+	}
+
+	for (sub = 0; subspecs[sub] && !result; sub++) {
+		if (subspecs[sub + 1]) {
+			/*
+			 * This is not the last subspec. Remove trailing "+" so
+			 * we can parse it.
+			 */
+			size_t last = subspecs[sub]->len - 1;
+			assert(subspecs[sub]->buf[last] == '+');
+			strbuf_remove(subspecs[sub], last, 1);
+		}
+		result = parse_combine_subfilter(
+			filter_options, subspecs[sub], errbuf);
+	}
+
+	filter_options->choice = LOFC_COMBINE;
+
+cleanup:
+	strbuf_list_free(subspecs);
+	if (result) {
+		list_objects_filter_release(filter_options);
+		memset(filter_options, 0, sizeof(*filter_options));
+	}
+	return result;
+}
+
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
+	filter_options->filter_spec = strdup(arg);
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
@@ -122,22 +215,29 @@ void expand_list_objects_filter_spec(
 	else if (filter->choice == LOFC_TREE_DEPTH)
 		strbuf_addf(expanded_spec, "tree:%lu",
 			    filter->tree_exclude_depth);
 	else
 		strbuf_addstr(expanded_spec, filter->filter_spec);
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
+	size_t sub;
+
+	if (!filter_options)
+		return;
 	free(filter_options->filter_spec);
 	free(filter_options->sparse_oid_value);
+	for (sub = 0; sub < filter_options->sub_nr; sub++)
+		list_objects_filter_release(&filter_options->sub[sub]);
+	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
 	const struct list_objects_filter_options *filter_options)
 {
 	/*
 	 * Record the name of the partial clone remote in the
 	 * config and in the global variable -- the latter is
@@ -167,15 +267,17 @@ void partial_clone_register(
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
+
+	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index c54f0000fb..789faef1e5 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -6,20 +6,21 @@
 
 /*
  * The list of defined filters for list-objects.
  */
 enum list_objects_filter_choice {
 	LOFC_DISABLED = 0,
 	LOFC_BLOB_NONE,
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
+	LOFC_COMBINE,
 	LOFC__COUNT /* must be last */
 };
 
 struct list_objects_filter_options {
 	/*
 	 * 'filter_spec' is the raw argument value given on the command line
 	 * or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
@@ -31,27 +32,37 @@ struct list_objects_filter_options {
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
 	unsigned int no_filter : 1;
 
 	/*
-	 * Parsed values (fields) from within the filter-spec.  These are
-	 * choice-specific; not all values will be defined for any given
-	 * choice.
+	 * BEGIN choice-specific parsed values from within the filter-spec. Only
+	 * some values will be defined for any given choice.
 	 */
+
 	struct object_id *sparse_oid_value;
 	unsigned long blob_limit_value;
 	unsigned long tree_exclude_depth;
+
+	/* LOFC_COMBINE values */
+
+	/* This array contains all the subfilters which this filter combines. */
+	size_t sub_nr, sub_alloc;
+	struct list_objects_filter_options *sub;
+
+	/*
+	 * END choice-specific parsed values.
+	 */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
 int parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
diff --git a/list-objects-filter.c b/list-objects-filter.c
index b259039bd0..8d015bf164 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -19,30 +19,45 @@
  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  * that have been shown, but should be revisited if they appear
  * in the traversal (until we mark it SEEN).  This is a way to
  * let us silently de-dup calls to show() in the caller.  This
  * is subtly different from the "revision.h:SHOWN" and the
  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  * the non-de-dup usage in pack-bitmap.c
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
+struct subfilter {
+	struct filter *filter;
+	struct oidset seen;
+	struct oidset omits;
+	struct object_id skip_tree;
+	unsigned is_skipping_tree : 1;
+};
+
 struct filter {
 	enum list_objects_filter_result (*filter_object_fn)(
 		struct repository *r,
 		enum list_objects_filter_situation filter_situation,
 		struct object *obj,
 		const char *pathname,
 		const char *filename,
 		struct oidset *omits,
 		void *filter_data);
 
+	/*
+	 * Optional. If this function is supplied and the filter needs to
+	 * collect omits, then this function is called once before free_fn is
+	 * called.
+	 */
+	void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);
+
 	void (*free_fn)(void *filter_data);
 
 	void *filter_data;
 
 	/* If non-NULL, the filter collects a list of the omitted OIDs here. */
 	struct oidset *omits;
 };
 
 static enum list_objects_filter_result filter_blobs_none(
 	struct repository *r,
@@ -464,33 +479,175 @@ static void filter_sparse_oid__init(
 
 	ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 	d->array_frame[d->nr].defval = 0; /* default to include */
 	d->array_frame[d->nr].child_prov_omit = 0;
 
 	filter->filter_data = d;
 	filter->filter_object_fn = filter_sparse;
 	filter->free_fn = filter_sparse_free;
 }
 
+/* A filter which only shows objects shown by all sub-filters. */
+struct combine_filter_data {
+	struct subfilter *sub;
+	size_t nr;
+};
+
+static int should_delegate(enum list_objects_filter_situation filter_situation,
+			   struct object *obj,
+			   struct subfilter *sub)
+{
+	if (!sub->is_skipping_tree)
+		return 1;
+	if (filter_situation == LOFS_END_TREE &&
+		oideq(&obj->oid, &sub->skip_tree)) {
+		sub->is_skipping_tree = 0;
+		return 1;
+	}
+	return 0;
+}
+
+static enum list_objects_filter_result process_subfilter(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct subfilter *sub)
+{
+	enum list_objects_filter_result result;
+
+	/*
+	 * Check should_delegate before oidset_contains so that
+	 * is_skipping_tree gets unset even when the object is marked as seen.
+	 * As of this writing, no filter uses LOFR_MARK_SEEN on trees that also
+	 * uses LOFR_SKIP_TREE, so the ordering is only theoretically
+	 * important. Be cautious if you change the order of the below checks
+	 * and more filters have been added!
+	 */
+	if (!should_delegate(filter_situation, obj, sub))
+		return LOFR_ZERO;
+	if (oidset_contains(&sub->seen, &obj->oid))
+		return LOFR_ZERO;
+
+	result = list_objects_filter__filter_object(
+		r, filter_situation, obj, pathname, filename, sub->filter);
+
+	if (result & LOFR_MARK_SEEN)
+		oidset_insert(&sub->seen, &obj->oid);
+
+	if (result & LOFR_SKIP_TREE) {
+		sub->is_skipping_tree = 1;
+		sub->skip_tree = obj->oid;
+	}
+
+	return result;
+}
+
+static enum list_objects_filter_result filter_combine(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct oidset *omits,
+	void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	enum list_objects_filter_result combined_result =
+		LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
+	size_t sub;
+
+	for (sub = 0; sub < d->nr; sub++) {
+		enum list_objects_filter_result sub_result = process_subfilter(
+			r, filter_situation, obj, pathname, filename,
+			&d->sub[sub]);
+		if (!(sub_result & LOFR_DO_SHOW))
+			combined_result &= ~LOFR_DO_SHOW;
+		if (!(sub_result & LOFR_MARK_SEEN))
+			combined_result &= ~LOFR_MARK_SEEN;
+		if (!d->sub[sub].is_skipping_tree)
+			combined_result &= ~LOFR_SKIP_TREE;
+	}
+
+	return combined_result;
+}
+
+static void filter_combine__free(void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	size_t sub;
+	for (sub = 0; sub < d->nr; sub++) {
+		list_objects_filter__free(d->sub[sub].filter);
+		oidset_clear(&d->sub[sub].seen);
+		if (d->sub[sub].omits.set.size)
+			BUG("expected oidset to be cleared already");
+	}
+	free(d->sub);
+}
+
+static void add_all(struct oidset *dest, struct oidset *src) {
+	struct oidset_iter iter;
+	struct object_id *src_oid;
+
+	oidset_iter_init(src, &iter);
+	while ((src_oid = oidset_iter_next(&iter)) != NULL)
+		oidset_insert(dest, src_oid);
+}
+
+static void filter_combine__finalize_omits(
+	struct oidset *omits,
+	void *filter_data)
+{
+	struct combine_filter_data *d = filter_data;
+	size_t sub;
+
+	for (sub = 0; sub < d->nr; sub++) {
+		add_all(omits, &d->sub[sub].omits);
+		oidset_clear(&d->sub[sub].omits);
+	}
+}
+
+static void filter_combine__init(
+	struct list_objects_filter_options *filter_options,
+	struct filter* filter)
+{
+	struct combine_filter_data *d = xcalloc(1, sizeof(*d));
+	size_t sub;
+
+	d->nr = filter_options->sub_nr;
+	d->sub = xcalloc(d->nr, sizeof(*d->sub));
+	for (sub = 0; sub < d->nr; sub++)
+		d->sub[sub].filter = list_objects_filter__init(
+			filter->omits ? &d->sub[sub].omits : NULL,
+			&filter_options->sub[sub]);
+
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_combine;
+	filter->free_fn = filter_combine__free;
+	filter->finalize_omits_fn = filter_combine__finalize_omits;
+}
+
 typedef void (*filter_init_fn)(
 	struct list_objects_filter_options *filter_options,
 	struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
  */
 static filter_init_fn s_filters[] = {
 	NULL,
 	filter_blobs_none__init,
 	filter_blobs_limit__init,
 	filter_trees_depth__init,
 	filter_sparse_oid__init,
+	filter_combine__init,
 };
 
 struct filter *list_objects_filter__init(
 	struct oidset *omitted,
 	struct list_objects_filter_options *filter_options)
 {
 	struct filter *filter;
 	filter_init_fn init_fn;
 
 	assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
@@ -528,13 +685,15 @@ enum list_objects_filter_result list_objects_filter__filter_object(
 	 */
 	if (filter_situation == LOFS_END_TREE)
 		return 0;
 	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
 void list_objects_filter__free(struct filter *filter)
 {
 	if (!filter)
 		return;
+	if (filter->finalize_omits_fn && filter->omits)
+		filter->finalize_omits_fn(filter->omits, filter->filter_data);
 	filter->free_fn(filter->filter_data);
 	free(filter);
 }
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index acd7f5ab80..05d4f2e9c2 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -271,21 +271,33 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' '
 # Make sure tree:0 does not iterate through any trees.
 
 test_expect_success 'verify skipping tree iteration when not collecting omits' '
 	GIT_TRACE=1 git -C r3 rev-list \
 		--objects --filter=tree:0 HEAD 2>filter_trace &&
 	grep "Skipping contents of tree [.][.][.]" filter_trace >actual &&
 	# One line for each commit traversed.
 	test_line_count = 2 actual &&
 
 	# Make sure no other trees were considered besides the root.
-	! grep "Skipping contents of tree [^.]" filter_trace
+	! grep "Skipping contents of tree [^.]" filter_trace &&
+
+	# Try this again with "combine:". If both sub-filters are skipping
+	# trees, the composite filter should also skip trees. This is not
+	# important unless the user does combine:tree:X+tree:Y or another filter
+	# besides "tree:" is implemented in the future which can skip trees.
+	GIT_TRACE=1 git -C r3 rev-list \
+		--objects --filter=combine:tree:1+tree:3 HEAD 2>filter_trace &&
+
+	# Only skip the dir1/ tree, which is shared between the two commits.
+	grep "Skipping contents of tree " filter_trace >actual &&
+	test_write_lines "Skipping contents of tree dir1/..." >expected &&
+	test_cmp expected actual
 '
 
 # Test tree:# filters.
 
 expect_has () {
 	commit=$1 &&
 	name=$2 &&
 
 	hash=$(git -C r3 rev-parse $commit:$name) &&
 	grep "^$hash $name$" actual
@@ -323,20 +335,126 @@ test_expect_success 'verify tree:3 includes everything expected' '
 	expect_has HEAD dir1/sparse1 &&
 	expect_has HEAD dir1/sparse2 &&
 	expect_has HEAD pattern &&
 	expect_has HEAD sparse1 &&
 	expect_has HEAD sparse2 &&
 
 	# There are also 2 commit objects
 	test_line_count = 10 actual
 '
 
+test_expect_success 'combine:... for a simple combination' '
+	git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
+		>actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD dir1 &&
+
+	# There are also 2 commit objects
+	test_line_count = 5 actual
+'
+
+test_expect_success 'combine:... with URL encoding' '
+	git -C r3 rev-list --objects \
+		--filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD dir1 &&
+
+	# There are also 2 commit objects
+	test_line_count = 5 actual
+'
+
+expect_invalid_filter_spec () {
+	spec="$1" &&
+	err="$2" &&
+
+	test_must_fail git -C r3 rev-list --objects --filter="$spec" HEAD \
+		>actual 2>actual_stderr &&
+	test_must_be_empty actual &&
+	test_i18ngrep "$err" actual_stderr
+}
+
+test_expect_success 'combine:... while URL-encoding things that should not be' '
+	expect_invalid_filter_spec combine%3Atree:2+blob:none \
+		"invalid filter-spec"
+'
+
+test_expect_success 'combine: with nothing after the :' '
+	expect_invalid_filter_spec combine: "expected something after combine:"
+'
+
+test_expect_success 'parse error in first sub-filter in combine:' '
+	expect_invalid_filter_spec combine:tree:asdf+blob:none \
+		"expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with non-encoded reserved chars' '
+	expect_invalid_filter_spec combine:tree:2+sparse:@xyz \
+		"must escape char in sub-filter-spec: .@." &&
+	expect_invalid_filter_spec combine:tree:2+sparse:\` \
+		"must escape char in sub-filter-spec: .\`." &&
+	expect_invalid_filter_spec combine:tree:2+sparse:~abc \
+		"must escape char in sub-filter-spec: .\~."
+'
+
+test_expect_success 'validate err msg for "combine:<valid-filter>+"' '
+	expect_invalid_filter_spec combine:tree:2+ "expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
+	git -C r3 rev-list --objects --filter="combine:tree:2+bl%6Fb:n%6fne" \
+		HEAD >actual &&
+	test_line_count = 5 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
+		HEAD >actual &&
+	test_line_count = 5 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
+	test_line_count = 2 actual &&
+	git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
+		HEAD >actual &&
+	test_line_count = 5 actual
+'
+
+test_expect_success 'add a sparse pattern blob whose path has reserved chars' '
+	cp r3/pattern r3/pattern1+renamed% &&
+	git -C r3 add pattern1+renamed% &&
+	git -C r3 commit -m "add sparse pattern file with reserved chars"
+'
+
+test_expect_success 'combine:... with more than two sub-filters' '
+	git -C r3 rev-list --objects \
+		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
+		HEAD >actual &&
+
+	expect_has HEAD "" &&
+	expect_has HEAD~1 "" &&
+	expect_has HEAD~2 "" &&
+	expect_has HEAD dir1 &&
+	expect_has HEAD dir1/sparse1 &&
+	expect_has HEAD dir1/sparse2 &&
+
+	# Should also have 3 commits
+	test_line_count = 9 actual &&
+
+	# Try again, this time making sure the last sub-filter is only
+	# URL-decoded once.
+	cp actual expect &&
+
+	git -C r3 rev-list --objects \
+		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
+		HEAD >actual &&
+	test_cmp expect actual
+'
+
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
 test_expect_success 'setup r4' '
 	git init r4 &&
 
 	echo foo > r4/foo &&
 	mkdir r4/subdir &&
 	echo bar > r4/subdir/bar &&
 
@@ -366,20 +484,51 @@ test_expect_success 'test tree:# filter provisional omit for blob and tree' '
 
 test_expect_success 'verify skipping tree iteration when collecting omits' '
 	GIT_TRACE=1 git -C r4 rev-list --filter-print-omitted \
 		--objects --filter=tree:0 HEAD 2>filter_trace &&
 	grep "^Skipping contents of tree " filter_trace >actual &&
 
 	echo "Skipping contents of tree subdir/..." >expect &&
 	test_cmp expect actual
 '
 
+test_expect_success 'setup r5' '
+	git init r5 &&
+	mkdir -p r5/subdir &&
+
+	echo 1     >r5/short-root          &&
+	echo 12345 >r5/long-root           &&
+	echo a     >r5/subdir/short-subdir &&
+	echo abcde >r5/subdir/long-subdir  &&
+
+	git -C r5 add short-root long-root subdir &&
+	git -C r5 commit -m "commit msg"
+'
+
+test_expect_success 'verify collecting omits in combined: filter' '
+	# Note that this test guards against the naive implementation of simply
+	# giving both filters the same "omits" set and expecting it to
+	# automatically merge them.
+	git -C r5 rev-list --objects --quiet --filter-print-omitted \
+		--filter=combine:tree:2+blob:limit=3 HEAD >actual &&
+
+	# Expect 0 trees/commits, 3 blobs omitted (all blobs except short-root)
+	omitted_1=$(echo 12345 | git hash-object --stdin) &&
+	omitted_2=$(echo a     | git hash-object --stdin) &&
+	omitted_3=$(echo abcde | git hash-object --stdin) &&
+
+	grep ~$omitted_1 actual &&
+	grep ~$omitted_2 actual &&
+	grep ~$omitted_3 actual &&
+	test_line_count = 3 actual
+'
+
 # Test tree:<depth> where a tree is iterated to twice - once where a subentry is
 # too deep to be included, and again where the blob inside it is shallow enough
 # to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
 # can't use it because a tree can be iterated over again at a lower depth).
 
 test_expect_success 'tree:<depth> where we iterate over tree at two levels' '
 	git init r5 &&
 
 	mkdir -p r5/a/subdir/b &&
 	echo foo > r5/a/subdir/b/foo &&
diff --git a/url.c b/url.c
index 25576c390b..bdede647bc 100644
--- a/url.c
+++ b/url.c
@@ -79,20 +79,26 @@ char *url_decode_mem(const char *url, int len)
 
 	/* Skip protocol part if present */
 	if (colon && url < colon) {
 		strbuf_add(&out, url, colon - url);
 		len -= colon - url;
 		url = colon;
 	}
 	return url_decode_internal(&url, len, NULL, &out, 0);
 }
 
+char *url_percent_decode(const char *encoded)
+{
+	struct strbuf out = STRBUF_INIT;
+	return url_decode_internal(&encoded, strlen(encoded), NULL, &out, 0);
+}
+
 char *url_decode_parameter_name(const char **query)
 {
 	struct strbuf out = STRBUF_INIT;
 	return url_decode_internal(query, -1, "&=", &out, 1);
 }
 
 char *url_decode_parameter_value(const char **query)
 {
 	struct strbuf out = STRBUF_INIT;
 	return url_decode_internal(query, -1, "&", &out, 1);
diff --git a/url.h b/url.h
index 00b7d58c33..2a27c34277 100644
--- a/url.h
+++ b/url.h
@@ -1,16 +1,24 @@
 #ifndef URL_H
 #define URL_H
 
 struct strbuf;
 
 int is_url(const char *url);
 int is_urlschemechar(int first_flag, int ch);
 char *url_decode(const char *url);
 char *url_decode_mem(const char *url, int len);
+
+/*
+ * Similar to the url_decode_{,mem} methods above, but doesn't assume there
+ * is a scheme followed by a : at the start of the string. Instead, %-sequences
+ * before any : are also parsed.
+ */
+char *url_percent_decode(const char *encoded);
+
 char *url_decode_parameter_name(const char **query);
 char *url_decode_parameter_value(const char **query);
 
 void end_url_with_slash(struct strbuf *buf, const char *url);
 void str_end_url_with_slash(const char *url, char **dest);
 
 #endif /* URL_H */
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 05/10] list-objects-filter-options: move error check up
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (3 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 04/10] list-objects-filter: implement composite filters Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 06/10] list-objects-filter-options: make filter_spec a string_list Matthew DeVore
                     ` (5 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Move the check that filter_options->choice is set to higher in the call
stack. This can only be set when the gentle parse function is called
from one of the two call sites.

This is important because in an upcoming patch this may or may not be an
error, and whether it is an error is only known to the
parse_list_objects_filter function.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 1c402c6059..ab2c983031 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -28,25 +28,22 @@ static int parse_combine_filter(
  * expand_list_objects_filter_spec() first).  We also "intern" the arg for the
  * convenience of the current command.
  */
 static int gently_parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf)
 {
 	const char *v0;
 
-	if (filter_options->choice) {
-		strbuf_addstr(
-			errbuf, _("multiple filter-specs cannot be combined"));
-		return 1;
-	}
+	if (filter_options->choice)
+		BUG("filter_options already populated");
 
 	if (!strcmp(arg, "blob:none")) {
 		filter_options->choice = LOFC_BLOB_NONE;
 		return 0;
 
 	} else if (skip_prefix(arg, "blob:limit=", &v0)) {
 		if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
 			filter_options->choice = LOFC_BLOB_LIMIT;
 			return 0;
 		}
@@ -178,20 +175,22 @@ static int parse_combine_filter(
 		list_objects_filter_release(filter_options);
 		memset(filter_options, 0, sizeof(*filter_options));
 	}
 	return result;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
+	if (filter_options->choice)
+		die(_("multiple filter-specs cannot be combined"));
 	filter_options->filter_spec = strdup(arg);
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 06/10] list-objects-filter-options: make filter_spec a string_list
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (4 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 05/10] list-objects-filter-options: move error check up Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-22  0:37     ` Jonathan Tan
  2019-06-15  0:40   ` [PATCH v4 07/10] strbuf: give URL-encoding API a char predicate fn Matthew DeVore
                     ` (4 subsequent siblings)
  10 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Junio C Hamano

Make the filter_spec string a string_list rather than a raw C string.
The list of strings must be concatted together to make a complete
filter_spec. A future patch will use this capability to build "combine:"
filter specs gradually.

A strbuf would seem to be a more natural choice for this object, but it
unfortunately requires initialization besides just zero'ing out the
memory.  This results in all container structs, and all containers of
those structs, etc., to also require initialization. Initializing them
all would be more cumbersome that simply using a string_list, which
behaves properly when its contents are zero'd.

For the purposes of code simplification, change behavior in how filter
specs are conveyed over the protocol: do not normalize the tree:<depth>
filter specs since there should be no server in existence that supports
tree:# but not tree:#k etc.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 builtin/clone.c                     |  8 ++---
 builtin/fetch.c                     |  9 ++----
 builtin/rev-list.c                  |  6 ++--
 fetch-pack.c                        | 20 ++++--------
 list-objects-filter-options.c       | 50 ++++++++++++++++++++---------
 list-objects-filter-options.h       | 27 +++++++++++-----
 t/t6112-rev-list-filters-objects.sh |  7 ----
 transport-helper.c                  | 10 ++----
 upload-pack.c                       | 11 +++----
 9 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index e3231864ca..921df72d84 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1134,27 +1134,25 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
 
 	if (option_upload_pack)
 		transport_set_option(transport, TRANS_OPT_UPLOADPACK,
 				     option_upload_pack);
 
 	if (server_options.nr)
 		transport->server_options = &server_options;
 
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-				     expanded_filter_spec.buf);
+				     spec);
 		transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 
 	if (transport->smart_options && !deepen && !filter_options.choice)
 		transport->smart_options->check_self_contained_and_connected = 1;
 
 
 	argv_array_push(&ref_prefixes, "HEAD");
 	refspec_ref_prefixes(&remote->fetch, &ref_prefixes);
 	if (option_branch)
 		expand_ref_prefix(&ref_prefixes, option_branch);
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4ba63d5ac6..dee89e1a19 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1181,27 +1181,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 	if (deepen && deepen_since)
 		set_option(transport, TRANS_OPT_DEEPEN_SINCE, deepen_since);
 	if (deepen && deepen_not.nr)
 		set_option(transport, TRANS_OPT_DEEPEN_NOT,
 			   (const char *)&deepen_not);
 	if (deepen_relative)
 		set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
 	if (update_shallow)
 		set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
 	if (filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
-		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-			   expanded_filter_spec.buf);
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
+		set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-		strbuf_release(&expanded_filter_spec);
 	}
 	if (negotiation_tip.nr) {
 		if (transport->smart_options)
 			add_negotiation_tips(transport->smart_options);
 		else
 			warning("Ignoring --negotiation-tip because the protocol does not support it.");
 	}
 	return transport;
 }
 
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 660172b014..68acbe8fd2 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -459,22 +459,24 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 			show_progress = arg;
 			continue;
 		}
 
 		if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
 			parse_list_objects_filter(&filter_options, arg);
 			if (filter_options.choice && !revs.blob_objects)
 				die(_("object filtering requires --objects"));
 			if (filter_options.choice == LOFC_SPARSE_OID &&
 			    !filter_options.sparse_oid_value)
-				die(_("invalid sparse value '%s'"),
-				    filter_options.filter_spec);
+				die(
+					_("invalid sparse value '%s'"),
+					list_objects_filter_spec(
+						&filter_options));
 			continue;
 		}
 		if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
 			list_objects_filter_set_no_filter(&filter_options);
 			continue;
 		}
 		if (!strcmp(arg, "--filter-print-omitted")) {
 			arg_print_omitted = 1;
 			continue;
 		}
diff --git a/fetch-pack.c b/fetch-pack.c
index 1c10f54e78..72e13b0a1d 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -332,26 +332,23 @@ static int find_common(struct fetch_negotiator *negotiator,
 		packet_buf_write(&req_buf, "deepen-since %"PRItime, max_age);
 	}
 	if (args->deepen_not) {
 		int i;
 		for (i = 0; i < args->deepen_not->nr; i++) {
 			struct string_list_item *s = args->deepen_not->items + i;
 			packet_buf_write(&req_buf, "deepen-not %s", s->string);
 		}
 	}
 	if (server_supports_filtering && args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	}
 	packet_buf_flush(&req_buf);
 	state_len = req_buf.len;
 
 	if (args->deepen) {
 		const char *arg;
 		struct object_id oid;
 
 		send_request(args, fd[1], &req_buf);
 		while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
@@ -1092,21 +1089,21 @@ static int add_haves(struct fetch_negotiator *negotiator,
 		ret = 1;
 	}
 
 	/* Increase haves to send on next round */
 	*haves_to_send = next_flush(1, *haves_to_send);
 
 	return ret;
 }
 
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
-			      const struct fetch_pack_args *args,
+			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
 			      int *haves_to_send, int *in_vain,
 			      int sideband_all)
 {
 	int ret = 0;
 	struct strbuf req_buf = STRBUF_INIT;
 
 	if (server_supports_v2("fetch", 1))
 		packet_buf_write(&req_buf, "command=fetch");
 	if (server_supports_v2("agent", 0))
@@ -1133,27 +1130,24 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 
 	/* Add shallow-info and deepen request */
 	if (server_supports_feature("fetch", "shallow", 0))
 		add_shallow_requests(&req_buf, args);
 	else if (is_repository_shallow(the_repository) || args->deepen)
 		die(_("Server does not support shallow requests"));
 
 	/* Add filter */
 	if (server_supports_feature("fetch", "filter", 0) &&
 	    args->filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
+		const char *spec =
+			expand_list_objects_filter_spec(&args->filter_options);
 		print_verbose(args, _("Server supports filter"));
-		expand_list_objects_filter_spec(&args->filter_options,
-						&expanded_filter_spec);
-		packet_buf_write(&req_buf, "filter %s",
-				 expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		packet_buf_write(&req_buf, "filter %s", spec);
 	} else if (args->filter_options.choice) {
 		warning("filtering not recognized by server, ignoring");
 	}
 
 	/* add wants */
 	add_wants(args->no_dependents, wants, &req_buf);
 
 	if (args->no_dependents) {
 		packet_buf_write(&req_buf, "done");
 		ret = 1;
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index ab2c983031..411d23004c 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -177,72 +177,89 @@ static int parse_combine_filter(
 	}
 	return result;
 }
 
 int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 			      const char *arg)
 {
 	struct strbuf buf = STRBUF_INIT;
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
-	filter_options->filter_spec = strdup(arg);
+	string_list_append(&filter_options->filter_spec, xstrdup(arg));
 	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
 		die("%s", buf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg) {
 		list_objects_filter_set_no_filter(filter_options);
 		return 0;
 	}
 
 	return parse_list_objects_filter(filter_options, arg);
 }
 
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec)
+const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
 {
-	strbuf_init(expanded_spec, strlen(filter->filter_spec));
-	if (filter->choice == LOFC_BLOB_LIMIT)
-		strbuf_addf(expanded_spec, "blob:limit=%lu",
+	if (!filter->filter_spec.nr)
+		BUG("no filter_spec available for this filter");
+	if (filter->filter_spec.nr != 1) {
+		struct strbuf concatted = STRBUF_INIT;
+		strbuf_add_separated_string_list(
+			&concatted, "", &filter->filter_spec);
+		string_list_clear(&filter->filter_spec, /*free_util=*/0);
+		string_list_append(
+			&filter->filter_spec, strbuf_detach(&concatted, NULL));
+	}
+
+	return filter->filter_spec.items[0].string;
+}
+
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter)
+{
+	if (filter->choice == LOFC_BLOB_LIMIT) {
+		struct strbuf expanded_spec = STRBUF_INIT;
+		strbuf_addf(&expanded_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
-	else if (filter->choice == LOFC_TREE_DEPTH)
-		strbuf_addf(expanded_spec, "tree:%lu",
-			    filter->tree_exclude_depth);
-	else
-		strbuf_addstr(expanded_spec, filter->filter_spec);
+		string_list_clear(&filter->filter_spec, /*free_util=*/0);
+		string_list_append(
+			&filter->filter_spec,
+			strbuf_detach(&expanded_spec, NULL));
+	}
+
+	return list_objects_filter_spec(filter);
 }
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options)
 {
 	size_t sub;
 
 	if (!filter_options)
 		return;
-	free(filter_options->filter_spec);
+	string_list_clear(&filter_options->filter_spec, /*free_util=*/0);
 	free(filter_options->sparse_oid_value);
 	for (sub = 0; sub < filter_options->sub_nr; sub++)
 		list_objects_filter_release(&filter_options->sub[sub]);
 	free(filter_options->sub);
 	memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options)
+	struct list_objects_filter_options *filter_options)
 {
 	/*
 	 * Record the name of the partial clone remote in the
 	 * config and in the global variable -- the latter is
 	 * used throughout to indicate that partial clone is
 	 * enabled and to expect missing objects.
 	 */
 	if (repository_format_partial_clone &&
 	    *repository_format_partial_clone &&
 	    strcmp(remote, repository_format_partial_clone))
@@ -251,32 +268,33 @@ void partial_clone_register(
 	git_config_set("core.repositoryformatversion", "1");
 	git_config_set("extensions.partialclone", remote);
 
 	repository_format_partial_clone = xstrdup(remote);
 
 	/*
 	 * Record the initial filter-spec in the config as
 	 * the default for subsequent fetches from this remote.
 	 */
 	core_partial_clone_filter_default =
-		xstrdup(filter_options->filter_spec);
+		xstrdup(expand_list_objects_filter_spec(filter_options));
 	git_config_set("core.partialclonefilter",
 		       core_partial_clone_filter_default);
 }
 
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
 	if (!core_partial_clone_filter_default)
 		return;
 
-	filter_options->filter_spec = strdup(core_partial_clone_filter_default);
+	string_list_append(&filter_options->filter_spec,
+			   core_partial_clone_filter_default);
 	gently_parse_list_objects_filter(filter_options,
 					 core_partial_clone_filter_default,
 					 &errbuf);
 	strbuf_release(&errbuf);
 }
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 789faef1e5..bb33303f9b 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -1,15 +1,15 @@
 #ifndef LIST_OBJECTS_FILTER_OPTIONS_H
 #define LIST_OBJECTS_FILTER_OPTIONS_H
 
 #include "parse-options.h"
-#include "strbuf.h"
+#include "string-list.h"
 
 /*
  * The list of defined filters for list-objects.
  */
 enum list_objects_filter_choice {
 	LOFC_DISABLED = 0,
 	LOFC_BLOB_NONE,
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
@@ -17,22 +17,24 @@ enum list_objects_filter_choice {
 	LOFC__COUNT /* must be last */
 };
 
 struct list_objects_filter_options {
 	/*
 	 * 'filter_spec' is the raw argument value given on the command line
 	 * or protocol request.  (The part after the "--keyword=".)  For
 	 * commands that launch filtering sub-processes, or for communication
 	 * over the network, don't use this value; use the result of
 	 * expand_list_objects_filter_spec() instead.
+	 * To get the raw filter spec given by the user, use the result of
+	 * list_objects_filter_spec().
 	 */
-	char *filter_spec;
+	struct string_list filter_spec;
 
 	/*
 	 * 'choice' is determined by parsing the filter-spec.  This indicates
 	 * the filtering algorithm to use.
 	 */
 	enum list_objects_filter_choice choice;
 
 	/*
 	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
 	 */
@@ -69,35 +71,44 @@ int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
 
 /*
  * Translates abbreviated numbers in the filter's filter_spec into their
  * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024").
+ * Returns a string owned by the list_objects_filter_options object.
  *
- * This form should be used instead of the raw filter_spec field when
- * communicating with a remote process or subprocess.
+ * This form should be used instead of the raw list_objects_filter_spec()
+ * value when communicating with a remote process or subprocess.
  */
-void expand_list_objects_filter_spec(
-	const struct list_objects_filter_options *filter,
-	struct strbuf *expanded_spec);
+const char *expand_list_objects_filter_spec(
+	struct list_objects_filter_options *filter);
+
+/*
+ * Returns the filter spec string more or less in the form as the user
+ * entered it. This form of the filter_spec can be used in user-facing
+ * messages.  Returns a string owned by the list_objects_filter_options
+ * object.
+ */
+const char *list_objects_filter_spec(
+	struct list_objects_filter_options *filter);
 
 void list_objects_filter_release(
 	struct list_objects_filter_options *filter_options);
 
 static inline void list_objects_filter_set_no_filter(
 	struct list_objects_filter_options *filter_options)
 {
 	list_objects_filter_release(filter_options);
 	filter_options->no_filter = 1;
 }
 
 void partial_clone_register(
 	const char *remote,
-	const struct list_objects_filter_options *filter_options);
+	struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
 	struct list_objects_filter_options *filter_options);
 
 #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index 05d4f2e9c2..27ba15719a 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -583,18 +583,11 @@ test_expect_success 'rev-list W/ missing=allow-any' '
 # Test expansion of filter specs.
 
 test_expect_success 'expand blob limit in protocol' '
 	git -C r2 config --local uploadpack.allowfilter 1 &&
 	GIT_TRACE_PACKET="$(pwd)/trace" git -c protocol.version=2 clone \
 		--filter=blob:limit=1k "file://$(pwd)/r2" limit &&
 	! grep "blob:limit=1k" trace &&
 	grep "blob:limit=1024" trace
 '
 
-test_expect_success 'expand tree depth limit in protocol' '
-	GIT_TRACE_PACKET="$(pwd)/tree_trace" git -c protocol.version=2 clone \
-		--filter=tree:0k "file://$(pwd)/r2" tree &&
-	! grep "tree:0k" tree_trace &&
-	grep "tree:0" tree_trace
-'
-
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index c7e17ec9cb..0a34544df0 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -675,27 +675,23 @@ static int fetch(struct transport *transport,
 	    data->transport_options.check_self_contained_and_connected)
 		set_helper_option(transport, "check-connectivity", "true");
 
 	if (transport->cloning)
 		set_helper_option(transport, "cloning", "true");
 
 	if (data->transport_options.update_shallow)
 		set_helper_option(transport, "update-shallow", "true");
 
 	if (data->transport_options.filter_options.choice) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(
-			&data->transport_options.filter_options,
-			&expanded_filter_spec);
-		set_helper_option(transport, "filter",
-				  expanded_filter_spec.buf);
-		strbuf_release(&expanded_filter_spec);
+		const char *spec = expand_list_objects_filter_spec(
+			&data->transport_options.filter_options);
+		set_helper_option(transport, "filter", spec);
 	}
 
 	if (data->transport_options.negotiation_tips)
 		warning("Ignoring --negotiation-tip because the protocol does not support it.");
 
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
 	if (data->import)
 		return fetch_with_import(transport, nr_heads, to_fetch);
diff --git a/upload-pack.c b/upload-pack.c
index 24298913c0..a74d293fef 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -133,32 +133,31 @@ static void create_pack_file(const struct object_array *have_obj,
 
 	argv_array_push(&pack_objects.args, "--stdout");
 	if (shallow_nr)
 		argv_array_push(&pack_objects.args, "--shallow");
 	if (!no_progress)
 		argv_array_push(&pack_objects.args, "--progress");
 	if (use_ofs_delta)
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.filter_spec) {
-		struct strbuf expanded_filter_spec = STRBUF_INIT;
-		expand_list_objects_filter_spec(&filter_options,
-						&expanded_filter_spec);
+	if (filter_options.choice) {
+		const char *spec =
+			expand_list_objects_filter_spec(&filter_options);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
-			sq_quote_buf(&buf, expanded_filter_spec.buf);
+			sq_quote_buf(&buf, spec);
 			argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
 			strbuf_release(&buf);
 		} else {
 			argv_array_pushf(&pack_objects.args, "--filter=%s",
-					 expanded_filter_spec.buf);
+					 spec);
 		}
 	}
 
 	pack_objects.in = -1;
 	pack_objects.out = -1;
 	pack_objects.err = -1;
 
 	if (start_command(&pack_objects))
 		die("git upload-pack: unable to fork git-pack-objects");
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 07/10] strbuf: give URL-encoding API a char predicate fn
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (5 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 06/10] list-objects-filter-options: make filter_spec a string_list Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 08/10] list-objects-filter-options: allow mult. --filter Matthew DeVore
                     ` (3 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Allow callers to specify exactly what characters need to be URL-encoded
and which do not. This new API will be taken advantage of in a patch
later in this set.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 credential-store.c |  9 +++++----
 http.c             |  6 ++++--
 strbuf.c           | 15 ++++++++-------
 strbuf.h           |  7 ++++++-
 4 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/credential-store.c b/credential-store.c
index ac295420dd..c010497cb2 100644
--- a/credential-store.c
+++ b/credential-store.c
@@ -65,29 +65,30 @@ static void rewrite_credential_file(const char *fn, struct credential *c,
 	parse_credential_file(fn, c, NULL, print_line);
 	if (commit_lock_file(&credential_lock) < 0)
 		die_errno("unable to write credential store");
 }
 
 static void store_credential_file(const char *fn, struct credential *c)
 {
 	struct strbuf buf = STRBUF_INIT;
 
 	strbuf_addf(&buf, "%s://", c->protocol);
-	strbuf_addstr_urlencode(&buf, c->username, 1);
+	strbuf_addstr_urlencode(&buf, c->username, is_rfc3986_unreserved);
 	strbuf_addch(&buf, ':');
-	strbuf_addstr_urlencode(&buf, c->password, 1);
+	strbuf_addstr_urlencode(&buf, c->password, is_rfc3986_unreserved);
 	strbuf_addch(&buf, '@');
 	if (c->host)
-		strbuf_addstr_urlencode(&buf, c->host, 1);
+		strbuf_addstr_urlencode(&buf, c->host, is_rfc3986_unreserved);
 	if (c->path) {
 		strbuf_addch(&buf, '/');
-		strbuf_addstr_urlencode(&buf, c->path, 0);
+		strbuf_addstr_urlencode(&buf, c->path,
+					is_rfc3986_reserved_or_unreserved);
 	}
 
 	rewrite_credential_file(fn, c, &buf);
 	strbuf_release(&buf);
 }
 
 static void store_credential(const struct string_list *fns, struct credential *c)
 {
 	struct string_list_item *fn;
 
diff --git a/http.c b/http.c
index 27aa0a3192..938b9e55af 100644
--- a/http.c
+++ b/http.c
@@ -506,23 +506,25 @@ static void var_override(const char **var, char *value)
 static void set_proxyauth_name_password(CURL *result)
 {
 #if LIBCURL_VERSION_NUM >= 0x071301
 		curl_easy_setopt(result, CURLOPT_PROXYUSERNAME,
 			proxy_auth.username);
 		curl_easy_setopt(result, CURLOPT_PROXYPASSWORD,
 			proxy_auth.password);
 #else
 		struct strbuf s = STRBUF_INIT;
 
-		strbuf_addstr_urlencode(&s, proxy_auth.username, 1);
+		strbuf_addstr_urlencode(&s, proxy_auth.username,
+					is_rfc3986_unreserved);
 		strbuf_addch(&s, ':');
-		strbuf_addstr_urlencode(&s, proxy_auth.password, 1);
+		strbuf_addstr_urlencode(&s, proxy_auth.password,
+					is_rfc3986_unreserved);
 		curl_proxyuserpwd = strbuf_detach(&s, NULL);
 		curl_easy_setopt(result, CURLOPT_PROXYUSERPWD, curl_proxyuserpwd);
 #endif
 }
 
 static void init_curl_proxy_auth(CURL *result)
 {
 	if (proxy_auth.username) {
 		if (!proxy_auth.password)
 			credential_fill(&proxy_auth);
diff --git a/strbuf.c b/strbuf.c
index 0e18b259ce..60ab5144f2 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -767,55 +767,56 @@ void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s)
 		case '&':
 			strbuf_addstr(buf, "&amp;");
 			break;
 		case 0:
 			return;
 		}
 		s++;
 	}
 }
 
-static int is_rfc3986_reserved(char ch)
+int is_rfc3986_reserved_or_unreserved(char ch)
 {
+	if (is_rfc3986_unreserved(ch))
+		return 1;
 	switch (ch) {
 		case '!': case '*': case '\'': case '(': case ')': case ';':
 		case ':': case '@': case '&': case '=': case '+': case '$':
 		case ',': case '/': case '?': case '#': case '[': case ']':
 			return 1;
 	}
 	return 0;
 }
 
-static int is_rfc3986_unreserved(char ch)
+int is_rfc3986_unreserved(char ch)
 {
 	return isalnum(ch) ||
 		ch == '-' || ch == '_' || ch == '.' || ch == '~';
 }
 
 static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len,
-				 int reserved)
+				 char_predicate allow_unencoded_fn)
 {
 	strbuf_grow(sb, len);
 	while (len--) {
 		char ch = *s++;
-		if (is_rfc3986_unreserved(ch) ||
-		    (!reserved && is_rfc3986_reserved(ch)))
+		if (allow_unencoded_fn(ch))
 			strbuf_addch(sb, ch);
 		else
 			strbuf_addf(sb, "%%%02x", (unsigned char)ch);
 	}
 }
 
 void strbuf_addstr_urlencode(struct strbuf *sb, const char *s,
-			     int reserved)
+			     char_predicate allow_unencoded_fn)
 {
-	strbuf_add_urlencode(sb, s, strlen(s), reserved);
+	strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn);
 }
 
 void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes)
 {
 	if (bytes > 1 << 30) {
 		strbuf_addf(buf, "%u.%2.2u GiB",
 			    (unsigned)(bytes >> 30),
 			    (unsigned)(bytes & ((1 << 30) - 1)) / 10737419);
 	} else if (bytes > 1 << 20) {
 		unsigned x = bytes + 5243;  /* for rounding */
diff --git a/strbuf.h b/strbuf.h
index c8d98dfb95..346d722492 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -659,22 +659,27 @@ void strbuf_branchname(struct strbuf *sb, const char *name,
 		       unsigned allowed);
 
 /*
  * Like strbuf_branchname() above, but confirm that the result is
  * syntactically valid to be used as a local branch name in refs/heads/.
  *
  * The return value is "0" if the result is valid, and "-1" otherwise.
  */
 int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
 
+typedef int (*char_predicate)(char ch);
+
+int is_rfc3986_unreserved(char ch);
+int is_rfc3986_reserved_or_unreserved(char ch);
+
 void strbuf_addstr_urlencode(struct strbuf *sb, const char *name,
-			     int reserved);
+			     char_predicate allow_unencoded_fn);
 
 __attribute__((format (printf,1,2)))
 int printf_ln(const char *fmt, ...);
 __attribute__((format (printf,2,3)))
 int fprintf_ln(FILE *fp, const char *fmt, ...);
 
 char *xstrdup_tolower(const char *);
 char *xstrdup_toupper(const char *);
 
 /**
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 08/10] list-objects-filter-options: allow mult. --filter
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (6 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 07/10] strbuf: give URL-encoding API a char predicate fn Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 09/10] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
                     ` (2 subsequent siblings)
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore, Jeff Hostetler, Junio C Hamano

Allow combining of multiple filters by simply repeating the --filter
flag. Before this patch, the user had to combine them in a single flag
somewhat awkwardly (e.g. --filter=combine:FOO+BAR), including
URL-encoding the individual filters.

To make this work, in the --filter flag parsing callback, rather than
error out when we detect that the filter_options struct is already
populated, we modify it in-place to contain the added sub-filter. The
existing sub-filter becomes the lhs of the combined filter, and the
next sub-filter becomes the rhs. We also have to URL-encode the LHS and
RHS sub-filters.

We can simplify the operation if the LHS is already a combine: filter.
In that case, we just append the URL-encoded RHS sub-filter to the LHS
spec to get the new spec.

Helped-by: Emily Shaffer <emilyshaffer@google.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 Documentation/rev-list-options.txt  | 16 ++++++
 list-objects-filter-options.c       | 88 +++++++++++++++++++++++++++--
 list-objects-filter-options.h       | 11 ++++
 t/t5616-partial-clone.sh            | 19 +++++++
 t/t6112-rev-list-filters-objects.sh | 46 +++++++++++++--
 transport.c                         |  1 +
 upload-pack.c                       |  2 +
 7 files changed, 173 insertions(+), 10 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 71a1fcc093..d1f080bf6d 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -731,20 +731,36 @@ at multiple depths in the commits traversed). <depth>=0 will not include
 any trees or blobs unless included explicitly in the command-line (or
 standard input when --stdin is used). <depth>=1 will include only the
 tree and blobs which are referenced directly by a commit reachable from
 <commit> or an explicitly-given object. <depth>=2 is like <depth>=1
 while also including trees and blobs one more level removed from an
 explicitly-given commit or tree.
 +
 Note that the form '--filter=sparse:path=<path>' that wants to read
 from an arbitrary path on the filesystem has been dropped for security
 reasons.
++
+Multiple '--filter=' flags can be specified to combine filters. Only
+objects which are accepted by every filter are included.
++
+The form '--filter=combine:<filter1>+<filter2>+...<filterN>' can also be
+used to combined several filters, but this is harder than just repeating
+the '--filter' flag and is usually not necessary. Filters are joined by
+'{plus}' and individual filters are %-encoded (i.e. URL-encoded).
+Besides the '{plus}' and '%' characters, the following characters are
+reserved and also must be encoded: `~!@#$^&*()[]{}\;",<>?`+&#39;&#96;+
+as well as all characters with ASCII code &lt;= `0x20`, which includes
+space and newline.
++
+Other arbitrary characters can also be encoded. For instance,
+'combine:tree:3+blob:none' and 'combine:tree%3A3+blob%3Anone' are
+equivalent.
 
 --no-filter::
 	Turn off any previous `--filter=` argument.
 
 --filter-print-omitted::
 	Only useful with `--filter=`; prints a list of the objects omitted
 	by the filter.  Object IDs are prefixed with a ``~'' character.
 
 --missing=<missing-action>::
 	A debug option to help with future "partial clone" development.
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 411d23004c..ab9c455fbd 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -1,18 +1,19 @@
 #include "cache.h"
 #include "commit.h"
 #include "config.h"
 #include "revision.h"
 #include "argv-array.h"
 #include "list-objects.h"
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
+#include "trace.h"
 #include "url.h"
 
 static int parse_combine_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf);
 
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
@@ -171,29 +172,106 @@ static int parse_combine_filter(
 
 cleanup:
 	strbuf_list_free(subspecs);
 	if (result) {
 		list_objects_filter_release(filter_options);
 		memset(filter_options, 0, sizeof(*filter_options));
 	}
 	return result;
 }
 
-int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
-			      const char *arg)
+static int allow_unencoded(char ch)
+{
+	if (ch <= ' ' || ch == '%' || ch == '+')
+		return 0;
+	return !strchr(RESERVED_NON_WS, ch);
+}
+
+static void filter_spec_append_urlencode(
+	struct list_objects_filter_options *filter, const char *raw)
 {
 	struct strbuf buf = STRBUF_INIT;
+	strbuf_addstr_urlencode(&buf, raw, allow_unencoded);
+	trace_printf("Add to combine filter-spec: %s\n", buf.buf);
+	string_list_append(&filter->filter_spec, strbuf_detach(&buf, NULL));
+}
+
+/*
+ * Changes filter_options into an equivalent LOFC_COMBINE filter options
+ * instance. Does not do anything if filter_options is already LOFC_COMBINE.
+ */
+static void transform_to_combine_type(
+	struct list_objects_filter_options *filter_options)
+{
+	assert(filter_options->choice);
+	if (filter_options->choice == LOFC_COMBINE)
+		return;
+	{
+		const int initial_sub_alloc = 2;
+		struct list_objects_filter_options *sub_array =
+			xcalloc(initial_sub_alloc, sizeof(*sub_array));
+		sub_array[0] = *filter_options;
+		memset(filter_options, 0, sizeof(*filter_options));
+		filter_options->sub = sub_array;
+		filter_options->sub_alloc = initial_sub_alloc;
+	}
+	filter_options->sub_nr = 1;
+	filter_options->choice = LOFC_COMBINE;
+	string_list_append(&filter_options->filter_spec, xstrdup("combine:"));
+	filter_spec_append_urlencode(
+		filter_options,
+		list_objects_filter_spec(&filter_options->sub[0]));
+	/*
+	 * We don't need the filter_spec strings for subfilter specs, only the
+	 * top level.
+	 */
+	string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
+}
+
+void list_objects_filter_die_if_populated(
+	struct list_objects_filter_options *filter_options)
+{
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
-	string_list_append(&filter_options->filter_spec, xstrdup(arg));
-	if (gently_parse_list_objects_filter(filter_options, arg, &buf))
-		die("%s", buf.buf);
+}
+
+int parse_list_objects_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg)
+{
+	struct strbuf errbuf = STRBUF_INIT;
+	int parse_error;
+
+	if (!filter_options->choice) {
+		string_list_append(&filter_options->filter_spec, xstrdup(arg));
+
+		parse_error = gently_parse_list_objects_filter(
+			filter_options, arg, &errbuf);
+	} else {
+		/*
+		 * Make filter_options an LOFC_COMBINE spec so we can trivially
+		 * add subspecs to it.
+		 */
+		transform_to_combine_type(filter_options);
+
+		string_list_append(&filter_options->filter_spec, xstrdup("+"));
+		filter_spec_append_urlencode(filter_options, arg);
+		ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1,
+			   filter_options->sub_alloc);
+		filter_options = &filter_options->sub[filter_options->sub_nr++];
+		memset(filter_options, 0, sizeof(*filter_options));
+
+		parse_error = gently_parse_list_objects_filter(
+			filter_options, arg, &errbuf);
+	}
+	if (parse_error)
+		die("%s", errbuf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
 	if (unset || !arg) {
 		list_objects_filter_set_no_filter(filter_options);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index bb33303f9b..d8bc7e946e 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -56,20 +56,31 @@ struct list_objects_filter_options {
 	struct list_objects_filter_options *sub;
 
 	/*
 	 * END choice-specific parsed values.
 	 */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
+void list_objects_filter_die_if_populated(
+	struct list_objects_filter_options *filter_options);
+
+/*
+ * Parses the filter spec string given by arg and either (1) simply places the
+ * result in filter_options if it is not yet populated or (2) combines it with
+ * the filter already in filter_options if it is already populated. In the case
+ * of (2), the filter specs are combined as if specified with 'combine:'.
+ *
+ * Dies and prints a user-facing message if an error occurs.
+ */
 int parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 9a8f9886b3..11536f4028 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -201,20 +201,39 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr
 	test_line_count = 70 fetched_objects &&
 
 	awk -f print_1.awk fetched_objects |
 	xargs -n1 git -C dst cat-file -t >fetched_types &&
 
 	sort -u fetched_types >unique_types.observed &&
 	test_write_lines blob commit tree >unique_types.expected &&
 	test_cmp unique_types.expected unique_types.observed
 '
 
+test_expect_success 'implicitly construct combine: filter with repeated flags' '
+	GIT_TRACE=$(pwd)/trace git clone --bare \
+		--filter=blob:none --filter=tree:1 \
+		"file://$(pwd)/srv.bare" pc2 &&
+	grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \
+		trace &&
+	git -C pc2 rev-list --objects --missing=allow-any HEAD >objects &&
+
+	# We should have gotten some root trees.
+	grep " $" objects &&
+	# Should not have gotten any non-root trees or blobs.
+	! grep " ." objects &&
+
+	xargs -n 1 git -C pc2 cat-file -t <objects >types &&
+	sort -u types >unique_types.actual &&
+	test_write_lines commit tree >unique_types.expected &&
+	test_cmp unique_types.expected unique_types.actual
+'
+
 test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
 	rm -rf src dst &&
 	git init src &&
 	test_commit -C src x &&
 	test_config -C src uploadpack.allowfilter 1 &&
 	test_config -C src uploadpack.allowanysha1inwant 1 &&
 
 	# Create a tag pointing to a blob.
 	BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) &&
 	git -C src tag myblob "$BLOB" &&
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index 27ba15719a..de0e5a5d36 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -344,21 +344,30 @@ test_expect_success 'verify tree:3 includes everything expected' '
 
 test_expect_success 'combine:... for a simple combination' '
 	git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
 		>actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
 	# There are also 2 commit objects
-	test_line_count = 5 actual
+	test_line_count = 5 actual &&
+
+	cp actual expected &&
+
+	# Try again using repeated --filter - this is equivalent to a manual
+	# combine with "combine:...+..."
+	git -C r3 rev-list --objects --filter=combine:tree:2 \
+		--filter=blob:none HEAD >actual &&
+
+	test_cmp expected actual
 '
 
 test_expect_success 'combine:... with URL encoding' '
 	git -C r3 rev-list --objects \
 		--filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD dir1 &&
 
@@ -410,24 +419,26 @@ test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
 	git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
 		HEAD >actual &&
 	test_line_count = 5 actual &&
 	git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
 	test_line_count = 2 actual &&
 	git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
 		HEAD >actual &&
 	test_line_count = 5 actual
 '
 
-test_expect_success 'add a sparse pattern blob whose path has reserved chars' '
+test_expect_success 'add sparse pattern blobs whose paths have reserved chars' '
 	cp r3/pattern r3/pattern1+renamed% &&
-	git -C r3 add pattern1+renamed% &&
-	git -C r3 commit -m "add sparse pattern file with reserved chars"
+	cp r3/pattern "r3/p;at%ter+n" &&
+	cp r3/pattern r3/^~pattern &&
+	git -C r3 add pattern1+renamed% "p;at%ter+n" ^~pattern &&
+	git -C r3 commit -m "add sparse pattern files with reserved chars"
 '
 
 test_expect_success 'combine:... with more than two sub-filters' '
 	git -C r3 rev-list --objects \
 		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
 		HEAD >actual &&
 
 	expect_has HEAD "" &&
 	expect_has HEAD~1 "" &&
 	expect_has HEAD~2 "" &&
@@ -438,21 +449,46 @@ test_expect_success 'combine:... with more than two sub-filters' '
 	# Should also have 3 commits
 	test_line_count = 9 actual &&
 
 	# Try again, this time making sure the last sub-filter is only
 	# URL-decoded once.
 	cp actual expect &&
 
 	git -C r3 rev-list --objects \
 		--filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
 		HEAD >actual &&
-	test_cmp expect actual
+	test_cmp expect actual &&
+
+	# Use the same composite filter again, but with a pattern file name that
+	# requires encoding multiple characters, and use implicit filter
+	# combining.
+	test_when_finished "rm -f trace1" &&
+	GIT_TRACE=$(pwd)/trace1 git -C r3 rev-list --objects \
+		--filter=tree:3 --filter=blob:limit=40 \
+		--filter=sparse:oid="master:p;at%ter+n" \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Add to combine filter-spec: sparse:oid=master:p%3bat%25ter%2bn" \
+		trace1 &&
+
+	# Repeat the above test, but this time, the characters to encode are in
+	# the LHS of the combined filter.
+	test_when_finished "rm -f trace2" &&
+	GIT_TRACE=$(pwd)/trace2 git -C r3 rev-list --objects \
+		--filter=sparse:oid=master:^~pattern \
+		--filter=tree:3 --filter=blob:limit=40 \
+		HEAD >actual &&
+
+	test_cmp expect actual &&
+	grep "Add to combine filter-spec: sparse:oid=master:%5e%7epattern" \
+		trace2
 '
 
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
 test_expect_success 'setup r4' '
 	git init r4 &&
 
 	echo foo > r4/foo &&
 	mkdir r4/subdir &&
diff --git a/transport.c b/transport.c
index f1fcd2c4b0..ee7dd1c062 100644
--- a/transport.c
+++ b/transport.c
@@ -217,20 +217,21 @@ static int set_git_option(struct git_transport_options *opts,
 	} else if (!strcmp(name, TRANS_OPT_DEEPEN_RELATIVE)) {
 		opts->deepen_relative = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_FROM_PROMISOR)) {
 		opts->from_promisor = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) {
 		opts->no_dependents = !!value;
 		return 0;
 	} else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) {
+		list_objects_filter_die_if_populated(&opts->filter_options);
 		parse_list_objects_filter(&opts->filter_options, value);
 		return 0;
 	}
 	return 1;
 }
 
 static int connect_setup(struct transport *transport, int for_push)
 {
 	struct git_transport_data *data = transport->data;
 	int flags = transport->verbose > 0 ? CONNECT_VERBOSE : 0;
diff --git a/upload-pack.c b/upload-pack.c
index a74d293fef..dda2ac6f44 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -876,20 +876,21 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
 		if (process_deepen(reader->line, &depth))
 			continue;
 		if (process_deepen_since(reader->line, &deepen_since, &deepen_rev_list))
 			continue;
 		if (process_deepen_not(reader->line, &deepen_not, &deepen_rev_list))
 			continue;
 
 		if (skip_prefix(reader->line, "filter ", &arg)) {
 			if (!filter_capability_requested)
 				die("git upload-pack: filtering capability not negotiated");
+			list_objects_filter_die_if_populated(&filter_options);
 			parse_list_objects_filter(&filter_options, arg);
 			continue;
 		}
 
 		if (!skip_prefix(reader->line, "want ", &arg) ||
 		    parse_oid_hex(arg, &oid_buf, &features))
 			die("git upload-pack: protocol error, "
 			    "expected to get object ID, not '%s'", reader->line);
 
 		if (parse_feature_request(features, "deepen-relative"))
@@ -1297,20 +1298,21 @@ static void process_args(struct packet_reader *request,
 			continue;
 		if (process_deepen_not(arg, &data->deepen_not,
 				       &data->deepen_rev_list))
 			continue;
 		if (!strcmp(arg, "deepen-relative")) {
 			data->deepen_relative = 1;
 			continue;
 		}
 
 		if (allow_filter && skip_prefix(arg, "filter ", &p)) {
+			list_objects_filter_die_if_populated(&filter_options);
 			parse_list_objects_filter(&filter_options, p);
 			continue;
 		}
 
 		if ((git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
 		     allow_sideband_all) &&
 		    !strcmp(arg, "sideband-all")) {
 			data->writer.use_sideband = 1;
 			continue;
 		}
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 09/10] list-objects-filter-options: clean up use of ALLOC_GROW
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (7 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 08/10] list-objects-filter-options: allow mult. --filter Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-15  0:40   ` [PATCH v4 10/10] list-objects-filter-options: make parser void Matthew DeVore
  2019-06-18  1:25   ` [PATCH v4 00/10] Filter combination Junio C Hamano
  10 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

Introduce a new macro ALLOC_GROW_BY which automatically zeros the added
array elements and takes care of updating the nr value. Use the macro in
code introduced earlier in this patchset.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 cache.h                       | 22 ++++++++++++++++++++++
 list-objects-filter-options.c | 17 +++++++----------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/cache.h b/cache.h
index b4bb2e2c11..48fb0f63c2 100644
--- a/cache.h
+++ b/cache.h
@@ -653,33 +653,55 @@ int init_db(const char *git_dir, const char *real_git_dir,
 void sanitize_stdfds(void);
 int daemonize(void);
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
 /*
  * Realloc the buffer pointed at by variable 'x' so that it can hold
  * at least 'nr' entries; the number of entries currently allocated
  * is 'alloc', using the standard growing factor alloc_nr() macro.
  *
+ * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some
+ * added niceties.
+ *
  * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
  */
 #define ALLOC_GROW(x, nr, alloc) \
 	do { \
 		if ((nr) > alloc) { \
 			if (alloc_nr(alloc) < (nr)) \
 				alloc = (nr); \
 			else \
 				alloc = alloc_nr(alloc); \
 			REALLOC_ARRAY(x, alloc); \
 		} \
 	} while (0)
 
+/*
+ * Similar to ALLOC_GROW but handles updating of the nr value and
+ * zeroing the bytes of the newly-grown array elements.
+ *
+ * DO NOT USE any expression with side-effect for any of the
+ * arguments.
+ */
+#define ALLOC_GROW_BY(x, nr, increase, alloc) \
+	do { \
+		if (increase) { \
+			size_t new_nr = nr + (increase); \
+			if (new_nr < nr) \
+				BUG("negative growth in ALLOC_GROW_BY"); \
+			ALLOC_GROW(x, new_nr, alloc); \
+			memset((x) + nr, 0, sizeof(*(x)) * (increase)); \
+			nr = new_nr; \
+		} \
+	} while (0)
+
 /* Initialize and use the cache information */
 struct lock_file;
 void preload_index(struct index_state *index,
 		   const struct pathspec *pathspec,
 		   unsigned int refresh_flags);
 int do_read_index(struct index_state *istate, const char *path,
 		  int must_exist); /* for testting only! */
 int read_index_from(struct index_state *, const char *path,
 		    const char *gitdir);
 int is_index_unborn(struct index_state *);
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index ab9c455fbd..f07928ea21 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -112,28 +112,26 @@ static int has_reserved_character(
 	}
 
 	return 0;
 }
 
 static int parse_combine_subfilter(
 	struct list_objects_filter_options *filter_options,
 	struct strbuf *subspec,
 	struct strbuf *errbuf)
 {
-	size_t new_index = filter_options->sub_nr++;
+	size_t new_index = filter_options->sub_nr;
 	char *decoded;
 	int result;
 
-	ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
-		   filter_options->sub_alloc);
-	memset(&filter_options->sub[new_index], 0,
-	       sizeof(*filter_options->sub));
+	ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+		      filter_options->sub_alloc);
 
 	decoded = url_percent_decode(subspec->buf);
 
 	result = has_reserved_character(subspec, errbuf) ||
 		gently_parse_list_objects_filter(
 			&filter_options->sub[new_index], decoded, errbuf);
 
 	free(decoded);
 	return result;
 }
@@ -248,27 +246,26 @@ int parse_list_objects_filter(
 			filter_options, arg, &errbuf);
 	} else {
 		/*
 		 * Make filter_options an LOFC_COMBINE spec so we can trivially
 		 * add subspecs to it.
 		 */
 		transform_to_combine_type(filter_options);
 
 		string_list_append(&filter_options->filter_spec, xstrdup("+"));
 		filter_spec_append_urlencode(filter_options, arg);
-		ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1,
-			   filter_options->sub_alloc);
-		filter_options = &filter_options->sub[filter_options->sub_nr++];
-		memset(filter_options, 0, sizeof(*filter_options));
+		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
-			filter_options, arg, &errbuf);
+			&filter_options->sub[filter_options->sub_nr - 1], arg,
+			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
 	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 10/10] list-objects-filter-options: make parser void
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (8 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 09/10] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
@ 2019-06-15  0:40   ` Matthew DeVore
  2019-06-22  0:46     ` Jonathan Tan
  2019-06-18  1:25   ` [PATCH v4 00/10] Filter combination Junio C Hamano
  10 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-15  0:40 UTC (permalink / raw)
  To: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer
  Cc: Matthew DeVore, matvore

This function always returns 0, so make it return void instead.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 list-objects-filter-options.c | 12 +++++-------
 list-objects-filter-options.h |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index f07928ea21..e19ecdcafa 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -225,21 +225,21 @@ static void transform_to_combine_type(
 	string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
 }
 
 void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options)
 {
 	if (filter_options->choice)
 		die(_("multiple filter-specs cannot be combined"));
 }
 
-int parse_list_objects_filter(
+void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg)
 {
 	struct strbuf errbuf = STRBUF_INIT;
 	int parse_error;
 
 	if (!filter_options->choice) {
 		string_list_append(&filter_options->filter_spec, xstrdup(arg));
 
 		parse_error = gently_parse_list_objects_filter(
@@ -255,34 +255,32 @@ int parse_list_objects_filter(
 		filter_spec_append_urlencode(filter_options, arg);
 		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
 			      filter_options->sub_alloc);
 
 		parse_error = gently_parse_list_objects_filter(
 			&filter_options->sub[filter_options->sub_nr - 1], arg,
 			&errbuf);
 	}
 	if (parse_error)
 		die("%s", errbuf.buf);
-	return 0;
 }
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset)
 {
 	struct list_objects_filter_options *filter_options = opt->value;
 
-	if (unset || !arg) {
+	if (unset || !arg)
 		list_objects_filter_set_no_filter(filter_options);
-		return 0;
-	}
-
-	return parse_list_objects_filter(filter_options, arg);
+	else
+		parse_list_objects_filter(filter_options, arg);
+	return 0;
 }
 
 const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
 {
 	if (!filter->filter_spec.nr)
 		BUG("no filter_spec available for this filter");
 	if (filter->filter_spec.nr != 1) {
 		struct strbuf concatted = STRBUF_INIT;
 		strbuf_add_separated_string_list(
 			&concatted, "", &filter->filter_spec);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index d8bc7e946e..db37dfb34a 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -67,21 +67,21 @@ void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options);
 
 /*
  * Parses the filter spec string given by arg and either (1) simply places the
  * result in filter_options if it is not yet populated or (2) combines it with
  * the filter already in filter_options if it is already populated. In the case
  * of (2), the filter specs are combined as if specified with 'combine:'.
  *
  * Dies and prints a user-facing message if an error occurs.
  */
-int parse_list_objects_filter(
+void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);
 
 int opt_parse_list_objects_filter(const struct option *opt,
 				  const char *arg, int unset);
 
 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
 	{ OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
 	  N_("object filtering"), 0, \
 	  opt_parse_list_objects_filter }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 00/10] Filter combination
  2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
                     ` (9 preceding siblings ...)
  2019-06-15  0:40   ` [PATCH v4 10/10] list-objects-filter-options: make parser void Matthew DeVore
@ 2019-06-18  1:25   ` Junio C Hamano
  10 siblings, 0 replies; 57+ messages in thread
From: Junio C Hamano @ 2019-06-18  1:25 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer, matvore

Matthew DeVore <matvore@google.com> writes:

> I had to rebase this onto the latest master rev. master now has the patch which
> disables the sparse:path filter, and v3 of this patch set has conflicts with it.
> This version does not so it can be patched in and tried out by others.
>
> I have re-run the test suite on each commit. Sorry for the spamminess.

Thanks.  Will queue.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 04/10] list-objects-filter: implement composite filters
  2019-06-15  0:40   ` [PATCH v4 04/10] list-objects-filter: implement composite filters Matthew DeVore
@ 2019-06-18  8:42     ` Johannes Schindelin
  2019-06-18 20:22       ` Matthew DeVore
  2019-06-22  0:26     ` Jonathan Tan
  1 sibling, 1 reply; 57+ messages in thread
From: Johannes Schindelin @ 2019-06-18  8:42 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, jonathantanmy, jrn, dstolee, jeffhost, jrnieder, pclouds,
	peff, emilyshaffer, matvore, Jeff Hostetler, Junio C Hamano

Hi Matthew,

On Fri, 14 Jun 2019, Matthew DeVore wrote:

> diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
> index 8e7b4f96fa..1c402c6059 100644
> --- a/list-objects-filter-options.c
> +++ b/list-objects-filter-options.c
> [...]
> +
> +static int parse_combine_filter(
> +	struct list_objects_filter_options *filter_options,
> +	const char *arg,
> +	struct strbuf *errbuf)
> +{
> +	struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
> +	size_t sub;
> +	int result = 0;
> +
> +	if (!subspecs[0]) {
> +		strbuf_addf(errbuf,
> +			    _("expected something after combine:"));

Please squash this in, to pacify Coccinelle:

-- snipsnap --
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 5e5e30bc6a17..483ab512e24c 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -150,7 +150,7 @@ static int parse_combine_filter(
 	int result = 0;

 	if (!subspecs[0]) {
-		strbuf_addf(errbuf,
+		strbuf_addstr(errbuf,
 			    _("expected something after combine:"));
 		result = 1;
 		goto cleanup;


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 04/10] list-objects-filter: implement composite filters
  2019-06-18  8:42     ` Johannes Schindelin
@ 2019-06-18 20:22       ` Matthew DeVore
  2019-06-21 18:17         ` Johannes Schindelin
  0 siblings, 1 reply; 57+ messages in thread
From: Matthew DeVore @ 2019-06-18 20:22 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, peff, emilyshaffer, Jeff Hostetler,
	Junio C Hamano

On Tue, Jun 18, 2019 at 10:42:10AM +0200, Johannes Schindelin wrote:
> > +	if (!subspecs[0]) {
> > +		strbuf_addf(errbuf,
> > +			    _("expected something after combine:"));
> 
> Please squash this in, to pacify Coccinelle:
> 
> -- snipsnap --
> diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
> index 5e5e30bc6a17..483ab512e24c 100644
> --- a/list-objects-filter-options.c
> +++ b/list-objects-filter-options.c
> @@ -150,7 +150,7 @@ static int parse_combine_filter(
>  	int result = 0;
> 
>  	if (!subspecs[0]) {
> -		strbuf_addf(errbuf,
> +		strbuf_addstr(errbuf,

Thank you - fixed locally for the next re-roll.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 04/10] list-objects-filter: implement composite filters
  2019-06-18 20:22       ` Matthew DeVore
@ 2019-06-21 18:17         ` Johannes Schindelin
  0 siblings, 0 replies; 57+ messages in thread
From: Johannes Schindelin @ 2019-06-21 18:17 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: Matthew DeVore, git, jonathantanmy, jrn, dstolee, jeffhost,
	jrnieder, pclouds, peff, emilyshaffer, Jeff Hostetler,
	Junio C Hamano

Hi,

On Tue, 18 Jun 2019, Matthew DeVore wrote:

> On Tue, Jun 18, 2019 at 10:42:10AM +0200, Johannes Schindelin wrote:
> > > +	if (!subspecs[0]) {
> > > +		strbuf_addf(errbuf,
> > > +			    _("expected something after combine:"));
> >
> > Please squash this in, to pacify Coccinelle:

Junio, maybe you can apply a SQUASH??? on top? This is the reason why `pu`
is failing in the Azure Pipeline.

Thanks,
Dscho

> >
> > -- snipsnap --
> > diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
> > index 5e5e30bc6a17..483ab512e24c 100644
> > --- a/list-objects-filter-options.c
> > +++ b/list-objects-filter-options.c
> > @@ -150,7 +150,7 @@ static int parse_combine_filter(
> >  	int result = 0;
> >
> >  	if (!subspecs[0]) {
> > -		strbuf_addf(errbuf,
> > +		strbuf_addstr(errbuf,
>
> Thank you - fixed locally for the next re-roll.
>

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 01/10] list-objects-filter: make API easier to use
  2019-06-15  0:40   ` [PATCH v4 01/10] list-objects-filter: make API easier to use Matthew DeVore
@ 2019-06-21 22:58     ` Jonathan Tan
  2019-06-27  0:46       ` Matthew DeVore
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Tan @ 2019-06-21 22:58 UTC (permalink / raw)
  To: matvore; +Cc: git, Jonathan Tan

> Make the list-objects-filter.h API more opaque and easier to use. This
> prepares for combined filter support, where filters will be created and
> used in a new context.
> 
> Helped-by: Jeff Hostetler <git@jeffhostetler.com>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: Matthew DeVore <matvore@google.com>

So what happens is that filter_fn, filter_free_fn, and filter_data are
encapsulated into one opaque object, and users will now use filter_fn
and filter_free_fn through other functions that we expose, allowing us
to add some conveniences that currently have to be repeated at each call
site.

I would prefer the following commit message:

  list-objects-filter: encapsulate filter components

  Encapsulate filter_fn, filter_free_fn, and filter_data into its own
  opaque struct.

  Due to opaqueness, filter_fn and filter_free_fn can no longer be
  accessed directly by users. Currently, all usages of filter_fn are
  guarded by a necessary check:

    (obj->flags & NOT_USER_GIVEN) && filter_fn

  Take the opportunity to include this check into the new function
  list_objects_filter__filter_object(), so that we no longer need to
  write this check at every caller of the filter function.

  Also, the init functions in list-objects-filter.c no longer need to
  confusingly return the filter constituents in various places
  (filter_fn and filter_free_fn as out parameters, and filter_data as
  the function's return value); they can just initialize the "struct
  filter" passed in.

> +enum list_objects_filter_result list_objects_filter__filter_object(
> +	struct repository *r,
> +	enum list_objects_filter_situation filter_situation,
> +	struct object *obj,
> +	const char *pathname,
> +	const char *filename,
> +	struct filter *filter)
> +{
> +	if (filter && (obj->flags & NOT_USER_GIVEN))
> +		return filter->filter_object_fn(r, filter_situation, obj,
> +						pathname, filename,
> +						filter->filter_data);
> +	/*
> +	 * No filter is active or user gave object explicitly. Choose default
> +	 * behavior based on filter situation.
> +	 */

This part is when we do not need to apply the filter (or none exists). I
think the comment will be better if stated more explicitly:

  No filter is active or user gave object explicitly. In this case,
  always show the object (except when LOFS_END_TREE, since this tree had
  already been shown when LOFS_BEGIN_TREE).

> +	if (filter_situation == LOFS_END_TREE)
> +		return 0;
> +	return LOFR_MARK_SEEN | LOFR_DO_SHOW;
> +}

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 04/10] list-objects-filter: implement composite filters
  2019-06-15  0:40   ` [PATCH v4 04/10] list-objects-filter: implement composite filters Matthew DeVore
  2019-06-18  8:42     ` Johannes Schindelin
@ 2019-06-22  0:26     ` Jonathan Tan
  1 sibling, 0 replies; 57+ messages in thread
From: Jonathan Tan @ 2019-06-22  0:26 UTC (permalink / raw)
  To: matvore; +Cc: git, Jonathan Tan

> Allow combining filters such that only objects accepted by all filters
> are shown. The motivation for this is to allow getting directory
> listings without also fetching blobs. This can be done by combining
> blob:none with tree:<depth>. There are massive repositories that have
> larger-than-expected trees - even if you include only a single commit.

First of all, patches 2 and 3 are straightforward and LGTM. On to patch
4...

[snip]

> The current usage requires passing the filter to rev-list in the
> following form:
> 
> 	--filter=<FILTER1> --filter=<FILTER2> ...
> 
> Such usage is currently an error, so giving it a meaning is backwards-
> compatible.
> 
> The URL-encoding scheme is being introduced before the repeated flag
> logic, and the user-facing documentation for URL-encoding is being
> withheld until the repeated flag feature is implemented. The
> URL-encoding is in general not meant to be used directly by the user,
> and it is better to describe the URL-encoding feature in terms of the
> repeated flag.

As of this commit, we don't support such arguments passed to rev-list in
this way, so I would write these paragraphs as:

  A combined filter supports any number of subfilters, and is written in
  the following form:

    combine:<filter 1>+<filter 2>+<filter 3>

  Certain non-alphanumeric characters in each filter must be
  URL-encoded.

  For now, combined filters must be specified in this form. In a
  subsequent commit, rev-list will support multiple --filter arguments
  which will have the same effect as specifying one filter argument
  starting with "combine:".

> Helped-by: Emily Shaffer <emilyshaffer@google.com>
> Helped-by: Jeff Hostetler <git@jeffhostetler.com>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: Matthew DeVore <matvore@google.com>
> ---
>  list-objects-filter-options.c       | 106 ++++++++++++++++++-
>  list-objects-filter-options.h       |  17 ++-
>  list-objects-filter.c               | 159 ++++++++++++++++++++++++++++
>  t/t6112-rev-list-filters-objects.sh | 151 +++++++++++++++++++++++++-
>  url.c                               |   6 ++
>  url.h                               |   8 ++
>  6 files changed, 441 insertions(+), 6 deletions(-)
> 
> @@ -28,22 +34,20 @@ static int gently_parse_list_objects_filter(
>  	struct strbuf *errbuf)
>  {
>  	const char *v0;
>  
>  	if (filter_options->choice) {
>  		strbuf_addstr(
>  			errbuf, _("multiple filter-specs cannot be combined"));
>  		return 1;
>  	}
>  
> -	filter_options->filter_spec = strdup(arg);
> -

This line has been removed from gently_parse_list_objects_filter()
because this function gains another caller that does not need it.
To compensate, this line has been added to both its existing callers.

> @@ -31,27 +32,37 @@ struct list_objects_filter_options {
>  	 * the filtering algorithm to use.
>  	 */
>  	enum list_objects_filter_choice choice;
>  
>  	/*
>  	 * Choice is LOFC_DISABLED because "--no-filter" was requested.
>  	 */
>  	unsigned int no_filter : 1;
>  
>  	/*
> -	 * Parsed values (fields) from within the filter-spec.  These are
> -	 * choice-specific; not all values will be defined for any given
> -	 * choice.
> +	 * BEGIN choice-specific parsed values from within the filter-spec. Only
> +	 * some values will be defined for any given choice.
>  	 */
> +
>  	struct object_id *sparse_oid_value;
>  	unsigned long blob_limit_value;
>  	unsigned long tree_exclude_depth;
> +
> +	/* LOFC_COMBINE values */
> +
> +	/* This array contains all the subfilters which this filter combines. */
> +	size_t sub_nr, sub_alloc;
> +	struct list_objects_filter_options *sub;
> +
> +	/*
> +	 * END choice-specific parsed values.
> +	 */
>  };

I still think it's cleaner to just have a "left subfilter" and "right
subfilter", but I don't feel strongly about it. In any case, this is an
internal detail and can always be changed in the future.

> +	/*
> +	 * Optional. If this function is supplied and the filter needs to
> +	 * collect omits, then this function is called once before free_fn is
> +	 * called.
> +	 */
> +	void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);

This is needed because a combined filter's omits actually lie in the
subfilters. Resolving it this way means that callers must call
list_objects_filter__free() before using the omits set. Can you add
documentation to __init() (which is the first function to take in the
omits set) and __free() describing this?

(As stated in the test below, we cannot just share one omits set amongst
all the subfilters - see filter_trees_update_omits and the call site
that relies on its return value.)

Here comes the tricky part...

> +static int should_delegate(enum list_objects_filter_situation filter_situation,
> +			   struct object *obj,
> +			   struct subfilter *sub)
> +{
> +	if (!sub->is_skipping_tree)
> +		return 1;
> +	if (filter_situation == LOFS_END_TREE &&
> +		oideq(&obj->oid, &sub->skip_tree)) {
> +		sub->is_skipping_tree = 0;
> +		return 1;
> +	}
> +	return 0;
> +}

Optional: I think this should be called "test_and_set_skip_tree" or
something like that, made to return the inverse of its current return
value, and documented:

  Returns the value of sub->is_skipping_tree at the moment of
  invocation. If iteration is at the LOFS_END_TREE of the tree currently
  being skipped, first clears sub->is_skipping_tree before returning.

> +static enum list_objects_filter_result process_subfilter(
> +	struct repository *r,
> +	enum list_objects_filter_situation filter_situation,
> +	struct object *obj,
> +	const char *pathname,
> +	const char *filename,
> +	struct subfilter *sub)
> +{
> +	enum list_objects_filter_result result;
> +
> +	/*
> +	 * Check should_delegate before oidset_contains so that
> +	 * is_skipping_tree gets unset even when the object is marked as seen.
> +	 * As of this writing, no filter uses LOFR_MARK_SEEN on trees that also
> +	 * uses LOFR_SKIP_TREE, so the ordering is only theoretically
> +	 * important. Be cautious if you change the order of the below checks
> +	 * and more filters have been added!
> +	 */
> +	if (!should_delegate(filter_situation, obj, sub))
> +		return LOFR_ZERO;
> +	if (oidset_contains(&sub->seen, &obj->oid))
> +		return LOFR_ZERO;
> +
> +	result = list_objects_filter__filter_object(
> +		r, filter_situation, obj, pathname, filename, sub->filter);
> +
> +	if (result & LOFR_MARK_SEEN)
> +		oidset_insert(&sub->seen, &obj->oid);
> +
> +	if (result & LOFR_SKIP_TREE) {
> +		sub->is_skipping_tree = 1;
> +		sub->skip_tree = obj->oid;
> +	}
> +
> +	return result;
> +}

Looks good.

> +static enum list_objects_filter_result filter_combine(
> +	struct repository *r,
> +	enum list_objects_filter_situation filter_situation,
> +	struct object *obj,
> +	const char *pathname,
> +	const char *filename,
> +	struct oidset *omits,
> +	void *filter_data)
> +{
> +	struct combine_filter_data *d = filter_data;
> +	enum list_objects_filter_result combined_result =
> +		LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
> +	size_t sub;
> +
> +	for (sub = 0; sub < d->nr; sub++) {
> +		enum list_objects_filter_result sub_result = process_subfilter(
> +			r, filter_situation, obj, pathname, filename,
> +			&d->sub[sub]);
> +		if (!(sub_result & LOFR_DO_SHOW))
> +			combined_result &= ~LOFR_DO_SHOW;
> +		if (!(sub_result & LOFR_MARK_SEEN))
> +			combined_result &= ~LOFR_MARK_SEEN;
> +		if (!d->sub[sub].is_skipping_tree)
> +			combined_result &= ~LOFR_SKIP_TREE;
> +	}
> +
> +	return combined_result;
> +}

And also looks good. Might be confusing for tree skipping to be
communicated through is_skipping_tree instead of the return value, but
is_skipping_tree needs to be set anyway for other reasons, so that's
convenient.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 06/10] list-objects-filter-options: make filter_spec a string_list
  2019-06-15  0:40   ` [PATCH v4 06/10] list-objects-filter-options: make filter_spec a string_list Matthew DeVore
@ 2019-06-22  0:37     ` Jonathan Tan
  0 siblings, 0 replies; 57+ messages in thread
From: Jonathan Tan @ 2019-06-22  0:37 UTC (permalink / raw)
  To: matvore; +Cc: git, Jonathan Tan

Patch 5 and this patch look good to me.

> @@ -1134,27 +1134,25 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
>  		transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
>  
>  	if (option_upload_pack)
>  		transport_set_option(transport, TRANS_OPT_UPLOADPACK,
>  				     option_upload_pack);
>  
>  	if (server_options.nr)
>  		transport->server_options = &server_options;
>  
>  	if (filter_options.choice) {
> -		struct strbuf expanded_filter_spec = STRBUF_INIT;
> -		expand_list_objects_filter_spec(&filter_options,
> -						&expanded_filter_spec);
> +		const char *spec =
> +			expand_list_objects_filter_spec(&filter_options);
>  		transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
> -				     expanded_filter_spec.buf);
> +				     spec);
>  		transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
> -		strbuf_release(&expanded_filter_spec);

So expand_list_objects_filter_spec() now returns a filter_options-owned
string (instead of previously writing to a strbuf), which is why we no
longer need to do any freeing or releasing. That makes sense. (Same for
the other call sites.)

> @@ -177,72 +177,89 @@ static int parse_combine_filter(
>  	}
>  	return result;
>  }
>  
>  int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
>  			      const char *arg)
>  {
>  	struct strbuf buf = STRBUF_INIT;
>  	if (filter_options->choice)
>  		die(_("multiple filter-specs cannot be combined"));
> -	filter_options->filter_spec = strdup(arg);
> +	string_list_append(&filter_options->filter_spec, xstrdup(arg));

This append needs to be called with xstrdup, because a zero-initialized
string list is NODUP. OK.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 10/10] list-objects-filter-options: make parser void
  2019-06-15  0:40   ` [PATCH v4 10/10] list-objects-filter-options: make parser void Matthew DeVore
@ 2019-06-22  0:46     ` Jonathan Tan
  0 siblings, 0 replies; 57+ messages in thread
From: Jonathan Tan @ 2019-06-22  0:46 UTC (permalink / raw)
  To: matvore; +Cc: git, Jonathan Tan

> This function always returns 0, so make it return void instead.

And...patches 7-10 look straightforward and good to me.

In summary, I don't think any changes need to be made to all 10 patches
other than textual ones (commit messages, documentation, and function
names).

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 01/10] list-objects-filter: make API easier to use
  2019-06-21 22:58     ` Jonathan Tan
@ 2019-06-27  0:46       ` Matthew DeVore
  0 siblings, 0 replies; 57+ messages in thread
From: Matthew DeVore @ 2019-06-27  0:46 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: matvore, git

On Fri, Jun 21, 2019 at 03:58:38PM -0700, Jonathan Tan wrote:
> So what happens is that filter_fn, filter_free_fn, and filter_data are
> encapsulated into one opaque object, and users will now use filter_fn
> and filter_free_fn through other functions that we expose, allowing us
> to add some conveniences that currently have to be repeated at each call
> site.
> 
> I would prefer the following commit message:
> 
>   list-objects-filter: encapsulate filter components
> 
>   Encapsulate filter_fn, filter_free_fn, and filter_data into its own
>   opaque struct.
> 
>   Due to opaqueness, filter_fn and filter_free_fn can no longer be
>   accessed directly by users. Currently, all usages of filter_fn are
>   guarded by a necessary check:
> 
>     (obj->flags & NOT_USER_GIVEN) && filter_fn
> 
>   Take the opportunity to include this check into the new function
>   list_objects_filter__filter_object(), so that we no longer need to
>   write this check at every caller of the filter function.
> 
>   Also, the init functions in list-objects-filter.c no longer need to
>   confusingly return the filter constituents in various places
>   (filter_fn and filter_free_fn as out parameters, and filter_data as
>   the function's return value); they can just initialize the "struct
>   filter" passed in.
> 

Very nice, applied. I think your commit message is much more helpful than mine
and doesn't use the filter combination feature as an excuse for the change.

> > +	/*
> > +	 * No filter is active or user gave object explicitly. Choose default
> > +	 * behavior based on filter situation.
> > +	 */
> 
> This part is when we do not need to apply the filter (or none exists). I
> think the comment will be better if stated more explicitly:
> 
>   No filter is active or user gave object explicitly. In this case,
>   always show the object (except when LOFS_END_TREE, since this tree had
>   already been shown when LOFS_BEGIN_TREE).
> 

Agreed, this is a little better. Applied.

^ permalink raw reply	[flat|nested] 57+ messages in thread

end of thread, back to index

Thread overview: 57+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-01  0:35 [PATCH v2 0/9] Filter combination Matthew DeVore
2019-06-01  0:35 ` [PATCH v2 1/9] list-objects-filter: make API easier to use Matthew DeVore
2019-06-01  0:35 ` [PATCH v2 2/9] list-objects-filter: put omits set in filter struct Matthew DeVore
2019-06-01  0:35 ` [PATCH v2 3/9] list-objects-filter-options: always supply *errbuf Matthew DeVore
2019-06-01  0:35 ` [PATCH v2 4/9] list-objects-filter: implement composite filters Matthew DeVore
2019-06-03 21:51   ` Jeff Hostetler
2019-06-06 22:32     ` Matthew DeVore
2019-06-07 17:58       ` Jeff Hostetler
2019-06-01  0:35 ` [PATCH v2 5/9] list-objects-filter-options: move error check up Matthew DeVore
2019-06-01  0:36 ` [PATCH v2 6/9] list-objects-filter-options: make filter_spec a strbuf Matthew DeVore
2019-06-10 20:13   ` Junio C Hamano
2019-06-11  0:34     ` Matthew DeVore
2019-06-11 17:33       ` Junio C Hamano
2019-06-11 18:44         ` Matthew DeVore
2019-06-11 21:34           ` Matthew DeVore
2019-06-11 21:48           ` Junio C Hamano
2019-06-12  0:37             ` Matthew DeVore
2019-06-12 14:55               ` Matthew DeVore
2019-06-01  0:36 ` [PATCH v2 7/9] list-objects-filter-options: allow mult. --filter Matthew DeVore
2019-06-01  0:36 ` [PATCH v2 8/9] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
2019-06-03 22:07   ` Jacob Keller
2019-06-03 22:39     ` Matthew DeVore
2019-06-04  3:16       ` Jacob Keller
2019-06-01  0:36 ` [PATCH v2 9/9] list-objects-filter-options: make parser void Matthew DeVore
2019-06-03 21:35 ` [PATCH v2 0/9] Filter combination Jeff Hostetler
2019-06-13 21:51 ` [PATCH v3 00/10] " Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 01/10] list-objects-filter: make API easier to use Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 02/10] list-objects-filter: put omits set in filter struct Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 03/10] list-objects-filter-options: always supply *errbuf Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 04/10] list-objects-filter: implement composite filters Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 05/10] list-objects-filter-options: move error check up Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 06/10] list-objects-filter-options: make filter_spec a string_list Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 07/10] strbuf: give URL-encoding API a char predicate fn Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 08/10] list-objects-filter-options: allow mult. --filter Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 09/10] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
2019-06-13 21:51   ` [PATCH v3 10/10] list-objects-filter-options: make parser void Matthew DeVore
2019-06-14 19:50   ` [PATCH v3 00/10] Filter combination Junio C Hamano
2019-06-15  0:40 ` [PATCH v4 " Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 01/10] list-objects-filter: make API easier to use Matthew DeVore
2019-06-21 22:58     ` Jonathan Tan
2019-06-27  0:46       ` Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 02/10] list-objects-filter: put omits set in filter struct Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 03/10] list-objects-filter-options: always supply *errbuf Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 04/10] list-objects-filter: implement composite filters Matthew DeVore
2019-06-18  8:42     ` Johannes Schindelin
2019-06-18 20:22       ` Matthew DeVore
2019-06-21 18:17         ` Johannes Schindelin
2019-06-22  0:26     ` Jonathan Tan
2019-06-15  0:40   ` [PATCH v4 05/10] list-objects-filter-options: move error check up Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 06/10] list-objects-filter-options: make filter_spec a string_list Matthew DeVore
2019-06-22  0:37     ` Jonathan Tan
2019-06-15  0:40   ` [PATCH v4 07/10] strbuf: give URL-encoding API a char predicate fn Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 08/10] list-objects-filter-options: allow mult. --filter Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 09/10] list-objects-filter-options: clean up use of ALLOC_GROW Matthew DeVore
2019-06-15  0:40   ` [PATCH v4 10/10] list-objects-filter-options: make parser void Matthew DeVore
2019-06-22  0:46     ` Jonathan Tan
2019-06-18  1:25   ` [PATCH v4 00/10] Filter combination Junio C Hamano

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox