From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Subject: [PATCH 12/13] pack-bitmap: implement BLOB_LIMIT filtering
Date: Wed, 12 Feb 2020 21:25:23 -0500 [thread overview]
Message-ID: <20200213022523.GL1126038@coredump.intra.peff.net> (raw)
In-Reply-To: <20200213021506.GA1124607@coredump.intra.peff.net>
Just as the previous commit implemented BLOB_NONE, we can support
BLOB_LIMIT filters by looking at the sizes of any blobs in the result
and unsetting their bits as appropriate. This is slightly more expensive
than BLOB_NONE, but still produces a noticeable speedup (these results
are on git.git):
Test HEAD~2 HEAD
------------------------------------------------------------------------------------
5310.7: rev-list count with blob:none 1.80(1.77+0.02) 0.22(0.20+0.02) -87.8%
5310.8: rev-list count with blob:limit=1k 1.99(1.96+0.03) 0.29(0.25+0.03) -85.4%
The implementation is similar to the BLOB_NONE one, with the exception
that we have to go object-by-object while walking the blob-type bitmap
(since we can't mask out the matches, but must look up the size
individually for each blob). The trick with using ctz64() is taken from
show_objects_for_type(), which likewise needs to find individual bits
(but wants to quickly skip over big chunks without blobs).
Signed-off-by: Jeff King <peff@peff.net>
---
pack-bitmap.c | 80 ++++++++++++++++++++++++++++++
t/perf/p5310-pack-bitmaps.sh | 5 ++
t/t6113-rev-list-bitmap-filters.sh | 20 +++++++-
3 files changed, 104 insertions(+), 1 deletion(-)
diff --git a/pack-bitmap.c b/pack-bitmap.c
index f430ddc3d2..76cb60e8c3 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -773,6 +773,78 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
bitmap_free(tips);
}
+static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
+ uint32_t pos)
+{
+ struct packed_git *pack = bitmap_git->pack;
+ unsigned long size;
+ struct object_info oi = OBJECT_INFO_INIT;
+
+ oi.sizep = &size;
+
+ if (pos < pack->num_objects) {
+ struct revindex_entry *entry = &pack->revindex[pos];
+ if (packed_object_info(the_repository, pack,
+ entry->offset, &oi) < 0) {
+ struct object_id oid;
+ nth_packed_object_oid(&oid, pack, entry->nr);
+ die(_("unable to get size of %s"), oid_to_hex(&oid));
+ }
+ } else {
+ struct eindex *eindex = &bitmap_git->ext_index;
+ struct object *obj = eindex->objects[pos - pack->num_objects];
+ if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
+ }
+
+ return size;
+}
+
+static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
+ struct object_list *tip_objects,
+ struct bitmap *to_filter,
+ unsigned long limit)
+{
+ struct eindex *eindex = &bitmap_git->ext_index;
+ struct bitmap *tips;
+ struct ewah_iterator it;
+ eword_t mask;
+ uint32_t i;
+
+ tips = find_tip_blobs(bitmap_git, tip_objects);
+
+ for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB);
+ i < to_filter->word_alloc && ewah_iterator_next(&mask, &it);
+ i++) {
+ eword_t word = to_filter->words[i] & mask;
+ unsigned offset;
+
+ for (offset = 0; offset < BITS_IN_EWORD; offset++) {
+ uint32_t pos;
+
+ if ((word >> offset) == 0)
+ break;
+ offset += ewah_bit_ctz64(word >> offset);
+ pos = i * BITS_IN_EWORD + offset;
+
+ if (!bitmap_get(tips, pos) &&
+ get_size_by_pos(bitmap_git, pos) >= limit)
+ bitmap_unset(to_filter, pos);
+ }
+ }
+
+ for (i = 0; i < eindex->count; i++) {
+ uint32_t pos = i + bitmap_git->pack->num_objects;
+ if (eindex->objects[i]->type == OBJ_BLOB &&
+ bitmap_get(to_filter, pos) &&
+ !bitmap_get(tips, pos) &&
+ get_size_by_pos(bitmap_git, pos) >= limit)
+ bitmap_unset(to_filter, pos);
+ }
+
+ bitmap_free(tips);
+}
+
static int filter_bitmap(struct bitmap_index *bitmap_git,
struct object_list *tip_objects,
struct bitmap *to_filter,
@@ -788,6 +860,14 @@ static int filter_bitmap(struct bitmap_index *bitmap_git,
return 0;
}
+ if (filter->choice == LOFC_BLOB_LIMIT) {
+ if (bitmap_git)
+ filter_bitmap_blob_limit(bitmap_git, tip_objects,
+ to_filter,
+ filter->blob_limit_value);
+ return 0;
+ }
+
/* filter choice not handled */
return -1;
}
diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh
index 3383983450..bbe1eb26a9 100755
--- a/t/perf/p5310-pack-bitmaps.sh
+++ b/t/perf/p5310-pack-bitmaps.sh
@@ -44,6 +44,11 @@ test_perf 'rev-list count with blob:none' '
--filter=blob:none >/dev/null
'
+test_perf 'rev-list count with blob:limit=1k' '
+ git rev-list --use-bitmap-index --count --objects --all \
+ --filter=blob:limit=1k >/dev/null
+'
+
test_expect_success 'create partial bitmap state' '
# pick a commit to represent the repo tip in the past
cutoff=$(git rev-list HEAD~100 -1) &&
diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh
index feaa6c0989..0878f72828 100755
--- a/t/t6113-rev-list-bitmap-filters.sh
+++ b/t/t6113-rev-list-bitmap-filters.sh
@@ -6,8 +6,10 @@ test_description='rev-list combining bitmaps and filters'
test_expect_success 'set up bitmapped repo' '
# one commit will have bitmaps, the other will not
test_commit one &&
+ test_commit much-larger-blob-one &&
git repack -adb &&
- test_commit two
+ test_commit two &&
+ test_commit much-larger-blob-two
'
test_expect_success 'filters fallback to non-bitmap traversal' '
@@ -56,4 +58,20 @@ test_expect_success 'blob:none filter with specified blob' '
cmp_bitmap_traversal expect actual
'
+test_expect_success 'blob:limit filter' '
+ git rev-list --objects --filter=blob:limit=5 HEAD >expect &&
+ git rev-list --use-bitmap-index \
+ --objects --filter=blob:limit=5 HEAD >actual &&
+ cmp_bitmap_traversal expect actual
+'
+
+test_expect_success 'blob:limit filter with specified blob' '
+ git rev-list --objects --filter=blob:limit=5 \
+ HEAD HEAD:much-larger-blob-two.t >expect &&
+ git rev-list --use-bitmap-index \
+ --objects --filter=blob:limit=5 \
+ HEAD HEAD:much-larger-blob-two.t >actual &&
+ cmp_bitmap_traversal expect actual
+'
+
test_done
--
2.25.0.785.g49bcbe7794
next prev parent reply other threads:[~2020-02-13 2:25 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-13 2:15 [PATCH 0/13] combining object filters and bitmaps Jeff King
2020-02-13 2:16 ` [PATCH 01/13] pack-bitmap: factor out type iterator initialization Jeff King
2020-02-13 17:45 ` Junio C Hamano
2020-02-13 2:16 ` [PATCH 02/13] pack-bitmap: fix leak of haves/wants object lists Jeff King
2020-02-13 18:12 ` Junio C Hamano
2020-02-13 2:17 ` [PATCH 03/13] rev-list: fallback to non-bitmap traversal when filtering Jeff King
2020-02-13 18:19 ` Junio C Hamano
2020-02-13 18:40 ` Jeff King
2020-02-13 2:17 ` [PATCH 04/13] rev-list: consolidate bitmap-disabling options Jeff King
2020-02-13 2:18 ` [PATCH 05/13] rev-list: factor out bitmap-optimized routines Jeff King
2020-02-13 18:34 ` Junio C Hamano
2020-02-13 2:19 ` [PATCH 06/13] rev-list: make --count work with --objects Jeff King
2020-02-13 19:14 ` Junio C Hamano
2020-02-13 20:27 ` Jeff King
2020-02-13 2:20 ` [PATCH 07/13] rev-list: allow bitmaps when counting objects Jeff King
2020-02-13 21:47 ` Junio C Hamano
2020-02-13 22:27 ` Jeff King
2020-02-13 2:20 ` [PATCH 08/13] pack-bitmap: basic noop bitmap filter infrastructure Jeff King
2020-02-13 2:21 ` [PATCH 09/13] rev-list: use bitmap filters for traversal Jeff King
2020-02-13 22:22 ` Junio C Hamano
2020-02-13 22:34 ` Jeff King
2020-02-13 2:21 ` [PATCH 10/13] bitmap: add bitmap_unset() function Jeff King
2020-02-13 2:23 ` [PATCH 11/13] pack-bitmap: implement BLOB_NONE filtering Jeff King
2020-02-13 2:25 ` Jeff King [this message]
2020-02-13 23:17 ` [PATCH 12/13] pack-bitmap: implement BLOB_LIMIT filtering Junio C Hamano
2020-02-13 2:25 ` [PATCH 13/13] pack-objects: support filters with bitmaps Jeff King
2020-02-14 18:21 ` [PATCH v2 0/15] combining object filters and bitmaps Jeff King
2020-02-14 18:22 ` [PATCH v2 01/15] pack-bitmap: factor out type iterator initialization Jeff King
2020-02-15 0:10 ` Taylor Blau
2020-02-14 18:22 ` [PATCH v2 02/15] pack-bitmap: fix leak of haves/wants object lists Jeff King
2020-02-15 0:15 ` Taylor Blau
2020-02-15 6:46 ` Jeff King
2020-02-18 17:58 ` Derrick Stolee
2020-02-18 20:02 ` Jeff King
2020-02-14 18:22 ` [PATCH v2 03/15] rev-list: fallback to non-bitmap traversal when filtering Jeff King
2020-02-15 0:22 ` Taylor Blau
2020-02-14 18:22 ` [PATCH v2 04/15] pack-bitmap: refuse to do a bitmap traversal with pathspecs Jeff King
2020-02-14 19:03 ` Junio C Hamano
2020-02-14 20:51 ` Jeff King
2020-02-14 18:22 ` [PATCH v2 05/15] rev-list: factor out bitmap-optimized routines Jeff King
2020-02-15 0:35 ` Taylor Blau
2020-02-14 18:22 ` [PATCH v2 06/15] rev-list: make --count work with --objects Jeff King
2020-02-15 0:42 ` Taylor Blau
2020-02-15 6:48 ` Jeff King
2020-02-16 23:34 ` Junio C Hamano
2020-02-18 5:24 ` Jeff King
2020-02-18 17:28 ` Junio C Hamano
2020-02-18 19:55 ` Jeff King
2020-02-18 21:19 ` Junio C Hamano
2020-02-18 21:23 ` Jeff King
2020-02-18 18:05 ` Derrick Stolee
2020-02-18 19:59 ` Jeff King
2020-02-14 18:22 ` [PATCH v2 07/15] rev-list: allow bitmaps when counting objects Jeff King
2020-02-15 0:45 ` Taylor Blau
2020-02-15 6:55 ` Jeff King
2020-02-16 23:36 ` Junio C Hamano
2020-02-14 18:22 ` [PATCH v2 08/15] t5310: factor out bitmap traversal comparison Jeff King
2020-02-15 2:14 ` Taylor Blau
2020-02-15 7:00 ` Jeff King
2020-02-14 18:22 ` [PATCH v2 09/15] rev-list: allow commit-only bitmap traversals Jeff King
2020-02-18 18:18 ` Derrick Stolee
2020-02-18 20:05 ` Jeff King
2020-02-18 20:11 ` Derrick Stolee
2020-02-14 18:22 ` [PATCH v2 10/15] pack-bitmap: basic noop bitmap filter infrastructure Jeff King
2020-02-14 18:22 ` [PATCH v2 11/15] rev-list: use bitmap filters for traversal Jeff King
2020-02-14 18:22 ` [PATCH v2 12/15] bitmap: add bitmap_unset() function Jeff King
2020-02-14 18:22 ` [PATCH v2 13/15] pack-bitmap: implement BLOB_NONE filtering Jeff King
2020-02-18 19:26 ` Derrick Stolee
2020-02-18 19:36 ` Derrick Stolee
2020-02-18 20:30 ` Jeff King
2020-02-18 20:24 ` Jeff King
2020-02-14 18:22 ` [PATCH v2 14/15] pack-bitmap: implement BLOB_LIMIT filtering Jeff King
2020-02-14 18:22 ` [PATCH v2 15/15] pack-objects: support filters with bitmaps Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200213022523.GL1126038@coredump.intra.peff.net \
--to=peff@peff.net \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).