From: Derrick Stolee <dstolee@microsoft.com>
To: git@vger.kernel.org
Cc: stolee@gmail.com, gitster@pobox.com, peff@peff.net,
Derrick Stolee <dstolee@microsoft.com>
Subject: [PATCH v2] cleanup: fix possible overflow errors in binary search
Date: Sun, 8 Oct 2017 14:29:37 -0400 [thread overview]
Message-ID: <20171008182937.69205-1-dstolee@microsoft.com> (raw)
In-Reply-To: <bd3c5fe2-ac6a-b978-163b-da07208b879c@gmail.com>
A common mistake when writing binary search is to allow possible
integer overflow by using the simple average:
mid = (min + max) / 2;
Instead, use the overflow-safe version:
mid = min + (max - min) / 2;
This translation is safe since the operation occurs inside a loop
conditioned on "min < max". The included changes were found using
the following git grep:
git grep '/ *2;' '*.c'
Making this cleanup will prevent future review friction when a new
binary search is contructed based on existing code.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
builtin/index-pack.c | 4 ++--
builtin/pack-objects.c | 2 +-
builtin/unpack-objects.c | 2 +-
cache-tree.c | 2 +-
compat/regex/regex_internal.c | 4 ++--
compat/regex/regexec.c | 2 +-
packfile.c | 2 +-
sha1-lookup.c | 4 ++--
sha1_name.c | 2 +-
string-list.c | 2 +-
utf8.c | 2 +-
xdiff/xpatience.c | 2 +-
12 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index f2be145e1..8ec459f52 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -633,7 +633,7 @@ static int find_ofs_delta(const off_t offset, enum object_type type)
int first = 0, last = nr_ofs_deltas;
while (first < last) {
- int next = (first + last) / 2;
+ int next = first + (last - first) / 2;
struct ofs_delta_entry *delta = &ofs_deltas[next];
int cmp;
@@ -687,7 +687,7 @@ static int find_ref_delta(const unsigned char *sha1, enum object_type type)
int first = 0, last = nr_ref_deltas;
while (first < last) {
- int next = (first + last) / 2;
+ int next = first + (last - first) / 2;
struct ref_delta_entry *delta = &ref_deltas[next];
int cmp;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 5ee2c48ff..6e77dfd44 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1277,7 +1277,7 @@ static int done_pbase_path_pos(unsigned hash)
int lo = 0;
int hi = done_pbase_paths_num;
while (lo < hi) {
- int mi = (hi + lo) / 2;
+ int mi = lo + (hi - lo) / 2;
if (done_pbase_paths[mi] == hash)
return mi;
if (done_pbase_paths[mi] < hash)
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 689a29fac..62ea264c4 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -394,7 +394,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
lo = 0;
hi = nr;
while (lo < hi) {
- mid = (lo + hi)/2;
+ mid = lo + (hi - lo) / 2;
if (base_offset < obj_list[mid].offset) {
hi = mid;
} else if (base_offset > obj_list[mid].offset) {
diff --git a/cache-tree.c b/cache-tree.c
index 71d092ed5..d3f740127 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -49,7 +49,7 @@ static int subtree_pos(struct cache_tree *it, const char *path, int pathlen)
lo = 0;
hi = it->subtree_nr;
while (lo < hi) {
- int mi = (lo + hi) / 2;
+ int mi = lo + (hi - lo) / 2;
struct cache_tree_sub *mdl = down[mi];
int cmp = subtree_name_cmp(path, pathlen,
mdl->name, mdl->namelen);
diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c
index d4121f2f4..98342b831 100644
--- a/compat/regex/regex_internal.c
+++ b/compat/regex/regex_internal.c
@@ -613,7 +613,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
int low = 0, high = pstr->valid_len, mid;
do
{
- mid = (high + low) / 2;
+ mid = low + (high - low) / 2;
if (pstr->offsets[mid] > offset)
high = mid;
else if (pstr->offsets[mid] < offset)
@@ -1394,7 +1394,7 @@ re_node_set_contains (const re_node_set *set, int elem)
right = set->nelem - 1;
while (idx < right)
{
- mid = (idx + right) / 2;
+ mid = idx + (right - idx) / 2;
if (set->elems[mid] < elem)
idx = mid + 1;
else
diff --git a/compat/regex/regexec.c b/compat/regex/regexec.c
index 0a745d9c3..6f2b48a78 100644
--- a/compat/regex/regexec.c
+++ b/compat/regex/regexec.c
@@ -4284,7 +4284,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
last = right = mctx->nbkref_ents;
for (left = 0; left < right;)
{
- mid = (left + right) / 2;
+ mid = left + (right - left) / 2;
if (mctx->bkref_ents[mid].str_idx < str_idx)
left = mid + 1;
else
diff --git a/packfile.c b/packfile.c
index eab754248..4a5fe7ab1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1743,7 +1743,7 @@ off_t find_pack_entry_one(const unsigned char *sha1,
sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
while (lo < hi) {
- unsigned mi = (lo + hi) / 2;
+ unsigned mi = lo + (hi - lo) / 2;
int cmp = hashcmp(index + mi * stride, sha1);
if (debug_lookup)
diff --git a/sha1-lookup.c b/sha1-lookup.c
index 2552b7902..4cf3ebd92 100644
--- a/sha1-lookup.c
+++ b/sha1-lookup.c
@@ -10,7 +10,7 @@ static uint32_t take2(const unsigned char *sha1)
* Conventional binary search loop looks like this:
*
* do {
- * int mi = (lo + hi) / 2;
+ * int mi = lo + (hi - lo) / 2;
* int cmp = "entry pointed at by mi" minus "target";
* if (!cmp)
* return (mi is the wanted one)
@@ -95,7 +95,7 @@ int sha1_pos(const unsigned char *sha1, void *table, size_t nr,
hi = mi;
else
lo = mi + 1;
- mi = (hi + lo) / 2;
+ mi = lo + (hi - lo) / 2;
} while (lo < hi);
return -lo-1;
}
diff --git a/sha1_name.c b/sha1_name.c
index 134ac9742..c7c5ab376 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -157,7 +157,7 @@ static void unique_in_pack(struct packed_git *p,
num = p->num_objects;
last = num;
while (first < last) {
- uint32_t mid = (first + last) / 2;
+ uint32_t mid = first + (last - first) / 2;
const unsigned char *current;
int cmp;
diff --git a/string-list.c b/string-list.c
index 806b4c872..a0cf0cfe8 100644
--- a/string-list.c
+++ b/string-list.c
@@ -16,7 +16,7 @@ static int get_entry_index(const struct string_list *list, const char *string,
compare_strings_fn cmp = list->cmp ? list->cmp : strcmp;
while (left + 1 < right) {
- int middle = (left + right) / 2;
+ int middle = left + (right - left) / 2;
int compare = cmp(string, list->items[middle].string);
if (compare < 0)
right = middle;
diff --git a/utf8.c b/utf8.c
index 47a42047c..2c27ce013 100644
--- a/utf8.c
+++ b/utf8.c
@@ -32,7 +32,7 @@ static int bisearch(ucs_char_t ucs, const struct interval *table, int max)
if (ucs < table[0].first || ucs > table[max].last)
return 0;
while (max >= min) {
- mid = (min + max) / 2;
+ mid = min + (max - min) / 2;
if (ucs > table[mid].last)
min = mid + 1;
else if (ucs < table[mid].first)
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index a613efc70..9f91702de 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -166,7 +166,7 @@ static int binary_search(struct entry **sequence, int longest,
int left = -1, right = longest;
while (left + 1 < right) {
- int middle = (left + right) / 2;
+ int middle = left + (right - left) / 2;
/* by construction, no two entries can be equal */
if (sequence[middle]->line2 > entry->line2)
right = middle;
--
2.14.1.538.g56ec8fc98.dirty
next prev parent reply other threads:[~2017-10-08 18:29 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-25 9:54 [PATCH v2 0/5] Improve abbreviation disambiguation Derrick Stolee
2017-09-25 9:54 ` [PATCH v2 1/5] test-list-objects: List a subset of object ids Derrick Stolee
2017-09-26 9:24 ` Junio C Hamano
2017-10-05 8:42 ` Jeff King
2017-10-05 9:48 ` Junio C Hamano
2017-10-05 10:00 ` Jeff King
2017-10-05 10:16 ` Junio C Hamano
2017-10-05 12:39 ` Derrick Stolee
2017-10-06 14:11 ` Jeff King
2017-10-07 19:12 ` Derrick Stolee
2017-10-07 19:33 ` Jeff King
2017-10-08 1:46 ` Junio C Hamano
2017-09-25 9:54 ` [PATCH v2 2/5] p0008-abbrev.sh: Test find_unique_abbrev() perf Derrick Stolee
2017-09-26 9:27 ` Junio C Hamano
2017-10-05 8:55 ` Jeff King
2017-10-05 8:57 ` Jeff King
2017-09-25 9:54 ` [PATCH v2 3/5] sha1_name: Unroll len loop in find_unique_abbrev_r Derrick Stolee
2017-09-25 9:54 ` [PATCH v2 4/5] sha1_name: Parse less while finding common prefix Derrick Stolee
2017-09-25 23:42 ` Stefan Beller
2017-10-02 14:52 ` Derrick Stolee
2017-09-25 9:54 ` [PATCH v2 5/5] sha1_name: Minimize OID comparisons during disambiguation Derrick Stolee
2017-10-02 14:56 ` [PATCH v3 0/5] Improve abbreviation disambituation Derrick Stolee
2017-10-05 9:49 ` Jeff King
2017-10-02 14:56 ` [PATCH v3 1/5] test-list-objects: List a subset of object ids Derrick Stolee
2017-10-03 4:16 ` Junio C Hamano
2017-10-02 14:56 ` [PATCH v3 2/5] p0008-abbrev.sh: Test find_unique_abbrev() perf Derrick Stolee
2017-10-02 14:56 ` [PATCH v3 3/5] sha1_name: Unroll len loop in find_unique_abbrev_r Derrick Stolee
2017-10-03 10:49 ` Junio C Hamano
2017-10-03 11:26 ` Derrick Stolee
2017-10-04 6:10 ` Junio C Hamano
2017-10-04 13:06 ` Derrick Stolee
2017-10-04 6:07 ` Junio C Hamano
2017-10-04 13:19 ` Derrick Stolee
2017-10-05 1:26 ` Junio C Hamano
2017-10-05 9:13 ` Jeff King
2017-10-05 9:50 ` Junio C Hamano
2017-10-02 14:56 ` [PATCH v3 4/5] sha1_name: Parse less while finding common prefix Derrick Stolee
2017-10-04 6:14 ` Junio C Hamano
2017-10-02 14:56 ` [PATCH v3 5/5] sha1_name: Minimize OID comparisons during disambiguation Derrick Stolee
2017-10-03 15:55 ` Stefan Beller
2017-10-03 17:05 ` Derrick Stolee
2017-10-05 9:44 ` Jeff King
2017-10-06 13:52 ` [PATCH] cleanup: fix possible overflow errors in binary search Derrick Stolee
2017-10-06 14:18 ` Jeff King
2017-10-06 14:41 ` Derrick Stolee
2017-10-08 18:29 ` Derrick Stolee [this message]
2017-10-09 13:33 ` [PATCH v2] " Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171008182937.69205-1-dstolee@microsoft.com \
--to=dstolee@microsoft.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).