* [PATCH v2 01/18] Add a function to solve least-cost assignment problems
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-05 18:24 ` Jeff King
2018-05-30 13:55 ` SZEDER Gábor
2018-05-04 15:34 ` [PATCH v2 02/18] Add a new builtin: branch-diff Johannes Schindelin
` (20 subsequent siblings)
21 siblings, 2 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
The Jonker-Volgenant algorithm was implemented to answer questions such
as: given two different versions of a topic branch (or iterations of a
patch series), what is the best pairing of commits/patches between the
different versions?
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
Makefile | 1 +
hungarian.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++
hungarian.h | 19 +++++
3 files changed, 225 insertions(+)
create mode 100644 hungarian.c
create mode 100644 hungarian.h
diff --git a/Makefile b/Makefile
index 50da82b0169..96f2e76a904 100644
--- a/Makefile
+++ b/Makefile
@@ -829,6 +829,7 @@ LIB_OBJS += gpg-interface.o
LIB_OBJS += graph.o
LIB_OBJS += grep.o
LIB_OBJS += hashmap.o
+LIB_OBJS += hungarian.o
LIB_OBJS += help.o
LIB_OBJS += hex.o
LIB_OBJS += ident.o
diff --git a/hungarian.c b/hungarian.c
new file mode 100644
index 00000000000..346299a97d9
--- /dev/null
+++ b/hungarian.c
@@ -0,0 +1,205 @@
+/*
+ * Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path
+ * algorithm for dense and sparse linear assignment problems</i>. Computing,
+ * 38(4), 325-340.
+ */
+#include "cache.h"
+#include "hungarian.h"
+#include <float.h>
+
+#define COST(column, row) cost[(column) + column_count * (row)]
+
+/*
+ * The parameter `cost` is the cost matrix: the cost to assign column j to row
+ * i is `cost[j + column_count * i].
+ */
+int compute_assignment(int column_count, int row_count, double *cost,
+ int *column2row, int *row2column)
+{
+ double *v = xmalloc(sizeof(double) * column_count), *d;
+ int *free_row, free_count = 0, saved_free_count, *pred, *col;
+ int i, j, phase;
+
+ memset(column2row, -1, sizeof(int) * column_count);
+ memset(row2column, -1, sizeof(int) * row_count);
+
+ /* column reduction */
+ for (j = column_count - 1; j >= 0; j--) {
+ int i1 = 0;
+
+ for (i = 1; i < row_count; i++)
+ if (COST(j, i1) > COST(j, i))
+ i1 = i;
+ v[j] = COST(j, i1);
+ if (row2column[i1] == -1) {
+ /* row i1 unassigned */
+ row2column[i1] = j;
+ column2row[j] = i1;
+ } else {
+ if (row2column[i1] >= 0)
+ row2column[i1] = -2 - row2column[i1];
+ column2row[j] = -1;
+ }
+ }
+
+ /* reduction transfer */
+ free_row = xmalloc(sizeof(int) * row_count);
+ for (int i = 0; i < row_count; i++) {
+ int j1 = row2column[i];
+ if (j1 == -1)
+ free_row[free_count++] = i;
+ else if (j1 < -1)
+ row2column[i] = -2 - j1;
+ else {
+ double min = COST(!j1, i) - v[!j1];
+ for (j = 1; j < column_count; j++)
+ if (j != j1 && min > COST(j, i) - v[j])
+ min = COST(j, i) - v[j];
+ v[j1] -= min;
+ }
+ }
+
+ if (free_count ==
+ (column_count < row_count ? row_count - column_count : 0)) {
+ free(v);
+ free(free_row);
+ return 0;
+ }
+
+ /* augmenting row reduction */
+ for (phase = 0; phase < 2; phase++) {
+ int k = 0;
+
+ saved_free_count = free_count;
+ free_count = 0;
+ while (k < saved_free_count) {
+ double u1, u2;
+ int j1 = 0, j2, i0;
+
+ i = free_row[k++];
+ u1 = COST(j1, i) - v[j1];
+ j2 = -1;
+ u2 = DBL_MAX;
+ for (j = 1; j < column_count; j++) {
+ double c = COST(j, i) - v[j];
+ if (u2 > c) {
+ if (u1 < c) {
+ u2 = c;
+ j2 = j;
+ } else {
+ u2 = u1;
+ u1 = c;
+ j2 = j1;
+ j1 = j;
+ }
+ }
+ }
+ if (j2 < 0) {
+ j2 = j1;
+ u2 = u1;
+ }
+
+ i0 = column2row[j1];
+ if (u1 < u2)
+ v[j1] -= u2 - u1;
+ else if (i0 >= 0) {
+ j1 = j2;
+ i0 = column2row[j1];
+ }
+
+ if (i0 >= 0) {
+ if (u1 < u2)
+ free_row[--k] = i0;
+ else
+ free_row[free_count++] = i0;
+ }
+ row2column[i] = j1;
+ column2row[j1] = i;
+ }
+ }
+
+ /* augmentation */
+ saved_free_count = free_count;
+ d = xmalloc(sizeof(double) * column_count);
+ pred = xmalloc(sizeof(int) * column_count);
+ col = xmalloc(sizeof(int) * column_count);
+ for (free_count = 0; free_count < saved_free_count; free_count++) {
+ int i1 = free_row[free_count], low = 0, up = 0, last, k;
+ double min, c, u1;
+
+ for (j = 0; j < column_count; j++) {
+ d[j] = COST(j, i1) - v[j];
+ pred[j] = i1;
+ col[j] = j;
+ }
+
+ j = -1;
+ do {
+ last = low;
+ min = d[col[up++]];
+ for (k = up; k < column_count; k++) {
+ j = col[k];
+ c = d[j];
+ if (c <= min) {
+ if (c < min) {
+ up = low;
+ min = c;
+ }
+ col[k] = col[up];
+ col[up++] = j;
+ }
+ }
+ for (k = low; k < up; k++)
+ if (column2row[col[k]] == -1)
+ goto update;
+
+ /* scan a row */
+ do {
+ int j1 = col[low++];
+
+ i = column2row[j1];
+ u1 = COST(j1, i) - v[j1] - min;
+ for (k = up; k < column_count; k++) {
+ j = col[k];
+ c = COST(j, i) - v[j] - u1;
+ if (c < d[j]) {
+ d[j] = c;
+ pred[j] = i;
+ if (c == min) {
+ if (column2row[j] == -1)
+ goto update;
+ col[k] = col[up];
+ col[up++] = j;
+ }
+ }
+ }
+ } while (low != up);
+ } while (low == up);
+
+update:
+ /* updating of the column pieces */
+ for (k = 0; k < last; k++) {
+ int j1 = col[k];
+ v[j1] += d[j1] - min;
+ }
+
+ /* augmentation */
+ do {
+ if (j < 0)
+ BUG("negative j: %d", j);
+ i = pred[j];
+ column2row[j] = i;
+ k = j;
+ j = row2column[i];
+ row2column[i] = k;
+ } while (i1 != i);
+ }
+
+ free(col);
+ free(pred);
+ free(d);
+ free(v);
+ free(free_row);
+
+ return 0;
+}
diff --git a/hungarian.h b/hungarian.h
new file mode 100644
index 00000000000..e7cee4bb039
--- /dev/null
+++ b/hungarian.h
@@ -0,0 +1,19 @@
+#ifndef HUNGARIAN_H
+#define HUNGARIAN_H
+
+/*
+ * Compute an assignment of columns -> rows (and vice versa) such that every
+ * column is assigned to at most one row (and vice versa) minimizing the
+ * overall cost.
+ *
+ * The parameter `cost` is the cost matrix: the cost to assign column j to row
+ * i is `cost[j + column_count * i].
+ *
+ * The arrays column2row and row2column will be populated with the respective
+ * assignments (-1 for unassigned, which can happen only if column_count !=
+ * row_count).
+ */
+int compute_assignment(int column_count, int row_count, double *cost,
+ int *column2row, int *row2column);
+
+#endif
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 01/18] Add a function to solve least-cost assignment problems
2018-05-04 15:34 ` [PATCH v2 01/18] Add a function to solve least-cost assignment problems Johannes Schindelin
@ 2018-05-05 18:24 ` Jeff King
2018-05-05 21:55 ` Johannes Schindelin
2018-05-30 13:55 ` SZEDER Gábor
1 sibling, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-05-05 18:24 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Fri, May 04, 2018 at 05:34:29PM +0200, Johannes Schindelin wrote:
> The Jonker-Volgenant algorithm was implemented to answer questions such
> as: given two different versions of a topic branch (or iterations of a
> patch series), what is the best pairing of commits/patches between the
> different versions?
I love git-tbdiff, so I'm excited to see it getting more exposure (and a
speedup). Thanks for working on this!
Two minor nits on this patch:
> +/*
> + * The parameter `cost` is the cost matrix: the cost to assign column j to row
> + * i is `cost[j + column_count * i].
> + */
> +int compute_assignment(int column_count, int row_count, double *cost,
> + int *column2row, int *row2column)
> +{
> + double *v = xmalloc(sizeof(double) * column_count), *d;
Please use st_mult, xcalloc, or ALLOC_ARRAY here to avoid unchecked
multiplication in an allocation. This is probably hard to exploit in
practice (since you'd need sizeof(size_t)/8 columns, which presumably
requires allocating some heavier-weight struct per item). But it makes
auditing easier if we avoid the pattern altogether.
> +/*
> + * Compute an assignment of columns -> rows (and vice versa) such that every
> + * column is assigned to at most one row (and vice versa) minimizing the
> + * overall cost.
> + *
> + * The parameter `cost` is the cost matrix: the cost to assign column j to row
> + * i is `cost[j + column_count * i].
> + *
> + * The arrays column2row and row2column will be populated with the respective
> + * assignments (-1 for unassigned, which can happen only if column_count !=
> + * row_count).
> + */
> +int compute_assignment(int column_count, int row_count, double *cost,
> + int *column2row, int *row2column);
It looks like this always returns zero. Is there a ever a case where we
would return an error if this? If not, should it just be void?
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 01/18] Add a function to solve least-cost assignment problems
2018-05-05 18:24 ` Jeff King
@ 2018-05-05 21:55 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-05 21:55 UTC (permalink / raw)
To: Jeff King
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Peff,
On Sat, 5 May 2018, Jeff King wrote:
> On Fri, May 04, 2018 at 05:34:29PM +0200, Johannes Schindelin wrote:
>
> > The Jonker-Volgenant algorithm was implemented to answer questions such
> > as: given two different versions of a topic branch (or iterations of a
> > patch series), what is the best pairing of commits/patches between the
> > different versions?
>
> I love git-tbdiff, so I'm excited to see it getting more exposure (and a
> speedup). Thanks for working on this!
:-)
> Two minor nits on this patch:
>
> > +/*
> > + * The parameter `cost` is the cost matrix: the cost to assign column j to row
> > + * i is `cost[j + column_count * i].
> > + */
> > +int compute_assignment(int column_count, int row_count, double *cost,
> > + int *column2row, int *row2column)
> > +{
> > + double *v = xmalloc(sizeof(double) * column_count), *d;
>
> Please use st_mult, xcalloc, or ALLOC_ARRAY here to avoid unchecked
> multiplication in an allocation. This is probably hard to exploit in
> practice (since you'd need sizeof(size_t)/8 columns, which presumably
> requires allocating some heavier-weight struct per item). But it makes
> auditing easier if we avoid the pattern altogether.
Sure. I did mean to return errors in those case, but I guess it is not
worth the trouble (what would we do in case of out-of-memory?).
> > +/*
> > + * Compute an assignment of columns -> rows (and vice versa) such that every
> > + * column is assigned to at most one row (and vice versa) minimizing the
> > + * overall cost.
> > + *
> > + * The parameter `cost` is the cost matrix: the cost to assign column j to row
> > + * i is `cost[j + column_count * i].
> > + *
> > + * The arrays column2row and row2column will be populated with the respective
> > + * assignments (-1 for unassigned, which can happen only if column_count !=
> > + * row_count).
> > + */
> > +int compute_assignment(int column_count, int row_count, double *cost,
> > + int *column2row, int *row2column);
>
> It looks like this always returns zero. Is there a ever a case where we
> would return an error if this? If not, should it just be void?
Fixed.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 01/18] Add a function to solve least-cost assignment problems
2018-05-04 15:34 ` [PATCH v2 01/18] Add a function to solve least-cost assignment problems Johannes Schindelin
2018-05-05 18:24 ` Jeff King
@ 2018-05-30 13:55 ` SZEDER Gábor
2018-05-30 16:14 ` Stefan Beller
1 sibling, 1 reply; 387+ messages in thread
From: SZEDER Gábor @ 2018-05-30 13:55 UTC (permalink / raw)
To: Johannes Schindelin
Cc: SZEDER Gábor, git, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
> The Jonker-Volgenant algorithm was implemented to answer questions such
> as: given two different versions of a topic branch (or iterations of a
> patch series), what is the best pairing of commits/patches between the
> different versions?
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
> Makefile | 1 +
> hungarian.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> hungarian.h | 19 +++++
(Nit: I personally don't really like these filenames, I know they will
surprise and distract me every time I notice them for years to come... :)
> +int compute_assignment(int column_count, int row_count, double *cost,
> + int *column2row, int *row2column)
> +{
> + double *v = xmalloc(sizeof(double) * column_count), *d;
> + int *free_row, free_count = 0, saved_free_count, *pred, *col;
> + int i, j, phase;
<snip>
> + for (free_count = 0; free_count < saved_free_count; free_count++) {
> + int i1 = free_row[free_count], low = 0, up = 0, last, k;
> + double min, c, u1;
<snip most of the loop's body>
> + /* augmentation */
> + do {
> + if (j < 0)
> + BUG("negative j: %d", j);
> + i = pred[j];
> + column2row[j] = i;
> + k = j;
> + j = row2column[i];
> + row2column[i] = k;
Coccinelle suggests to replace the last three lines above with:
SWAP(j, row2column[i]);
I think it's right, using the SWAP macro makes the resulting code not
only shorter and clearer, but it also saves the reader from thinking
about whether it's important to set 'k = j' (I think it's not), or 'k'
is just used here in lieu of a dedicated 'tmp' variable (I think it
is).
> + } while (i1 != i);
> + }
> +
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 01/18] Add a function to solve least-cost assignment problems
2018-05-30 13:55 ` SZEDER Gábor
@ 2018-05-30 16:14 ` Stefan Beller
2018-05-30 23:28 ` brian m. carlson
0 siblings, 1 reply; 387+ messages in thread
From: Stefan Beller @ 2018-05-30 16:14 UTC (permalink / raw)
To: SZEDER Gábor
Cc: Johannes Schindelin, git, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Jacob Keller, Eric Sunshine
On Wed, May 30, 2018 at 6:55 AM, SZEDER Gábor <szeder.dev@gmail.com> wrote:
>> The Jonker-Volgenant algorithm was implemented to answer questions such
>> as: given two different versions of a topic branch (or iterations of a
>> patch series), what is the best pairing of commits/patches between the
>> different versions?
>>
>> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
>> ---
>> Makefile | 1 +
>> hungarian.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> hungarian.h | 19 +++++
>
> (Nit: I personally don't really like these filenames, I know they will
> surprise and distract me every time I notice them for years to come... :)
Good point. I remember my initial reaction to the file names was expecting
some hungarian notation, which totally didn't make sense, so I refrained from
commenting. Searching the web for the algorithm, maybe 'lapjv.c' is adequate?
(short for "Linear Assignment Problem Jonker Volgenant") Matlab has a function
named lapjv solving the same problem, so it would fall in line with the outside
world.
Out of interest, why is it called hungarian in the first place? (I presume that
comes from some background of DScho in image processing or such, so the
the answer will be interesting for sure:)
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 01/18] Add a function to solve least-cost assignment problems
2018-05-30 16:14 ` Stefan Beller
@ 2018-05-30 23:28 ` brian m. carlson
2018-05-31 12:19 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: brian m. carlson @ 2018-05-30 23:28 UTC (permalink / raw)
To: Stefan Beller
Cc: SZEDER Gábor, Johannes Schindelin, git, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller, Eric Sunshine
[-- Attachment #1: Type: text/plain, Size: 1142 bytes --]
On Wed, May 30, 2018 at 09:14:06AM -0700, Stefan Beller wrote:
> Good point. I remember my initial reaction to the file names was expecting
> some hungarian notation, which totally didn't make sense, so I refrained from
> commenting. Searching the web for the algorithm, maybe 'lapjv.c' is adequate?
> (short for "Linear Assignment Problem Jonker Volgenant") Matlab has a function
> named lapjv solving the same problem, so it would fall in line with the outside
> world.
>
> Out of interest, why is it called hungarian in the first place? (I presume that
> comes from some background of DScho in image processing or such, so the
> the answer will be interesting for sure:)
I think it's because tbdiff uses the hungarian Python module, which
implements the Hungarian method, also known as the Kuhn-Munkres
algorithm, for solving the linear assignment problem. This is the
Jonker-Volgenant algorithm, which solves the same problem. It's faster,
but less tolerant.
At least this is what I just learned after about ten minutes of
searching.
--
brian m. carlson: Houston, Texas, US
OpenPGP: https://keybase.io/bk2204
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 867 bytes --]
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 01/18] Add a function to solve least-cost assignment problems
2018-05-30 23:28 ` brian m. carlson
@ 2018-05-31 12:19 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-31 12:19 UTC (permalink / raw)
To: brian m. carlson
Cc: Stefan Beller, SZEDER Gábor, git, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller, Eric Sunshine
Hi Brian,
On Wed, 30 May 2018, brian m. carlson wrote:
> On Wed, May 30, 2018 at 09:14:06AM -0700, Stefan Beller wrote:
> > Good point. I remember my initial reaction to the file names was
> > expecting some hungarian notation, which totally didn't make sense, so
> > I refrained from commenting. Searching the web for the algorithm,
> > maybe 'lapjv.c' is adequate? (short for "Linear Assignment Problem
> > Jonker Volgenant") Matlab has a function named lapjv solving the same
> > problem, so it would fall in line with the outside world.
> >
> > Out of interest, why is it called hungarian in the first place? (I
> > presume that comes from some background of DScho in image processing
> > or such, so the the answer will be interesting for sure:)
>
> I think it's because tbdiff uses the hungarian Python module, which
> implements the Hungarian method, also known as the Kuhn-Munkres
> algorithm, for solving the linear assignment problem. This is the
> Jonker-Volgenant algorithm, which solves the same problem. It's faster,
> but less tolerant.
>
> At least this is what I just learned after about ten minutes of
> searching.
You learned well.
The Assignment Problem (or "Linear Assignment Problem") is generally
solved by the Hungarian algorithm. I forgot why it is called that way.
Kuhn-Munkres came up with a simplification of the algorithm IIRC but it
still is O(N^4). Then Jonker-Volgenant took a very different approach that
somehow results in O(N^3). It's been *years* since I studied both
implementations, so I cannot really explain what they do, and how the
latter achieves its order-of-magnitude better performance.
And after reading these mails, I agree that the name "hungarian" might be
confusing.
I also think that "lapjv" is confusing. In general, I try to let Matlab
conventions inform on my naming as little as possible, and I find my
naming fu a lot better for it.
So in this case, how about `linear-assignment.c`?
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 01/18] Add a function to solve least-cost assignment problems Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-05 18:26 ` Jeff King
2018-05-04 15:34 ` [PATCH v2 03/18] branch-diff: first rudimentary implementation Johannes Schindelin
` (19 subsequent siblings)
21 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
This builtin does not do a whole lot so far, apart from showing a usage
that is oddly similar to that of `git tbdiff`. And for a good reason:
the next commits will turn `branch-diff` into a full-blown replacement
for `tbdiff`.
At this point, we ignore tbdiff's color options as well as the
--no-patches option, as they will all be implemented later using
diff_options.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
.gitignore | 1 +
Makefile | 1 +
builtin.h | 1 +
builtin/branch-diff.c | 38 ++++++++++++++++++++++++++++++++++++++
command-list.txt | 1 +
git.c | 1 +
6 files changed, 43 insertions(+)
create mode 100644 builtin/branch-diff.c
diff --git a/.gitignore b/.gitignore
index 833ef3b0b78..1346a64492f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@
/git-bisect--helper
/git-blame
/git-branch
+/git-branch-diff
/git-bundle
/git-cat-file
/git-check-attr
diff --git a/Makefile b/Makefile
index 96f2e76a904..9b1984776d8 100644
--- a/Makefile
+++ b/Makefile
@@ -953,6 +953,7 @@ BUILTIN_OBJS += builtin/archive.o
BUILTIN_OBJS += builtin/bisect--helper.o
BUILTIN_OBJS += builtin/blame.o
BUILTIN_OBJS += builtin/branch.o
+BUILTIN_OBJS += builtin/branch-diff.o
BUILTIN_OBJS += builtin/bundle.o
BUILTIN_OBJS += builtin/cat-file.o
BUILTIN_OBJS += builtin/check-attr.o
diff --git a/builtin.h b/builtin.h
index 42378f3aa47..e1c4d2a529a 100644
--- a/builtin.h
+++ b/builtin.h
@@ -135,6 +135,7 @@ extern int cmd_archive(int argc, const char **argv, const char *prefix);
extern int cmd_bisect__helper(int argc, const char **argv, const char *prefix);
extern int cmd_blame(int argc, const char **argv, const char *prefix);
extern int cmd_branch(int argc, const char **argv, const char *prefix);
+extern int cmd_branch_diff(int argc, const char **argv, const char *prefix);
extern int cmd_bundle(int argc, const char **argv, const char *prefix);
extern int cmd_cat_file(int argc, const char **argv, const char *prefix);
extern int cmd_checkout(int argc, const char **argv, const char *prefix);
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
new file mode 100644
index 00000000000..60a4b4fbe30
--- /dev/null
+++ b/builtin/branch-diff.c
@@ -0,0 +1,38 @@
+#include "cache.h"
+#include "builtin.h"
+#include "parse-options.h"
+
+static const char * const builtin_branch_diff_usage[] = {
+N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
+N_("git branch-diff [<options>] <old-tip>...<new-tip>"),
+N_("git branch-diff [<options>] <base> <old-tip> <new-tip>"),
+NULL
+};
+
+static int parse_creation_weight(const struct option *opt, const char *arg,
+ int unset)
+{
+ double *d = opt->value;
+ if (unset)
+ *d = 0.6;
+ else
+ *d = atof(arg);
+ return 0;
+}
+
+int cmd_branch_diff(int argc, const char **argv, const char *prefix)
+{
+ double creation_weight = 0.6;
+ struct option options[] = {
+ { OPTION_CALLBACK,
+ 0, "creation-weight", &creation_weight, N_("factor"),
+ N_("Fudge factor by which creation is weighted [0.6]"),
+ 0, parse_creation_weight },
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, NULL, options,
+ builtin_branch_diff_usage, 0);
+
+ return 0;
+}
diff --git a/command-list.txt b/command-list.txt
index a1fad28fd82..d01b9063e81 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -19,6 +19,7 @@ git-archive mainporcelain
git-bisect mainporcelain info
git-blame ancillaryinterrogators
git-branch mainporcelain history
+git-branch-diff mainporcelain
git-bundle mainporcelain
git-cat-file plumbinginterrogators
git-check-attr purehelpers
diff --git a/git.c b/git.c
index f598fae7b7a..d2794fb6f5d 100644
--- a/git.c
+++ b/git.c
@@ -377,6 +377,7 @@ static struct cmd_struct commands[] = {
{ "bisect--helper", cmd_bisect__helper, RUN_SETUP },
{ "blame", cmd_blame, RUN_SETUP },
{ "branch", cmd_branch, RUN_SETUP | DELAY_PAGER_CONFIG },
+ { "branch-diff", cmd_branch_diff, RUN_SETUP | USE_PAGER },
{ "bundle", cmd_bundle, RUN_SETUP_GENTLY | NO_PARSEOPT },
{ "cat-file", cmd_cat_file, RUN_SETUP },
{ "check-attr", cmd_check_attr, RUN_SETUP },
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-04 15:34 ` [PATCH v2 02/18] Add a new builtin: branch-diff Johannes Schindelin
@ 2018-05-05 18:26 ` Jeff King
2018-05-05 21:57 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-05-05 18:26 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Fri, May 04, 2018 at 05:34:32PM +0200, Johannes Schindelin wrote:
> This builtin does not do a whole lot so far, apart from showing a usage
> that is oddly similar to that of `git tbdiff`. And for a good reason:
> the next commits will turn `branch-diff` into a full-blown replacement
> for `tbdiff`.
One minor point about the name: will it become annoying as a tab
completion conflict with git-branch?
It feels really petty complaining about the name, but I just want to
raise the point, since it will never be easier to change than right now.
(And no, I don't really have another name in mind; I'm just wondering if
"subset" names like this might be a mild annoyance in the long run).
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-05 18:26 ` Jeff King
@ 2018-05-05 21:57 ` Johannes Schindelin
2018-05-06 0:25 ` Todd Zullinger
` (3 more replies)
0 siblings, 4 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-05 21:57 UTC (permalink / raw)
To: Jeff King
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Peff,
On Sat, 5 May 2018, Jeff King wrote:
> On Fri, May 04, 2018 at 05:34:32PM +0200, Johannes Schindelin wrote:
>
> > This builtin does not do a whole lot so far, apart from showing a usage
> > that is oddly similar to that of `git tbdiff`. And for a good reason:
> > the next commits will turn `branch-diff` into a full-blown replacement
> > for `tbdiff`.
>
> One minor point about the name: will it become annoying as a tab
> completion conflict with git-branch?
I did mention this in the commit message of 18/18:
Without this patch, we would only complete the `branch-diff` part but
not the options and other arguments.
This of itself may already be slightly disruptive for well-trained
fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
`git branch origin/master`, as we now no longer automatically append a
space after completing `git branch`: this is now ambiguous.
> It feels really petty complaining about the name, but I just want to
> raise the point, since it will never be easier to change than right now.
I do hear you. Especially since I hate `git cherry` every single time that
I try to tab-complete `git cherry-pick`.
> (And no, I don't really have another name in mind; I'm just wondering if
> "subset" names like this might be a mild annoyance in the long run).
They totally are, and if you can come up with a better name, I am really
interested in changing it before this hits `next`, even.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-05 21:57 ` Johannes Schindelin
@ 2018-05-06 0:25 ` Todd Zullinger
2018-05-06 0:38 ` Todd Zullinger
2018-05-06 1:05 ` Igor Djordjevic
` (2 subsequent siblings)
3 siblings, 1 reply; 387+ messages in thread
From: Todd Zullinger @ 2018-05-06 0:25 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Johannes,
Johannes Schindelin wrote:
> On Sat, 5 May 2018, Jeff King wrote:
>> One minor point about the name: will it become annoying as a tab
>> completion conflict with git-branch?
>
> I did mention this in the commit message of 18/18:
>
> Without this patch, we would only complete the `branch-diff` part but
> not the options and other arguments.
>
> This of itself may already be slightly disruptive for well-trained
> fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
> `git branch origin/master`, as we now no longer automatically append a
> space after completing `git branch`: this is now ambiguous.
>
>> It feels really petty complaining about the name, but I just want to
>> raise the point, since it will never be easier to change than right now.
>
> I do hear you. Especially since I hate `git cherry` every single time that
> I try to tab-complete `git cherry-pick`.
>
>> (And no, I don't really have another name in mind; I'm just wondering if
>> "subset" names like this might be a mild annoyance in the long run).
>
> They totally are, and if you can come up with a better name, I am really
> interested in changing it before this hits `next`, even.
Would it be possible and reasonable to teach 'git branch' to
call this as a subcommand, i.e. as 'git branch diff'? Then
the completion wouldn't offer git branch-diff.
Users could still call it directly if they wanted, though
I'd tend to think that should be discouraged and have it
treated as an implementation detail that it's a separate
binary.
We have a number of commands which take subcommands this way
(bundle, bisect, notes, submodule, and stash come to mind).
I don't know if any are used with and without a subcommand,
but it doesn't seem too strange from a UI point of view, to
me.
(I don't know if it's coincidental that of the existing
commands I noted above, 3 of the 5 are currently implemented
as shell scripts. But they've all seen at least some work
toward converting them to C, I believe).
The idea might be gross and/or unreasonable from an
implementation or UI view. I'm not sure, but I thought I
would toss the idea out.
This wouldn't work for git cherry{,-pick} where you wouldn't
consider 'git cherry pick' as related to 'git cherry'
though.
We also have this with git show{,-branch} and some others.
It's a mild annoyance, but muscle memory adapts eventually.
--
Todd
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A budget is just a method of worrying before you spend money, as well
as afterward.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 0:25 ` Todd Zullinger
@ 2018-05-06 0:38 ` Todd Zullinger
2018-05-06 12:04 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Todd Zullinger @ 2018-05-06 0:38 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
I wrote:
> Would it be possible and reasonable to teach 'git branch' to
> call this as a subcommand, i.e. as 'git branch diff'? Then
> the completion wouldn't offer git branch-diff.
Of course right after I sent this, it occurred to me that
'git branch diff' would make mask the ability to create a
branch named diff. Using 'git branch --diff ...' wouldn't
suffer that problem.
It does add a bit more overhead to the 'git branch' command,
in terms of documentation and usage. I'm not sure it's too
much though. The git-branch summary wouldn't change much:
-git-branch - List, create, or delete branches
+git-branch - List, create, delete, or diff branches
I hesitate to hit send again, in case I'm once again
overlooking a glaringly obvious problem with this idea. ;)
--
Todd
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Quick to judge, quick to anger, slow to understand.
Ignorance and prejudice and fear walk hand in hand.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 0:38 ` Todd Zullinger
@ 2018-05-06 12:04 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-06 12:04 UTC (permalink / raw)
To: Todd Zullinger
Cc: Jeff King, git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Todd,
On Sat, 5 May 2018, Todd Zullinger wrote:
> I wrote:
> > Would it be possible and reasonable to teach 'git branch' to
> > call this as a subcommand, i.e. as 'git branch diff'? Then
> > the completion wouldn't offer git branch-diff.
>
> Of course right after I sent this, it occurred to me that
> 'git branch diff' would make mask the ability to create a
> branch named diff. Using 'git branch --diff ...' wouldn't
> suffer that problem.
Yep, I immediately thought of --diff instead of diff when I read your
previous mail on that matter. And I like this idea!
Of course, it will complicate the code to set up the pager a bit (for
`branch-diff`, I could default to "on" all the time). But IIRC we recently
changed the --list cmdmode to set the pager to "auto", so I'll just copy
that.
> It does add a bit more overhead to the 'git branch' command,
> in terms of documentation and usage. I'm not sure it's too
> much though. The git-branch summary wouldn't change much:
>
> -git-branch - List, create, or delete branches
> +git-branch - List, create, delete, or diff branches
Indeed.
Unless I hear objections, I will work on moving to `git branch --diff` (it
might take a while, though, I will be traveling for work this week).
Ciao,
Johannes
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-05 21:57 ` Johannes Schindelin
2018-05-06 0:25 ` Todd Zullinger
@ 2018-05-06 1:05 ` Igor Djordjevic
2018-05-06 4:53 ` Jacob Keller
2018-05-06 12:10 ` Johannes Schindelin
2018-05-06 2:33 ` Junio C Hamano
2018-05-07 7:50 ` Jeff King
3 siblings, 2 replies; 387+ messages in thread
From: Igor Djordjevic @ 2018-05-06 1:05 UTC (permalink / raw)
To: Johannes Schindelin, Jeff King
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Dscho,
On 05/05/2018 23:57, Johannes Schindelin wrote:
>
> > > This builtin does not do a whole lot so far, apart from showing a
> > > usage that is oddly similar to that of `git tbdiff`. And for a
> > > good reason: the next commits will turn `branch-diff` into a
> > > full-blown replacement for `tbdiff`.
> >
> > One minor point about the name: will it become annoying as a tab
> > completion conflict with git-branch?
>
> I did mention this in the commit message of 18/18:
>
> Without this patch, we would only complete the `branch-diff` part but
> not the options and other arguments.
>
> This of itself may already be slightly disruptive for well-trained
> fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
> `git branch origin/master`, as we now no longer automatically append a
> space after completing `git branch`: this is now ambiguous.
>
> > It feels really petty complaining about the name, but I just want
> > to raise the point, since it will never be easier to change than
> > right now.
>
> I do hear you. Especially since I hate `git cherry` every single
> time that I try to tab-complete `git cherry-pick`.
>
> > (And no, I don't really have another name in mind; I'm just
> > wondering if "subset" names like this might be a mild annoyance in
> > the long run).
>
> They totally are, and if you can come up with a better name, I am
> really interested in changing it before this hits `next`, even.
I gave this just a quick glance so might be I`m missing something
obvious or otherwise well-known here, bur why not `diff-branch` instead?
From user interface perspective, I would (personally) rather expect a
command that does "diff of branches" to belong to "diff family" of
commands (just operating on branches, instead of "branch" command
knowing to "diff itself"), and I see we already have `diff-files`,
`diff-index` and `diff-tree`, for what that`s worth.
Heck, I might even expect something like `git diff --branch ...` to work,
but I guess that is yet a different matter :)
Thanks, Buga
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 1:05 ` Igor Djordjevic
@ 2018-05-06 4:53 ` Jacob Keller
2018-05-06 8:32 ` Duy Nguyen
2018-05-06 12:10 ` Johannes Schindelin
1 sibling, 1 reply; 387+ messages in thread
From: Jacob Keller @ 2018-05-06 4:53 UTC (permalink / raw)
To: Igor Djordjevic
Cc: Johannes Schindelin, Jeff King, Git mailing list, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Eric Sunshine
On Sat, May 5, 2018 at 6:05 PM, Igor Djordjevic
<igor.d.djordjevic@gmail.com> wrote:
> Hi Dscho,
>
> On 05/05/2018 23:57, Johannes Schindelin wrote:
>>
>> > > This builtin does not do a whole lot so far, apart from showing a
>> > > usage that is oddly similar to that of `git tbdiff`. And for a
>> > > good reason: the next commits will turn `branch-diff` into a
>> > > full-blown replacement for `tbdiff`.
>> >
>> > One minor point about the name: will it become annoying as a tab
>> > completion conflict with git-branch?
>>
>> I did mention this in the commit message of 18/18:
>>
>> Without this patch, we would only complete the `branch-diff` part but
>> not the options and other arguments.
>>
>> This of itself may already be slightly disruptive for well-trained
>> fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
>> `git branch origin/master`, as we now no longer automatically append a
>> space after completing `git branch`: this is now ambiguous.
>>
>> > It feels really petty complaining about the name, but I just want
>> > to raise the point, since it will never be easier to change than
>> > right now.
>>
>> I do hear you. Especially since I hate `git cherry` every single
>> time that I try to tab-complete `git cherry-pick`.
>>
>> > (And no, I don't really have another name in mind; I'm just
>> > wondering if "subset" names like this might be a mild annoyance in
>> > the long run).
>>
>> They totally are, and if you can come up with a better name, I am
>> really interested in changing it before this hits `next`, even.
>
> I gave this just a quick glance so might be I`m missing something
> obvious or otherwise well-known here, bur why not `diff-branch` instead?
>
> From user interface perspective, I would (personally) rather expect a
> command that does "diff of branches" to belong to "diff family" of
> commands (just operating on branches, instead of "branch" command
> knowing to "diff itself"), and I see we already have `diff-files`,
> `diff-index` and `diff-tree`, for what that`s worth.
>
> Heck, I might even expect something like `git diff --branch ...` to work,
> but I guess that is yet a different matter :)
>
> Thanks, Buga
I like diff-branch, though I suppose that also conflicts with diff too.
Thanks,
Jake
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 4:53 ` Jacob Keller
@ 2018-05-06 8:32 ` Duy Nguyen
2018-05-06 12:08 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Duy Nguyen @ 2018-05-06 8:32 UTC (permalink / raw)
To: Jacob Keller
Cc: Igor Djordjevic, Johannes Schindelin, Jeff King,
Git mailing list, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Eric Sunshine
On Sun, May 6, 2018 at 6:53 AM, Jacob Keller <jacob.keller@gmail.com> wrote:
> On Sat, May 5, 2018 at 6:05 PM, Igor Djordjevic
> <igor.d.djordjevic@gmail.com> wrote:
>> Hi Dscho,
>>
>> On 05/05/2018 23:57, Johannes Schindelin wrote:
>>>
>>> > > This builtin does not do a whole lot so far, apart from showing a
>>> > > usage that is oddly similar to that of `git tbdiff`. And for a
>>> > > good reason: the next commits will turn `branch-diff` into a
>>> > > full-blown replacement for `tbdiff`.
>>> >
>>> > One minor point about the name: will it become annoying as a tab
>>> > completion conflict with git-branch?
>>>
>>> I did mention this in the commit message of 18/18:
>>>
>>> Without this patch, we would only complete the `branch-diff` part but
>>> not the options and other arguments.
>>>
>>> This of itself may already be slightly disruptive for well-trained
>>> fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
>>> `git branch origin/master`, as we now no longer automatically append a
>>> space after completing `git branch`: this is now ambiguous.
>>>
>>> > It feels really petty complaining about the name, but I just want
>>> > to raise the point, since it will never be easier to change than
>>> > right now.
>>>
>>> I do hear you. Especially since I hate `git cherry` every single
>>> time that I try to tab-complete `git cherry-pick`.
>>>
>>> > (And no, I don't really have another name in mind; I'm just
>>> > wondering if "subset" names like this might be a mild annoyance in
>>> > the long run).
>>>
>>> They totally are, and if you can come up with a better name, I am
>>> really interested in changing it before this hits `next`, even.
>>
>> I gave this just a quick glance so might be I`m missing something
>> obvious or otherwise well-known here, bur why not `diff-branch` instead?
>>
>> From user interface perspective, I would (personally) rather expect a
>> command that does "diff of branches" to belong to "diff family" of
>> commands (just operating on branches, instead of "branch" command
>> knowing to "diff itself"), and I see we already have `diff-files`,
>> `diff-index` and `diff-tree`, for what that`s worth.
>>
>> Heck, I might even expect something like `git diff --branch ...` to work,
>> but I guess that is yet a different matter :)
>>
>> Thanks, Buga
>
> I like diff-branch, though I suppose that also conflicts with diff too.
How about interdiff?
--
Duy
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 8:32 ` Duy Nguyen
@ 2018-05-06 12:08 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-06 12:08 UTC (permalink / raw)
To: Duy Nguyen
Cc: Jacob Keller, Igor Djordjevic, Jeff King, Git mailing list,
Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Eric Sunshine
Hi Duy,
On Sun, 6 May 2018, Duy Nguyen wrote:
> On Sun, May 6, 2018 at 6:53 AM, Jacob Keller <jacob.keller@gmail.com> wrote:
> > On Sat, May 5, 2018 at 6:05 PM, Igor Djordjevic
> > <igor.d.djordjevic@gmail.com> wrote:
> >>
> >> On 05/05/2018 23:57, Johannes Schindelin wrote:
> >>>
> >>> > > This builtin does not do a whole lot so far, apart from showing a
> >>> > > usage that is oddly similar to that of `git tbdiff`. And for a
> >>> > > good reason: the next commits will turn `branch-diff` into a
> >>> > > full-blown replacement for `tbdiff`.
> >>> >
> >>> > One minor point about the name: will it become annoying as a tab
> >>> > completion conflict with git-branch?
> >>>
> >>> I did mention this in the commit message of 18/18:
> >>>
> >>> Without this patch, we would only complete the `branch-diff` part but
> >>> not the options and other arguments.
> >>>
> >>> This of itself may already be slightly disruptive for well-trained
> >>> fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
> >>> `git branch origin/master`, as we now no longer automatically append a
> >>> space after completing `git branch`: this is now ambiguous.
> >>>
> >>> > It feels really petty complaining about the name, but I just want
> >>> > to raise the point, since it will never be easier to change than
> >>> > right now.
> >>>
> >>> I do hear you. Especially since I hate `git cherry` every single
> >>> time that I try to tab-complete `git cherry-pick`.
> >>>
> >>> > (And no, I don't really have another name in mind; I'm just
> >>> > wondering if "subset" names like this might be a mild annoyance in
> >>> > the long run).
> >>>
> >>> They totally are, and if you can come up with a better name, I am
> >>> really interested in changing it before this hits `next`, even.
> >>
> >> I gave this just a quick glance so might be I`m missing something
> >> obvious or otherwise well-known here, bur why not `diff-branch` instead?
> >>
> >> From user interface perspective, I would (personally) rather expect a
> >> command that does "diff of branches" to belong to "diff family" of
> >> commands (just operating on branches, instead of "branch" command
> >> knowing to "diff itself"), and I see we already have `diff-files`,
> >> `diff-index` and `diff-tree`, for what that`s worth.
> >>
> >> Heck, I might even expect something like `git diff --branch ...` to work,
> >> but I guess that is yet a different matter :)
> >>
> >> Thanks, Buga
> >
> > I like diff-branch, though I suppose that also conflicts with diff too.
>
> How about interdiff?
No. An interdiff is well defined as the diff you would get by first
applying the first of two patches in reverse and then the second patch
forward. In other words, it turns two revisions of a patch into the diff
between the result of applying both revisions.
I tried very hard to avoid using that term in my patch series (tbdiff used
the term incorrectly: what it called an interdiff is a diff of two
patches, where a patch is an author line followed by the commit message
followed by the commit diff).
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 1:05 ` Igor Djordjevic
2018-05-06 4:53 ` Jacob Keller
@ 2018-05-06 12:10 ` Johannes Schindelin
2018-05-06 13:37 ` Igor Djordjevic
1 sibling, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-06 12:10 UTC (permalink / raw)
To: Igor Djordjevic
Cc: Jeff King, git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Buga,
On Sun, 6 May 2018, Igor Djordjevic wrote:
> On 05/05/2018 23:57, Johannes Schindelin wrote:
> >
> > > > This builtin does not do a whole lot so far, apart from showing a
> > > > usage that is oddly similar to that of `git tbdiff`. And for a
> > > > good reason: the next commits will turn `branch-diff` into a
> > > > full-blown replacement for `tbdiff`.
> > >
> > > One minor point about the name: will it become annoying as a tab
> > > completion conflict with git-branch?
> >
> > I did mention this in the commit message of 18/18:
> >
> > Without this patch, we would only complete the `branch-diff` part but
> > not the options and other arguments.
> >
> > This of itself may already be slightly disruptive for well-trained
> > fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
> > `git branch origin/master`, as we now no longer automatically append a
> > space after completing `git branch`: this is now ambiguous.
> >
> > > It feels really petty complaining about the name, but I just want
> > > to raise the point, since it will never be easier to change than
> > > right now.
> >
> > I do hear you. Especially since I hate `git cherry` every single
> > time that I try to tab-complete `git cherry-pick`.
> >
> > > (And no, I don't really have another name in mind; I'm just
> > > wondering if "subset" names like this might be a mild annoyance in
> > > the long run).
> >
> > They totally are, and if you can come up with a better name, I am
> > really interested in changing it before this hits `next`, even.
>
> I gave this just a quick glance so might be I`m missing something
> obvious or otherwise well-known here, bur why not `diff-branch` instead?
I think that is just turning the problem from `branch` to `diff`.
Of course, we have precedent with diff-index and diff-files. Except that
they don't auto-complete (because they are low-level commands) and I
*would* like the subcommand discussed in this here patch series to
auto-complete.
I think Todd's idea to shift it from a full-blown builtin to a cmdmode
of `branch` makes tons of sense.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 12:10 ` Johannes Schindelin
@ 2018-05-06 13:37 ` Igor Djordjevic
2018-05-07 1:34 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Igor Djordjevic @ 2018-05-06 13:37 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Dscho,
On 06/05/2018 14:10, Johannes Schindelin wrote:
>
> > > > > This builtin does not do a whole lot so far, apart from showing a
> > > > > usage that is oddly similar to that of `git tbdiff`. And for a
> > > > > good reason: the next commits will turn `branch-diff` into a
> > > > > full-blown replacement for `tbdiff`.
> > > >
> > > > One minor point about the name: will it become annoying as a tab
> > > > completion conflict with git-branch?
> > >
> > > I did mention this in the commit message of 18/18:
> > >
> > > Without this patch, we would only complete the `branch-diff` part but
> > > not the options and other arguments.
> > >
> > > This of itself may already be slightly disruptive for well-trained
> > > fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
> > > `git branch origin/master`, as we now no longer automatically append a
> > > space after completing `git branch`: this is now ambiguous.
> > >
> > > > It feels really petty complaining about the name, but I just want
> > > > to raise the point, since it will never be easier to change than
> > > > right now.
> > >
> > > I do hear you. Especially since I hate `git cherry` every single
> > > time that I try to tab-complete `git cherry-pick`.
> > >
> > > > (And no, I don't really have another name in mind; I'm just
> > > > wondering if "subset" names like this might be a mild annoyance in
> > > > the long run).
> > >
> > > They totally are, and if you can come up with a better name, I am
> > > really interested in changing it before this hits `next`, even.
> >
> > I gave this just a quick glance so might be I`m missing something
> > obvious or otherwise well-known here, bur why not `diff-branch` instead?
>
> I think that is just turning the problem from `branch` to `diff`.
>
> Of course, we have precedent with diff-index and diff-files. Except that
> they don't auto-complete (because they are low-level commands) and I
> *would* like the subcommand discussed in this here patch series to
> auto-complete.
Yeah, I did suspect it might be something like this (those other ones
not auto-completing, where we do want it here), thanks for elaborating.
> I think Todd's idea to shift it from a full-blown builtin to a cmdmode
> of `branch` makes tons of sense.
I don`t know, I still find it a bit strange that in order to "diff
something", you go to "something" and tell it to "diff itself" - not
because it`s a weird concept (OOP, anyone? :]), but because we
already have "diff" command that can accept different things, thus
just teaching it to accept additional "something" (branch, in this
case), seems more natural (to me) - "branch diff" being just another
"diff" mode of operation.
What about that side thought you left out from my original message,
making it `git diff --branch` instead?
But if "branch diff" is considered to be too special-cased mode of
"diff" so that supporting it from `diff` itself would make it feel
awkward in both usage and maintenance (in terms of many other regular
`diff` specific options being unsupported), I guess I would understand
having it outside `diff` altogether (and implemented as proposed `git
branch --diff`, or something)... for the time being, at least :)
Regards, Buga
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 13:37 ` Igor Djordjevic
@ 2018-05-07 1:34 ` Johannes Schindelin
2018-05-07 22:05 ` Igor Djordjevic
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 1:34 UTC (permalink / raw)
To: Igor Djordjevic
Cc: Jeff King, git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Buga,
On Sun, 6 May 2018, Igor Djordjevic wrote:
> On 06/05/2018 14:10, Johannes Schindelin wrote:
>
> > I think Todd's idea to shift it from a full-blown builtin to a cmdmode
> > of `branch` makes tons of sense.
>
> I don`t know, I still find it a bit strange that in order to "diff
> something", you go to "something" and tell it to "diff itself" - not
> because it`s a weird concept (OOP, anyone? :]), but because we already
> have "diff" command that can accept different things, thus just teaching
> it to accept additional "something" (branch, in this case), seems more
> natural (to me) - "branch diff" being just another "diff" mode of
> operation.
You also have to call `git branch` to list branches. And to rename
branches. And to delete them. So why not also compare them at the same
time?
> What about that side thought you left out from my original message,
> making it `git diff --branch` instead?
I really did not like this, as all of the `git diff` options really are
about comparing two revisions, not two *sets* of revisions.
Further, if I put my unsuspecting user hat on, I would ask myself how you
can compare branches with one another? That is what I would expect `git
diff --branch` to do, not to compare two versions of *the same* branch.
So `git diff --branch` does not at all convey the same to me as `git
branch --diff`, and I find that the latter does match better what this
patch series tries to achieve.
I briefly considered `git branch --compare` instead, but then rejected it:
it would again sound more like I try to compare two separate (and likely
unrelated) branches with one another, and that simply does not make much
sense, and tbdiff would not help with that, anyway.
> But if "branch diff" is considered to be too special-cased mode of
> "diff" so that supporting it from `diff` itself would make it feel
> awkward in both usage and maintenance (in terms of many other regular
> `diff` specific options being unsupported), I guess I would understand
> having it outside `diff` altogether (and implemented as proposed `git
> branch --diff`, or something)... for the time being, at least :)
The branch diff is not even a special-cased mode of diff. It is *way* more
complicated than that. It tries to find 1:1 correspondences between *sets*
of commits, and then only outputs a "sort" of a diff between the commits
that correspond with each other. I say "sort" of a diff because that diff
does not look like `git diff <commit1> <commit2>` at all!
So I think it would just be confusing to add that mode to `git diff`.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 1:34 ` Johannes Schindelin
@ 2018-05-07 22:05 ` Igor Djordjevic
2018-05-07 22:24 ` Stefan Beller
0 siblings, 1 reply; 387+ messages in thread
From: Igor Djordjevic @ 2018-05-07 22:05 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Dscho,
On 07/05/2018 03:34, Johannes Schindelin wrote:
>
> > > I think Todd's idea to shift it from a full-blown builtin to a cmdmode
> > > of `branch` makes tons of sense.
> >
> > I don`t know, I still find it a bit strange that in order to "diff
> > something", you go to "something" and tell it to "diff itself" - not
> > because it`s a weird concept (OOP, anyone? :]), but because we already
> > have "diff" command that can accept different things, thus just teaching
> > it to accept additional "something" (branch, in this case), seems more
> > natural (to me) - "branch diff" being just another "diff" mode of
> > operation.
>
> You also have to call `git branch` to list branches. And to rename
> branches. And to delete them. So why not also compare them at the same
> time?
Maybe because we already have a command that specifically does
comparison? :)
List, rename, delete -- all these seem more as basic CRUD operations,
where comparison is a more complex one. And not to get me wrong - I
could see "branch diff" being part of "branch", but not really when
"diff" already exists as a separate thing, already doing quite some
(but still diff related, and configurable) stuff.
> > What about that side thought you left out from my original message,
> > making it `git diff --branch` instead?
>
> I really did not like this, as all of the `git diff` options really are
> about comparing two revisions, not two *sets* of revisions.
I see what you mean, but I would argue this being a deliberate user
choice here, like picking a diff "strategy" - I`d say it still utterly
does compare two revisions (branch tips, in this case), just putting
focus on comparing revisions that lead to them (branch history),
instead of just files found in them (branch files).
> Further, if I put my unsuspecting user hat on, I would ask myself how you
> can compare branches with one another? That is what I would expect `git
> diff --branch` to do, not to compare two versions of *the same* branch.
I totally agree with you here, and thus I have a question - what
determines "two versions of *the same* branch"? :) Do you still
explicitly provide both "old" and "new" version branch tips?
I see "multiple versions of the same branch" more as a conceptual
model, and not something Git is aware of (I think?) - BUT, even if it
was, I don`t see why this should be a(n artificial) restriction?
Basically, what you (conceptually) call "two versions of the same
branch", I simply call "two branches" (from usage standpoint).
And you may have a branch that got split, or more of them that got
unified, so defining "previous branch version" may not be that
straightforward - it`s really just "two commit ranges" (as man page
defines it in general), with "two versions of a patch series" only
being the most common/expected use case of the former.
Finally, if user picks two totally unrelated "branches" to compare,
he won`t get a really useful diff - but it`s the same as if he would
compare two totally unrelated commits (where tree state massively
changed in between, or having unrelated histories, even).
Besides, while I might still not be much into the matter, but isn`t
"branch" in Git just a pointer to revision? Being so, there is really
no such thing as "branch" in terms of being a specific (sub)set of
revisions (commits), other then "everything from branch head/pointer
to root commit" (in general).
Yes, we do perceive "a branch" being a specific set of topic related
commits, but which *exact* commits we are interested in ("branch" lower
bounds) may differ in regards to what we aim for - how far do we consider
one branch to reach in the past depends solely on the use case.
> So `git diff --branch` does not at all convey the same to me as `git
> branch --diff`, and I find that the latter does match better what this
> patch series tries to achieve.
I agree with the first part, but it seems to me your finding is
biased due to your (expected) use case.
> > But if "branch diff" is considered to be too special-cased mode of
> > "diff" so that supporting it from `diff` itself would make it feel
> > awkward in both usage and maintenance (in terms of many other regular
> > `diff` specific options being unsupported), I guess I would understand
> > having it outside `diff` altogether (and implemented as proposed `git
> > branch --diff`, or something)... for the time being, at least :)
>
> The branch diff is not even a special-cased mode of diff. It is *way* more
> complicated than that. It tries to find 1:1 correspondences between *sets*
> of commits, and then only outputs a "sort" of a diff between the commits
> that correspond with each other. I say "sort" of a diff because that diff
> does not look like `git diff <commit1> <commit2>` at all!
But there is not only one `git diff <commit1> <commit2>` looks, it
depends on other options (like --name-status, for example), which is
my point exactly :)
With something like `git diff --branch <commit1>...<commit2>` you
would get yet another "diff look", useful for use case in question
here.
Regards, Buga
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 22:05 ` Igor Djordjevic
@ 2018-05-07 22:24 ` Stefan Beller
2018-05-07 23:39 ` Igor Djordjevic
2018-05-08 3:44 ` Jeff King
0 siblings, 2 replies; 387+ messages in thread
From: Stefan Beller @ 2018-05-07 22:24 UTC (permalink / raw)
To: Igor Djordjevic
Cc: Johannes Schindelin, Jeff King, git, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Jacob Keller, Eric Sunshine
On Mon, May 7, 2018 at 3:05 PM, Igor Djordjevic
<igor.d.djordjevic@gmail.com> wrote:
> List, rename, delete -- all these seem more as basic CRUD operations,
> where comparison is a more complex one. And not to get me wrong - I
> could see "branch diff" being part of "branch", but not really when
> "diff" already exists as a separate thing, already doing quite some
> (but still diff related, and configurable) stuff.
If we go with "branch --diff", because it has the CRUD operations already
there for branches, I might ask for "remote --diff" to diff two remotes. ;)
(That command "remote --diff" would not make any sense, would it?)
> Basically, what you (conceptually) call "two versions of the same
> branch", I simply call "two branches" (from usage standpoint).
If I diff 2 (topic) branches, which are based on a different version
from upstream, then I see changes from commits that I don't care
about, but this tool explicitly excludes them. Instead it includes
the ordering of the commits as well as its commit messages to
the diff.
So I would not say this tool "diffs two branches", as that is understood
as "diffing the trees, where each of the two branches points two",
whereas this tool diffs a patch series, or if you give Git-ranges,
then it would produce such a patch series in memory.
> And you may have a branch that got split, or more of them that got
> unified, so defining "previous branch version" may not be that
> straightforward - it`s really just "two commit ranges" (as man page
> defines it in general), with "two versions of a patch series" only
> being the most common/expected use case of the former.
>
> Finally, if user picks two totally unrelated "branches" to compare,
> he won`t get a really useful diff - but it`s the same as if he would
> compare two totally unrelated commits (where tree state massively
> changed in between, or having unrelated histories, even).
I used just that, but narrowed down the comparison to one file
instead of the whole tree.
> With something like `git diff --branch <commit1>...<commit2>` you
> would get yet another "diff look", useful for use case in question
> here.
Personally I think this patch series should neither extend git-diff
nor git-branch.
It should not extend git-diff, because currently git-diff can diff
tree-ishs (and does that very well) and comparing to
worktree/index.
It should also not extend git-branch, as that command is for
CRUD operations that you hinted at earlier (Earlier I proposed
git-remote --diff for diffing two remote, which makes no sense,
another one might be git-worktree, which also just does CRUD
for worktrees. It would be a bad idea to have "git worktree --diff")
Hence I propose "git range-diff", similar to topic-diff, that
was proposed earlier.
* it "diffs ranges" of commits.
* it can also deal with out-of-git things like patch series,
but that is a mere by product and may not be desired.
Just like git-diff can also compare two files outside a git
repo, that would not be a good use case.
Keep the name Git-centric!
* it autocompletes well.
Stefan
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 22:24 ` Stefan Beller
@ 2018-05-07 23:39 ` Igor Djordjevic
2018-05-08 3:44 ` Jeff King
1 sibling, 0 replies; 387+ messages in thread
From: Igor Djordjevic @ 2018-05-07 23:39 UTC (permalink / raw)
To: Stefan Beller
Cc: Johannes Schindelin, Jeff King, git, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Jacob Keller, Eric Sunshine
Hi Stefan,
On 08/05/2018 00:24, Stefan Beller wrote:
>
> > List, rename, delete -- all these seem more as basic CRUD operations,
> > where comparison is a more complex one. And not to get me wrong - I
> > could see "branch diff" being part of "branch", but not really when
> > "diff" already exists as a separate thing, already doing quite some
> > (but still diff related, and configurable) stuff.
>
> If we go with "branch --diff", because it has the CRUD operations already
> there for branches, I might ask for "remote --diff" to diff two remotes. ;)
> (That command "remote --diff" would not make any sense, would it?)
I`m not sure if this is a reply to me or in general, and whether you
support what I sad, or argue against it...? Because what you`re
saying was (or at least should have been) my exact point there :)
> > Basically, what you (conceptually) call "two versions of the same
> > branch", I simply call "two branches" (from usage standpoint).
>
> If I diff 2 (topic) branches, which are based on a different version
> from upstream, then I see changes from commits that I don't care
> about, but this tool explicitly excludes them. Instead it includes
> the ordering of the commits as well as its commit messages to
> the diff.
Here, I was merely pointing out that you still need to provide two
branch heads - which might be expected to resemble "two versions of
the same topic", but they are still (just) "two branches" in Git world.
> > And you may have a branch that got split, or more of them that got
> > unified, so defining "previous branch version" may not be that
> > straightforward - it`s really just "two commit ranges" (as man page
> > defines it in general), with "two versions of a patch series" only
> > being the most common/expected use case of the former.
> >
> > Finally, if user picks two totally unrelated "branches" to compare,
> > he won`t get a really useful diff - but it`s the same as if he would
> > compare two totally unrelated commits (where tree state massively
> > changed in between, or having unrelated histories, even).
>
> I used just that, but narrowed down the comparison to one file
> instead of the whole tree.
Again, not sure if this should support the argument, or argue against
it? :) My point was that there might be other use cases (as you seem
to have supported now), and as "diff" is pretty forgiving, might be
"diff branch" should be as well.
> > With something like `git diff --branch <commit1>...<commit2>` you
> > would get yet another "diff look", useful for use case in question
> > here.
>
> Personally I think this patch series should neither extend git-diff
> nor git-branch.
>
> It should not extend git-diff, because currently git-diff can diff
> tree-ishs (and does that very well) and comparing to
> worktree/index.
Hmm, are you saying that `git diff` actually has a too generic name
for its (more specific) purpose?
> It should also not extend git-branch, as that command is for
> CRUD operations that you hinted at earlier (Earlier I proposed
> git-remote --diff for diffing two remote, which makes no sense,
> another one might be git-worktree, which also just does CRUD
> for worktrees. It would be a bad idea to have "git worktree --diff")
Agreed here.
> Hence I propose "git range-diff", similar to topic-diff, that
> was proposed earlier.
I find it strange that we already have both "diff" and "diff-something"
commands, and yet you still propose "something-diff" naming pattern
instead (but I guess it`s mainly because of auto-complete concerns).
Please forgive my lack of code base familiarity, but from what I`ve
seen so far, and at least from end-user perspective, I may rather expect
`git diff-range` as low level implementation, and possibly exposed
through `git diff --range` (with a nice single letter abbreviation?).
> * it "diffs ranges" of commits.
Thus "diff-range", as your description says itself :) ("range-diff"
might sound like it "ranges diffs"...?)
> * it can also deal with out-of-git things like patch series,
> but that is a mere by product and may not be desired.
> Just like git-diff can also compare two files outside a git
> repo, that would not be a good use case.
Hmm, so still follows `git diff` in general... `git diff --range`? :D
> * it autocompletes well.
Only here I`m not sure if something like `git diff --range` (with
accompanying single letter option) would be considered "auto-complete
friendly", or not?
Regards, Buga
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 22:24 ` Stefan Beller
2018-05-07 23:39 ` Igor Djordjevic
@ 2018-05-08 3:44 ` Jeff King
2018-05-08 3:48 ` Jeff King
2018-05-22 11:38 ` Ævar Arnfjörð Bjarmason
1 sibling, 2 replies; 387+ messages in thread
From: Jeff King @ 2018-05-08 3:44 UTC (permalink / raw)
To: Stefan Beller
Cc: Igor Djordjevic, Johannes Schindelin, git, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller, Eric Sunshine
On Mon, May 07, 2018 at 03:24:59PM -0700, Stefan Beller wrote:
> Hence I propose "git range-diff", similar to topic-diff, that
> was proposed earlier.
>
> * it "diffs ranges" of commits.
> * it can also deal with out-of-git things like patch series,
> but that is a mere by product and may not be desired.
> Just like git-diff can also compare two files outside a git
> repo, that would not be a good use case.
> Keep the name Git-centric!
> * it autocompletes well.
FWIW, I like this by far of all of the suggested names.
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-08 3:44 ` Jeff King
@ 2018-05-08 3:48 ` Jeff King
2018-05-22 11:38 ` Ævar Arnfjörð Bjarmason
1 sibling, 0 replies; 387+ messages in thread
From: Jeff King @ 2018-05-08 3:48 UTC (permalink / raw)
To: Stefan Beller
Cc: Igor Djordjevic, Johannes Schindelin, git, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller, Eric Sunshine
On Mon, May 07, 2018 at 11:44:29PM -0400, Jeff King wrote:
> On Mon, May 07, 2018 at 03:24:59PM -0700, Stefan Beller wrote:
>
> > Hence I propose "git range-diff", similar to topic-diff, that
> > was proposed earlier.
> >
> > * it "diffs ranges" of commits.
> > * it can also deal with out-of-git things like patch series,
> > but that is a mere by product and may not be desired.
> > Just like git-diff can also compare two files outside a git
> > repo, that would not be a good use case.
> > Keep the name Git-centric!
> > * it autocompletes well.
>
> FWIW, I like this by far of all of the suggested names.
I hit "send" before I had a chance to expound. ;)
The thing that I really like about it is that it names the _concept_.
If I were writing a manual page describing what this output is, I would
call it a "range diff". And naturally, the command to generate range
diffs is "git range-diff".
I think "git diff --range" would also be OK, but IMHO it's useful to
keep the "git diff" family as always comparing end-points.
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-08 3:44 ` Jeff King
2018-05-08 3:48 ` Jeff King
@ 2018-05-22 11:38 ` Ævar Arnfjörð Bjarmason
2018-05-25 22:06 ` Stefan Beller
1 sibling, 1 reply; 387+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2018-05-22 11:38 UTC (permalink / raw)
To: Jeff King
Cc: Stefan Beller, Igor Djordjevic, Johannes Schindelin, git,
Junio C Hamano, Thomas Rast, Thomas Gummerer, Ramsay Jones,
Jacob Keller, Eric Sunshine
On Tue, May 08 2018, Jeff King wrote:
> On Mon, May 07, 2018 at 03:24:59PM -0700, Stefan Beller wrote:
>
>> Hence I propose "git range-diff", similar to topic-diff, that
>> was proposed earlier.
>>
>> * it "diffs ranges" of commits.
>> * it can also deal with out-of-git things like patch series,
>> but that is a mere by product and may not be desired.
>> Just like git-diff can also compare two files outside a git
>> repo, that would not be a good use case.
>> Keep the name Git-centric!
>> * it autocompletes well.
>
> FWIW, I like this by far of all of the suggested names.
I agree, "range-diff" is the best one mentioned so far.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-22 11:38 ` Ævar Arnfjörð Bjarmason
@ 2018-05-25 22:06 ` Stefan Beller
[not found] ` <CAA8fPEkNjy+ETz4Mx+C2kUfLjLzR9uuOmO3GfN48ZH1SwyfE1A@mail.gmail.com>
0 siblings, 1 reply; 387+ messages in thread
From: Stefan Beller @ 2018-05-25 22:06 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, Igor Djordjevic,
Ævar Arnfjörð Bjarmason, git, Junio C Hamano,
Thomas Rast, Thomas Gummerer, Ramsay Jones, Jacob Keller,
Eric Sunshine
Johannes,
On IRC you wrote:
<dscho> And BTW this is not bike-shedding to me. Discussing the name
of a variable, or indentation, or line wrapping, is. But improving the
user experience is important. We *suck* on that, historically, and I
do want to break with that habit.
...
<dscho> avar, _ikke_: so a colleague of mine whose opinion on naming I
respect more than all Git developers combined *also* came up with the
term `range-diff`, independently.
...
<dscho> Yes, you are looking at two ranges. But not *any* two ranges.
*That* is my point.
So I sat back and want to try again;
IIUC your dislike for "range-diff" boils down to:
(A) it doesn't diff any arbitrary range, as the output would become
very cumbersome and hard to understand,
(B) it is not a good intuitive name for users, as they would not think
of range-diff when they'd want to have this feature.
Regarding (A), I think the same can be said about input to the diff
machinery, e.g. 'git diff v2.0.0 v2.17.0' is just very much text, and
it is hardly useful (except as a patch fed to the machine).
Over time there were added tons of options that make the diff output
easier to digest, e.g. additional pathspecs to restrict to a sub tree or
ignoring certain things (white spaces mostly), such that
'git diff -w v2.0.0 v2.17.0 -- refs.h' is easier for a human to grok.
Regarding (B), I agree, but blame it on the nature of an open
source project that provides a toolbox. So the way a user is
going to discover this feature is via stackoverflow or via
asking a coworker or finding the example output somewhere.
I think that last point could be part of the feedback:
git-diff has prominently hints at its name via "diff --git ..."
in the first line of its output, so maybe the output of this feature
also wants to name itself?
Other thoughts:
We could go down the route and trying to find a best possible
technical name, for which I could offer:
revision-walk-difference
revwalk-diff
As that literally describes the output: two rev walks are
performed and then those outputs of the rev walks is diffed.
Based off these technicals we could get more creative:
redo-rev-walk-spot-the-difference
re-walk-spot
retravel-spot
spot-diff
But I think all these do not address the feedback (B).
"What would a user find intuitive?"; I personally thought
a bit about how I discovered cherry-pick. I just took it as
a given name, without much thought, as I discovered it
by tell tale, not looking for it in the docs. It sort of made
sense as a command that I learned earlier about,
"interactive rebase", also has had the "pick" command,
such that "picking" made sense. I think I retroactively
made sense of the "cherry" part. Now I tried to find it
in the mailing list archive and actually learn about its origin,
but no good stories are found there.
For what the user might find most useful, I just looked
at other tools in Gerrits landscape and there the expectation
seems that you upload your code first and do the diff of the different
patches serverside. I think the same holds for Github or other
branch based reviewing systems. You can force push the
branch that is pull requested and the web UI somehow makes
sense of it.
That leads me to the (weak) conclusion of branch-diff or tbdiff
to be useful most for patch based / mailing list based workflows
as there is no magic server helping you out.
Searching for "kernel +tbdiff" to find the kernel devs using tbdiff
gave me no results, so I may be mistaken there.
Trying to find "interdiffs" (for the lack of a better name) between
patches on the kernel mailing list also is not obvious to the uninitiated.
So for the various workflows, I could come up with
change-diff
pullrequest-diff
patch-series-diff
but we do not look at diffs, rather we only use this tool to work on
incremental things, so maybe instead:
change-history
pullrequest-history
patch-series-evolution
Note how these are 3 suggestions, one for each major workflow,
and I'd *REALLY* would want to have a tool that is agnostic to the
workflow on top (whether you use pull requests or Gerrit changes),
but now I would like to step back and remind us that this tool
is only mostly used for viewing the evolution of your new thing,
but it can also be very useful to inspect non-new things.
(backported patches to maint, or some -stable branch)
Or rather: We do not know the major use case yet. Sure
I will use it in my cover letter and that is on my mind now,
but I think there are other use cases that are not explored
yet, so we should rather make the naming decision based
off of technicals rather than anticipated use case and user
discovery methods.
I hope this is actually useful feedback on the naming discovery.
Thanks,
Stefan
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-05 21:57 ` Johannes Schindelin
2018-05-06 0:25 ` Todd Zullinger
2018-05-06 1:05 ` Igor Djordjevic
@ 2018-05-06 2:33 ` Junio C Hamano
2018-05-06 12:21 ` Johannes Schindelin
2018-05-07 7:50 ` Jeff King
3 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-05-06 2:33 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> Hi Peff,
>
> On Sat, 5 May 2018, Jeff King wrote:
>
>> On Fri, May 04, 2018 at 05:34:32PM +0200, Johannes Schindelin wrote:
>>
>> > This builtin does not do a whole lot so far, apart from showing a usage
>> > that is oddly similar to that of `git tbdiff`. And for a good reason:
>> > the next commits will turn `branch-diff` into a full-blown replacement
>> > for `tbdiff`.
>>
>> One minor point about the name: will it become annoying as a tab
>> completion conflict with git-branch?
If tbdiff were "Thomas's branch diff", I would call this jbdiff ;-)
but I think the 't' in there stands for "topic", not "Thomas's".
How about "git topic-diff"?
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 2:33 ` Junio C Hamano
@ 2018-05-06 12:21 ` Johannes Schindelin
2018-05-06 20:51 ` Eric Sunshine
2018-05-07 1:45 ` Junio C Hamano
0 siblings, 2 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-06 12:21 UTC (permalink / raw)
To: Junio C Hamano
Cc: Jeff King, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Junio,
On Sun, 6 May 2018, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > On Sat, 5 May 2018, Jeff King wrote:
> >
> >> On Fri, May 04, 2018 at 05:34:32PM +0200, Johannes Schindelin wrote:
> >>
> >> > This builtin does not do a whole lot so far, apart from showing a usage
> >> > that is oddly similar to that of `git tbdiff`. And for a good reason:
> >> > the next commits will turn `branch-diff` into a full-blown replacement
> >> > for `tbdiff`.
> >>
> >> One minor point about the name: will it become annoying as a tab
> >> completion conflict with git-branch?
>
> If tbdiff were "Thomas's branch diff", I would call this jbdiff ;-)
> but I think the 't' in there stands for "topic", not "Thomas's".
>
> How about "git topic-diff"?
Or `git topic-branch-diff`?
But then, we do not really use the term `topic branch` a lot in Git, *and*
the operation in question is not really about showing differences between
topic branches, but between revisions of topic branches.
So far, the solution I like best is to use `git branch --diff <...>`,
which also neatly side-steps the problem of cluttering the top-level
command list (because tab completion).
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 12:21 ` Johannes Schindelin
@ 2018-05-06 20:51 ` Eric Sunshine
2018-05-07 2:04 ` Johannes Schindelin
2018-05-07 1:45 ` Junio C Hamano
1 sibling, 1 reply; 387+ messages in thread
From: Eric Sunshine @ 2018-05-06 20:51 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Junio C Hamano, Jeff King, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller
On Sun, May 6, 2018 at 8:21 AM, Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
> On Sun, 6 May 2018, Junio C Hamano wrote:
>> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>> > On Sat, 5 May 2018, Jeff King wrote:
>> >> One minor point about the name: will it become annoying as a tab
>> >> completion conflict with git-branch?
>>
>> If tbdiff were "Thomas's branch diff", I would call this jbdiff ;-)
>> but I think the 't' in there stands for "topic", not "Thomas's".
>> How about "git topic-diff"?
>
> Or `git topic-branch-diff`?
>
> But then, we do not really use the term `topic branch` a lot in Git, *and*
> the operation in question is not really about showing differences between
> topic branches, but between revisions of topic branches.
>
> So far, the solution I like best is to use `git branch --diff <...>`,
> which also neatly side-steps the problem of cluttering the top-level
> command list (because tab completion).
Let's, please, not fall into the trap of polluting git-branch with
utterly unrelated functionality, as has happened a few times with
other Git commands. Let's especially not do so merely for the sake of
tab-completion. git-branch is for branch management; it's not for
diff'ing.
Of the suggestions thus far, Junio's git-topic-diff seems the least
worse, and doesn't suffer from tab-completion problems.
Building on Duy's suggestion: git-interdiff could be a superset of the
current git-branch-diff:
# standard interdiff
git interdiff womp-v1 womp-v2
# 'tbdiff'-like output
git interdiff --topic womp-v1 womp-v2
(Substitute "--topic" by any other better name.)
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 20:51 ` Eric Sunshine
@ 2018-05-07 2:04 ` Johannes Schindelin
2018-05-07 7:48 ` Jeff King
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 2:04 UTC (permalink / raw)
To: Eric Sunshine
Cc: Junio C Hamano, Jeff King, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller
Hi Eric,
On Sun, 6 May 2018, Eric Sunshine wrote:
> On Sun, May 6, 2018 at 8:21 AM, Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> > On Sun, 6 May 2018, Junio C Hamano wrote:
> >> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> >> > On Sat, 5 May 2018, Jeff King wrote:
> >> >> One minor point about the name: will it become annoying as a tab
> >> >> completion conflict with git-branch?
> >>
> >> If tbdiff were "Thomas's branch diff", I would call this jbdiff ;-)
> >> but I think the 't' in there stands for "topic", not "Thomas's".
> >> How about "git topic-diff"?
> >
> > Or `git topic-branch-diff`?
> >
> > But then, we do not really use the term `topic branch` a lot in Git, *and*
> > the operation in question is not really about showing differences between
> > topic branches, but between revisions of topic branches.
> >
> > So far, the solution I like best is to use `git branch --diff <...>`,
> > which also neatly side-steps the problem of cluttering the top-level
> > command list (because tab completion).
>
> Let's, please, not fall into the trap of polluting git-branch with
> utterly unrelated functionality, as has happened a few times with
> other Git commands. Let's especially not do so merely for the sake of
> tab-completion. git-branch is for branch management; it's not for
> diff'ing.
I totally disagree. `git branch` is *the* command to work with branches.
Yes, you can manage branches. But you can also list them. And now you can
also compare them.
> Of the suggestions thus far, Junio's git-topic-diff seems the least
> worse, and doesn't suffer from tab-completion problems.
Except that this is too limited a view.
Have you seen one of the more important tidbits in the cover letter, the
one about Git for Windows' *branch thicket*? In this case, it is not *one*
topic branch that we are talking about.
And even worse: what this patch series introduces is not at all a feature
to compare topic branches!
Instead, it is a way to compare iterations of patch series, versions of
topic branches, changes introduced into a topic branch by rebasing it,
etc. And `git topic-diff` simply does not say this. It says something
different, something that my patches cannot fulfill.
> Building on Duy's suggestion: git-interdiff could be a superset of the
> current git-branch-diff:
>
> # standard interdiff
> git interdiff womp-v1 womp-v2
> # 'tbdiff'-like output
> git interdiff --topic womp-v1 womp-v2
No, no, and no. An interdiff is an interdiff is an interdiff. See e.g.
https://www.tutorialspoint.com/unix_commands/interdiff.htm for details.
The operation introduced by this patch series, or for that matter tbdiff,
*never ever* produced an interdiff. Get this "interdiff" label out of your
mind immediately when you think about this here operation.
One of my commit messages even talks about this, and says *why* we do not
generate interdiffs: they are in general not even well-defined.
Take my --rebase-merges patch series, for example. It is so long-running
that at some stages, all I did was to resolve merge conflicts incurred
from rebasing to `master`. That was literally all. Now, if you tried to
produce an interdiff, you would *already fail in the first step*, as the
previous overall diff does not apply in reverse on current `master`.
Out of all the options so far, the one that I liked was `git branch
--diff`. Seriously. I do not understand why you think that this is abusing
the `git branch` command. It is no less abusing it than `git branch
--edit-description`! And that is a *very good* command, and it is *very
good* that it is an option to `git branch`. It makes a total lot of sense,
I have never had to think "wait, in which Git command is this implemented
already?" And I would expect the exact same thing to happen with `git
branch --diff`.
Ciao,
Johannes
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 2:04 ` Johannes Schindelin
@ 2018-05-07 7:48 ` Jeff King
2018-05-07 21:33 ` Igor Djordjevic
2018-05-08 0:30 ` Junio C Hamano
0 siblings, 2 replies; 387+ messages in thread
From: Jeff King @ 2018-05-07 7:48 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Eric Sunshine, Junio C Hamano, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller
On Sun, May 06, 2018 at 10:04:31PM -0400, Johannes Schindelin wrote:
> > Let's, please, not fall into the trap of polluting git-branch with
> > utterly unrelated functionality, as has happened a few times with
> > other Git commands. Let's especially not do so merely for the sake of
> > tab-completion. git-branch is for branch management; it's not for
> > diff'ing.
>
> I totally disagree. `git branch` is *the* command to work with branches.
> Yes, you can manage branches. But you can also list them. And now you can
> also compare them.
One of the things I don't like about "git branch --diff" is that this
feature is not _just_ about branches at all. E.g., I could do:
git tbdiff HEAD~10 HEAD~5 foo
Or even:
git tbdiff v2.16.0 v2.17.0 my-rewritten-v2.17.0
Those arguments really are just commitishes, not necessarily branches.
One of the current interface rules for "git branch" is that the branch
names we hand it are interpreted _exactly_ as branch names. You cannot
"git branch -m v2.16.0", and there is no ambiguity in "git branch -d
foo" if "foo" is both a tag and a branch.
But this new mode does not fit the pattern at all.
If we were to attach this to an existing command, I think it has more to
do with "diff" than "branch". But I'm not sure we want to overload
"diff" either (which has traditionally been about two endpoints, and
does not really traverse at all, though arguably "foo...bar" is a bit of
a cheat :) ).
> > Of the suggestions thus far, Junio's git-topic-diff seems the least
> > worse, and doesn't suffer from tab-completion problems.
>
> Except that this is too limited a view.
Right, I agree with you. Topic branches are the intended use, but that's
not what it _does_, and obviously it can be applied in other cases. So
since "branch" is too specific, I think "topic branch" is even more so.
It's really "diff-history" or something, I think. That's not very
catchy, but I think the best name would imply that it was diffing a set
of commits (so even "diff-commit" would not be right, because that again
sounds like endpoints).
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 7:48 ` Jeff King
@ 2018-05-07 21:33 ` Igor Djordjevic
2018-05-21 10:33 ` Johannes Schindelin
2018-05-08 0:30 ` Junio C Hamano
1 sibling, 1 reply; 387+ messages in thread
From: Igor Djordjevic @ 2018-05-07 21:33 UTC (permalink / raw)
To: Jeff King, Johannes Schindelin
Cc: Eric Sunshine, Junio C Hamano, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller
On 07/05/2018 09:48, Jeff King wrote:
>
> > > Let's, please, not fall into the trap of polluting git-branch with
> > > utterly unrelated functionality, as has happened a few times with
> > > other Git commands. Let's especially not do so merely for the sake of
> > > tab-completion. git-branch is for branch management; it's not for
> > > diff'ing.
> >
> > I totally disagree. `git branch` is *the* command to work with branches.
> > Yes, you can manage branches. But you can also list them. And now you can
> > also compare them.
>
> One of the things I don't like about "git branch --diff" is that this
> feature is not _just_ about branches at all. E.g., I could do:
>
> git tbdiff HEAD~10 HEAD~5 foo
>
> Or even:
>
> git tbdiff v2.16.0 v2.17.0 my-rewritten-v2.17.0
>
> Those arguments really are just commitishes, not necessarily branches.
> One of the current interface rules for "git branch" is that the branch
> names we hand it are interpreted _exactly_ as branch names. You cannot
> "git branch -m v2.16.0", and there is no ambiguity in "git branch -d
> foo" if "foo" is both a tag and a branch.
>
> But this new mode does not fit the pattern at all.
>
> If we were to attach this to an existing command, I think it has more to
> do with "diff" than "branch". But I'm not sure we want to overload
> "diff" either (which has traditionally been about two endpoints, and
> does not really traverse at all, though arguably "foo...bar" is a bit of
> a cheat :) ).
>
> > > Of the suggestions thus far, Junio's git-topic-diff seems the least
> > > worse, and doesn't suffer from tab-completion problems.
> >
> > Except that this is too limited a view.
>
> Right, I agree with you. Topic branches are the intended use, but that's
> not what it _does_, and obviously it can be applied in other cases. So
> since "branch" is too specific, I think "topic branch" is even more so.
>
> It's really "diff-history" or something, I think. That's not very
> catchy, but I think the best name would imply that it was diffing a set
> of commits (so even "diff-commit" would not be right, because that again
> sounds like endpoints).
This is exactly what I feel as well, thanks for concise and
to-the-point spelling out.
From user interface perspective, I would expect something like this
to be possible (and natural):
(1) git diff topic-v1...topic-v2
(2) git diff --branch topic-v1...topic-v2
(1) is what we are all familiar with, providing a diff between two
revisions with focus on file changes, where (2) shifts focus to
history changes.
It`s all still a comparison between two revisions (pointed to by
"topic-v1" and "topic-v2" branch heads in this specific example), but
it differs in what we are comparing - (1) set of files contained in
endpoints, or (2) set of revisions contained in (or "leading to")
endpoints.
Hmm... what about `git diff --history`? :/ It does seem more "true"
to what it does, though I still like `git diff --branch` more
(catchier, indeed).
Regards, Buga
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 21:33 ` Igor Djordjevic
@ 2018-05-21 10:33 ` Johannes Schindelin
2018-05-21 17:56 ` Stefan Beller
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-21 10:33 UTC (permalink / raw)
To: Igor Djordjevic
Cc: Jeff King, Eric Sunshine, Junio C Hamano, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller
Hi Buga,
On Mon, 7 May 2018, Igor Djordjevic wrote:
> On 07/05/2018 09:48, Jeff King wrote:
> >
> > > > Let's, please, not fall into the trap of polluting git-branch with
> > > > utterly unrelated functionality, as has happened a few times with
> > > > other Git commands. Let's especially not do so merely for the sake of
> > > > tab-completion. git-branch is for branch management; it's not for
> > > > diff'ing.
> > >
> > > I totally disagree. `git branch` is *the* command to work with branches.
> > > Yes, you can manage branches. But you can also list them. And now you can
> > > also compare them.
> >
> > One of the things I don't like about "git branch --diff" is that this
> > feature is not _just_ about branches at all. E.g., I could do:
> >
> > git tbdiff HEAD~10 HEAD~5 foo
> >
> > Or even:
> >
> > git tbdiff v2.16.0 v2.17.0 my-rewritten-v2.17.0
> >
> > Those arguments really are just commitishes, not necessarily branches.
> > One of the current interface rules for "git branch" is that the branch
> > names we hand it are interpreted _exactly_ as branch names. You cannot
> > "git branch -m v2.16.0", and there is no ambiguity in "git branch -d
> > foo" if "foo" is both a tag and a branch.
> >
> > But this new mode does not fit the pattern at all.
> >
> > If we were to attach this to an existing command, I think it has more to
> > do with "diff" than "branch". But I'm not sure we want to overload
> > "diff" either (which has traditionally been about two endpoints, and
> > does not really traverse at all, though arguably "foo...bar" is a bit of
> > a cheat :) ).
> >
> > > > Of the suggestions thus far, Junio's git-topic-diff seems the least
> > > > worse, and doesn't suffer from tab-completion problems.
> > >
> > > Except that this is too limited a view.
> >
> > Right, I agree with you. Topic branches are the intended use, but that's
> > not what it _does_, and obviously it can be applied in other cases. So
> > since "branch" is too specific, I think "topic branch" is even more so.
> >
> > It's really "diff-history" or something, I think. That's not very
> > catchy, but I think the best name would imply that it was diffing a set
> > of commits (so even "diff-commit" would not be right, because that again
> > sounds like endpoints).
>
> This is exactly what I feel as well, thanks for concise and
> to-the-point spelling out.
>
> From user interface perspective, I would expect something like this
> to be possible (and natural):
>
> (1) git diff topic-v1...topic-v2
No, we cannot. The `git diff topic-v1...topic-v2` invocation has worked
for a looooong time, and does something very different.
We should not even allow ourselves to think of such a breakage.
> (2) git diff --branch topic-v1...topic-v2
From my point of view, `git diff --branch` indicates that I diff
*branches*. Which is not really something that makes sense, and definitely
not what this command is about.
We are not comparing branches.
We are comparing versions of the same branch.
> (1) is what we are all familiar with, providing a diff between two
> revisions with focus on file changes, where (2) shifts focus to
> history changes.
>
> It`s all still a comparison between two revisions (pointed to by
> "topic-v1" and "topic-v2" branch heads in this specific example), but
> it differs in what we are comparing - (1) set of files contained in
> endpoints, or (2) set of revisions contained in (or "leading to")
> endpoints.
It is very much not about comparing *two* revisions. It is very much about
comparing two *ranges of* revisions, and not just any ranges, no. Those
ranges need to be so related as to contain mostly identical changes.
Otherwise, `git branch --diff` will spend a ton of time, just to come back
with a series of `-` lines followed by a series of `+` lines
(figuratively, not literally). Which would be stupid, to spend that much
time on something that `git rev-list --left-right topic1...topic2` would
have computed a lot faster.
> Hmm... what about `git diff --history`? :/ It does seem more "true"
> to what it does, though I still like `git diff --branch` more
> (catchier, indeed).
It certainly is catchier. But also a ton more puzzling.
I do not want to compare histories, after all. That would be like saying:
okay, topic1 and topic2 ended up at the same stage, but *how* did they
get there?
What I *want* to ask via the command implemented by this patch series is
the question: there was a set of patches previously, and now I have a set
of revised patches, what changed?
Most fellow German software engineers (who seem to have a knack for
idiotically long variable/function names) would now probably suggest:
git compare-patch-series-with-revised-patch-series
I hope you agree that that is better *and* worse than your suggestions,
depending from what angle you look at it: it is better because it
describes what the command is *actually* doing. But it is much worse at
the same time because it is too long.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-21 10:33 ` Johannes Schindelin
@ 2018-05-21 17:56 ` Stefan Beller
2018-05-21 20:24 ` Jeff King
0 siblings, 1 reply; 387+ messages in thread
From: Stefan Beller @ 2018-05-21 17:56 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Igor Djordjevic, Jeff King, Eric Sunshine, Junio C Hamano,
Git List, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller
Hi Johannes,
>> (2) git diff --branch topic-v1...topic-v2
>
> From my point of view, `git diff --branch` indicates that I diff
> *branches*. Which is not really something that makes sense, and definitely
> not what this command is about.
>
> We are not comparing branches.
>
> We are comparing versions of the same branch.
I happen to have a messier workflow than you have, as I
develop a "resend" of a topic in a new branch (or I have to
restore the old sent topic from the reflog).
Now that I have the tool I also compare two branches,
namely, the branch that Junio queued
(origin/base..origin/sb/intelligent-name) vs the resend
that I had locally (origin/base..foo).
Next time I might compare Junios queued topic to the
local format-patch'es that I already annotated.
So in a way this diffs different versions of a topic, "diff-topic-versions".
>> It`s all still a comparison between two revisions (pointed to by
>> "topic-v1" and "topic-v2" branch heads in this specific example), but
>> it differs in what we are comparing - (1) set of files contained in
>> endpoints, or (2) set of revisions contained in (or "leading to")
>> endpoints.
>
> It is very much not about comparing *two* revisions.
I wonder if we can make the tool more intelligent to take two revisions
and it figures out the range by finding the base branch itself.
Probably as a follow up.
> It is very much about
> comparing two *ranges of* revisions, and not just any ranges, no. Those
> ranges need to be so related as to contain mostly identical changes.
range-diff, eh?
> Most fellow German software engineers (who seem to have a knack for
> idiotically long variable/function names) would now probably suggest:
>
> git compare-patch-series-with-revised-patch-series
or short:
revision-compare
compare-revs
com-revs
revised-diff
revise-diff
revised-compare
diff-revise
> I hope you agree that that is better *and* worse than your suggestions,
> depending from what angle you look at it: it is better because it
> describes what the command is *actually* doing. But it is much worse at
> the same time because it is too long.
btw, you think very much in terms of *patch series*, but there are workflows
without patches (pull requests at Github et Al., changes in Gerrit),
and I would think the output of the tool under discussion would still be
useful.
In [1] Junio gives his use case, it is "before accepting them", which could
be comparing an mbox or patch files against a branch, or first building
up a local history on a detached head (and then wondering if to reset
the branch to the new history), which would be all in Git.
That use case still has 'patches' involved, but these are not the main
selling point for the tool, as you could turn patches into commits before
using this tool.
[1] https://public-inbox.org/git/xmqqvabh1ung.fsf@gitster-ct.c.googlers.com/
Thanks,
Stefan
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-21 17:56 ` Stefan Beller
@ 2018-05-21 20:24 ` Jeff King
2018-05-21 21:40 ` Brandon Williams
0 siblings, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-05-21 20:24 UTC (permalink / raw)
To: Stefan Beller
Cc: Johannes Schindelin, Igor Djordjevic, Eric Sunshine,
Junio C Hamano, Git List, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller
On Mon, May 21, 2018 at 10:56:47AM -0700, Stefan Beller wrote:
> > It is very much about
> > comparing two *ranges of* revisions, and not just any ranges, no. Those
> > ranges need to be so related as to contain mostly identical changes.
>
> range-diff, eh?
>
> > Most fellow German software engineers (who seem to have a knack for
> > idiotically long variable/function names) would now probably suggest:
> >
> > git compare-patch-series-with-revised-patch-series
>
> or short:
>
> revision-compare
> compare-revs
> com-revs
>
> revised-diff
> revise-diff
> revised-compare
>
> diff-revise
I still like "range diff", but I think something around "revise" is a
good line of thought, too. Because it implies that we expect the two
ranges to be composed of almost-the-same commits.
That leads to another use case where I think focusing on topic branches
(or even branches at all) would be a misnomer. Imagine I cherry-pick a
bunch of commits with:
git cherry-pick -10 $old_commit
I might then want to see how the result differs with something like:
git range-diff $old_commit~10..$old_commit HEAD~10..HEAD
I wouldn't think of this as a topic-branch operation, but just as
comparing two sequences of commits. I guess "revise" isn't strictly
accurate here either, as I'm not revising. But I do assume the two
ranges share some kind of mapping of patches.
-Peff
PS I wish there were a nicer syntax to do that. Perhaps
"git range-diff -10 $old_commit HEAD" could work, though occasionally
the two ranges are not the same length (e.g., if you ended up
skipping one of the cherry-picked commits). Anyway, those kind of
niceties can easily come later on top. :)
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-21 20:24 ` Jeff King
@ 2018-05-21 21:40 ` Brandon Williams
2018-05-21 21:48 ` Stefan Beller
2018-05-21 21:52 ` Jeff King
0 siblings, 2 replies; 387+ messages in thread
From: Brandon Williams @ 2018-05-21 21:40 UTC (permalink / raw)
To: Jeff King
Cc: Stefan Beller, Johannes Schindelin, Igor Djordjevic,
Eric Sunshine, Junio C Hamano, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Jacob Keller
On 05/21, Jeff King wrote:
> On Mon, May 21, 2018 at 10:56:47AM -0700, Stefan Beller wrote:
>
> > > It is very much about
> > > comparing two *ranges of* revisions, and not just any ranges, no. Those
> > > ranges need to be so related as to contain mostly identical changes.
> >
> > range-diff, eh?
> >
> > > Most fellow German software engineers (who seem to have a knack for
> > > idiotically long variable/function names) would now probably suggest:
> > >
> > > git compare-patch-series-with-revised-patch-series
> >
> > or short:
> >
> > revision-compare
> > compare-revs
> > com-revs
> >
> > revised-diff
> > revise-diff
> > revised-compare
> >
> > diff-revise
I haven't really been following all of the discussion but from what I
can tell the point of this command is to generate a diff based on two
different versions of a series, so why not call it 'series-diff'? :)
--
Brandon Williams
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-21 21:40 ` Brandon Williams
@ 2018-05-21 21:48 ` Stefan Beller
2018-05-21 21:52 ` Jeff King
1 sibling, 0 replies; 387+ messages in thread
From: Stefan Beller @ 2018-05-21 21:48 UTC (permalink / raw)
To: Brandon Williams
Cc: Jeff King, Johannes Schindelin, Igor Djordjevic, Eric Sunshine,
Junio C Hamano, Git List, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller
On Mon, May 21, 2018 at 2:40 PM, Brandon Williams <bmwill@google.com> wrote:
revised-compare
>> >
>> > diff-revise
>
> I haven't really been following all of the discussion but from what I
> can tell the point of this command is to generate a diff based on two
> different versions of a series, so why not call it 'series-diff'? :)
Upon mentioning series-diff, I misheard Brandon and thought he proposed
serious-diff
:-)
Stefan
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-21 21:40 ` Brandon Williams
2018-05-21 21:48 ` Stefan Beller
@ 2018-05-21 21:52 ` Jeff King
2018-05-22 2:08 ` Junio C Hamano
1 sibling, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-05-21 21:52 UTC (permalink / raw)
To: Brandon Williams
Cc: Stefan Beller, Johannes Schindelin, Igor Djordjevic,
Eric Sunshine, Junio C Hamano, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Jacob Keller
On Mon, May 21, 2018 at 02:40:57PM -0700, Brandon Williams wrote:
> > > > Most fellow German software engineers (who seem to have a knack for
> > > > idiotically long variable/function names) would now probably suggest:
> > > >
> > > > git compare-patch-series-with-revised-patch-series
> > >
> > > or short:
> > >
> > > revision-compare
> > > compare-revs
> > > com-revs
> > >
> > > revised-diff
> > > revise-diff
> > > revised-compare
> > >
> > > diff-revise
>
> I haven't really been following all of the discussion but from what I
> can tell the point of this command is to generate a diff based on two
> different versions of a series, so why not call it 'series-diff'? :)
That's OK with me, though I prefer "range" as I think we use that term
elsewhere ("series" is usually part of "patch series", but many people
do not use a workflow with that term).
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-21 21:52 ` Jeff King
@ 2018-05-22 2:08 ` Junio C Hamano
0 siblings, 0 replies; 387+ messages in thread
From: Junio C Hamano @ 2018-05-22 2:08 UTC (permalink / raw)
To: Jeff King
Cc: Brandon Williams, Stefan Beller, Johannes Schindelin,
Igor Djordjevic, Eric Sunshine, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Jacob Keller
Jeff King <peff@peff.net> writes:
>> I haven't really been following all of the discussion but from what I
>> can tell the point of this command is to generate a diff based on two
>> different versions of a series, so why not call it 'series-diff'? :)
>
> That's OK with me, though I prefer "range" as I think we use that term
> elsewhere ("series" is usually part of "patch series", but many people
> do not use a workflow with that term).
FWIW, I am OK with either, with a bit of preference to "range" over
"series". As long as this stays to be an independent command (as
opposed to be made into a new mode to existing command) and the
command name is not overly hard to type, I am OK with anything ;-)
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 7:48 ` Jeff King
2018-05-07 21:33 ` Igor Djordjevic
@ 2018-05-08 0:30 ` Junio C Hamano
1 sibling, 0 replies; 387+ messages in thread
From: Junio C Hamano @ 2018-05-08 0:30 UTC (permalink / raw)
To: Jeff King
Cc: Johannes Schindelin, Eric Sunshine, Git List, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller
Jeff King <peff@peff.net> writes:
> One of the things I don't like about "git branch --diff" is that this
> feature is not _just_ about branches at all.
I actually wouldn't be that much against the word "branch" in
"branch-diff" on the ground that we are typically not feeding
branches to the command (we are feeding two ranges, and one endpoint
of each range typically gets expressed using branch name), as we
have a precedent in "show-branch", for example, that often takes
branches but does not have to.
> It's really "diff-history" or something, I think. That's not very
> catchy, but I think the best name would imply that it was diffing a set
> of commits (so even "diff-commit" would not be right, because that again
> sounds like endpoints).
Sure. This should't be a submode "--diff" of "git branch" just like
it shouldn't be a submode of "git commit" only because it is about
comparing two sets of commits. "diff" is about comparing two
endpoints, and not about comparing two sets. "log" is the closest
thing, if we really want to coerce it into an existing set of
commands, as it is about a set of commits, but it does not do
multiple sets, let alone comparing them.
"branch-diff" was just a good as "diff-history", except that both of
them may irritate command line completion users. I do not think I
care too much about which exact command name it gets, but I think it
is a bad idea to tacked it to an existing command as a submode that
does unrelated thing to what the main command does. So from that
point of view, "branch-diff" and "diff-history" are equally good
being a distinct command, and equally bad sharing prefix with common
existing command.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-06 12:21 ` Johannes Schindelin
2018-05-06 20:51 ` Eric Sunshine
@ 2018-05-07 1:45 ` Junio C Hamano
2018-05-07 5:39 ` Johannes Schindelin
1 sibling, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-05-07 1:45 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>> If tbdiff were "Thomas's branch diff", I would call this jbdiff ;-)
>> but I think the 't' in there stands for "topic", not "Thomas's".
>>
>> How about "git topic-diff"?
>
> Or `git topic-branch-diff`?
Yeah something along that line, which is about comparing each step
in two iterations of a single topic. It would be wonderful if it
also supported a short-hand
$ git tbdiff --reflog 1.day.ago js/branch-diff
that turned into:
$ git tbdiff js/branch-diff..js/branch-diff@{1.day.ago} \
js/branch-diff@{1.day.ago}..js/branch-diff
That compares "what was on the topic a day ago" with "what is new on
the topic since that time", which is exactly what an individual
contributor wants when reviewing how the topic was polished, I would
say.
[Footnote]
A variant I often use when accepting a rerolled series is
$ git checkout js/branch-diff
$ git checkout master...
$ git am ./+js-branch-diff-v2
$ git tbdiff ..@{-1} @{-1}..
so this is not only for individual contributors but also helps
integrators.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 1:45 ` Junio C Hamano
@ 2018-05-07 5:39 ` Johannes Schindelin
2018-05-07 15:12 ` Junio C Hamano
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 5:39 UTC (permalink / raw)
To: Junio C Hamano
Cc: Jeff King, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Junio,
On Mon, 7 May 2018, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> >> If tbdiff were "Thomas's branch diff", I would call this jbdiff ;-)
> >> but I think the 't' in there stands for "topic", not "Thomas's".
> >>
> >> How about "git topic-diff"?
> >
> > Or `git topic-branch-diff`?
>
> Yeah something along that line, which is about comparing each step
> in two iterations of a single topic. It would be wonderful if it
> also supported a short-hand
>
> $ git tbdiff --reflog 1.day.ago js/branch-diff
>
> that turned into:
>
> $ git tbdiff js/branch-diff..js/branch-diff@{1.day.ago} \
> js/branch-diff@{1.day.ago}..js/branch-diff
Or even easier: `git tbdiff js/branch-diff@{1.day.ago}...js/branch-diff`.
> That compares "what was on the topic a day ago" with "what is new on
> the topic since that time", which is exactly what an individual
> contributor wants when reviewing how the topic was polished, I would
> say.
It would be easy to introduce, but I am wary about its usefulness.
Unless you re-generate the branch from patches (which I guess you do a
lot, but I don't), you are likely to compare incomplete patch series: say,
when you call `git rebase -i` to reword 05/18's commit message, your
command will only compare 05--18 of the patch series.
Worse, if js/branch-diff needs to be uprooted (e.g. because it now depends
on some different patch, or because it already depended on a separate
patch series that was now updated), your `git branch --diff` call will
compare more than just my patches: it will assume that those dependencies
are part of the patch series, because they changed, too.
> [Footnote]
>
> A variant I often use when accepting a rerolled series is
>
> $ git checkout js/branch-diff
> $ git checkout master...
> $ git am ./+js-branch-diff-v2
> $ git tbdiff ..@{-1} @{-1}..
>
> so this is not only for individual contributors but also helps
> integrators.
Yes, and I also pointed out (twice) that it will help interested parties
follow what I do with my merging-rebases in Git for Windows.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 5:39 ` Johannes Schindelin
@ 2018-05-07 15:12 ` Junio C Hamano
2018-05-21 10:41 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-05-07 15:12 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Jeff King, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> It would be easy to introduce, but I am wary about its usefulness.
> Unless you re-generate the branch from patches (which I guess you do a
> lot, but I don't), you are likely to compare incomplete patch series: say,
> when you call `git rebase -i` to reword 05/18's commit message, your
> command will only compare 05--18 of the patch series.
Well that is exactly the point of that "..@{1} @{1}..", which turned
out to be very useful in practice at least for me when I am updating
a topic with "rebase -i", and then reviewing what I did with tbdiff.
I do not want 01-04 in the above case as I already know I did not
touch them.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 15:12 ` Junio C Hamano
@ 2018-05-21 10:41 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-21 10:41 UTC (permalink / raw)
To: Junio C Hamano
Cc: Jeff King, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Junio,
On Tue, 8 May 2018, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > It would be easy to introduce, but I am wary about its usefulness.
> > Unless you re-generate the branch from patches (which I guess you do a
> > lot, but I don't), you are likely to compare incomplete patch series: say,
> > when you call `git rebase -i` to reword 05/18's commit message, your
> > command will only compare 05--18 of the patch series.
>
> Well that is exactly the point of that "..@{1} @{1}..", which turned
> out to be very useful in practice at least for me when I am updating
> a topic with "rebase -i", and then reviewing what I did with tbdiff.
>
> I do not want 01-04 in the above case as I already know I did not
> touch them.
And you are a seasoned veteran maintainer.
To the occasional contributor, this information is not obvious, and it is
not stored in their brain. It needs to be made explicit, which is why this
here command outputs those `abcdef = 012345` lines: it lists all the
commits, stating which ones are unchanged. In your 01-04 example, those
lines would be of the form `abcdef = abcdef`, of course.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-05 21:57 ` Johannes Schindelin
` (2 preceding siblings ...)
2018-05-06 2:33 ` Junio C Hamano
@ 2018-05-07 7:50 ` Jeff King
2018-05-07 15:28 ` Duy Nguyen
3 siblings, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-05-07 7:50 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Sat, May 05, 2018 at 11:57:26PM +0200, Johannes Schindelin wrote:
> > It feels really petty complaining about the name, but I just want to
> > raise the point, since it will never be easier to change than right now.
>
> I do hear you. Especially since I hate `git cherry` every single time that
> I try to tab-complete `git cherry-pick`.
Me too. :)
I've wondered if "git pick" would be a good alias for cherry-pick (the
"cherry" metaphor is probably not well understood by most users). And
"revert" should just be "pick -R", but that is a whole other discussion.
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 7:50 ` Jeff King
@ 2018-05-07 15:28 ` Duy Nguyen
2018-05-07 19:58 ` Stefan Beller
0 siblings, 1 reply; 387+ messages in thread
From: Duy Nguyen @ 2018-05-07 15:28 UTC (permalink / raw)
To: Jeff King
Cc: Johannes Schindelin, Git Mailing List, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Mon, May 7, 2018 at 9:50 AM, Jeff King <peff@peff.net> wrote:
> On Sat, May 05, 2018 at 11:57:26PM +0200, Johannes Schindelin wrote:
>
>> > It feels really petty complaining about the name, but I just want to
>> > raise the point, since it will never be easier to change than right now.
>>
>> I do hear you. Especially since I hate `git cherry` every single time that
>> I try to tab-complete `git cherry-pick`.
>
> Me too. :)
Just so you know I'm also not happy with that "git cherry". Since I'm
updating git-completion.bash in this area and we got 3 "me too" votes
(four if we count Szeder in another thread), I'm going to implementing
something to at least let you exclude "cherry" from the completion
list if you want.
--
Duy
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 02/18] Add a new builtin: branch-diff
2018-05-07 15:28 ` Duy Nguyen
@ 2018-05-07 19:58 ` Stefan Beller
0 siblings, 0 replies; 387+ messages in thread
From: Stefan Beller @ 2018-05-07 19:58 UTC (permalink / raw)
To: Duy Nguyen
Cc: Jeff King, Johannes Schindelin, Git Mailing List, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Jacob Keller, Eric Sunshine
On Mon, May 7, 2018 at 8:28 AM, Duy Nguyen <pclouds@gmail.com> wrote:
>>> I do hear you. Especially since I hate `git cherry` every single time that
>>> I try to tab-complete `git cherry-pick`.
>>
>> Me too. :)
>
> Just so you know I'm also not happy with that "git cherry". Since I'm
> updating git-completion.bash in this area and we got 3 "me too" votes
> (four if we count Szeder in another thread), I'm going to implementing
> something to at least let you exclude "cherry" from the completion
> list if you want.
And another "me too" here.
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v2 03/18] branch-diff: first rudimentary implementation
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 01/18] Add a function to solve least-cost assignment problems Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 02/18] Add a new builtin: branch-diff Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 04/18] branch-diff: improve the order of the shown commits Johannes Schindelin
` (18 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
At this stage, `git branch-diff` can determine corresponding commits of
two related commit ranges. This makes use of the recently introduced
implementation of the Hungarian algorithm.
The core of this patch is a straight port of the ideas of tbdiff, the
seemingly dormant project at https://github.com/trast/tbdiff.
The output does not at all match `tbdiff`'s output yet, as this patch
really concentrates on getting the patch matching part right.
Note: due to differences in the diff algorithm (`tbdiff` uses the
Python module `difflib`, Git uses its xdiff fork), the cost matrix
calculated by `branch-diff` is different (but very similar) to the one
calculated by `tbdiff`. Therefore, it is possible that they find
different matching commits in corner cases (e.g. when a patch was split
into two patches of roughly equal length).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 335 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 334 insertions(+), 1 deletion(-)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index 60a4b4fbe30..c462681067c 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -1,6 +1,12 @@
#include "cache.h"
#include "builtin.h"
#include "parse-options.h"
+#include "string-list.h"
+#include "run-command.h"
+#include "argv-array.h"
+#include "hashmap.h"
+#include "xdiff-interface.h"
+#include "hungarian.h"
static const char * const builtin_branch_diff_usage[] = {
N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
@@ -20,6 +26,279 @@ static int parse_creation_weight(const struct option *opt, const char *arg,
return 0;
}
+struct patch_util {
+ /* For the search for an exact match */
+ struct hashmap_entry e;
+ const char *diff, *patch;
+
+ int i;
+ int diffsize;
+ size_t diff_offset;
+ /* the index of the matching item in the other branch, or -1 */
+ int matching;
+ struct object_id oid;
+};
+
+/*
+ * Reads the patches into a string list, with the `util` field being populated
+ * as struct object_id (will need to be free()d).
+ */
+static int read_patches(const char *range, struct string_list *list)
+{
+ struct child_process cp = CHILD_PROCESS_INIT;
+ FILE *in;
+ struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT;
+ struct patch_util *util = NULL;
+ int in_header = 1;
+
+ argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges",
+ "--reverse", "--date-order", "--decorate=no",
+ "--no-abbrev-commit", range,
+ NULL);
+ cp.out = -1;
+ cp.no_stdin = 1;
+ cp.git_cmd = 1;
+
+ if (start_command(&cp))
+ return error_errno(_("could not start `log`"));
+ in = fdopen(cp.out, "r");
+ if (!in) {
+ error_errno(_("could not read `log` output"));
+ finish_command(&cp);
+ return -1;
+ }
+
+ while (strbuf_getline(&line, in) != EOF) {
+ const char *p;
+
+ if (skip_prefix(line.buf, "commit ", &p)) {
+ if (util) {
+ string_list_append(list, buf.buf)->util = util;
+ strbuf_reset(&buf);
+ }
+ util = xcalloc(sizeof(*util), 1);
+ if (get_oid(p, &util->oid)) {
+ error(_("could not parse commit '%s'"), p);
+ free(util);
+ string_list_clear(list, 1);
+ strbuf_release(&buf);
+ strbuf_release(&line);
+ fclose(in);
+ finish_command(&cp);
+ return -1;
+ }
+ util->matching = -1;
+ in_header = 1;
+ continue;
+ }
+
+ if (starts_with(line.buf, "diff --git")) {
+ in_header = 0;
+ strbuf_addch(&buf, '\n');
+ if (!util->diff_offset)
+ util->diff_offset = buf.len;
+ strbuf_addbuf(&buf, &line);
+ } else if (in_header) {
+ if (starts_with(line.buf, "Author: ")) {
+ strbuf_addbuf(&buf, &line);
+ strbuf_addstr(&buf, "\n\n");
+ } else if (starts_with(line.buf, " ")) {
+ strbuf_addbuf(&buf, &line);
+ strbuf_addch(&buf, '\n');
+ }
+ continue;
+ } else if (starts_with(line.buf, "@@ "))
+ strbuf_addstr(&buf, "@@");
+ else if (line.buf[0] && !starts_with(line.buf, "index "))
+ /*
+ * A completely blank (not ' \n', which is context)
+ * line is not valid in a diff. We skip it
+ * silently, because this neatly handles the blank
+ * separator line between commits in git-log
+ * output.
+ */
+ strbuf_addbuf(&buf, &line);
+ else
+ continue;
+
+ strbuf_addch(&buf, '\n');
+ util->diffsize++;
+ }
+ fclose(in);
+ strbuf_release(&line);
+
+ if (util)
+ string_list_append(list, buf.buf)->util = util;
+ strbuf_release(&buf);
+
+ if (finish_command(&cp))
+ return -1;
+
+ return 0;
+}
+
+static int patch_util_cmp(const void *dummy, const struct patch_util *a,
+ const struct patch_util *b, const char *keydata)
+{
+ return strcmp(a->diff, keydata ? keydata : b->diff);
+}
+
+static void find_exact_matches(struct string_list *a, struct string_list *b)
+{
+ struct hashmap map;
+ int i;
+
+ hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0);
+
+ /* First, add the patches of a to a hash map */
+ for (i = 0; i < a->nr; i++) {
+ struct patch_util *util = a->items[i].util;
+
+ util->i = i;
+ util->patch = a->items[i].string;
+ util->diff = util->patch + util->diff_offset;
+ hashmap_entry_init(util, strhash(util->diff));
+ hashmap_add(&map, util);
+ }
+
+ /* Now try to find exact matches in b */
+ for (i = 0; i < b->nr; i++) {
+ struct patch_util *util = b->items[i].util, *other;
+
+ util->i = i;
+ util->patch = b->items[i].string;
+ util->diff = util->patch + util->diff_offset;
+ hashmap_entry_init(util, strhash(util->diff));
+ other = hashmap_remove(&map, util, NULL);
+ if (other) {
+ if (other->matching >= 0)
+ BUG("already assigned!");
+
+ other->matching = i;
+ util->matching = other->i;
+ }
+ }
+
+ hashmap_free(&map, 0);
+}
+
+static void diffsize_consume(void *data, char *line, unsigned long len)
+{
+ (*(int *)data)++;
+}
+
+static int diffsize(const char *a, const char *b)
+{
+ xpparam_t pp = { 0 };
+ xdemitconf_t cfg = { 0 };
+ mmfile_t mf1, mf2;
+ int count = 0;
+
+ mf1.ptr = (char *)a;
+ mf1.size = strlen(a);
+ mf2.ptr = (char *)b;
+ mf2.size = strlen(b);
+
+ cfg.ctxlen = 3;
+ if (!xdi_diff_outf(&mf1, &mf2, diffsize_consume, &count, &pp, &cfg))
+ return count;
+
+ error(_("failed to generate diff"));
+ return INT_MAX;
+}
+
+static int get_correspondences(struct string_list *a, struct string_list *b,
+ double creation_weight)
+{
+ int n = a->nr + b->nr;
+ double *cost = xmalloc(sizeof(double) * n * n), c;
+ int *a2b = xmalloc(sizeof(int) * n), *b2a = xmalloc(sizeof(int) * n);
+ int i, j, res;
+
+ for (i = 0; i < a->nr; i++) {
+ struct patch_util *a_util = a->items[i].util;
+
+ for (j = 0; j < b->nr; j++) {
+ struct patch_util *b_util = b->items[j].util;
+
+ if (a_util->matching == j)
+ c = 0;
+ else if (a_util->matching < 0 && b_util->matching < 0)
+ c = diffsize(a_util->diff, b_util->diff);
+ else
+ c = INT_MAX;
+ cost[i + n * j] = c;
+ }
+
+ c = a_util->matching < 0 ?
+ a_util->diffsize * creation_weight : INT_MAX;
+ for (j = b->nr; j < n; j++)
+ cost[i + n * j] = c;
+ }
+
+ for (j = 0; j < b->nr; j++) {
+ struct patch_util *util = b->items[j].util;
+
+ c = util->matching < 0 ?
+ util->diffsize * creation_weight : INT_MAX;
+ for (i = a->nr; i < n; i++)
+ cost[i + n * j] = c;
+ }
+
+ for (i = a->nr; i < n; i++)
+ for (j = b->nr; j < n; j++)
+ cost[i + n * j] = 0;
+
+ res = compute_assignment(n, n, cost, a2b, b2a);
+
+ for (i = 0; i < a->nr; i++)
+ if (a2b[i] >= 0 && a2b[i] < b->nr) {
+ struct patch_util *a_util = a->items[i].util;
+ struct patch_util *b_util = b->items[a2b[i]].util;
+
+ a_util->matching = a2b[i];
+ b_util->matching = i;
+ }
+
+ free(cost);
+ free(a2b);
+ free(b2a);
+
+ return res;
+}
+
+static const char *short_oid(struct patch_util *util)
+{
+ return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
+}
+
+static void output(struct string_list *a, struct string_list *b)
+{
+ int i;
+
+ for (i = 0; i < b->nr; i++) {
+ struct patch_util *util = b->items[i].util, *prev;
+
+ if (util->matching < 0)
+ printf("-: -------- > %d: %s\n",
+ i + 1, short_oid(util));
+ else {
+ prev = a->items[util->matching].util;
+ printf("%d: %s ! %d: %s\n",
+ util->matching + 1, short_oid(prev),
+ i + 1, short_oid(util));
+ }
+ }
+
+ for (i = 0; i < a->nr; i++) {
+ struct patch_util *util = a->items[i].util;
+
+ if (util->matching < 0)
+ printf("%d: %s < -: --------\n",
+ i + 1, short_oid(util));
+ }
+}
+
int cmd_branch_diff(int argc, const char **argv, const char *prefix)
{
double creation_weight = 0.6;
@@ -30,9 +309,63 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
0, parse_creation_weight },
OPT_END()
};
+ int res = 0;
+ struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
+ struct string_list branch1 = STRING_LIST_INIT_DUP;
+ struct string_list branch2 = STRING_LIST_INIT_DUP;
argc = parse_options(argc, argv, NULL, options,
builtin_branch_diff_usage, 0);
- return 0;
+ if (argc == 2) {
+ if (!strstr(argv[0], ".."))
+ warning(_("no .. in range: '%s'"), argv[0]);
+ strbuf_addstr(&range1, argv[0]);
+
+ if (!strstr(argv[1], ".."))
+ warning(_("no .. in range: '%s'"), argv[1]);
+ strbuf_addstr(&range2, argv[1]);
+ } else if (argc == 3) {
+ strbuf_addf(&range1, "%s..%s", argv[0], argv[1]);
+ strbuf_addf(&range2, "%s..%s", argv[0], argv[2]);
+ } else if (argc == 1) {
+ const char *b = strstr(argv[0], "..."), *a = argv[0];
+ int a_len;
+
+ if (!b)
+ die(_("single arg format requires a symmetric range"));
+
+ a_len = (int)(b - a);
+ if (!a_len) {
+ a = "HEAD";
+ a_len = strlen(a);
+ }
+ b += 3;
+ if (!*b)
+ b = "HEAD";
+ strbuf_addf(&range1, "%s..%.*s", b, a_len, a);
+ strbuf_addf(&range2, "%.*s..%s", a_len, a, b);
+ } else {
+ error(_("need two commit ranges"));
+ usage_with_options(builtin_branch_diff_usage, options);
+ }
+
+ if (read_patches(range1.buf, &branch1))
+ res = error(_("could not parse log for '%s'"), range1.buf);
+ if (!res && read_patches(range2.buf, &branch2))
+ res = error(_("could not parse log for '%s'"), range2.buf);
+
+ if (!res) {
+ find_exact_matches(&branch1, &branch2);
+ res = get_correspondences(&branch1, &branch2, creation_weight);
+ if (!res)
+ output(&branch1, &branch2);
+ }
+
+ strbuf_release(&range1);
+ strbuf_release(&range2);
+ string_list_clear(&branch1, 1);
+ string_list_clear(&branch2, 1);
+
+ return !!res;
}
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 04/18] branch-diff: improve the order of the shown commits
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (2 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 03/18] branch-diff: first rudimentary implementation Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 05/18] branch-diff: also show the diff between patches Johannes Schindelin
` (17 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
This patch lets branch-diff use the same order as tbdiff.
The idea is simple: for left-to-right readers, it is natural to assume
that the branch-diff is performed between an older vs a newer version of
the branch. As such, the user is probably more interested in the
question "where did this come from?" rather than "where did that one
go?".
To that end, we list the commits in the order of the second commit range
("the newer version"), inserting the unmatched commits of the first
commit range as soon as all their predecessors have been shown.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 59 +++++++++++++++++++++++++++++--------------
1 file changed, 40 insertions(+), 19 deletions(-)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index c462681067c..92302b1c339 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -31,7 +31,7 @@ struct patch_util {
struct hashmap_entry e;
const char *diff, *patch;
- int i;
+ int i, shown;
int diffsize;
size_t diff_offset;
/* the index of the matching item in the other branch, or -1 */
@@ -274,28 +274,49 @@ static const char *short_oid(struct patch_util *util)
static void output(struct string_list *a, struct string_list *b)
{
- int i;
-
- for (i = 0; i < b->nr; i++) {
- struct patch_util *util = b->items[i].util, *prev;
+ int i = 0, j = 0;
+
+ /*
+ * We assume the user is really more interested in the second argument
+ * ("newer" version). To that end, we print the output in the order of
+ * the RHS (the `b` parameter). To put the LHS (the `a` parameter)
+ * commits that are no longer in the RHS into a good place, we place
+ * them once we have shown all of their predecessors in the LHS.
+ */
+
+ while (i < a->nr || j < b->nr) {
+ struct patch_util *a_util, *b_util;
+ a_util = i < a->nr ? a->items[i].util : NULL;
+ b_util = j < b->nr ? b->items[j].util : NULL;
+
+ /* Skip all the already-shown commits from the LHS. */
+ while (i < a->nr && a_util->shown)
+ a_util = ++i < a->nr ? a->items[i].util : NULL;
+
+ /* Show unmatched LHS commit whose predecessors were shown. */
+ if (i < a->nr && a_util->matching < 0) {
+ printf("%d: %s < -: --------\n",
+ i + 1, short_oid(a_util));
+ i++;
+ continue;
+ }
- if (util->matching < 0)
+ /* Show unmatched RHS commits. */
+ while (j < b->nr && b_util->matching < 0) {
printf("-: -------- > %d: %s\n",
- i + 1, short_oid(util));
- else {
- prev = a->items[util->matching].util;
- printf("%d: %s ! %d: %s\n",
- util->matching + 1, short_oid(prev),
- i + 1, short_oid(util));
+ j + 1, short_oid(b_util));
+ b_util = ++j < b->nr ? b->items[j].util : NULL;
}
- }
-
- for (i = 0; i < a->nr; i++) {
- struct patch_util *util = a->items[i].util;
- if (util->matching < 0)
- printf("%d: %s < -: --------\n",
- i + 1, short_oid(util));
+ /* Show matching LHS/RHS pair. */
+ if (j < b->nr) {
+ a_util = a->items[b_util->matching].util;
+ printf("%d: %s ! %d: %s\n",
+ b_util->matching + 1, short_oid(a_util),
+ j + 1, short_oid(b_util));
+ a_util->shown = 1;
+ j++;
+ }
}
}
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 05/18] branch-diff: also show the diff between patches
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (3 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 04/18] branch-diff: improve the order of the shown commits Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-06 1:14 ` Igor Djordjevic
2018-05-04 15:34 ` [PATCH v2 06/18] branch-diff: right-trim commit messages Johannes Schindelin
` (16 subsequent siblings)
21 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
Just like tbdiff, we now show the diff between matching patches. This is
a "diff of two diffs", so it can be a bit daunting to read for the
beginner.
And just like tbdiff, we now also accept the `--no-patches` option
(which is actually equivalent to the diff option `-s`).
This brings branch-diff closer to feature parity with regard to tbdiff.
An alternative would be to display an interdiff, i.e. the hypothetical
diff which is the result of first reverting the old diff and then
applying the new diff.
Especially when rebasing often, an interdiff is often not feasible,
though: if the old diff cannot be applied in reverse (due to a moving
upstream), an interdiff can simply not be inferred.
Note: while we now parse diff options such as --color, the effect is not
yet the same as in tbdiff, where also the commit pairs would be colored.
This is left for a later commit.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 53 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 49 insertions(+), 4 deletions(-)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index 92302b1c339..b23d66a3b1c 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -7,6 +7,8 @@
#include "hashmap.h"
#include "xdiff-interface.h"
#include "hungarian.h"
+#include "diff.h"
+#include "diffcore.h"
static const char * const builtin_branch_diff_usage[] = {
N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
@@ -272,7 +274,31 @@ static const char *short_oid(struct patch_util *util)
return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
}
-static void output(struct string_list *a, struct string_list *b)
+static struct diff_filespec *get_filespec(const char *name, const char *p)
+{
+ struct diff_filespec *spec = alloc_filespec(name);
+
+ fill_filespec(spec, &null_oid, 0, 0644);
+ spec->data = (char *)p;
+ spec->size = strlen(p);
+ spec->should_munmap = 0;
+ spec->is_stdin = 1;
+
+ return spec;
+}
+
+static void patch_diff(const char *a, const char *b,
+ struct diff_options *diffopt)
+{
+ diff_queue(&diff_queued_diff,
+ get_filespec("a", a), get_filespec("b", b));
+
+ diffcore_std(diffopt);
+ diff_flush(diffopt);
+}
+
+static void output(struct string_list *a, struct string_list *b,
+ struct diff_options *diffopt)
{
int i = 0, j = 0;
@@ -314,6 +340,9 @@ static void output(struct string_list *a, struct string_list *b)
printf("%d: %s ! %d: %s\n",
b_util->matching + 1, short_oid(a_util),
j + 1, short_oid(b_util));
+ if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
+ patch_diff(a->items[b_util->matching].string,
+ b->items[j].string, diffopt);
a_util->shown = 1;
j++;
}
@@ -322,21 +351,37 @@ static void output(struct string_list *a, struct string_list *b)
int cmd_branch_diff(int argc, const char **argv, const char *prefix)
{
+ struct diff_options diffopt = { NULL };
double creation_weight = 0.6;
struct option options[] = {
+ OPT_SET_INT(0, "no-patches", &diffopt.output_format,
+ N_("short format (no diffs)"),
+ DIFF_FORMAT_NO_OUTPUT),
{ OPTION_CALLBACK,
0, "creation-weight", &creation_weight, N_("factor"),
N_("Fudge factor by which creation is weighted [0.6]"),
0, parse_creation_weight },
OPT_END()
};
- int res = 0;
+ int i, j, res = 0;
struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
struct string_list branch1 = STRING_LIST_INIT_DUP;
struct string_list branch2 = STRING_LIST_INIT_DUP;
+ diff_setup(&diffopt);
+ diffopt.output_format = DIFF_FORMAT_PATCH;
+
argc = parse_options(argc, argv, NULL, options,
- builtin_branch_diff_usage, 0);
+ builtin_branch_diff_usage, PARSE_OPT_KEEP_UNKNOWN);
+
+ for (i = j = 0; i < argc; i++) {
+ int c = diff_opt_parse(&diffopt, argv + i, argc - i, prefix);
+
+ if (!c)
+ argv[j++] = argv[i];
+ }
+ argc = j;
+ diff_setup_done(&diffopt);
if (argc == 2) {
if (!strstr(argv[0], ".."))
@@ -380,7 +425,7 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
find_exact_matches(&branch1, &branch2);
res = get_correspondences(&branch1, &branch2, creation_weight);
if (!res)
- output(&branch1, &branch2);
+ output(&branch1, &branch2, &diffopt);
}
strbuf_release(&range1);
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 05/18] branch-diff: also show the diff between patches
2018-05-04 15:34 ` [PATCH v2 05/18] branch-diff: also show the diff between patches Johannes Schindelin
@ 2018-05-06 1:14 ` Igor Djordjevic
2018-05-06 12:18 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Igor Djordjevic @ 2018-05-06 1:14 UTC (permalink / raw)
To: Johannes Schindelin, git
Cc: Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Johannes,
On 04/05/2018 17:34, Johannes Schindelin wrote:
> Just like tbdiff, we now show the diff between matching patches. This is
> a "diff of two diffs", so it can be a bit daunting to read for the
> beginner.
>
> And just like tbdiff, we now also accept the `--no-patches` option
> (which is actually equivalent to the diff option `-s`).
A quick nit - would `--no-patch` (singular form) option name be more
aligned with diff `-s` option it resembles?
Thanks, Buga
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 05/18] branch-diff: also show the diff between patches
2018-05-06 1:14 ` Igor Djordjevic
@ 2018-05-06 12:18 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-06 12:18 UTC (permalink / raw)
To: Igor Djordjevic
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Buga,
On Sun, 6 May 2018, Igor Djordjevic wrote:
> On 04/05/2018 17:34, Johannes Schindelin wrote:
> > Just like tbdiff, we now show the diff between matching patches. This is
> > a "diff of two diffs", so it can be a bit daunting to read for the
> > beginner.
> >
> > And just like tbdiff, we now also accept the `--no-patches` option
> > (which is actually equivalent to the diff option `-s`).
>
> A quick nit - would `--no-patch` (singular form) option name be more
> aligned with diff `-s` option it resembles?
The reason I used `--no-patches` is that tbdiff called it that way.
But you're right, the functionality is already available via -s, and we
*do* make this a distinct thing from tbdiff. So I'll simply drop support
for --no-patches.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v2 06/18] branch-diff: right-trim commit messages
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (4 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 05/18] branch-diff: also show the diff between patches Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 07/18] branch-diff: indent the diffs just like tbdiff Johannes Schindelin
` (15 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
When comparing commit messages, we need to keep in mind that they are
indented by four spaces. That is, empty lines are no longer empty, but
have "trailing whitespace". When displaying them in color, that results
in those nagging red lines.
Let's just right-trim the lines in the commit message, it's not like
trailing white-space in the commit messages are important enough to care
about in branch-diff.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index b23d66a3b1c..e2337b905b1 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -105,6 +105,7 @@ static int read_patches(const char *range, struct string_list *list)
strbuf_addbuf(&buf, &line);
strbuf_addstr(&buf, "\n\n");
} else if (starts_with(line.buf, " ")) {
+ strbuf_rtrim(&line);
strbuf_addbuf(&buf, &line);
strbuf_addch(&buf, '\n');
}
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 07/18] branch-diff: indent the diffs just like tbdiff
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (5 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 06/18] branch-diff: right-trim commit messages Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-06 14:15 ` Martin Ågren
2018-05-04 15:34 ` [PATCH v2 08/18] branch-diff: suppress the diff headers Johannes Schindelin
` (14 subsequent siblings)
21 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
The main information in the branch-diff view comes from the list of
matching and non-matching commits, the diffs are additional information.
Indenting them helps with the reading flow.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index e2337b905b1..4fc9fd74531 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -275,6 +275,11 @@ static const char *short_oid(struct patch_util *util)
return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
}
+static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data)
+{
+ return data;
+}
+
static struct diff_filespec *get_filespec(const char *name, const char *p)
{
struct diff_filespec *spec = alloc_filespec(name);
@@ -353,6 +358,7 @@ static void output(struct string_list *a, struct string_list *b,
int cmd_branch_diff(int argc, const char **argv, const char *prefix)
{
struct diff_options diffopt = { NULL };
+ struct strbuf four_spaces = STRBUF_INIT;
double creation_weight = 0.6;
struct option options[] = {
OPT_SET_INT(0, "no-patches", &diffopt.output_format,
@@ -371,6 +377,9 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
diff_setup(&diffopt);
diffopt.output_format = DIFF_FORMAT_PATCH;
+ diffopt.output_prefix = output_prefix_cb;
+ strbuf_addstr(&four_spaces, " ");
+ diffopt.output_prefix_data = &four_spaces;
argc = parse_options(argc, argv, NULL, options,
builtin_branch_diff_usage, PARSE_OPT_KEEP_UNKNOWN);
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 07/18] branch-diff: indent the diffs just like tbdiff
2018-05-04 15:34 ` [PATCH v2 07/18] branch-diff: indent the diffs just like tbdiff Johannes Schindelin
@ 2018-05-06 14:15 ` Martin Ågren
2018-05-07 1:54 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Martin Ågren @ 2018-05-06 14:15 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Git Mailing List, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On 4 May 2018 at 17:34, Johannes Schindelin <johannes.schindelin@gmx.de> wrote:
> @@ -353,6 +358,7 @@ static void output(struct string_list *a, struct string_list *b,
> int cmd_branch_diff(int argc, const char **argv, const char *prefix)
> {
> struct diff_options diffopt = { NULL };
> + struct strbuf four_spaces = STRBUF_INIT;
> double creation_weight = 0.6;
> struct option options[] = {
> OPT_SET_INT(0, "no-patches", &diffopt.output_format,
> @@ -371,6 +377,9 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
>
> diff_setup(&diffopt);
> diffopt.output_format = DIFF_FORMAT_PATCH;
> + diffopt.output_prefix = output_prefix_cb;
> + strbuf_addstr(&four_spaces, " ");
> + diffopt.output_prefix_data = &four_spaces;
>
> argc = parse_options(argc, argv, NULL, options,
> builtin_branch_diff_usage, PARSE_OPT_KEEP_UNKNOWN);
You end up leaking the buffer of `four_spaces`. Granted, that's not a
big memory leak, but still. ;-) This was the only leak that
LeakSanitizer found in v2 when running the new test-script and playing
around with this a bit. This looks really good!
Martin
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 07/18] branch-diff: indent the diffs just like tbdiff
2018-05-06 14:15 ` Martin Ågren
@ 2018-05-07 1:54 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 1:54 UTC (permalink / raw)
To: Martin Ågren
Cc: Git Mailing List, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
[-- Attachment #1: Type: text/plain, Size: 1369 bytes --]
Hi Martin,
On Sun, 6 May 2018, Martin Ågren wrote:
> On 4 May 2018 at 17:34, Johannes Schindelin <johannes.schindelin@gmx.de> wrote:
> > @@ -353,6 +358,7 @@ static void output(struct string_list *a, struct string_list *b,
> > int cmd_branch_diff(int argc, const char **argv, const char *prefix)
> > {
> > struct diff_options diffopt = { NULL };
> > + struct strbuf four_spaces = STRBUF_INIT;
> > double creation_weight = 0.6;
> > struct option options[] = {
> > OPT_SET_INT(0, "no-patches", &diffopt.output_format,
> > @@ -371,6 +377,9 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
> >
> > diff_setup(&diffopt);
> > diffopt.output_format = DIFF_FORMAT_PATCH;
> > + diffopt.output_prefix = output_prefix_cb;
> > + strbuf_addstr(&four_spaces, " ");
> > + diffopt.output_prefix_data = &four_spaces;
> >
> > argc = parse_options(argc, argv, NULL, options,
> > builtin_branch_diff_usage, PARSE_OPT_KEEP_UNKNOWN);
>
> You end up leaking the buffer of `four_spaces`. Granted, that's not a
> big memory leak, but still. ;-) This was the only leak that
> LeakSanitizer found in v2 when running the new test-script and playing
> around with this a bit. This looks really good!
Good point. Fixed.
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v2 08/18] branch-diff: suppress the diff headers
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (6 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 07/18] branch-diff: indent the diffs just like tbdiff Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 09/18] branch-diff: adjust the output of the commit pairs Johannes Schindelin
` (13 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
When showing the diff between corresponding patches of the two branch
versions, we have to make up a fake filename to run the diff machinery.
That filename does not carry any meaningful information, hence tbdiff
suppresses it. So we should, too.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 1 +
diff.c | 5 ++++-
diff.h | 1 +
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index 4fc9fd74531..ed520d6229d 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -377,6 +377,7 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
diff_setup(&diffopt);
diffopt.output_format = DIFF_FORMAT_PATCH;
+ diffopt.flags.suppress_diff_headers = 1;
diffopt.output_prefix = output_prefix_cb;
strbuf_addstr(&four_spaces, " ");
diffopt.output_prefix_data = &four_spaces;
diff --git a/diff.c b/diff.c
index 1289df4b1f9..f1bda0db3f5 100644
--- a/diff.c
+++ b/diff.c
@@ -3197,13 +3197,16 @@ static void builtin_diff(const char *name_a,
memset(&xpp, 0, sizeof(xpp));
memset(&xecfg, 0, sizeof(xecfg));
memset(&ecbdata, 0, sizeof(ecbdata));
+ if (o->flags.suppress_diff_headers)
+ lbl[0] = NULL;
ecbdata.label_path = lbl;
ecbdata.color_diff = want_color(o->use_color);
ecbdata.ws_rule = whitespace_rule(name_b);
if (ecbdata.ws_rule & WS_BLANK_AT_EOF)
check_blank_at_eof(&mf1, &mf2, &ecbdata);
ecbdata.opt = o;
- ecbdata.header = header.len ? &header : NULL;
+ if (header.len && !o->flags.suppress_diff_headers)
+ ecbdata.header = &header;
xpp.flags = o->xdl_opts;
xpp.anchors = o->anchors;
xpp.anchors_nr = o->anchors_nr;
diff --git a/diff.h b/diff.h
index d29560f822c..0dd6a71af60 100644
--- a/diff.h
+++ b/diff.h
@@ -94,6 +94,7 @@ struct diff_flags {
unsigned funccontext:1;
unsigned default_follow_renames:1;
unsigned stat_with_summary:1;
+ unsigned suppress_diff_headers:1;
};
static inline void diff_flags_or(struct diff_flags *a,
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 09/18] branch-diff: adjust the output of the commit pairs
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (7 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 08/18] branch-diff: suppress the diff headers Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 16:25 ` Elijah Newren
2018-05-04 15:34 ` [PATCH v2 10/18] branch-diff: do not show "function names" in hunk headers Johannes Schindelin
` (12 subsequent siblings)
21 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
This change brings branch-diff yet another step closer to feature parity
with tbdiff: it now shows the oneline, too, and indicates with `=` when
the commits have identical diffs.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 67 +++++++++++++++++++++++++++++++++++++------
1 file changed, 58 insertions(+), 9 deletions(-)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index ed520d6229d..5b187890bdf 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -9,6 +9,8 @@
#include "hungarian.h"
#include "diff.h"
#include "diffcore.h"
+#include "commit.h"
+#include "pretty.h"
static const char * const builtin_branch_diff_usage[] = {
N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
@@ -270,9 +272,57 @@ static int get_correspondences(struct string_list *a, struct string_list *b,
return res;
}
-static const char *short_oid(struct patch_util *util)
+static void output_pair_header(struct strbuf *buf,
+ int i, struct patch_util *a_util,
+ int j, struct patch_util *b_util)
{
- return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
+ static char *dashes;
+ struct object_id *oid = a_util ? &a_util->oid : &b_util->oid;
+ struct commit *commit;
+
+ if (!dashes) {
+ char *p;
+
+ dashes = xstrdup(find_unique_abbrev(oid, DEFAULT_ABBREV));
+ for (p = dashes; *p; p++)
+ *p = '-';
+ }
+
+ strbuf_reset(buf);
+ if (i < 0)
+ strbuf_addf(buf, "-: %s ", dashes);
+ else
+ strbuf_addf(buf, "%d: %s ", i + 1,
+ find_unique_abbrev(&a_util->oid, DEFAULT_ABBREV));
+
+ if (i < 0)
+ strbuf_addch(buf, '>');
+ else if (j < 0)
+ strbuf_addch(buf, '<');
+ else if (strcmp(a_util->patch, b_util->patch))
+ strbuf_addch(buf, '!');
+ else
+ strbuf_addch(buf, '=');
+
+ if (j < 0)
+ strbuf_addf(buf, " -: %s", dashes);
+ else
+ strbuf_addf(buf, " %d: %s", j + 1,
+ find_unique_abbrev(&b_util->oid, DEFAULT_ABBREV));
+
+ commit = lookup_commit_reference(oid);
+ if (commit) {
+ const char *commit_buffer = get_commit_buffer(commit, NULL);
+ const char *subject;
+
+ find_commit_subject(commit_buffer, &subject);
+ strbuf_addch(buf, ' ');
+ format_subject(buf, subject, " ");
+ unuse_commit_buffer(commit, commit_buffer);
+ }
+ strbuf_addch(buf, '\n');
+
+ fwrite(buf->buf, buf->len, 1, stdout);
}
static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data)
@@ -306,6 +356,7 @@ static void patch_diff(const char *a, const char *b,
static void output(struct string_list *a, struct string_list *b,
struct diff_options *diffopt)
{
+ struct strbuf buf = STRBUF_INIT;
int i = 0, j = 0;
/*
@@ -327,25 +378,22 @@ static void output(struct string_list *a, struct string_list *b,
/* Show unmatched LHS commit whose predecessors were shown. */
if (i < a->nr && a_util->matching < 0) {
- printf("%d: %s < -: --------\n",
- i + 1, short_oid(a_util));
+ output_pair_header(&buf, i, a_util, -1, NULL);
i++;
continue;
}
/* Show unmatched RHS commits. */
while (j < b->nr && b_util->matching < 0) {
- printf("-: -------- > %d: %s\n",
- j + 1, short_oid(b_util));
+ output_pair_header(&buf, -1, NULL, j, b_util);
b_util = ++j < b->nr ? b->items[j].util : NULL;
}
/* Show matching LHS/RHS pair. */
if (j < b->nr) {
a_util = a->items[b_util->matching].util;
- printf("%d: %s ! %d: %s\n",
- b_util->matching + 1, short_oid(a_util),
- j + 1, short_oid(b_util));
+ output_pair_header(&buf,
+ b_util->matching, a_util, j, b_util);
if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
patch_diff(a->items[b_util->matching].string,
b->items[j].string, diffopt);
@@ -353,6 +401,7 @@ static void output(struct string_list *a, struct string_list *b,
j++;
}
}
+ strbuf_release(&buf);
}
int cmd_branch_diff(int argc, const char **argv, const char *prefix)
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 09/18] branch-diff: adjust the output of the commit pairs
2018-05-04 15:34 ` [PATCH v2 09/18] branch-diff: adjust the output of the commit pairs Johannes Schindelin
@ 2018-05-04 16:25 ` Elijah Newren
0 siblings, 0 replies; 387+ messages in thread
From: Elijah Newren @ 2018-05-04 16:25 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Git Mailing List, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Dscho,
On Fri, May 4, 2018 at 8:34 AM, Johannes Schindelin
<johannes.schindelin@gmx.de> wrote:
> This change brings branch-diff yet another step closer to feature parity
> with tbdiff: it now shows the oneline, too, and indicates with `=` when
> the commits have identical diffs.
>
<snip>
> @@ -270,9 +272,57 @@ static int get_correspondences(struct string_list *a, struct string_list *b,
> return res;
> }
>
> -static const char *short_oid(struct patch_util *util)
> +static void output_pair_header(struct strbuf *buf,
> + int i, struct patch_util *a_util,
> + int j, struct patch_util *b_util)
> {
> - return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
> + static char *dashes;
> + struct object_id *oid = a_util ? &a_util->oid : &b_util->oid;
> + struct commit *commit;
> +
> + if (!dashes) {
> + char *p;
> +
> + dashes = xstrdup(find_unique_abbrev(oid, DEFAULT_ABBREV));
> + for (p = dashes; *p; p++)
> + *p = '-';
> + }
> +
> + strbuf_reset(buf);
> + if (i < 0)
> + strbuf_addf(buf, "-: %s ", dashes);
> + else
> + strbuf_addf(buf, "%d: %s ", i + 1,
One nice thing tbdiff did was to right align patch numbers (which also
helped align other columns in the output). So, for example when there
are more than 9 patches I would see output like:
...
8: a980de43fd = 8: 362ab315ac directory rename detection: testcases
exploring possibly suboptimal merges
9: 3633e79ed9 = 9: 792e1371d9 directory rename detection:
miscellaneous testcases to complete coverage
10: e10d07ef40 = 10: a0b0a15103 directory rename detection: tests for
handling overwriting untracked files
11: f6d84b503e = 11: a7a436042a directory rename detection: tests for
handling overwriting dirty files
...
whereas branch-diff here is instead giving output of the form
...
8: a980de43fd = 8: 362ab315ac directory rename detection: testcases
exploring possibly suboptimal merges
9: 3633e79ed9 = 9: 792e1371d9 directory rename detection:
miscellaneous testcases to complete coverage
10: e10d07ef40 = 10: a0b0a15103 directory rename detection: tests
for handling overwriting untracked files
11: f6d84b503e = 11: a7a436042a directory rename detection: tests
for handling overwriting dirty files
...
Not a critical difference, but it'd be nice to match tbdiff here all the same.
> + find_unique_abbrev(&a_util->oid, DEFAULT_ABBREV));
> +
> + if (i < 0)
> + strbuf_addch(buf, '>');
> + else if (j < 0)
> + strbuf_addch(buf, '<');
> + else if (strcmp(a_util->patch, b_util->patch))
> + strbuf_addch(buf, '!');
> + else
> + strbuf_addch(buf, '=');
> +
> + if (j < 0)
> + strbuf_addf(buf, " -: %s", dashes);
> + else
> + strbuf_addf(buf, " %d: %s", j + 1,
Same comment on these last two strbuf_addf's about alignment.
Elijah
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v2 10/18] branch-diff: do not show "function names" in hunk headers
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (8 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 09/18] branch-diff: adjust the output of the commit pairs Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 11/18] branch-diff: add tests Johannes Schindelin
` (11 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
We are comparing complete, formatted commit messages with patches. There
are no function names here, so stop looking for them.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index 5b187890bdf..89d75c93115 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -11,6 +11,7 @@
#include "diffcore.h"
#include "commit.h"
#include "pretty.h"
+#include "userdiff.h"
static const char * const builtin_branch_diff_usage[] = {
N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
@@ -330,6 +331,10 @@ static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data)
return data;
}
+static struct userdiff_driver no_func_name = {
+ .funcname = { "$^", 0 }
+};
+
static struct diff_filespec *get_filespec(const char *name, const char *p)
{
struct diff_filespec *spec = alloc_filespec(name);
@@ -339,6 +344,7 @@ static struct diff_filespec *get_filespec(const char *name, const char *p)
spec->size = strlen(p);
spec->should_munmap = 0;
spec->is_stdin = 1;
+ spec->driver = &no_func_name;
return spec;
}
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 11/18] branch-diff: add tests
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (9 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 10/18] branch-diff: do not show "function names" in hunk headers Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 15:34 ` [PATCH v2 12/18] branch-diff: use color for the commit pairs Johannes Schindelin
` (10 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Thomas Rast, Junio C Hamano,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
From: Thomas Rast <tr@thomasrast.ch>
These are essentially lifted from https://github.com/trast/tbdiff, with
light touch-ups to account for the new command name.
Apart from renaming `tbdiff` to `branch-diff`, only one test case needed
to be adjusted: 11 - 'changed message'.
The underlying reason it had to be adjusted is that diff generation is
sometimes ambiguous. In this case, a comment line and an empty line are
added, but it is ambiguous whether they were added after the existing
empty line, or whether an empty line and the comment line are added
*before* the existing empty line. And apparently xdiff picks a different
option here than Python's difflib.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
t/.gitattributes | 1 +
t/t7910-branch-diff.sh | 144 ++++++++++
t/t7910/history.export | 604 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 749 insertions(+)
create mode 100755 t/t7910-branch-diff.sh
create mode 100644 t/t7910/history.export
diff --git a/t/.gitattributes b/t/.gitattributes
index 3bd959ae523..af15d5aeedd 100644
--- a/t/.gitattributes
+++ b/t/.gitattributes
@@ -18,5 +18,6 @@ t[0-9][0-9][0-9][0-9]/* -whitespace
/t5515/* eol=lf
/t556x_common eol=lf
/t7500/* eol=lf
+/t7910/* eol=lf
/t8005/*.txt eol=lf
/t9*/*.dump eol=lf
diff --git a/t/t7910-branch-diff.sh b/t/t7910-branch-diff.sh
new file mode 100755
index 00000000000..a7fece88045
--- /dev/null
+++ b/t/t7910-branch-diff.sh
@@ -0,0 +1,144 @@
+#!/bin/sh
+
+test_description='branch-diff tests'
+
+. ./test-lib.sh
+
+# Note that because of git-branch-diff's heuristics, test_commit does more
+# harm than good. We need some real history.
+
+test_expect_success 'setup' '
+ git fast-import < "$TEST_DIRECTORY"/t7910/history.export
+'
+
+test_expect_success 'simple A..B A..C (unmodified)' '
+ git branch-diff --no-color master..topic master..unmodified >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: 35b9b25 s/5/A/
+ 2: fccce22 = 2: de345ab s/4/A/
+ 3: 147e64e = 3: 9af6654 s/11/B/
+ 4: a63e992 = 4: 2901f77 s/12/B/
+ EOF
+ test_cmp expected actual
+'
+
+test_expect_success 'simple B...C (unmodified)' '
+ git branch-diff --no-color topic...unmodified >actual &&
+ # same "expected" as above
+ test_cmp expected actual
+'
+
+test_expect_success 'simple A B C (unmodified)' '
+ git branch-diff --no-color master topic unmodified >actual &&
+ # same "expected" as above
+ test_cmp expected actual
+'
+
+test_expect_success 'trivial reordering' '
+ git branch-diff --no-color master topic reordered >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: aca177a s/5/A/
+ 3: 147e64e = 2: 14ad629 s/11/B/
+ 4: a63e992 = 3: ee58208 s/12/B/
+ 2: fccce22 = 4: 307b27a s/4/A/
+ EOF
+ test_cmp expected actual
+'
+
+test_expect_success 'removed a commit' '
+ git branch-diff --no-color master topic removed >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: 7657159 s/5/A/
+ 2: fccce22 < -: ------- s/4/A/
+ 3: 147e64e = 2: 43d84d3 s/11/B/
+ 4: a63e992 = 3: a740396 s/12/B/
+ EOF
+ test_cmp expected actual
+'
+
+test_expect_success 'added a commit' '
+ git branch-diff --no-color master topic added >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: 2716022 s/5/A/
+ 2: fccce22 = 2: b62accd s/4/A/
+ -: ------- > 3: df46cfa s/6/A/
+ 3: 147e64e = 4: 3e64548 s/11/B/
+ 4: a63e992 = 5: 12b4063 s/12/B/
+ EOF
+ test_cmp expected actual
+'
+
+test_expect_success 'new base, A B C' '
+ git branch-diff --no-color master topic rebased >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: cc9c443 s/5/A/
+ 2: fccce22 = 2: c5d9641 s/4/A/
+ 3: 147e64e = 3: 28cc2b6 s/11/B/
+ 4: a63e992 = 4: 5628ab7 s/12/B/
+ EOF
+ test_cmp expected actual
+'
+
+test_expect_success 'new base, B...C' '
+ # this syntax includes the commits from master!
+ git branch-diff --no-color topic...rebased >actual &&
+ cat >expected <<-EOF &&
+ -: ------- > 1: a31b12e unrelated
+ 1: 4de457d = 2: cc9c443 s/5/A/
+ 2: fccce22 = 3: c5d9641 s/4/A/
+ 3: 147e64e = 4: 28cc2b6 s/11/B/
+ 4: a63e992 = 5: 5628ab7 s/12/B/
+ EOF
+ test_cmp expected actual
+'
+
+test_expect_success 'changed commit' '
+ git branch-diff --no-color topic...changed >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: a4b3333 s/5/A/
+ 2: fccce22 = 2: f51d370 s/4/A/
+ 3: 147e64e ! 3: 0559556 s/11/B/
+ @@ -10,7 +10,7 @@
+ 9
+ 10
+ -11
+ -+B
+ ++BB
+ 12
+ 13
+ 14
+ 4: a63e992 ! 4: d966c5c s/12/B/
+ @@ -8,7 +8,7 @@
+ @@
+ 9
+ 10
+ - B
+ + BB
+ -12
+ +B
+ 13
+ EOF
+ test_cmp expected actual
+'
+
+test_expect_success 'changed message' '
+ git branch-diff --no-color topic...changed-message >actual &&
+ sed s/Z/\ /g >expected <<-EOF &&
+ 1: 4de457d = 1: f686024 s/5/A/
+ 2: fccce22 ! 2: 4ab067d s/4/A/
+ @@ -2,6 +2,8 @@
+ Z
+ Z s/4/A/
+ Z
+ + Also a silly comment here!
+ +
+ Zdiff --git a/file b/file
+ Z--- a/file
+ Z+++ b/file
+ 3: 147e64e = 3: b9cb956 s/11/B/
+ 4: a63e992 = 4: 8add5f1 s/12/B/
+ EOF
+ test_cmp expected actual
+'
+
+test_done
diff --git a/t/t7910/history.export b/t/t7910/history.export
new file mode 100644
index 00000000000..b8ffff0940d
--- /dev/null
+++ b/t/t7910/history.export
@@ -0,0 +1,604 @@
+blob
+mark :1
+data 51
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+reset refs/heads/removed
+commit refs/heads/removed
+mark :2
+author Thomas Rast <trast@inf.ethz.ch> 1374424921 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374484724 +0200
+data 8
+initial
+M 100644 :1 file
+
+blob
+mark :3
+data 51
+1
+2
+3
+4
+A
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/topic
+mark :4
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+data 7
+s/5/A/
+from :2
+M 100644 :3 file
+
+blob
+mark :5
+data 51
+1
+2
+3
+A
+A
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/topic
+mark :6
+author Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+data 7
+s/4/A/
+from :4
+M 100644 :5 file
+
+blob
+mark :7
+data 50
+1
+2
+3
+A
+A
+6
+7
+8
+9
+10
+B
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/topic
+mark :8
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+data 8
+s/11/B/
+from :6
+M 100644 :7 file
+
+blob
+mark :9
+data 49
+1
+2
+3
+A
+A
+6
+7
+8
+9
+10
+B
+B
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/topic
+mark :10
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+data 8
+s/12/B/
+from :8
+M 100644 :9 file
+
+blob
+mark :11
+data 10
+unrelated
+
+commit refs/heads/master
+mark :12
+author Thomas Rast <trast@inf.ethz.ch> 1374485127 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485127 +0200
+data 10
+unrelated
+from :2
+M 100644 :11 otherfile
+
+commit refs/heads/rebased
+mark :13
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485137 +0200
+data 7
+s/5/A/
+from :12
+M 100644 :3 file
+
+commit refs/heads/rebased
+mark :14
+author Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485138 +0200
+data 7
+s/4/A/
+from :13
+M 100644 :5 file
+
+commit refs/heads/rebased
+mark :15
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485138 +0200
+data 8
+s/11/B/
+from :14
+M 100644 :7 file
+
+commit refs/heads/rebased
+mark :16
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485138 +0200
+data 8
+s/12/B/
+from :15
+M 100644 :9 file
+
+commit refs/heads/added
+mark :17
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485341 +0200
+data 7
+s/5/A/
+from :2
+M 100644 :3 file
+
+commit refs/heads/added
+mark :18
+author Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485341 +0200
+data 7
+s/4/A/
+from :17
+M 100644 :5 file
+
+blob
+mark :19
+data 51
+1
+2
+3
+A
+A
+A
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/added
+mark :20
+author Thomas Rast <trast@inf.ethz.ch> 1374485186 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485341 +0200
+data 7
+s/6/A/
+from :18
+M 100644 :19 file
+
+blob
+mark :21
+data 50
+1
+2
+3
+A
+A
+A
+7
+8
+9
+10
+B
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/added
+mark :22
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485341 +0200
+data 8
+s/11/B/
+from :20
+M 100644 :21 file
+
+blob
+mark :23
+data 49
+1
+2
+3
+A
+A
+A
+7
+8
+9
+10
+B
+B
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/added
+mark :24
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485341 +0200
+data 8
+s/12/B/
+from :22
+M 100644 :23 file
+
+commit refs/heads/reordered
+mark :25
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485350 +0200
+data 7
+s/5/A/
+from :2
+M 100644 :3 file
+
+blob
+mark :26
+data 50
+1
+2
+3
+4
+A
+6
+7
+8
+9
+10
+B
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/reordered
+mark :27
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485350 +0200
+data 8
+s/11/B/
+from :25
+M 100644 :26 file
+
+blob
+mark :28
+data 49
+1
+2
+3
+4
+A
+6
+7
+8
+9
+10
+B
+B
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/reordered
+mark :29
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485350 +0200
+data 8
+s/12/B/
+from :27
+M 100644 :28 file
+
+commit refs/heads/reordered
+mark :30
+author Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485350 +0200
+data 7
+s/4/A/
+from :29
+M 100644 :9 file
+
+commit refs/heads/changed
+mark :31
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485507 +0200
+data 7
+s/5/A/
+from :2
+M 100644 :3 file
+
+commit refs/heads/changed
+mark :32
+author Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485507 +0200
+data 7
+s/4/A/
+from :31
+M 100644 :5 file
+
+blob
+mark :33
+data 51
+1
+2
+3
+A
+A
+6
+7
+8
+9
+10
+BB
+12
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/changed
+mark :34
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485507 +0200
+data 8
+s/11/B/
+from :32
+M 100644 :33 file
+
+blob
+mark :35
+data 50
+1
+2
+3
+A
+A
+6
+7
+8
+9
+10
+BB
+B
+13
+14
+15
+16
+17
+18
+19
+20
+
+commit refs/heads/changed
+mark :36
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485507 +0200
+data 8
+s/12/B/
+from :34
+M 100644 :35 file
+
+commit refs/heads/changed-message
+mark :37
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485530 +0200
+data 7
+s/5/A/
+from :2
+M 100644 :3 file
+
+commit refs/heads/changed-message
+mark :38
+author Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485530 +0200
+data 35
+s/4/A/
+
+Also a silly comment here!
+from :37
+M 100644 :5 file
+
+commit refs/heads/changed-message
+mark :39
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485536 +0200
+data 8
+s/11/B/
+from :38
+M 100644 :7 file
+
+commit refs/heads/changed-message
+mark :40
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485536 +0200
+data 8
+s/12/B/
+from :39
+M 100644 :9 file
+
+commit refs/heads/unmodified
+mark :41
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485631 +0200
+data 7
+s/5/A/
+from :2
+M 100644 :3 file
+
+commit refs/heads/unmodified
+mark :42
+author Thomas Rast <trast@inf.ethz.ch> 1374485024 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485631 +0200
+data 7
+s/4/A/
+from :41
+M 100644 :5 file
+
+commit refs/heads/unmodified
+mark :43
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485632 +0200
+data 8
+s/11/B/
+from :42
+M 100644 :7 file
+
+commit refs/heads/unmodified
+mark :44
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374485632 +0200
+data 8
+s/12/B/
+from :43
+M 100644 :9 file
+
+commit refs/heads/removed
+mark :45
+author Thomas Rast <trast@inf.ethz.ch> 1374485014 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374486061 +0200
+data 7
+s/5/A/
+from :2
+M 100644 :3 file
+
+commit refs/heads/removed
+mark :46
+author Thomas Rast <trast@inf.ethz.ch> 1374485036 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374486061 +0200
+data 8
+s/11/B/
+from :45
+M 100644 :26 file
+
+commit refs/heads/removed
+mark :47
+author Thomas Rast <trast@inf.ethz.ch> 1374485044 +0200
+committer Thomas Rast <trast@inf.ethz.ch> 1374486061 +0200
+data 8
+s/12/B/
+from :46
+M 100644 :28 file
+
+reset refs/heads/removed
+from :47
+
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 12/18] branch-diff: use color for the commit pairs
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (10 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 11/18] branch-diff: add tests Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-05 23:48 ` Todd Zullinger
2018-05-04 15:34 ` [PATCH v2 13/18] color: provide inverted colors, too Johannes Schindelin
` (9 subsequent siblings)
21 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
Arguably the most important part of branch-diff's output is the list of
commits in the two branches, together with their relationships.
For that reason, tbdiff introduced color-coding that is pretty
intuitive, especially for unchanged patches (all dim yellow, like the
first line in `git show`'s output) vs modified patches (old commit is
red, new commit is green). Let's imitate that color scheme.
While at it, also copy tbdiff's change of the fragment color to magenta.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 49 +++++++++++++++++++++++++++++++------------
1 file changed, 36 insertions(+), 13 deletions(-)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index 89d75c93115..04efd30f0f6 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -273,13 +273,19 @@ static int get_correspondences(struct string_list *a, struct string_list *b,
return res;
}
-static void output_pair_header(struct strbuf *buf,
+static void output_pair_header(struct diff_options *diffopt, struct strbuf *buf,
int i, struct patch_util *a_util,
int j, struct patch_util *b_util)
{
static char *dashes;
struct object_id *oid = a_util ? &a_util->oid : &b_util->oid;
struct commit *commit;
+ char status;
+ const char *color_reset = diff_get_color_opt(diffopt, DIFF_RESET);
+ const char *color_old = diff_get_color_opt(diffopt, DIFF_FILE_OLD);
+ const char *color_new = diff_get_color_opt(diffopt, DIFF_FILE_NEW);
+ const char *color_commit = diff_get_color_opt(diffopt, DIFF_COMMIT);
+ const char *color;
if (!dashes) {
char *p;
@@ -289,21 +295,33 @@ static void output_pair_header(struct strbuf *buf,
*p = '-';
}
+ if (j < 0) {
+ color = color_old;
+ status = '<';
+ } else if (i < 0) {
+ color = color_new;
+ status = '>';
+ } else if (strcmp(a_util->patch, b_util->patch)) {
+ color = color_commit;
+ status = '!';
+ } else {
+ color = color_commit;
+ status = '=';
+ }
+
strbuf_reset(buf);
+ strbuf_addstr(buf, status == '!' ? color_old : color);
if (i < 0)
strbuf_addf(buf, "-: %s ", dashes);
else
strbuf_addf(buf, "%d: %s ", i + 1,
find_unique_abbrev(&a_util->oid, DEFAULT_ABBREV));
- if (i < 0)
- strbuf_addch(buf, '>');
- else if (j < 0)
- strbuf_addch(buf, '<');
- else if (strcmp(a_util->patch, b_util->patch))
- strbuf_addch(buf, '!');
- else
- strbuf_addch(buf, '=');
+ if (status == '!')
+ strbuf_addf(buf, "%s%s", color_reset, color);
+ strbuf_addch(buf, status);
+ if (status == '!')
+ strbuf_addf(buf, "%s%s", color_reset, color_new);
if (j < 0)
strbuf_addf(buf, " -: %s", dashes);
@@ -316,12 +334,15 @@ static void output_pair_header(struct strbuf *buf,
const char *commit_buffer = get_commit_buffer(commit, NULL);
const char *subject;
+ if (status == '!')
+ strbuf_addf(buf, "%s%s", color_reset, color);
+
find_commit_subject(commit_buffer, &subject);
strbuf_addch(buf, ' ');
format_subject(buf, subject, " ");
unuse_commit_buffer(commit, commit_buffer);
}
- strbuf_addch(buf, '\n');
+ strbuf_addf(buf, "%s\n", color_reset);
fwrite(buf->buf, buf->len, 1, stdout);
}
@@ -384,21 +405,21 @@ static void output(struct string_list *a, struct string_list *b,
/* Show unmatched LHS commit whose predecessors were shown. */
if (i < a->nr && a_util->matching < 0) {
- output_pair_header(&buf, i, a_util, -1, NULL);
+ output_pair_header(diffopt, &buf, i, a_util, -1, NULL);
i++;
continue;
}
/* Show unmatched RHS commits. */
while (j < b->nr && b_util->matching < 0) {
- output_pair_header(&buf, -1, NULL, j, b_util);
+ output_pair_header(diffopt, &buf, -1, NULL, j, b_util);
b_util = ++j < b->nr ? b->items[j].util : NULL;
}
/* Show matching LHS/RHS pair. */
if (j < b->nr) {
a_util = a->items[b_util->matching].util;
- output_pair_header(&buf,
+ output_pair_header(diffopt, &buf,
b_util->matching, a_util, j, b_util);
if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
patch_diff(a->items[b_util->matching].string,
@@ -430,6 +451,8 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
struct string_list branch1 = STRING_LIST_INIT_DUP;
struct string_list branch2 = STRING_LIST_INIT_DUP;
+ git_diff_basic_config("diff.color.frag", "magenta", NULL);
+
diff_setup(&diffopt);
diffopt.output_format = DIFF_FORMAT_PATCH;
diffopt.flags.suppress_diff_headers = 1;
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 12/18] branch-diff: use color for the commit pairs
2018-05-04 15:34 ` [PATCH v2 12/18] branch-diff: use color for the commit pairs Johannes Schindelin
@ 2018-05-05 23:48 ` Todd Zullinger
2018-05-07 1:52 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Todd Zullinger @ 2018-05-05 23:48 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Johannes,
As many others have already said, thanks for this series!
I've used tbdiff a bit over the years, but having a builtin
will make it much more convenient (and the speed boost from
a C implementation will be a very nice bonus).
Johannes Schindelin wrote:
> @@ -430,6 +451,8 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
> struct string_list branch1 = STRING_LIST_INIT_DUP;
> struct string_list branch2 = STRING_LIST_INIT_DUP;
>
> + git_diff_basic_config("diff.color.frag", "magenta", NULL);
> +
> diff_setup(&diffopt);
> diffopt.output_format = DIFF_FORMAT_PATCH;
> diffopt.flags.suppress_diff_headers = 1;
Should this also (or only) check color.diff.frag? I thought
that color.diff.* was preferred over diff.color.*, though
that doesn't seem to be entirely true in all parts of the
current codebase.
In testing this series it seems that setting color.diff
options to change the various colors read earlier in this
patch via diff_get_color_opt, as well as the 'frag' slot,
are ignored. Setting them via diff.color.<slot> does work.
The later patch adding a man page documents branch-diff as
using `diff.color.*` and points to git-config(1), but the
config docs only list color.diff.
Is this a bug in the diff_get_color{,_opt}() tooling?
It's certainly not anything you've introduced here, of
course. I just noticed that some custom color.diff settings
I've used weren't picked up by branch-diff, despite your
clear intention to respect colors from the config.
--
Todd
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Abandon the search for Truth; settle for a good fantasy.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 12/18] branch-diff: use color for the commit pairs
2018-05-05 23:48 ` Todd Zullinger
@ 2018-05-07 1:52 ` Johannes Schindelin
2018-05-08 2:10 ` Todd Zullinger
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 1:52 UTC (permalink / raw)
To: Todd Zullinger
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Todd,
On Sat, 5 May 2018, Todd Zullinger wrote:
> > @@ -430,6 +451,8 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
> > struct string_list branch1 = STRING_LIST_INIT_DUP;
> > struct string_list branch2 = STRING_LIST_INIT_DUP;
> >
> > + git_diff_basic_config("diff.color.frag", "magenta", NULL);
> > +
> > diff_setup(&diffopt);
> > diffopt.output_format = DIFF_FORMAT_PATCH;
> > diffopt.flags.suppress_diff_headers = 1;
>
> Should this also (or only) check color.diff.frag?
This code is not querying diff.color.frag, it is setting it. Without
any way to override it.
Having thought about it longer, and triggered by Peff's suggestion to
decouple the "reverse" part from the actual color, I fixed this by
- *not* setting .frag to magenta,
- using the reverse method also to mark outer *hunk headers* (not only the
outer -/+ markers).
- actually calling git_diff_ui_config()...
> I thought that color.diff.* was preferred over diff.color.*, though
> that doesn't seem to be entirely true in all parts of the current
> codebase.
>
> In testing this series it seems that setting color.diff
> options to change the various colors read earlier in this
> patch via diff_get_color_opt, as well as the 'frag' slot,
> are ignored. Setting them via diff.color.<slot> does work.
In my tests, it did not even work via diff.color.<slot>. But I think I
fixed this (at least my local testing confirms this) by calling
git_diff_ui_config().
> The later patch adding a man page documents branch-diff as
> using `diff.color.*` and points to git-config(1), but the
> config docs only list color.diff.
In the current form (`git branch --diff`), I refrained from going into
*so* much detail ;-) But the gist still holds, and now the code should
support it, too.
The current work in progress can be pulled as `branch-diff` from
https://github.com/dscho/git, if I could ask you to test?
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 12/18] branch-diff: use color for the commit pairs
2018-05-07 1:52 ` Johannes Schindelin
@ 2018-05-08 2:10 ` Todd Zullinger
2018-06-01 8:17 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Todd Zullinger @ 2018-05-08 2:10 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Johannes,
Johannes Schindelin wrote:
> Hi Todd,
>
> On Sat, 5 May 2018, Todd Zullinger wrote:
>
>>> @@ -430,6 +451,8 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
>>> struct string_list branch1 = STRING_LIST_INIT_DUP;
>>> struct string_list branch2 = STRING_LIST_INIT_DUP;
>>>
>>> + git_diff_basic_config("diff.color.frag", "magenta", NULL);
>>> +
>>> diff_setup(&diffopt);
>>> diffopt.output_format = DIFF_FORMAT_PATCH;
>>> diffopt.flags.suppress_diff_headers = 1;
>>
>> Should this also (or only) check color.diff.frag?
>
> This code is not querying diff.color.frag, it is setting it. Without
> any way to override it.
>
> Having thought about it longer, and triggered by Peff's suggestion to
> decouple the "reverse" part from the actual color, I fixed this by
>
> - *not* setting .frag to magenta,
>
> - using the reverse method also to mark outer *hunk headers* (not only the
> outer -/+ markers).
>
> - actually calling git_diff_ui_config()...
Excellent. That seems to work nicely now, respecting the
color.diff.<slot> config.
> The current work in progress can be pulled as `branch-diff` from
> https://github.com/dscho/git, if I could ask you to test?
While the colors and 'branch --diff' usage seem to work
nicely, I found that with 4ac3413cc8 ("branch-diff: left-pad
patch numbers", 2018-05-05), 'git branch' itself is broken.
Running 'git branch' creates a branch named 'branch'.
Calling 'git branch --list' shows only 'branch' as the only
branch.
I didn't look too closely, but I'm guessing that the argv
handling is leaving the 'branch' argument in place where it
should be stripped?
This unsurprisingly breaks a large number of tests. :)
Thanks,
--
Todd
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A common mistake people make when trying to design something
completely foolproof is to underestimate the ingenuity of complete
fools.
-- Douglas Adams
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 12/18] branch-diff: use color for the commit pairs
2018-05-08 2:10 ` Todd Zullinger
@ 2018-06-01 8:17 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-06-01 8:17 UTC (permalink / raw)
To: Todd Zullinger
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Todd,
On Mon, 7 May 2018, Todd Zullinger wrote:
> Johannes Schindelin wrote:
> >
> > On Sat, 5 May 2018, Todd Zullinger wrote:
> >
> >>> @@ -430,6 +451,8 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
> >>> struct string_list branch1 = STRING_LIST_INIT_DUP;
> >>> struct string_list branch2 = STRING_LIST_INIT_DUP;
> >>>
> >>> + git_diff_basic_config("diff.color.frag", "magenta", NULL);
> >>> +
> >>> diff_setup(&diffopt);
> >>> diffopt.output_format = DIFF_FORMAT_PATCH;
> >>> diffopt.flags.suppress_diff_headers = 1;
> >>
> >> Should this also (or only) check color.diff.frag?
> >
> > This code is not querying diff.color.frag, it is setting it. Without
> > any way to override it.
> >
> > Having thought about it longer, and triggered by Peff's suggestion to
> > decouple the "reverse" part from the actual color, I fixed this by
> >
> > - *not* setting .frag to magenta,
> >
> > - using the reverse method also to mark outer *hunk headers* (not only
> > the outer -/+ markers).
> >
> > - actually calling git_diff_ui_config()...
>
> Excellent. That seems to work nicely now, respecting the
> color.diff.<slot> config.
>
> > The current work in progress can be pulled as `branch-diff` from
> > https://github.com/dscho/git, if I could ask you to test?
>
> While the colors and 'branch --diff' usage seem to work
> nicely, I found that with 4ac3413cc8 ("branch-diff: left-pad
> patch numbers", 2018-05-05), 'git branch' itself is broken.
>
> Running 'git branch' creates a branch named 'branch'.
> Calling 'git branch --list' shows only 'branch' as the only
> branch.
>
> I didn't look too closely, but I'm guessing that the argv
> handling is leaving the 'branch' argument in place where it
> should be stripped?
>
> This unsurprisingly breaks a large number of tests. :)
You will be delighted to learn that all of this is now moot, as I renamed
the command to `range-diff`, as this is what the wisdom of the crowd
chose.
Ciao,
Johannes
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v2 13/18] color: provide inverted colors, too
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (11 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 12/18] branch-diff: use color for the commit pairs Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-05 18:29 ` Jeff King
2018-05-04 15:34 ` [PATCH v2 14/18] diff: add an internal option to dual-color diffs of diffs Johannes Schindelin
` (8 subsequent siblings)
21 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
For every regular color, there exists the inverted equivalent where
background and foreground colors are exchanged.
We will use this in the next commit to allow inverting *just* the +/-
signs in a diff.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
color.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/color.h b/color.h
index cd0bcedd084..f0984b09583 100644
--- a/color.h
+++ b/color.h
@@ -36,6 +36,12 @@ struct strbuf;
#define GIT_COLOR_BOLD_BLUE "\033[1;34m"
#define GIT_COLOR_BOLD_MAGENTA "\033[1;35m"
#define GIT_COLOR_BOLD_CYAN "\033[1;36m"
+#define GIT_COLOR_INV_RED "\033[7;31m"
+#define GIT_COLOR_INV_GREEN "\033[7;32m"
+#define GIT_COLOR_INV_YELLOW "\033[7;33m"
+#define GIT_COLOR_INV_BLUE "\033[7;34m"
+#define GIT_COLOR_INV_MAGENTA "\033[7;35m"
+#define GIT_COLOR_INV_CYAN "\033[7;36m"
#define GIT_COLOR_BG_RED "\033[41m"
#define GIT_COLOR_BG_GREEN "\033[42m"
#define GIT_COLOR_BG_YELLOW "\033[43m"
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-04 15:34 ` [PATCH v2 13/18] color: provide inverted colors, too Johannes Schindelin
@ 2018-05-05 18:29 ` Jeff King
2018-05-05 22:03 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-05-05 18:29 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Fri, May 04, 2018 at 05:34:58PM +0200, Johannes Schindelin wrote:
> For every regular color, there exists the inverted equivalent where
> background and foreground colors are exchanged.
>
> We will use this in the next commit to allow inverting *just* the +/-
> signs in a diff.
There's a "reverse" attribute (which we already parse and support) that
can do this without having to repeat the colors. AFAIK it's well
supported everywhere, but I could be wrong.
I wonder if that would make configuring this slightly more pleasant,
since it saves the user having to define "oldinv" whenever they change
"old".
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-05 18:29 ` Jeff King
@ 2018-05-05 22:03 ` Johannes Schindelin
2018-05-06 6:35 ` Jeff King
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-05 22:03 UTC (permalink / raw)
To: Jeff King
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Peff,
On Sat, 5 May 2018, Jeff King wrote:
> On Fri, May 04, 2018 at 05:34:58PM +0200, Johannes Schindelin wrote:
>
> > For every regular color, there exists the inverted equivalent where
> > background and foreground colors are exchanged.
> >
> > We will use this in the next commit to allow inverting *just* the +/-
> > signs in a diff.
>
> There's a "reverse" attribute (which we already parse and support) that
> can do this without having to repeat the colors. AFAIK it's well
> supported everywhere, but I could be wrong.
How would I use that here, though? I need to get the thing via
diff_get_color_opt() which takes a parameter of type `enum color_diff`.
There is no way I can specify `reverse` here, can I?
> I wonder if that would make configuring this slightly more pleasant,
> since it saves the user having to define "oldinv" whenever they change
> "old".
I am all for making the configuration more pleasant. So I hope I can make
use of the `reverse` thing here, without having to introduce a new enum
value.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-05 22:03 ` Johannes Schindelin
@ 2018-05-06 6:35 ` Jeff King
2018-05-06 6:41 ` Jeff King
0 siblings, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-05-06 6:35 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Sun, May 06, 2018 at 12:03:50AM +0200, Johannes Schindelin wrote:
> > There's a "reverse" attribute (which we already parse and support) that
> > can do this without having to repeat the colors. AFAIK it's well
> > supported everywhere, but I could be wrong.
>
> How would I use that here, though? I need to get the thing via
> diff_get_color_opt() which takes a parameter of type `enum color_diff`.
> There is no way I can specify `reverse` here, can I?
My thinking was that the code would know that coloring the initial "+"
should combine color.diff.new, along with a new tbdiff-specific config
option. So the C equivalent of something like this:
new=$(git config --get-color color.diff.new green)
tbdiff=$(git config --get-color color.tbdiff.new reverse)
reset=$(git config --get-color color.diff.reset reset)
echo "${new}${tbdiff}+${reset}${new}+actual diff content${reset}"
Then if you set color.diff.new to blue, you'll get a reverse-blue "+"
without having to configure anything else.
You can still override the tbdiff coloring with a totally unrelated
color, since it comes after ${new} (so you could set it to purple or
something if you wanted, though obviously a background or attribute from
${new} can still leak through if you have one set). The only downside in
such a case is that the color sequence is slightly longer ("green, no
blue!").
You could also have tbdiff.new and tbdiff.old to allow setting them
independently (but they'd both default to "reverse").
> > I wonder if that would make configuring this slightly more pleasant,
> > since it saves the user having to define "oldinv" whenever they change
> > "old".
>
> I am all for making the configuration more pleasant. So I hope I can make
> use of the `reverse` thing here, without having to introduce a new enum
> value.
I think the new enum (and matching config) has some value in case people
want to override it. But if you don't want to, diff_get_color() is
really just checking want_color() as a convenience. You could do that,
too:
const char *reverse = want_color(opt->use_color) ? GIT_COLOR_REVERSE : "";
You'd have to introduce GIT_COLOR_REVERSE. I don't think we have a
constant for it yet, but it's \x[7m.
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-06 6:35 ` Jeff King
@ 2018-05-06 6:41 ` Jeff King
2018-05-07 1:20 ` Johannes Schindelin
2018-05-07 1:35 ` Junio C Hamano
0 siblings, 2 replies; 387+ messages in thread
From: Jeff King @ 2018-05-06 6:41 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Sun, May 06, 2018 at 02:35:44AM -0400, Jeff King wrote:
> You'd have to introduce GIT_COLOR_REVERSE. I don't think we have a
> constant for it yet, but it's \x[7m.
Heh, of course you knew that already, as I just noticed your patch is
using the reverse attribute internally (I had thought at first glance
you were just specifying the background independently).
So really, I guess all I am arguing for is having GIT_COLOR_INV (or
REVERSE) as a constant, and then teaching the code to combine it with
the existing "new" color. It's perfectly OK to have:
\x1b[7m\x1b[36m
instead of:
\x1b[7;36m
It's two extra bytes, but I doubt anybody cares.
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-06 6:41 ` Jeff King
@ 2018-05-07 1:20 ` Johannes Schindelin
2018-05-07 7:37 ` Jeff King
2018-05-07 1:35 ` Junio C Hamano
1 sibling, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 1:20 UTC (permalink / raw)
To: Jeff King
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Peff,
On Sun, 6 May 2018, Jeff King wrote:
> On Sun, May 06, 2018 at 02:35:44AM -0400, Jeff King wrote:
>
> > You'd have to introduce GIT_COLOR_REVERSE. I don't think we have a
> > constant for it yet, but it's \x[7m.
>
> Heh, of course you knew that already, as I just noticed your patch is
> using the reverse attribute internally (I had thought at first glance
> you were just specifying the background independently).
>
> So really, I guess all I am arguing for is having GIT_COLOR_INV (or
> REVERSE) as a constant, and then teaching the code to combine it with
> the existing "new" color. It's perfectly OK to have:
>
> \x1b[7m\x1b[36m
>
> instead of:
>
> \x1b[7;36m
>
> It's two extra bytes, but I doubt anybody cares.
Yep, I agree that it is a small price to pay for the benefit of simply
using the reverse of diff.color.old (and .new).
While at it, I also changed the hunk header colors: they are *also* simply
the same ones, with the outer one having background and foreground
reversed.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-07 1:20 ` Johannes Schindelin
@ 2018-05-07 7:37 ` Jeff King
0 siblings, 0 replies; 387+ messages in thread
From: Jeff King @ 2018-05-07 7:37 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Sun, May 06, 2018 at 09:20:46PM -0400, Johannes Schindelin wrote:
> > Heh, of course you knew that already, as I just noticed your patch is
> > using the reverse attribute internally (I had thought at first glance
> > you were just specifying the background independently).
> >
> > So really, I guess all I am arguing for is having GIT_COLOR_INV (or
> > REVERSE) as a constant, and then teaching the code to combine it with
> > the existing "new" color. It's perfectly OK to have:
> >
> > \x1b[7m\x1b[36m
> >
> > instead of:
> >
> > \x1b[7;36m
> >
> > It's two extra bytes, but I doubt anybody cares.
>
> Yep, I agree that it is a small price to pay for the benefit of simply
> using the reverse of diff.color.old (and .new).
>
> While at it, I also changed the hunk header colors: they are *also* simply
> the same ones, with the outer one having background and foreground
> reversed.
That sound sane.
If we ever did want to care about the number of bytes we output, I
suspect we could "compress" our ANSI terminal outputs by collapsing
adjacent colors into a single one. But IMHO it's not even worth worrying
about that optimization at this point.
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-06 6:41 ` Jeff King
2018-05-07 1:20 ` Johannes Schindelin
@ 2018-05-07 1:35 ` Junio C Hamano
2018-05-07 5:38 ` Johannes Schindelin
2018-05-07 7:40 ` Jeff King
1 sibling, 2 replies; 387+ messages in thread
From: Junio C Hamano @ 2018-05-07 1:35 UTC (permalink / raw)
To: Jeff King
Cc: Johannes Schindelin, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Jeff King <peff@peff.net> writes:
> On Sun, May 06, 2018 at 02:35:44AM -0400, Jeff King wrote:
>
>> You'd have to introduce GIT_COLOR_REVERSE. I don't think we have a
>> constant for it yet, but it's \x[7m.
>
> Heh, of course you knew that already, as I just noticed your patch is
> using the reverse attribute internally (I had thought at first glance
> you were just specifying the background independently).
I somehow suspected as such, but I also thought so and reacted "what
about us whose terminal is black-on-white unlike most others?",
before looking up what 7 meant ;-)
> So really, I guess all I am arguing for is having GIT_COLOR_INV (or
> REVERSE) as a constant, and then teaching the code to combine it with
> the existing "new" color. It's perfectly OK to have:
>
> \x1b[7m\x1b[36m
>
> instead of:
>
> \x1b[7;36m
>
> It's two extra bytes, but I doubt anybody cares.
I do not think two extra bytes will be missed, but it was not
immediately obvious to me how much flexibility or simplicity weu are
gaining by combining values from multiple configuration variables.
With a "letters on a new line is painted with ${new}, in addition,
the leading plus is further annotated with ${tbdiffNew}" (similarly
to "old") scheme, the user can take advantage of the fact that there
is no ${reset} between ${new} and ${tbdiffNew} and set tbdiffNew and
tbdiffOld to a same value (that does not change the color but
changes some other aspect of the appearance, like "reverse" or
"underline"). Since only pre-designed combination can be used (your
example works only because you chose to allow combination by
annotating the leading "+" with ${new}${tbdiffNew}), we'd need to
(1) establish a convention to paint things with similar meanings in
the same color, modifyable by individual command (e.g. you could say
anything new is by default green with "color.new=green", and then
"color.frotz.new=blink" "color.status.new=" "color.diff.new=blue"
would make frotz, status and diff subcommands to show new things in
blinking green, normal green, and blue), and (2) push the codebase
to adopt such color combination as a preferred design pattern if we
want the resulting system to be useful.
I guess you are getting simpler configuration, which is a big plus,
but to make a truly useful combining convention, we'd need to
rethink and find a way to transition existing configurations to the
new world, which may not be feasible.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-07 1:35 ` Junio C Hamano
@ 2018-05-07 5:38 ` Johannes Schindelin
2018-05-07 7:40 ` Jeff King
1 sibling, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 5:38 UTC (permalink / raw)
To: Junio C Hamano
Cc: Jeff King, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Junio,
On Mon, 7 May 2018, Junio C Hamano wrote:
> Jeff King <peff@peff.net> writes:
>
> > So really, I guess all I am arguing for is having GIT_COLOR_INV (or
> > REVERSE) as a constant, and then teaching the code to combine it with
> > the existing "new" color. It's perfectly OK to have:
> >
> > \x1b[7m\x1b[36m
> >
> > instead of:
> >
> > \x1b[7;36m
> >
> > It's two extra bytes, but I doubt anybody cares.
>
> I do not think two extra bytes will be missed, but it was not
> immediately obvious to me how much flexibility or simplicity weu are
> gaining by combining values from multiple configuration variables.
> With a "letters on a new line is painted with ${new}, in addition,
> the leading plus is further annotated with ${tbdiffNew}" (similarly
> to "old") scheme, the user can take advantage of the fact that there
> is no ${reset} between ${new} and ${tbdiffNew} and set tbdiffNew and
> tbdiffOld to a same value (that does not change the color but
> changes some other aspect of the appearance, like "reverse" or
> "underline"). Since only pre-designed combination can be used (your
> example works only because you chose to allow combination by
> annotating the leading "+" with ${new}${tbdiffNew}), we'd need to
> (1) establish a convention to paint things with similar meanings in
> the same color, modifyable by individual command (e.g. you could say
> anything new is by default green with "color.new=green", and then
> "color.frotz.new=blink" "color.status.new=" "color.diff.new=blue"
> would make frotz, status and diff subcommands to show new things in
> blinking green, normal green, and blue), and (2) push the codebase
> to adopt such color combination as a preferred design pattern if we
> want the resulting system to be useful.
>
> I guess you are getting simpler configuration, which is a big plus,
> but to make a truly useful combining convention, we'd need to
> rethink and find a way to transition existing configurations to the
> new world, which may not be feasible.
I really do not like the sound of that much complexity. It strikes me as
yet another instance of Yer Ain't Gonna Need It. In *particular* because
nested diffs are a special thing: you *already* get overwhelmed with
too much information, and adding colors to the fray won't help.
What does help is to keep the colors, so that they can mean the same thing
in inner vs outer diffs, but reverse foreground and background to make the
outer diff "stick out more".
Should my assessment be wrong, I think it'll still be relatively easy to
add support for config settings, *then*, not before we know it is needed.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 13/18] color: provide inverted colors, too
2018-05-07 1:35 ` Junio C Hamano
2018-05-07 5:38 ` Johannes Schindelin
@ 2018-05-07 7:40 ` Jeff King
1 sibling, 0 replies; 387+ messages in thread
From: Jeff King @ 2018-05-07 7:40 UTC (permalink / raw)
To: Junio C Hamano
Cc: Johannes Schindelin, git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Mon, May 07, 2018 at 10:35:53AM +0900, Junio C Hamano wrote:
> > So really, I guess all I am arguing for is having GIT_COLOR_INV (or
> > REVERSE) as a constant, and then teaching the code to combine it with
> > the existing "new" color. It's perfectly OK to have:
> >
> > \x1b[7m\x1b[36m
> >
> > instead of:
> >
> > \x1b[7;36m
> >
> > It's two extra bytes, but I doubt anybody cares.
>
> I do not think two extra bytes will be missed, but it was not
> immediately obvious to me how much flexibility or simplicity weu are
> gaining by combining values from multiple configuration variables.
My goal was just to let you set color.diff.new to something besides
green without having to also manually set color.tbdiff.new (or whatever
it's called) to match.
> With a "letters on a new line is painted with ${new}, in addition,
> the leading plus is further annotated with ${tbdiffNew}" (similarly
> to "old") scheme, the user can take advantage of the fact that there
> is no ${reset} between ${new} and ${tbdiffNew} and set tbdiffNew and
> tbdiffOld to a same value (that does not change the color but
> changes some other aspect of the appearance, like "reverse" or
> "underline"). Since only pre-designed combination can be used (your
> example works only because you chose to allow combination by
> annotating the leading "+" with ${new}${tbdiffNew}), we'd need to
> (1) establish a convention to paint things with similar meanings in
> the same color, modifyable by individual command (e.g. you could say
> anything new is by default green with "color.new=green", and then
> "color.frotz.new=blink" "color.status.new=" "color.diff.new=blue"
> would make frotz, status and diff subcommands to show new things in
> blinking green, normal green, and blue), and (2) push the codebase
> to adopt such color combination as a preferred design pattern if we
> want the resulting system to be useful.
Right, this is basically making that "new" piggy-backing explicit, but
only for this one case.
> I guess you are getting simpler configuration, which is a big plus,
> but to make a truly useful combining convention, we'd need to
> rethink and find a way to transition existing configurations to the
> new world, which may not be feasible.
Yes, one could probably develop a whole theming system for Git. We've
resisted it so far. :)
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v2 14/18] diff: add an internal option to dual-color diffs of diffs
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (12 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 13/18] color: provide inverted colors, too Johannes Schindelin
@ 2018-05-04 15:34 ` Johannes Schindelin
2018-05-04 15:35 ` [PATCH v2 15/18] branch-diff: offer to dual-color the diffs Johannes Schindelin
` (7 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:34 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
When diffing diffs, it can be quite daunting to figure out what the heck
is going on, as there are nested +/- signs.
Let's make this easier by adding a flag in diff_options that allows
color-coding the outer diff sign with inverted colors, so that the
preimage and postimage is colored like the diff it is.
Of course, this really only makes sense when the preimage and postimage
*are* diffs. So let's not expose this flag via a command-line option for
now.
This is a feature that was invented by git-tbdiff, and it will be used
in `branch-diff` in the next commit.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
diff.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++---------
diff.h | 5 ++++-
2 files changed, 59 insertions(+), 11 deletions(-)
diff --git a/diff.c b/diff.c
index f1bda0db3f5..98a41e88620 100644
--- a/diff.c
+++ b/diff.c
@@ -67,6 +67,8 @@ static char diff_colors[][COLOR_MAXLEN] = {
GIT_COLOR_BOLD_YELLOW, /* NEW_MOVED ALTERNATIVE */
GIT_COLOR_FAINT, /* NEW_MOVED_DIM */
GIT_COLOR_FAINT_ITALIC, /* NEW_MOVED_ALTERNATIVE_DIM */
+ GIT_COLOR_INV_RED, /* OLD_INV */
+ GIT_COLOR_INV_GREEN, /* NEW_INV */
};
static NORETURN void die_want_option(const char *option_name)
@@ -108,6 +110,10 @@ static int parse_diff_color_slot(const char *var)
return DIFF_FILE_NEW_MOVED_DIM;
if (!strcasecmp(var, "newmovedalternativedimmed"))
return DIFF_FILE_NEW_MOVED_ALT_DIM;
+ if (!strcasecmp(var, "oldinv"))
+ return DIFF_FILE_OLD_INV;
+ if (!strcasecmp(var, "newinv"))
+ return DIFF_FILE_NEW_INV;
return -1;
}
@@ -577,7 +583,10 @@ static void emit_line_0(struct diff_options *o, const char *set, const char *res
int nofirst;
FILE *file = o->file;
- fputs(diff_line_prefix(o), file);
+ if (first)
+ fputs(diff_line_prefix(o), file);
+ else if (!len)
+ return;
if (len == 0) {
has_trailing_newline = (first == '\n');
@@ -596,7 +605,7 @@ static void emit_line_0(struct diff_options *o, const char *set, const char *res
if (len || !nofirst) {
fputs(set, file);
- if (!nofirst)
+ if (first && !nofirst)
fputc(first, file);
fwrite(line, len, 1, file);
fputs(reset, file);
@@ -970,7 +979,8 @@ static void dim_moved_lines(struct diff_options *o)
static void emit_line_ws_markup(struct diff_options *o,
const char *set, const char *reset,
- const char *line, int len, char sign,
+ const char *line, int len,
+ const char *set_sign, char sign,
unsigned ws_rule, int blank_at_eof)
{
const char *ws = NULL;
@@ -981,14 +991,18 @@ static void emit_line_ws_markup(struct diff_options *o,
ws = NULL;
}
- if (!ws)
+ if (!ws && set_sign == set)
emit_line_0(o, set, reset, sign, line, len);
- else if (blank_at_eof)
+ else if (!ws) {
+ /* Emit just the prefix, then the rest. */
+ emit_line_0(o, set_sign, reset, sign, "", 0);
+ emit_line_0(o, set, reset, 0, line, len);
+ } else if (blank_at_eof)
/* Blank line at EOF - paint '+' as well */
emit_line_0(o, ws, reset, sign, line, len);
else {
/* Emit just the prefix, then the rest. */
- emit_line_0(o, set, reset, sign, "", 0);
+ emit_line_0(o, set_sign, reset, sign, "", 0);
ws_check_emit(line, len, ws_rule,
o->file, set, reset, ws);
}
@@ -998,7 +1012,7 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
struct emitted_diff_symbol *eds)
{
static const char *nneof = " No newline at end of file\n";
- const char *context, *reset, *set, *meta, *fraginfo;
+ const char *context, *reset, *set, *set_sign, *meta, *fraginfo;
struct strbuf sb = STRBUF_INIT;
enum diff_symbol s = eds->s;
@@ -1038,7 +1052,16 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
case DIFF_SYMBOL_CONTEXT:
set = diff_get_color_opt(o, DIFF_CONTEXT);
reset = diff_get_color_opt(o, DIFF_RESET);
- emit_line_ws_markup(o, set, reset, line, len, ' ',
+ set_sign = set;
+ if (o->flags.dual_color_diffed_diffs) {
+ char c = !len ? 0 : line[0];
+
+ if (c == '+')
+ set = diff_get_color_opt(o, DIFF_FILE_NEW);
+ else if (c == '-')
+ set = diff_get_color_opt(o, DIFF_FILE_OLD);
+ }
+ emit_line_ws_markup(o, set, reset, line, len, set_sign, ' ',
flags & (DIFF_SYMBOL_CONTENT_WS_MASK), 0);
break;
case DIFF_SYMBOL_PLUS:
@@ -1065,7 +1088,18 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
set = diff_get_color_opt(o, DIFF_FILE_NEW);
}
reset = diff_get_color_opt(o, DIFF_RESET);
- emit_line_ws_markup(o, set, reset, line, len, '+',
+ if (!o->flags.dual_color_diffed_diffs)
+ set_sign = set;
+ else {
+ char c = !len ? 0 : line[0];
+
+ set_sign = diff_get_color_opt(o, DIFF_FILE_NEW_INV);
+ if (c == '-')
+ set = diff_get_color_opt(o, DIFF_FILE_OLD);
+ else if (c != '+')
+ set = diff_get_color_opt(o, DIFF_CONTEXT);
+ }
+ emit_line_ws_markup(o, set, reset, line, len, set_sign, '+',
flags & DIFF_SYMBOL_CONTENT_WS_MASK,
flags & DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF);
break;
@@ -1093,7 +1127,18 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
set = diff_get_color_opt(o, DIFF_FILE_OLD);
}
reset = diff_get_color_opt(o, DIFF_RESET);
- emit_line_ws_markup(o, set, reset, line, len, '-',
+ if (!o->flags.dual_color_diffed_diffs)
+ set_sign = set;
+ else {
+ char c = !len ? 0 : line[0];
+
+ set_sign = diff_get_color_opt(o, DIFF_FILE_OLD_INV);
+ if (c == '+')
+ set = diff_get_color_opt(o, DIFF_FILE_NEW);
+ else if (c != '-')
+ set = diff_get_color_opt(o, DIFF_CONTEXT);
+ }
+ emit_line_ws_markup(o, set, reset, line, len, set_sign, '-',
flags & DIFF_SYMBOL_CONTENT_WS_MASK, 0);
break;
case DIFF_SYMBOL_WORDS_PORCELAIN:
diff --git a/diff.h b/diff.h
index 0dd6a71af60..c3e5d27967c 100644
--- a/diff.h
+++ b/diff.h
@@ -95,6 +95,7 @@ struct diff_flags {
unsigned default_follow_renames:1;
unsigned stat_with_summary:1;
unsigned suppress_diff_headers:1;
+ unsigned dual_color_diffed_diffs:1;
};
static inline void diff_flags_or(struct diff_flags *a,
@@ -242,7 +243,9 @@ enum color_diff {
DIFF_FILE_NEW_MOVED = 13,
DIFF_FILE_NEW_MOVED_ALT = 14,
DIFF_FILE_NEW_MOVED_DIM = 15,
- DIFF_FILE_NEW_MOVED_ALT_DIM = 16
+ DIFF_FILE_NEW_MOVED_ALT_DIM = 16,
+ DIFF_FILE_OLD_INV = 17,
+ DIFF_FILE_NEW_INV = 18
};
const char *diff_get_color(int diff_use_color, enum color_diff ix);
#define diff_get_color_opt(o, ix) \
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 15/18] branch-diff: offer to dual-color the diffs
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (13 preceding siblings ...)
2018-05-04 15:34 ` [PATCH v2 14/18] diff: add an internal option to dual-color diffs of diffs Johannes Schindelin
@ 2018-05-04 15:35 ` Johannes Schindelin
2018-05-04 15:35 ` [PATCH v2 16/18] branch-diff --dual-color: work around bogus white-space warning Johannes Schindelin
` (6 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:35 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
When showing what changed between old and new commits, we show a diff of
the patches. This diff is a diff between diffs, therefore there are
nested +/- signs, and it can be relatively hard to understand what is
going on.
With the --dual-color option, the preimage and the postimage are colored
like the diffs they are, and the *outer* +/- sign is inverted for
clarity.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/branch-diff.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
index 04efd30f0f6..8a16352e3a1 100644
--- a/builtin/branch-diff.c
+++ b/builtin/branch-diff.c
@@ -435,8 +435,11 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
{
struct diff_options diffopt = { NULL };
struct strbuf four_spaces = STRBUF_INIT;
+ int dual_color = 0;
double creation_weight = 0.6;
struct option options[] = {
+ OPT_BOOL(0, "dual-color", &dual_color,
+ N_("color both diff and diff-between-diffs")),
OPT_SET_INT(0, "no-patches", &diffopt.output_format,
N_("short format (no diffs)"),
DIFF_FORMAT_NO_OUTPUT),
@@ -472,6 +475,11 @@ int cmd_branch_diff(int argc, const char **argv, const char *prefix)
argc = j;
diff_setup_done(&diffopt);
+ if (dual_color) {
+ diffopt.use_color = 1;
+ diffopt.flags.dual_color_diffed_diffs = 1;
+ }
+
if (argc == 2) {
if (!strstr(argv[0], ".."))
warning(_("no .. in range: '%s'"), argv[0]);
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 16/18] branch-diff --dual-color: work around bogus white-space warning
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (14 preceding siblings ...)
2018-05-04 15:35 ` [PATCH v2 15/18] branch-diff: offer to dual-color the diffs Johannes Schindelin
@ 2018-05-04 15:35 ` Johannes Schindelin
2018-05-04 15:35 ` [PATCH v2 17/18] branch-diff: add a man page Johannes Schindelin
` (5 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:35 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
When displaying a diff of diffs, it is possible that there is an outer
`+` before a context line. That happens when the context changed between
old and new commit. When that context line starts with a tab (after the
space that marks it as context line), our diff machinery spits out a
white-space error (space before tab), but in this case, that is
incorrect.
Work around this by detecting that situation and simply *not* printing
the space in that case.
This is slightly improper a fix because it is conceivable that an
output_prefix might be configured with *just* the right length to let
that tab jump to a different tab stop depending whether we emit that
space or not.
However, the proper fix would be relatively ugly and intrusive because
it would have to *weaken* the WS_SPACE_BEFORE_TAB option in ws.c.
Besides, we do not expose the --dual-color option in cases other than
the `branch-diff` command, which only uses a hard-coded output_prefix of
four spaces (which misses the problem by one column ;-)).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
diff.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/diff.c b/diff.c
index 98a41e88620..b98a18fe014 100644
--- a/diff.c
+++ b/diff.c
@@ -1098,6 +1098,12 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
set = diff_get_color_opt(o, DIFF_FILE_OLD);
else if (c != '+')
set = diff_get_color_opt(o, DIFF_CONTEXT);
+ /* Avoid space-before-tab warning */
+ if (c == ' ' && (len < 2 || line[1] == '\t' ||
+ line[1] == '\r' || line[1] == '\n')) {
+ line++;
+ len--;
+ }
}
emit_line_ws_markup(o, set, reset, line, len, set_sign, '+',
flags & DIFF_SYMBOL_CONTENT_WS_MASK,
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 17/18] branch-diff: add a man page
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (15 preceding siblings ...)
2018-05-04 15:35 ` [PATCH v2 16/18] branch-diff --dual-color: work around bogus white-space warning Johannes Schindelin
@ 2018-05-04 15:35 ` Johannes Schindelin
2018-05-04 15:35 ` [PATCH v2 18/18] completion: support branch-diff Johannes Schindelin
` (4 subsequent siblings)
21 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:35 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
This is a heavily butchered version of the README written by Thomas
Rast and Thomas Gummerer, lifted from https://github.com/trast/tbdiff.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
Documentation/git-branch-diff.txt | 239 ++++++++++++++++++++++++++++++
1 file changed, 239 insertions(+)
create mode 100644 Documentation/git-branch-diff.txt
diff --git a/Documentation/git-branch-diff.txt b/Documentation/git-branch-diff.txt
new file mode 100644
index 00000000000..f9e23eaf721
--- /dev/null
+++ b/Documentation/git-branch-diff.txt
@@ -0,0 +1,239 @@
+git-branch-diff(1)
+==================
+
+NAME
+----
+git-branch-diff - Compare two versions of a branch
+
+SYNOPSIS
+--------
+[verse]
+'git branch-diff' [--color=[<when>]] [--no-color] [<diff-options>]
+ [--dual-color] [--no-patches] [--creation-weight=<weight>]
+ ( <range1> <range2> | <rev1>...<rev2> | <base> <rev1> <rev2> )
+
+DESCRIPTION
+-----------
+
+This command shows the differences between two versions of a patch
+series, or more generally, two commit ranges (ignoring merges).
+
+To that end, it first finds pairs of commits from both commit ranges
+that correspond with each other. Two commits are said to correspond when
+the diff between their patches (i.e. the author information, the commit
+message and the commit diff) is reasonably small compared to the
+patches' size. See ``Algorithm` below for details.
+
+Finally, the list of matching commits is shown in the order of the
+second commit range, with unmatched commits being inserted just after
+all of their ancestors have been shown.
+
+
+OPTIONS
+-------
+--no-patches::
+ Suppress the diffs between commit pairs that were deemed to
+ correspond; only show the pairings.
+
+--dual-color::
+ When the commit diffs differ, recreate the original diffs'
+ coloring, and add outer -/+ diff markers with the *background*
+ being red/green to make it easier to see e.g. when there was a
+ change in what exact lines were added.
+
+--creation-weight=<factor>::
+ Set the creation/deletion cost fudge factor to `<factor>`.
+ Defaults to 0.6. Try a larger value if `git branch-diff`
+ erroneously considers a large change a total rewrite (deletion
+ of one commit and addition of another), and a smaller one in
+ the reverse case. See the ``Algorithm`` section below for an
+ explanation why this is needed.
+
+<range1> <range2>::
+ Compare the commits specified by the two ranges, where
+ `<range1>` is considered an older version of `<range2>`.
+
+<rev1>...<rev2>::
+ Equivalent to passing `<rev2>..<rev1>` and `<rev1>..<rev2>`.
+
+<base> <rev1> <rev2>::
+ Equivalent to passing `<base>..<rev1>` and `<base>..<rev2>`.
+ Note that `<base>` does not need to be the exact branch point
+ of the branches. Example: after rebasing a branch `my-topic`,
+ `git branch-diff my-topic@{u} my-topic@{1} my-topic` would
+ show the differences introduced by the rebase.
+
+`git branch-diff` also accepts the regular diff options (see
+linkgit:git-diff[1]), most notably the `--color=[<when>]` and
+`--no-color` options. These options are used when generating the "diff
+between patches", i.e. to compare the author, commit message and diff of
+corresponding old/new commits. There is currently no means to tweak the
+diff options passed to `git log` when generating those patches.
+
+
+CONFIGURATION
+-------------
+This command uses the `diff.color.*` and `pager.branch-diff` settings
+(the latter is on by default).
+See linkgit:git-config[1].
+
+
+Examples
+--------
+
+When a rebase required merge conflicts to be resolved, compare the changes
+introduced by the rebase directly afterwards using:
+
+------------
+$ git branch-diff @{u} @{1} @
+------------
+
+
+A typical output of `git branch-diff` would look like this:
+
+------------
+-: ------- > 1: 0ddba11 Prepare for the inevitable!
+1: c0debee = 2: cab005e Add a helpful message at the start
+2: f00dbal ! 3: decafe1 Describe a bug
+ @@ -1,3 +1,3 @@
+ Author: A U Thor <author@example.com>
+
+ -TODO: Describe a bug
+ +Describe a bug
+ @@ -324,5 +324,6
+ This is expected.
+
+ -+What is unexpected is that it will also crash.
+ ++Unexpectedly, it also crashes. This is a bug, and the jury is
+ ++still out there how to fix it best. See ticket #314 for details.
+
+ Contact
+3: bedead < -: ------- TO-UNDO
+------------
+
+In this example, there are 3 old and 3 new commits, where the developer
+removed the 3rd, added a new one before the first two, and modified the
+commit message of the 2nd commit as well its diff.
+
+When the output goes to a terminal, it is color-coded by default, just
+like regular `git diff`'s output. In addition, the first line (adding a
+commit) is green, the last line (deleting a commit) is red, the second
+line (with a perfect match) is yellow like the commit header of `git
+show`'s output, and the third line colors the old commit red, the new
+one green and the rest like `git show`'s commit header.
+
+The color-coded diff is actually a bit hard to read, though, as it
+colors the entire lines red or green. The line that added "What is
+unexpected" in the old commit, for example, is completely red, even if
+the intent of the old commit was to add something.
+
+To help with that, use the `--dual-color` mode. In this mode, the diff
+of diffs will retain the original diff colors, and prefix the lines with
+-/+ markers that have their *background* red or green, to make it more
+obvious that they describe how the diff itself changed.
+
+
+Algorithm
+---------
+
+The general idea is this: we generate a cost matrix between the commits
+in both commit ranges, then solve the least-cost assignment.
+
+To avoid false positives (e.g. when a patch has been removed, and an
+unrelated patch has been added between two iterations of the same patch
+series), the cost matrix is extended to allow for that, by adding
+fixed-cost entries for wholesale deletes/adds.
+
+Example: Let commits `1--2` be the first iteration of a patch series and
+`A--C` the second iteration. Let's assume that `A` is a cherry-pick of
+`2,` and `C` is a cherry-pick of `1` but with a small modification (say,
+a fixed typo). Visualize the commits as a bipartite graph:
+
+------------
+ 1 A
+
+ 2 B
+
+ C
+------------
+
+We are looking for a "best" explanation of the new series in terms of
+the old one. We can represent an "explanation" as an edge in the graph:
+
+
+------------
+ 1 A
+ /
+ 2 --------' B
+
+ C
+------------
+
+This explanation comes for "free" because there was no change. Similarly
+`C` could be explained using `1`, but that comes at some cost c>0
+because of the modification:
+
+------------
+ 1 ----. A
+ | /
+ 2 ----+---' B
+ |
+ `----- C
+ c>0
+------------
+
+In mathematical terms, what we are looking for is some sort of a minimum
+cost bipartite matching; `1` is matched to `C` at some cost, etc. The
+underlying graph is in fact a complete bipartite graph; the cost we
+associate with every edge is the size of the diff between the two
+commits' patches. To explain also new commits, we introduce dummy nodes
+on both sides:
+
+------------
+ 1 ----. A
+ | /
+ 2 ----+---' B
+ |
+ o `----- C
+ c>0
+ o o
+
+ o o
+------------
+
+The cost of an edge `o--C` is the size of `C`'s diff, modified by a
+fudge factor that should be smaller than 1.0. The cost of an edge `o--o`
+is free. The fudge factor is necessary because even if `1` and `C` have
+nothing in common, they may still share a few empty lines and such,
+possibly making the assignment `1--C`, `o--o` slightly cheaper than
+`1--o`, `o--C` even if `1` and `C` have nothing in common. With the
+fudge factor we require a much larger common part to consider patches as
+corresponding.
+
+The overall time needed to compute this algorithm is the time needed to
+compute n+m commit diffs and then n*m diffs of patches, plus the time
+needed to compute the least-cost assigment between n and m diffs. Git
+uses an implementation of the Jonker-Volgenant algorithm to solve the
+assignment problem, which has cubic runtime complexity. The matching
+found in this case will look like this:
+
+------------
+ 1 ----. A
+ | /
+ 2 ----+---' B
+ .--+-----'
+ o -' `----- C
+ c>0
+ o ---------- o
+
+ o ---------- o
+------------
+
+
+SEE ALSO
+--------
+linkgit:git-log[1]
+
+GIT
+---
+Part of the linkgit:git[1] suite
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v2 18/18] completion: support branch-diff
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (16 preceding siblings ...)
2018-05-04 15:35 ` [PATCH v2 17/18] branch-diff: add a man page Johannes Schindelin
@ 2018-05-04 15:35 ` Johannes Schindelin
2018-05-06 8:24 ` Duy Nguyen
2018-05-04 16:21 ` [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike Elijah Newren
` (3 subsequent siblings)
21 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-04 15:35 UTC (permalink / raw)
To: git
Cc: Johannes Schindelin, Junio C Hamano, Thomas Rast,
Thomas Gummerer, Ævar Arnfjörð Bjarmason,
Ramsay Jones, Stefan Beller, Jacob Keller, Eric Sunshine
Tab completion of `branch-diff` is very convenient, especially given
that the revision arguments that need to be passed to `git branch-diff`
are typically more complex than, say, your grandfather's `git log`
arguments.
Without this patch, we would only complete the `branch-diff` part but
not the options and other arguments.
This of itself may already be slightly disruptive for well-trained
fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
`git branch origin/master`, as we now no longer automatically append a
space after completing `git branch`: this is now ambiguous.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
contrib/completion/git-completion.bash | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 01dd9ff07a2..45addd525ac 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1496,6 +1496,24 @@ _git_format_patch ()
__git_complete_revlist
}
+__git_branch_diff_options="
+ --no-patches --creation-weight= --dual-color
+"
+
+_git_branch_diff ()
+{
+ case "$cur" in
+ --*)
+ __gitcomp "
+ $__git_branch_diff_options
+ $__git_diff_common_options
+ "
+ return
+ ;;
+ esac
+ __git_complete_revlist
+}
+
_git_fsck ()
{
case "$cur" in
--
2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 18/18] completion: support branch-diff
2018-05-04 15:35 ` [PATCH v2 18/18] completion: support branch-diff Johannes Schindelin
@ 2018-05-06 8:24 ` Duy Nguyen
2018-05-07 1:23 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Duy Nguyen @ 2018-05-06 8:24 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Fri, May 04, 2018 at 05:35:11PM +0200, Johannes Schindelin wrote:
> Tab completion of `branch-diff` is very convenient, especially given
> that the revision arguments that need to be passed to `git branch-diff`
> are typically more complex than, say, your grandfather's `git log`
> arguments.
>
> Without this patch, we would only complete the `branch-diff` part but
> not the options and other arguments.
>
> This of itself may already be slightly disruptive for well-trained
> fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
> `git branch origin/master`, as we now no longer automatically append a
> space after completing `git branch`: this is now ambiguous.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
> contrib/completion/git-completion.bash | 18 ++++++++++++++++++
> 1 file changed, 18 insertions(+)
>
> diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
> index 01dd9ff07a2..45addd525ac 100644
> --- a/contrib/completion/git-completion.bash
> +++ b/contrib/completion/git-completion.bash
> @@ -1496,6 +1496,24 @@ _git_format_patch ()
> __git_complete_revlist
> }
>
> +__git_branch_diff_options="
> + --no-patches --creation-weight= --dual-color
> +"
> +
> +_git_branch_diff ()
> +{
> + case "$cur" in
> + --*)
> + __gitcomp "
You should use __gitcomp_builtin so you don't have to maintain
$__git_branch_diff_options here. Something like this
-- 8< --
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 45addd525a..4745631daf 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1496,18 +1496,11 @@ _git_format_patch ()
__git_complete_revlist
}
-__git_branch_diff_options="
- --no-patches --creation-weight= --dual-color
-"
-
_git_branch_diff ()
{
case "$cur" in
--*)
- __gitcomp "
- $__git_branch_diff_options
- $__git_diff_common_options
- "
+ __gitcomp_builtin branch-diff "$__git_diff_common_options"
return
;;
esac
-- 8< --
> + $__git_branch_diff_options
> + $__git_diff_common_options
> + "
> + return
> + ;;
> + esac
> + __git_complete_revlist
> +}
> +
> _git_fsck ()
> {
> case "$cur" in
> --
> 2.17.0.409.g71698f11835
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 18/18] completion: support branch-diff
2018-05-06 8:24 ` Duy Nguyen
@ 2018-05-07 1:23 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 1:23 UTC (permalink / raw)
To: Duy Nguyen
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Duy,
On Sun, 6 May 2018, Duy Nguyen wrote:
> On Fri, May 04, 2018 at 05:35:11PM +0200, Johannes Schindelin wrote:
> > Tab completion of `branch-diff` is very convenient, especially given
> > that the revision arguments that need to be passed to `git branch-diff`
> > are typically more complex than, say, your grandfather's `git log`
> > arguments.
> >
> > Without this patch, we would only complete the `branch-diff` part but
> > not the options and other arguments.
> >
> > This of itself may already be slightly disruptive for well-trained
> > fingers that assume that `git bra<TAB>ori<TAB>mas<TAB>` would expand to
> > `git branch origin/master`, as we now no longer automatically append a
> > space after completing `git branch`: this is now ambiguous.
> >
> > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> > ---
> > contrib/completion/git-completion.bash | 18 ++++++++++++++++++
> > 1 file changed, 18 insertions(+)
> >
> > diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
> > index 01dd9ff07a2..45addd525ac 100644
> > --- a/contrib/completion/git-completion.bash
> > +++ b/contrib/completion/git-completion.bash
> > @@ -1496,6 +1496,24 @@ _git_format_patch ()
> > __git_complete_revlist
> > }
> >
> > +__git_branch_diff_options="
> > + --no-patches --creation-weight= --dual-color
> > +"
> > +
> > +_git_branch_diff ()
> > +{
> > + case "$cur" in
> > + --*)
> > + __gitcomp "
>
> You should use __gitcomp_builtin so you don't have to maintain
> $__git_branch_diff_options here. Something like this
>
> -- 8< --
> diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
> index 45addd525a..4745631daf 100644
> --- a/contrib/completion/git-completion.bash
> +++ b/contrib/completion/git-completion.bash
> @@ -1496,18 +1496,11 @@ _git_format_patch ()
> __git_complete_revlist
> }
>
> -__git_branch_diff_options="
> - --no-patches --creation-weight= --dual-color
> -"
> -
> _git_branch_diff ()
> {
> case "$cur" in
> --*)
> - __gitcomp "
> - $__git_branch_diff_options
> - $__git_diff_common_options
> - "
> + __gitcomp_builtin branch-diff "$__git_diff_common_options"
> return
> ;;
> esac
> -- 8< --
Does this really work? I have this instead, for now, and verified that it
works:
-- snipsnap --
diff --git a/contrib/completion/git-completion.bash
b/contrib/completion/git-completion.bash
index 01dd9ff07a2..c498c053881 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1205,13 +1205,14 @@ _git_bisect ()
_git_branch ()
{
- local i c=1 only_local_ref="n" has_r="n"
+ local i c=1 only_local_ref="n" has_r="n" diff_mode="n"
while [ $c -lt $cword ]; do
i="${words[c]}"
case "$i" in
-d|--delete|-m|--move) only_local_ref="y" ;;
-r|--remotes) has_r="y" ;;
+ --diff) diff_mode="y" ;;
esac
((c++))
done
@@ -1221,11 +1222,22 @@ _git_branch ()
__git_complete_refs --cur="${cur##--set-upstream-to=}"
;;
--*)
+ if [ $diff_mode = "y" ]; then
+ __gitcomp "
+ --creation-factor= --dual-color
+ $__git_diff_common_options
+ "
+ return
+ fi
__gitcomp_builtin branch "--no-color --no-abbrev
--no-track --no-column
"
;;
*)
+ if [ $diff_mode = "y" ]; then
+ __git_complete_revlist
+ return
+ fi
if [ $only_local_ref = "y" -a $has_r = "n" ]; then
__gitcomp_direct "$(__git_heads "" "$cur" " ")"
else
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (17 preceding siblings ...)
2018-05-04 15:35 ` [PATCH v2 18/18] completion: support branch-diff Johannes Schindelin
@ 2018-05-04 16:21 ` Elijah Newren
2018-05-04 16:30 ` Elijah Newren
2018-05-05 20:03 ` Johannes Schindelin
2018-05-06 5:22 ` Junio C Hamano
` (2 subsequent siblings)
21 siblings, 2 replies; 387+ messages in thread
From: Elijah Newren @ 2018-05-04 16:21 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Git Mailing List, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Fri, May 4, 2018 at 8:34 AM, Johannes Schindelin
<johannes.schindelin@gmx.de> wrote:
> The incredibly useful `git-tbdiff` tool to compare patch series (say, to see
> what changed between two iterations sent to the Git mailing list) is slightly
> less useful for this developer due to the fact that it requires the `hungarian`
> and `numpy` Python packages which are for some reason really hard to build in
> MSYS2. So hard that I even had to give up, because it was simply easier to
> reimplement the whole shebang as a builtin command.
tbdiff is awesome; thanks for bringing it in as a builtin to git.
I've run through a few cases, comparing output of tbdiff and
branch-diff. So far, what I've noted is that they produce largely the
same output except that:
- tbdiff seems to shorten shas to 7 characters, branch-diff is using
10, in git.git at least. (Probably a good change)
- tbdiff aligned output columns better when there were more than 9
patches (I'll comment more on patch 09/18)
- As noted elsewhere in the review of round 1, tbdiff uses difflib
while branch-diff uses xdiff. I found some cases where that mattered,
and in all of them, I either felt like the difference was irrelevant
or that difflib was suboptimal, so this is definitely an improvement
for me.
- branch-diff produces it's output faster, and it is automatically
paged. This is really cool.
Also, I don't have bash-completion for either tbdiff or branch-diff.
:-( But I saw some discussion on the v1 patches about how this gets
handled... :-)
Elijah
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-04 16:21 ` [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike Elijah Newren
@ 2018-05-04 16:30 ` Elijah Newren
2018-05-05 20:03 ` Johannes Schindelin
1 sibling, 0 replies; 387+ messages in thread
From: Elijah Newren @ 2018-05-04 16:30 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Git Mailing List, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi,
On Fri, May 4, 2018 at 9:21 AM, Elijah Newren <newren@gmail.com> wrote:
> On Fri, May 4, 2018 at 8:34 AM, Johannes Schindelin
> <johannes.schindelin@gmx.de> wrote:
>> The incredibly useful `git-tbdiff` tool to compare patch series (say, to see
>> what changed between two iterations sent to the Git mailing list) is slightly
>> less useful for this developer due to the fact that it requires the `hungarian`
>> and `numpy` Python packages which are for some reason really hard to build in
>> MSYS2. So hard that I even had to give up, because it was simply easier to
>> reimplement the whole shebang as a builtin command.
>
> tbdiff is awesome; thanks for bringing it in as a builtin to git.
>
> I've run through a few cases, comparing output of tbdiff and
> branch-diff. So far, what I've noted is that they produce largely the
> same output except that:
>
> - tbdiff seems to shorten shas to 7 characters, branch-diff is using
> 10, in git.git at least. (Probably a good change)
Sorry, a quick self-correction here:
tbdiff, when using an actual shortened sha, uses 10 characters. But
when a patch doesn't have a match, tbdiff seems to use seven dashes on
one side in lieu of a shortened sha, whereas branch-diff will use 10
characters whether it has an actual shortened sha or is just putting a
bunch of dashes there. So, this is definitely a good change.
> - tbdiff aligned output columns better when there were more than 9
> patches (I'll comment more on patch 09/18)
> - As noted elsewhere in the review of round 1, tbdiff uses difflib
> while branch-diff uses xdiff. I found some cases where that mattered,
> and in all of them, I either felt like the difference was irrelevant
> or that difflib was suboptimal, so this is definitely an improvement
> for me.
> - branch-diff produces it's output faster, and it is automatically
> paged. This is really cool.
>
> Also, I don't have bash-completion for either tbdiff or branch-diff.
> :-( But I saw some discussion on the v1 patches about how this gets
> handled... :-)
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-04 16:21 ` [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike Elijah Newren
2018-05-04 16:30 ` Elijah Newren
@ 2018-05-05 20:03 ` Johannes Schindelin
2018-05-07 17:07 ` Elijah Newren
1 sibling, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-05 20:03 UTC (permalink / raw)
To: Elijah Newren
Cc: Git Mailing List, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Elijah,
On Fri, 4 May 2018, Elijah Newren wrote:
> On Fri, May 4, 2018 at 8:34 AM, Johannes Schindelin
> <johannes.schindelin@gmx.de> wrote:
> > The incredibly useful `git-tbdiff` tool to compare patch series (say, to see
> > what changed between two iterations sent to the Git mailing list) is slightly
> > less useful for this developer due to the fact that it requires the `hungarian`
> > and `numpy` Python packages which are for some reason really hard to build in
> > MSYS2. So hard that I even had to give up, because it was simply easier to
> > reimplement the whole shebang as a builtin command.
>
> tbdiff is awesome; thanks for bringing it in as a builtin to git.
You're welcome.
> I've run through a few cases, comparing output of tbdiff and
> branch-diff. So far, what I've noted is that they produce largely the
> same output except that:
>
> - tbdiff seems to shorten shas to 7 characters, branch-diff is using
> 10, in git.git at least. (Probably a good change)
Yes, it is a good change ;-)
> - tbdiff aligned output columns better when there were more than 9
> patches (I'll comment more on patch 09/18)
I added a new patch to align the patch numbers specifically. I considered
squashing it into 9/18, but decided against it: it will make it easier to
read through the rationale when calling `git annotate` on those lines.
> - As noted elsewhere in the review of round 1, tbdiff uses difflib
> while branch-diff uses xdiff. I found some cases where that mattered,
> and in all of them, I either felt like the difference was irrelevant
> or that difflib was suboptimal, so this is definitely an improvement
> for me.
Indeed. It is more or less ambiguities that get resolved differently.
> - branch-diff produces it's output faster, and it is automatically
> paged. This is really cool.
:-)
It was actually the paging that made the most difference for me. The `git
tbdiff` command broke for me as soon as I switched on the pager, as tbdiff
got confused by the decoration (AEvar had put up a PR to fix that, but
that PR has not even so much as been answered in the meantime, so I
thought it'd be a good time to rewrite the entire shebang in C, also
because I could not use tbdiff *at all* on Windows due to its hefty
dependencies).
> Also, I don't have bash-completion for either tbdiff or branch-diff.
> :-( But I saw some discussion on the v1 patches about how this gets
> handled... :-)
Oh? Does 18/18 not work for you?
https://public-inbox.org/git/71698f11835311c103aae565a2a761d10f4676b9.1525448066.git.johannes.schindelin@gmx.de/
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-05 20:03 ` Johannes Schindelin
@ 2018-05-07 17:07 ` Elijah Newren
2018-05-07 17:50 ` SZEDER Gábor
0 siblings, 1 reply; 387+ messages in thread
From: Elijah Newren @ 2018-05-07 17:07 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Git Mailing List, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Dscho,
On Sat, May 5, 2018 at 1:03 PM, Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
> Hi Elijah,
>
> On Fri, 4 May 2018, Elijah Newren wrote:
>
<snip>
>> - tbdiff aligned output columns better when there were more than 9
>> patches (I'll comment more on patch 09/18)
>
> I added a new patch to align the patch numbers specifically. I considered
> squashing it into 9/18, but decided against it: it will make it easier to
> read through the rationale when calling `git annotate` on those lines.
Awesome, thanks.
<snip>
>> Also, I don't have bash-completion for either tbdiff or branch-diff.
>> :-( But I saw some discussion on the v1 patches about how this gets
>> handled... :-)
>
> Oh? Does 18/18 not work for you?
> https://public-inbox.org/git/71698f11835311c103aae565a2a761d10f4676b9.1525448066.git.johannes.schindelin@gmx.de/
It looks like it does work, in part, there were just two issues:
1) I apparently wasn't using all the nice improvements from the
completion script in my locally built git, but was instead still using
the one associated with my system-installed (and much older) git.
(Oops, my bad.)
2) Your completion commands for branch-diff will only complete one
revision range, not two. e.g.
git branch-diff origin/master..my-topic@{2} origin/master..my-top<tab>
won't complete "my-topic" as I'd expect.
Elijah
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-07 17:07 ` Elijah Newren
@ 2018-05-07 17:50 ` SZEDER Gábor
2018-05-07 18:38 ` Elijah Newren
0 siblings, 1 reply; 387+ messages in thread
From: SZEDER Gábor @ 2018-05-07 17:50 UTC (permalink / raw)
To: Elijah Newren
Cc: SZEDER Gábor, Johannes Schindelin, Git Mailing List,
Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
> 2) Your completion commands for branch-diff will only complete one
> revision range, not two. e.g.
> git branch-diff origin/master..my-topic@{2} origin/master..my-top<tab>
> won't complete "my-topic" as I'd expect.
It does complete two revision ranges, but if you want to look at
reflogs, then you must escape the opening curly brace. I'm not sure
why, but apparently after the unescaped '{' Bash thinks that it's a
new command, and doesn't even call our completion functions anymore.
It's not specific to the completion of 'branch-diff', or even to our
completion script. I don't think we can do anything about it.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-07 17:50 ` SZEDER Gábor
@ 2018-05-07 18:38 ` Elijah Newren
0 siblings, 0 replies; 387+ messages in thread
From: Elijah Newren @ 2018-05-07 18:38 UTC (permalink / raw)
To: SZEDER Gábor
Cc: Johannes Schindelin, Git Mailing List, Junio C Hamano,
Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
On Mon, May 7, 2018 at 10:50 AM, SZEDER Gábor <szeder.dev@gmail.com> wrote:
>> 2) Your completion commands for branch-diff will only complete one
>> revision range, not two. e.g.
>> git branch-diff origin/master..my-topic@{2} origin/master..my-top<tab>
>> won't complete "my-topic" as I'd expect.
>
> It does complete two revision ranges, but if you want to look at
> reflogs, then you must escape the opening curly brace. I'm not sure
> why, but apparently after the unescaped '{' Bash thinks that it's a
> new command, and doesn't even call our completion functions anymore.
> It's not specific to the completion of 'branch-diff', or even to our
> completion script. I don't think we can do anything about it.
Ah, indeed. Thanks for the pointer.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (18 preceding siblings ...)
2018-05-04 16:21 ` [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike Elijah Newren
@ 2018-05-06 5:22 ` Junio C Hamano
2018-05-06 12:23 ` Johannes Schindelin
2018-05-06 22:56 ` brian m. carlson
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
21 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-05-06 5:22 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Johannes Schindelin <johannes.schindelin@gmx.de> writes:
> Johannes Schindelin (17):
> Add a function to solve least-cost assignment problems
> Add a new builtin: branch-diff
Perhaps retitling these to
hungarian: a function to solve least-cost assignment problems
branch-diff: a new builtin to compare iterations of a topic
may serve as good precedents to changes other people may later make
to these files. Especially the second one is already consistent
with the several changes that are listed below ;-)
> branch-diff: first rudimentary implementation
> branch-diff: improve the order of the shown commits
> branch-diff: also show the diff between patches
>...
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-06 5:22 ` Junio C Hamano
@ 2018-05-06 12:23 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-06 12:23 UTC (permalink / raw)
To: Junio C Hamano
Cc: git, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Junio,
On Sun, 6 May 2018, Junio C Hamano wrote:
> Johannes Schindelin <johannes.schindelin@gmx.de> writes:
>
> > Johannes Schindelin (17):
> > Add a function to solve least-cost assignment problems
> > Add a new builtin: branch-diff
>
> Perhaps retitling these to
>
> hungarian: a function to solve least-cost assignment problems
> branch-diff: a new builtin to compare iterations of a topic
>
> may serve as good precedents to changes other people may later make
> to these files. Especially the second one is already consistent
> with the several changes that are listed below ;-)
I like it! They are retitled locally, in preparation for whenever I send
out the next iteration.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (19 preceding siblings ...)
2018-05-06 5:22 ` Junio C Hamano
@ 2018-05-06 22:56 ` brian m. carlson
2018-05-07 2:05 ` Johannes Schindelin
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
21 siblings, 1 reply; 387+ messages in thread
From: brian m. carlson @ 2018-05-06 22:56 UTC (permalink / raw)
To: Johannes Schindelin
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
[-- Attachment #1: Type: text/plain, Size: 861 bytes --]
On Fri, May 04, 2018 at 05:34:27PM +0200, Johannes Schindelin wrote:
> The incredibly useful `git-tbdiff` tool to compare patch series (say, to see
> what changed between two iterations sent to the Git mailing list) is slightly
> less useful for this developer due to the fact that it requires the `hungarian`
> and `numpy` Python packages which are for some reason really hard to build in
> MSYS2. So hard that I even had to give up, because it was simply easier to
> reimplement the whole shebang as a builtin command.
I just want to say thanks for writing this. I use tbdiff extensively at
work and having this built-in and much faster will really help.
I did a once-over of v1 and I'll probably take a look at v2 or v3
(whatever's the latest) later in the week.
--
brian m. carlson: Houston, Texas, US
OpenPGP: https://keybase.io/bk2204
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 867 bytes --]
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v2 00/18] Add `branch-diff`, a `tbdiff` lookalike
2018-05-06 22:56 ` brian m. carlson
@ 2018-05-07 2:05 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-05-07 2:05 UTC (permalink / raw)
To: brian m. carlson
Cc: git, Junio C Hamano, Thomas Rast, Thomas Gummerer,
Ævar Arnfjörð Bjarmason, Ramsay Jones,
Stefan Beller, Jacob Keller, Eric Sunshine
Hi Brian,
On Sun, 6 May 2018, brian m. carlson wrote:
> On Fri, May 04, 2018 at 05:34:27PM +0200, Johannes Schindelin wrote:
> > The incredibly useful `git-tbdiff` tool to compare patch series (say,
> > to see what changed between two iterations sent to the Git mailing
> > list) is slightly less useful for this developer due to the fact that
> > it requires the `hungarian` and `numpy` Python packages which are for
> > some reason really hard to build in MSYS2. So hard that I even had to
> > give up, because it was simply easier to reimplement the whole shebang
> > as a builtin command.
>
> I just want to say thanks for writing this. I use tbdiff extensively at
> work and having this built-in and much faster will really help.
>
> I did a once-over of v1 and I'll probably take a look at v2 or v3
> (whatever's the latest) later in the week.
Thank you so much!
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v3 00/20] Add `range-diff`, a `tbdiff` lookalike
2018-05-04 15:34 ` [PATCH v2 " Johannes Schindelin
` (20 preceding siblings ...)
2018-05-06 22:56 ` brian m. carlson
@ 2018-07-03 11:26 ` Johannes Schindelin via GitGitGadget
2018-04-30 21:54 ` [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems Johannes Schindelin via GitGitGadget
` (20 more replies)
21 siblings, 21 replies; 387+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2018-07-03 11:26 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano
The incredibly useful `git-tbdiff` tool to compare patch series (say, to see what changed between two iterations sent to the Git mailing list) is slightly less useful for this developer due to the fact that it requires the `hungarian` and `numpy` Python packages which are for some reason really hard to build in MSYS2. So hard that I even had to give up, because it was simply easier to reimplement the whole shebang as a builtin command.
The project at https://github.com/trast/tbdiff seems to be dormant, anyway. Funny (and true) story: I looked at the open Pull Requests to see how active that project is, only to find to my surprise that I had submitted one in August 2015, and that it was still unanswered let alone merged.
While at it, I forward-ported AEvar's patch to force `--decorate=no` because `git -p tbdiff` would fail otherwise.
Side note: I work on implementing branch-diff not only to make life easier for reviewers who have to suffer through v2, v3, ... of my patch series, but also to verify my changes before submitting a new iteraion. And also, maybe even more importantly, I plan to use it to verify my merging-rebases of Git for
Windows (for which I previously used to redirect the pre-rebase/post-rebase diffs vs upstream and then compare them using `git diff --no-index`). And of course any interested person can see what changes were necessary e.g. in the merging-rebase of Git for Windows onto v2.17.0 by running a command like:
```sh
base=^{/Start.the.merging-rebase}
tag=v2.17.0.windows.1
pre=$tag$base^2
git branch-diff --dual-color $pre$base..$pre $tag$base..$tag
```
The --dual-color mode will identify the many changes that are solely due to different diff context lines (where otherwise uncolored lines start with a background-colored -/+ marker), i.e. merge conflicts I had to resolve.
Changes since v2:
- Right-aligned the patch numbers in the commit pairs.
- Used ALLOC_ARRAY() in hungarian.c instead of xmalloc(sizeof()*size).
- Changed compute_assignment()s return type from int to void, as it always succeeds.
- Changed the Hungarian Algorithm to use an integer cost matrix.
- Changed the --creation-weight <double> option to --creation-factor <percent> where <percent> is an integer.
- Retitled 1/19 and 2/19 to better conform with the current conventions, as pointed out (and suggested) by Junio.
- Shut up Coverity, and at the same time avoided passing the unnecessary `i` and `j` parameters to output_pair_header().
- Removed support for the `--no-patches` option: we inherit diff_options' support for `-s` already (and much more).
- Removed the ugly `_INV` enum values, and introduced a beautiful GIT_COLOR_REVERSE instead. This way, whatever the user configured as color.diff.new (or .old) will be used in reverse in the dual color mode.
- Instead of overriding the fragment header color, the dual color mode will now reverse the "outer" fragment headers, too.
- Turned the stand-alone branch-diff command into the `--diff` option of `git branch`. Adjusted pretty much *all* commit messages to account for this. This change should no longer be visible: see below.
- Pretty much re-wrote the completion, to support the new --diff mode of git-branch. See below: it was reverted for range-diff.
- Renamed t7910 to t3206, to be closer to the git-branch tests.
- Ensured that git_diff_ui_config() gets called, and therefore color.diff.* respected.
- Avoided leaking `four_spaces`.
- Fixed a declaration in a for (;;) statement (which Junio had as a fixup! that I almost missed).
- Renamed `branch --diff`, which had been renamed from `branch-diff` (which was picked to avoid re-using `tbdiff`) to `range-diff`.
- Renamed `hungarian.c` and its header to `linear-assignment.c`
- Made `--dual-color` the default, and changed it to still auto-detect whether color should be used rather than forcing it
Johannes Schindelin (19):
linear-assignment: a function to solve least-cost assignment problems
Introduce `range-diff` to compare iterations of a topic branch
range-diff: first rudimentary implementation
range-diff: improve the order of the shown commits
range-diff: also show the diff between patches
range-diff: right-trim commit messages
range-diff: indent the diffs just like tbdiff
range-diff: suppress the diff headers
range-diff: adjust the output of the commit pairs
range-diff: do not show "function names" in hunk headers
range-diff: use color for the commit pairs
color: add the meta color GIT_COLOR_REVERSE
diff: add an internal option to dual-color diffs of diffs
range-diff: offer to dual-color the diffs
range-diff --dual-color: work around bogus white-space warning
range-diff: add a man page
completion: support `git range-diff`
range-diff: left-pad patch numbers
range-diff: make --dual-color the default mode
Thomas Rast (1):
range-diff: add tests
.gitignore | 1 +
Documentation/git-range-diff.txt | 238 ++++++++++
Makefile | 3 +
builtin.h | 1 +
builtin/range-diff.c | 104 +++++
color.h | 1 +
command-list.txt | 1 +
contrib/completion/git-completion.bash | 14 +
diff.c | 94 +++-
diff.h | 2 +
git.c | 1 +
linear-assignment.c | 203 +++++++++
linear-assignment.h | 22 +
range-diff.c | 437 ++++++++++++++++++
range-diff.h | 9 +
t/.gitattributes | 1 +
t/t3206-range-diff.sh | 145 ++++++
t/t3206/history.export | 604 +++++++++++++++++++++++++
18 files changed, 1865 insertions(+), 16 deletions(-)
create mode 100644 Documentation/git-range-diff.txt
create mode 100644 builtin/range-diff.c
create mode 100644 linear-assignment.c
create mode 100644 linear-assignment.h
create mode 100644 range-diff.c
create mode 100644 range-diff.h
create mode 100755 t/t3206-range-diff.sh
create mode 100644 t/t3206/history.export
base-commit: e3331758f12da22f4103eec7efe1b5304a9be5e9
Published-As: https://github.com/gitgitgadget/git/releases/tags/pr-1%2Fdscho%2Fbranch-diff-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1/dscho/branch-diff-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1
Range-diff vs v2:
1: 3f51970cb ! 1: 39272eefc Add a function to solve least-cost assignment problems
@@ -1,11 +1,17 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- Add a function to solve least-cost assignment problems
+ linear-assignment: a function to solve least-cost assignment problems
- The Jonker-Volgenant algorithm was implemented to answer questions such
- as: given two different versions of a topic branch (or iterations of a
- patch series), what is the best pairing of commits/patches between the
- different versions?
+ The problem solved by the code introduced in this commit goes like this:
+ given two sets of items, and a cost matrix which says how much it
+ "costs" to assign any given item of the first set to any given item of
+ the second, assign all items (except when the sets have different size)
+ in the cheapest way.
+
+ We use the Jonker-Volgenant algorithm to solve the assignment problem to
+ answer questions such as: given two different versions of a topic branch
+ (or iterations of a patch series), what is the best pairing of
+ commits/patches between the different versions?
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
@@ -16,15 +22,15 @@
LIB_OBJS += graph.o
LIB_OBJS += grep.o
LIB_OBJS += hashmap.o
-+LIB_OBJS += hungarian.o
++LIB_OBJS += linear-assignment.o
LIB_OBJS += help.o
LIB_OBJS += hex.o
LIB_OBJS += ident.o
-diff --git a/hungarian.c b/hungarian.c
+diff --git a/linear-assignment.c b/linear-assignment.c
new file mode 100644
--- /dev/null
-+++ b/hungarian.c
++++ b/linear-assignment.c
@@
+/*
+ * Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path
@@ -32,8 +38,7 @@
+ * 38(4), 325-340.
+ */
+#include "cache.h"
-+#include "hungarian.h"
-+#include <float.h>
++#include "linear-assignment.h"
+
+#define COST(column, row) cost[(column) + column_count * (row)]
+
@@ -41,15 +46,16 @@
+ * The parameter `cost` is the cost matrix: the cost to assign column j to row
+ * i is `cost[j + column_count * i].
+ */
-+int compute_assignment(int column_count, int row_count, double *cost,
-+ int *column2row, int *row2column)
++void compute_assignment(int column_count, int row_count, int *cost,
++ int *column2row, int *row2column)
+{
-+ double *v = xmalloc(sizeof(double) * column_count), *d;
++ int *v, *d;
+ int *free_row, free_count = 0, saved_free_count, *pred, *col;
+ int i, j, phase;
+
+ memset(column2row, -1, sizeof(int) * column_count);
+ memset(row2column, -1, sizeof(int) * row_count);
++ ALLOC_ARRAY(v, column_count);
+
+ /* column reduction */
+ for (j = column_count - 1; j >= 0; j--) {
@@ -71,15 +77,15 @@
+ }
+
+ /* reduction transfer */
-+ free_row = xmalloc(sizeof(int) * row_count);
-+ for (int i = 0; i < row_count; i++) {
++ ALLOC_ARRAY(free_row, row_count);
++ for (i = 0; i < row_count; i++) {
+ int j1 = row2column[i];
+ if (j1 == -1)
+ free_row[free_count++] = i;
+ else if (j1 < -1)
+ row2column[i] = -2 - j1;
+ else {
-+ double min = COST(!j1, i) - v[!j1];
++ int min = COST(!j1, i) - v[!j1];
+ for (j = 1; j < column_count; j++)
+ if (j != j1 && min > COST(j, i) - v[j])
+ min = COST(j, i) - v[j];
@@ -91,7 +97,7 @@
+ (column_count < row_count ? row_count - column_count : 0)) {
+ free(v);
+ free(free_row);
-+ return 0;
++ return;
+ }
+
+ /* augmenting row reduction */
@@ -101,15 +107,15 @@
+ saved_free_count = free_count;
+ free_count = 0;
+ while (k < saved_free_count) {
-+ double u1, u2;
++ int u1, u2;
+ int j1 = 0, j2, i0;
+
+ i = free_row[k++];
+ u1 = COST(j1, i) - v[j1];
+ j2 = -1;
-+ u2 = DBL_MAX;
++ u2 = INT_MAX;
+ for (j = 1; j < column_count; j++) {
-+ double c = COST(j, i) - v[j];
++ int c = COST(j, i) - v[j];
+ if (u2 > c) {
+ if (u1 < c) {
+ u2 = c;
@@ -148,12 +154,12 @@
+
+ /* augmentation */
+ saved_free_count = free_count;
-+ d = xmalloc(sizeof(double) * column_count);
-+ pred = xmalloc(sizeof(int) * column_count);
-+ col = xmalloc(sizeof(int) * column_count);
++ ALLOC_ARRAY(d, column_count);
++ ALLOC_ARRAY(pred, column_count);
++ ALLOC_ARRAY(col, column_count);
+ for (free_count = 0; free_count < saved_free_count; free_count++) {
+ int i1 = free_row[free_count], low = 0, up = 0, last, k;
-+ double min, c, u1;
++ int min, c, u1;
+
+ for (j = 0; j < column_count; j++) {
+ d[j] = COST(j, i1) - v[j];
@@ -228,14 +234,12 @@
+ free(d);
+ free(v);
+ free(free_row);
-+
-+ return 0;
+}
-diff --git a/hungarian.h b/hungarian.h
+diff --git a/linear-assignment.h b/linear-assignment.h
new file mode 100644
--- /dev/null
-+++ b/hungarian.h
++++ b/linear-assignment.h
@@
+#ifndef HUNGARIAN_H
+#define HUNGARIAN_H
@@ -252,7 +256,10 @@
+ * assignments (-1 for unassigned, which can happen only if column_count !=
+ * row_count).
+ */
-+int compute_assignment(int column_count, int row_count, double *cost,
-+ int *column2row, int *row2column);
++void compute_assignment(int column_count, int row_count, int *cost,
++ int *column2row, int *row2column);
++
++/* The maximal cost in the cost matrix (to prevent integer overflows). */
++#define COST_MAX (1<<16)
+
+#endif
2: a1ea0320b < -: --------- Add a new builtin: branch-diff
-: --------- > 2: 7f15b26d4 Introduce `range-diff` to compare iterations of a topic branch
3: e530e450e ! 3: 076e1192d branch-diff: first rudimentary implementation
@@ -1,46 +1,117 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: first rudimentary implementation
+ range-diff: first rudimentary implementation
- At this stage, `git branch-diff` can determine corresponding commits of
- two related commit ranges. This makes use of the recently introduced
+ At this stage, `git range-diff` can determine corresponding commits
+ of two related commit ranges. This makes use of the recently introduced
implementation of the Hungarian algorithm.
The core of this patch is a straight port of the ideas of tbdiff, the
- seemingly dormant project at https://github.com/trast/tbdiff.
+ apparently dormant project at https://github.com/trast/tbdiff.
The output does not at all match `tbdiff`'s output yet, as this patch
really concentrates on getting the patch matching part right.
- Note: due to differences in the diff algorithm (`tbdiff` uses the
- Python module `difflib`, Git uses its xdiff fork), the cost matrix
- calculated by `branch-diff` is different (but very similar) to the one
- calculated by `tbdiff`. Therefore, it is possible that they find
- different matching commits in corner cases (e.g. when a patch was split
- into two patches of roughly equal length).
+ Note: due to differences in the diff algorithm (`tbdiff` uses the Python
+ module `difflib`, Git uses its xdiff fork), the cost matrix calculated
+ by `range-diff` is different (but very similar) to the one calculated
+ by `tbdiff`. Therefore, it is possible that they find different matching
+ commits in corner cases (e.g. when a patch was split into two patches of
+ roughly equal length).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/Makefile b/Makefile
+--- a/Makefile
++++ b/Makefile
+@@
+ LIB_OBJS += prompt.o
+ LIB_OBJS += protocol.o
+ LIB_OBJS += quote.o
++LIB_OBJS += range-diff.o
+ LIB_OBJS += reachable.o
+ LIB_OBJS += read-cache.o
+ LIB_OBJS += reflog-walk.o
+
+diff --git a/builtin/range-diff.c b/builtin/range-diff.c
+--- a/builtin/range-diff.c
++++ b/builtin/range-diff.c
@@
#include "cache.h"
#include "builtin.h"
#include "parse-options.h"
++#include "range-diff.h"
+
+ static const char * const builtin_range_diff_usage[] = {
+ N_("git range-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
+@@
+ N_("Percentage by which creation is weighted")),
+ OPT_END()
+ };
++ int res = 0;
++ struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
+
+- argc = parse_options(argc, argv, NULL, options,
+- builtin_range_diff_usage, 0);
++ argc = parse_options(argc, argv, NULL, options, builtin_range_diff_usage,
++ 0);
+
+- return 0;
++ if (argc == 2) {
++ if (!strstr(argv[0], ".."))
++ warning(_("no .. in range: '%s'"), argv[0]);
++ strbuf_addstr(&range1, argv[0]);
++
++ if (!strstr(argv[1], ".."))
++ warning(_("no .. in range: '%s'"), argv[1]);
++ strbuf_addstr(&range2, argv[1]);
++ } else if (argc == 3) {
++ strbuf_addf(&range1, "%s..%s", argv[0], argv[1]);
++ strbuf_addf(&range2, "%s..%s", argv[0], argv[2]);
++ } else if (argc == 1) {
++ const char *b = strstr(argv[0], "..."), *a = argv[0];
++ int a_len;
++
++ if (!b)
++ die(_("single arg format requires a symmetric range"));
++
++ a_len = (int)(b - a);
++ if (!a_len) {
++ a = "HEAD";
++ a_len = strlen(a);
++ }
++ b += 3;
++ if (!*b)
++ b = "HEAD";
++ strbuf_addf(&range1, "%s..%.*s", b, a_len, a);
++ strbuf_addf(&range2, "%.*s..%s", a_len, a, b);
++ } else {
++ error(_("need two commit ranges"));
++ usage_with_options(builtin_range_diff_usage, options);
++ }
++
++ res = show_range_diff(range1.buf, range2.buf, creation_factor);
++
++ strbuf_release(&range1);
++ strbuf_release(&range2);
++
++ return res;
+ }
+
+diff --git a/range-diff.c b/range-diff.c
+new file mode 100644
+--- /dev/null
++++ b/range-diff.c
+@@
++#include "cache.h"
++#include "range-diff.h"
+#include "string-list.h"
+#include "run-command.h"
+#include "argv-array.h"
+#include "hashmap.h"
+#include "xdiff-interface.h"
-+#include "hungarian.h"
-
- static const char * const builtin_branch_diff_usage[] = {
- N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
-@@
- return 0;
- }
-
++#include "linear-assignment.h"
++
+struct patch_util {
+ /* For the search for an exact match */
+ struct hashmap_entry e;
@@ -219,16 +290,19 @@
+ return count;
+
+ error(_("failed to generate diff"));
-+ return INT_MAX;
++ return COST_MAX;
+}
+
-+static int get_correspondences(struct string_list *a, struct string_list *b,
-+ double creation_weight)
++static void get_correspondences(struct string_list *a, struct string_list *b,
++ int creation_factor)
+{
+ int n = a->nr + b->nr;
-+ double *cost = xmalloc(sizeof(double) * n * n), c;
-+ int *a2b = xmalloc(sizeof(int) * n), *b2a = xmalloc(sizeof(int) * n);
-+ int i, j, res;
++ int *cost, c, *a2b, *b2a;
++ int i, j;
++
++ ALLOC_ARRAY(cost, st_mult(n, n));
++ ALLOC_ARRAY(a2b, n);
++ ALLOC_ARRAY(b2a, n);
+
+ for (i = 0; i < a->nr; i++) {
+ struct patch_util *a_util = a->items[i].util;
@@ -241,12 +315,12 @@
+ else if (a_util->matching < 0 && b_util->matching < 0)
+ c = diffsize(a_util->diff, b_util->diff);
+ else
-+ c = INT_MAX;
++ c = COST_MAX;
+ cost[i + n * j] = c;
+ }
+
+ c = a_util->matching < 0 ?
-+ a_util->diffsize * creation_weight : INT_MAX;
++ a_util->diffsize * creation_factor / 100 : COST_MAX;
+ for (j = b->nr; j < n; j++)
+ cost[i + n * j] = c;
+ }
@@ -255,7 +329,7 @@
+ struct patch_util *util = b->items[j].util;
+
+ c = util->matching < 0 ?
-+ util->diffsize * creation_weight : INT_MAX;
++ util->diffsize * creation_factor / 100 : COST_MAX;
+ for (i = a->nr; i < n; i++)
+ cost[i + n * j] = c;
+ }
@@ -264,7 +338,7 @@
+ for (j = b->nr; j < n; j++)
+ cost[i + n * j] = 0;
+
-+ res = compute_assignment(n, n, cost, a2b, b2a);
++ compute_assignment(n, n, cost, a2b, b2a);
+
+ for (i = 0; i < a->nr; i++)
+ if (a2b[i] >= 0 && a2b[i] < b->nr) {
@@ -278,8 +352,6 @@
+ free(cost);
+ free(a2b);
+ free(b2a);
-+
-+ return res;
+}
+
+static const char *short_oid(struct patch_util *util)
@@ -314,71 +386,40 @@
+ }
+}
+
- int cmd_branch_diff(int argc, const char **argv, const char *prefix)
- {
- double creation_weight = 0.6;
-@@
- 0, parse_creation_weight },
- OPT_END()
- };
++int show_range_diff(const char *range1, const char *range2,
++ int creation_factor)
++{
+ int res = 0;
-+ struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
++
+ struct string_list branch1 = STRING_LIST_INIT_DUP;
+ struct string_list branch2 = STRING_LIST_INIT_DUP;
-
- argc = parse_options(argc, argv, NULL, options,
- builtin_branch_diff_usage, 0);
-
-- return 0;
-+ if (argc == 2) {
-+ if (!strstr(argv[0], ".."))
-+ warning(_("no .. in range: '%s'"), argv[0]);
-+ strbuf_addstr(&range1, argv[0]);
-+
-+ if (!strstr(argv[1], ".."))
-+ warning(_("no .. in range: '%s'"), argv[1]);
-+ strbuf_addstr(&range2, argv[1]);
-+ } else if (argc == 3) {
-+ strbuf_addf(&range1, "%s..%s", argv[0], argv[1]);
-+ strbuf_addf(&range2, "%s..%s", argv[0], argv[2]);
-+ } else if (argc == 1) {
-+ const char *b = strstr(argv[0], "..."), *a = argv[0];
-+ int a_len;
+
-+ if (!b)
-+ die(_("single arg format requires a symmetric range"));
-+
-+ a_len = (int)(b - a);
-+ if (!a_len) {
-+ a = "HEAD";
-+ a_len = strlen(a);
-+ }
-+ b += 3;
-+ if (!*b)
-+ b = "HEAD";
-+ strbuf_addf(&range1, "%s..%.*s", b, a_len, a);
-+ strbuf_addf(&range2, "%.*s..%s", a_len, a, b);
-+ } else {
-+ error(_("need two commit ranges"));
-+ usage_with_options(builtin_branch_diff_usage, options);
-+ }
-+
-+ if (read_patches(range1.buf, &branch1))
-+ res = error(_("could not parse log for '%s'"), range1.buf);
-+ if (!res && read_patches(range2.buf, &branch2))
-+ res = error(_("could not parse log for '%s'"), range2.buf);
++ if (read_patches(range1, &branch1))
++ res = error(_("could not parse log for '%s'"), range1);
++ if (!res && read_patches(range2, &branch2))
++ res = error(_("could not parse log for '%s'"), range2);
+
+ if (!res) {
+ find_exact_matches(&branch1, &branch2);
-+ res = get_correspondences(&branch1, &branch2, creation_weight);
-+ if (!res)
-+ output(&branch1, &branch2);
++ get_correspondences(&branch1, &branch2, creation_factor);
++ output(&branch1, &branch2);
+ }
+
-+ strbuf_release(&range1);
-+ strbuf_release(&range2);
+ string_list_clear(&branch1, 1);
+ string_list_clear(&branch2, 1);
+
-+ return !!res;
- }
++ return res;
++}
+
+diff --git a/range-diff.h b/range-diff.h
+new file mode 100644
+--- /dev/null
++++ b/range-diff.h
+@@
++#ifndef BRANCH_DIFF_H
++#define BRANCH_DIFF_H
++
++int show_range_diff(const char *range1, const char *range2,
++ int creation_factor);
++
++#endif
4: 3032e2709 ! 4: e98489c8c branch-diff: improve the order of the shown commits
@@ -1,13 +1,13 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: improve the order of the shown commits
+ range-diff: improve the order of the shown commits
- This patch lets branch-diff use the same order as tbdiff.
+ This patch lets `git range-diff` use the same order as tbdiff.
The idea is simple: for left-to-right readers, it is natural to assume
- that the branch-diff is performed between an older vs a newer version of
- the branch. As such, the user is probably more interested in the
- question "where did this come from?" rather than "where did that one
+ that the `git range-diff` is performed between an older vs a newer
+ version of the branch. As such, the user is probably more interested in
+ the question "where did this come from?" rather than "where did that one
go?".
To that end, we list the commits in the order of the second commit range
@@ -16,9 +16,9 @@
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/range-diff.c b/range-diff.c
+--- a/range-diff.c
++++ b/range-diff.c
@@
struct hashmap_entry e;
const char *diff, *patch;
5: 12d9c7977 < -: --------- branch-diff: also show the diff between patches
-: --------- > 5: 935cad180 range-diff: also show the diff between patches
6: 53ee6ba38 ! 6: 93ac1931d branch-diff: right-trim commit messages
@@ -1,6 +1,6 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: right-trim commit messages
+ range-diff: right-trim commit messages
When comparing commit messages, we need to keep in mind that they are
indented by four spaces. That is, empty lines are no longer empty, but
@@ -9,13 +9,13 @@
Let's just right-trim the lines in the commit message, it's not like
trailing white-space in the commit messages are important enough to care
- about in branch-diff.
+ about in `git range-diff`.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/range-diff.c b/range-diff.c
+--- a/range-diff.c
++++ b/range-diff.c
@@
strbuf_addbuf(&buf, &line);
strbuf_addstr(&buf, "\n\n");
7: c856c460a < -: --------- branch-diff: indent the diffs just like tbdiff
-: --------- > 7: ca5282815 range-diff: indent the diffs just like tbdiff
8: 35a9681a1 ! 8: 80622685f branch-diff: suppress the diff headers
@@ -1,6 +1,6 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: suppress the diff headers
+ range-diff: suppress the diff headers
When showing the diff between corresponding patches of the two branch
versions, we have to make up a fake filename to run the diff machinery.
@@ -10,9 +10,9 @@
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/builtin/range-diff.c b/builtin/range-diff.c
+--- a/builtin/range-diff.c
++++ b/builtin/range-diff.c
@@
diff_setup(&diffopt);
9: 0e4c8279e ! 9: 6b31cbf72 branch-diff: adjust the output of the commit pairs
@@ -1,33 +1,33 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: adjust the output of the commit pairs
+ range-diff: adjust the output of the commit pairs
- This change brings branch-diff yet another step closer to feature parity
- with tbdiff: it now shows the oneline, too, and indicates with `=` when
- the commits have identical diffs.
+ This change brings `git range-diff` yet another step closer to
+ feature parity with tbdiff: it now shows the oneline, too, and indicates
+ with `=` when the commits have identical diffs.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/range-diff.c b/range-diff.c
+--- a/range-diff.c
++++ b/range-diff.c
@@
- #include "hungarian.h"
- #include "diff.h"
+ #include "xdiff-interface.h"
+ #include "linear-assignment.h"
#include "diffcore.h"
+#include "commit.h"
+#include "pretty.h"
- static const char * const builtin_branch_diff_usage[] = {
- N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
+ struct patch_util {
+ /* For the search for an exact match */
@@
- return res;
+ free(b2a);
}
-static const char *short_oid(struct patch_util *util)
+static void output_pair_header(struct strbuf *buf,
-+ int i, struct patch_util *a_util,
-+ int j, struct patch_util *b_util)
++ struct patch_util *a_util,
++ struct patch_util *b_util)
{
- return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
+ static char *dashes;
@@ -43,25 +43,25 @@
+ }
+
+ strbuf_reset(buf);
-+ if (i < 0)
++ if (!a_util)
+ strbuf_addf(buf, "-: %s ", dashes);
+ else
-+ strbuf_addf(buf, "%d: %s ", i + 1,
++ strbuf_addf(buf, "%d: %s ", a_util->i + 1,
+ find_unique_abbrev(&a_util->oid, DEFAULT_ABBREV));
+
-+ if (i < 0)
++ if (!a_util)
+ strbuf_addch(buf, '>');
-+ else if (j < 0)
++ else if (!b_util)
+ strbuf_addch(buf, '<');
+ else if (strcmp(a_util->patch, b_util->patch))
+ strbuf_addch(buf, '!');
+ else
+ strbuf_addch(buf, '=');
+
-+ if (j < 0)
++ if (!b_util)
+ strbuf_addf(buf, " -: %s", dashes);
+ else
-+ strbuf_addf(buf, " %d: %s", j + 1,
++ strbuf_addf(buf, " %d: %s", b_util->i + 1,
+ find_unique_abbrev(&b_util->oid, DEFAULT_ABBREV));
+
+ commit = lookup_commit_reference(oid);
@@ -79,7 +79,7 @@
+ fwrite(buf->buf, buf->len, 1, stdout);
}
- static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data)
+ static struct diff_filespec *get_filespec(const char *name, const char *p)
@@
static void output(struct string_list *a, struct string_list *b,
struct diff_options *diffopt)
@@ -94,7 +94,7 @@
if (i < a->nr && a_util->matching < 0) {
- printf("%d: %s < -: --------\n",
- i + 1, short_oid(a_util));
-+ output_pair_header(&buf, i, a_util, -1, NULL);
++ output_pair_header(&buf, a_util, NULL);
i++;
continue;
}
@@ -103,7 +103,7 @@
while (j < b->nr && b_util->matching < 0) {
- printf("-: -------- > %d: %s\n",
- j + 1, short_oid(b_util));
-+ output_pair_header(&buf, -1, NULL, j, b_util);
++ output_pair_header(&buf, NULL, b_util);
b_util = ++j < b->nr ? b->items[j].util : NULL;
}
@@ -113,8 +113,7 @@
- printf("%d: %s ! %d: %s\n",
- b_util->matching + 1, short_oid(a_util),
- j + 1, short_oid(b_util));
-+ output_pair_header(&buf,
-+ b_util->matching, a_util, j, b_util);
++ output_pair_header(&buf, a_util, b_util);
if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
patch_diff(a->items[b_util->matching].string,
b->items[j].string, diffopt);
@@ -125,4 +124,4 @@
+ strbuf_release(&buf);
}
- int cmd_branch_diff(int argc, const char **argv, const char *prefix)
+ int show_range_diff(const char *range1, const char *range2,
10: 2695a6abc ! 10: ef997bb8b branch-diff: do not show "function names" in hunk headers
@@ -1,25 +1,25 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: do not show "function names" in hunk headers
+ range-diff: do not show "function names" in hunk headers
We are comparing complete, formatted commit messages with patches. There
are no function names here, so stop looking for them.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/range-diff.c b/range-diff.c
+--- a/range-diff.c
++++ b/range-diff.c
@@
#include "diffcore.h"
#include "commit.h"
#include "pretty.h"
+#include "userdiff.h"
- static const char * const builtin_branch_diff_usage[] = {
- N_("git branch-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
+ struct patch_util {
+ /* For the search for an exact match */
@@
- return data;
+ fwrite(buf->buf, buf->len, 1, stdout);
}
+static struct userdiff_driver no_func_name = {
11: 313beeed3 ! 11: 3d9e5b0ba branch-diff: add tests
@@ -1,11 +1,12 @@
Author: Thomas Rast <tr@thomasrast.ch>
- branch-diff: add tests
+ range-diff: add tests
These are essentially lifted from https://github.com/trast/tbdiff, with
- light touch-ups to account for the new command name.
+ light touch-ups to account for the command now being an option of `git
+ branch`.
- Apart from renaming `tbdiff` to `branch-diff`, only one test case needed
+ Apart from renaming `tbdiff` to `range-diff`, only one test case needed
to be adjusted: 11 - 'changed message'.
The underlying reason it had to be adjusted is that diff generation is
@@ -28,26 +29,27 @@
/t8005/*.txt eol=lf
/t9*/*.dump eol=lf
-diff --git a/t/t7910-branch-diff.sh b/t/t7910-branch-diff.sh
+diff --git a/t/t3206-range-diff.sh b/t/t3206-range-diff.sh
new file mode 100755
--- /dev/null
-+++ b/t/t7910-branch-diff.sh
++++ b/t/t3206-range-diff.sh
@@
+#!/bin/sh
+
-+test_description='branch-diff tests'
++test_description='range-diff tests'
+
+. ./test-lib.sh
+
-+# Note that because of git-branch-diff's heuristics, test_commit does more
++# Note that because of the range-diff's heuristics, test_commit does more
+# harm than good. We need some real history.
+
+test_expect_success 'setup' '
-+ git fast-import < "$TEST_DIRECTORY"/t7910/history.export
++ git fast-import < "$TEST_DIRECTORY"/t3206/history.export
+'
+
+test_expect_success 'simple A..B A..C (unmodified)' '
-+ git branch-diff --no-color master..topic master..unmodified >actual &&
++ git range-diff --no-color master..topic master..unmodified \
++ >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: 35b9b25 s/5/A/
+ 2: fccce22 = 2: de345ab s/4/A/
@@ -58,19 +60,19 @@
+'
+
+test_expect_success 'simple B...C (unmodified)' '
-+ git branch-diff --no-color topic...unmodified >actual &&
++ git range-diff --no-color topic...unmodified >actual &&
+ # same "expected" as above
+ test_cmp expected actual
+'
+
+test_expect_success 'simple A B C (unmodified)' '
-+ git branch-diff --no-color master topic unmodified >actual &&
++ git range-diff --no-color master topic unmodified >actual &&
+ # same "expected" as above
+ test_cmp expected actual
+'
+
+test_expect_success 'trivial reordering' '
-+ git branch-diff --no-color master topic reordered >actual &&
++ git range-diff --no-color master topic reordered >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: aca177a s/5/A/
+ 3: 147e64e = 2: 14ad629 s/11/B/
@@ -81,7 +83,7 @@
+'
+
+test_expect_success 'removed a commit' '
-+ git branch-diff --no-color master topic removed >actual &&
++ git range-diff --no-color master topic removed >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: 7657159 s/5/A/
+ 2: fccce22 < -: ------- s/4/A/
@@ -92,7 +94,7 @@
+'
+
+test_expect_success 'added a commit' '
-+ git branch-diff --no-color master topic added >actual &&
++ git range-diff --no-color master topic added >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: 2716022 s/5/A/
+ 2: fccce22 = 2: b62accd s/4/A/
@@ -104,7 +106,7 @@
+'
+
+test_expect_success 'new base, A B C' '
-+ git branch-diff --no-color master topic rebased >actual &&
++ git range-diff --no-color master topic rebased >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: cc9c443 s/5/A/
+ 2: fccce22 = 2: c5d9641 s/4/A/
@@ -116,7 +118,7 @@
+
+test_expect_success 'new base, B...C' '
+ # this syntax includes the commits from master!
-+ git branch-diff --no-color topic...rebased >actual &&
++ git range-diff --no-color topic...rebased >actual &&
+ cat >expected <<-EOF &&
+ -: ------- > 1: a31b12e unrelated
+ 1: 4de457d = 2: cc9c443 s/5/A/
@@ -128,7 +130,7 @@
+'
+
+test_expect_success 'changed commit' '
-+ git branch-diff --no-color topic...changed >actual &&
++ git range-diff --no-color topic...changed >actual &&
+ cat >expected <<-EOF &&
+ 1: 4de457d = 1: a4b3333 s/5/A/
+ 2: fccce22 = 2: f51d370 s/4/A/
@@ -157,7 +159,7 @@
+'
+
+test_expect_success 'changed message' '
-+ git branch-diff --no-color topic...changed-message >actual &&
++ git range-diff --no-color topic...changed-message >actual &&
+ sed s/Z/\ /g >expected <<-EOF &&
+ 1: 4de457d = 1: f686024 s/5/A/
+ 2: fccce22 ! 2: 4ab067d s/4/A/
@@ -178,10 +180,10 @@
+
+test_done
-diff --git a/t/t7910/history.export b/t/t7910/history.export
+diff --git a/t/t3206/history.export b/t/t3206/history.export
new file mode 100644
--- /dev/null
-+++ b/t/t7910/history.export
++++ b/t/t3206/history.export
@@
+blob
+mark :1
12: ba4791918 ! 12: 7273cc647 branch-diff: use color for the commit pairs
@@ -1,30 +1,28 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: use color for the commit pairs
+ range-diff: use color for the commit pairs
- Arguably the most important part of branch-diff's output is the list of
- commits in the two branches, together with their relationships.
+ Arguably the most important part of `git range-diff`'s output is the
+ list of commits in the two branches, together with their relationships.
For that reason, tbdiff introduced color-coding that is pretty
intuitive, especially for unchanged patches (all dim yellow, like the
first line in `git show`'s output) vs modified patches (old commit is
red, new commit is green). Let's imitate that color scheme.
- While at it, also copy tbdiff's change of the fragment color to magenta.
-
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/range-diff.c b/range-diff.c
+--- a/range-diff.c
++++ b/range-diff.c
@@
- return res;
+ free(b2a);
}
-static void output_pair_header(struct strbuf *buf,
+static void output_pair_header(struct diff_options *diffopt, struct strbuf *buf,
- int i, struct patch_util *a_util,
- int j, struct patch_util *b_util)
+ struct patch_util *a_util,
+ struct patch_util *b_util)
{
static char *dashes;
struct object_id *oid = a_util ? &a_util->oid : &b_util->oid;
@@ -42,10 +40,10 @@
*p = '-';
}
-+ if (j < 0) {
++ if (!b_util) {
+ color = color_old;
+ status = '<';
-+ } else if (i < 0) {
++ } else if (!a_util) {
+ color = color_new;
+ status = '>';
+ } else if (strcmp(a_util->patch, b_util->patch)) {
@@ -58,15 +56,15 @@
+
strbuf_reset(buf);
+ strbuf_addstr(buf, status == '!' ? color_old : color);
- if (i < 0)
+ if (!a_util)
strbuf_addf(buf, "-: %s ", dashes);
else
- strbuf_addf(buf, "%d: %s ", i + 1,
+ strbuf_addf(buf, "%d: %s ", a_util->i + 1,
find_unique_abbrev(&a_util->oid, DEFAULT_ABBREV));
-- if (i < 0)
+- if (!a_util)
- strbuf_addch(buf, '>');
-- else if (j < 0)
+- else if (!b_util)
- strbuf_addch(buf, '<');
- else if (strcmp(a_util->patch, b_util->patch))
- strbuf_addch(buf, '!');
@@ -78,7 +76,7 @@
+ if (status == '!')
+ strbuf_addf(buf, "%s%s", color_reset, color_new);
- if (j < 0)
+ if (!b_util)
strbuf_addf(buf, " -: %s", dashes);
@@
const char *commit_buffer = get_commit_buffer(commit, NULL);
@@ -101,33 +99,24 @@
/* Show unmatched LHS commit whose predecessors were shown. */
if (i < a->nr && a_util->matching < 0) {
-- output_pair_header(&buf, i, a_util, -1, NULL);
-+ output_pair_header(diffopt, &buf, i, a_util, -1, NULL);
+- output_pair_header(&buf, a_util, NULL);
++ output_pair_header(diffopt, &buf, a_util, NULL);
i++;
continue;
}
/* Show unmatched RHS commits. */
while (j < b->nr && b_util->matching < 0) {
-- output_pair_header(&buf, -1, NULL, j, b_util);
-+ output_pair_header(diffopt, &buf, -1, NULL, j, b_util);
+- output_pair_header(&buf, NULL, b_util);
++ output_pair_header(diffopt, &buf, NULL, b_util);
b_util = ++j < b->nr ? b->items[j].util : NULL;
}
/* Show matching LHS/RHS pair. */
if (j < b->nr) {
a_util = a->items[b_util->matching].util;
-- output_pair_header(&buf,
-+ output_pair_header(diffopt, &buf,
- b_util->matching, a_util, j, b_util);
+- output_pair_header(&buf, a_util, b_util);
++ output_pair_header(diffopt, &buf, a_util, b_util);
if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
patch_diff(a->items[b_util->matching].string,
-@@
- struct string_list branch1 = STRING_LIST_INIT_DUP;
- struct string_list branch2 = STRING_LIST_INIT_DUP;
-
-+ git_diff_basic_config("diff.color.frag", "magenta", NULL);
-+
- diff_setup(&diffopt);
- diffopt.output_format = DIFF_FORMAT_PATCH;
- diffopt.flags.suppress_diff_headers = 1;
+ b->items[j].string, diffopt);
13: 1ebbe3595 < -: --------- color: provide inverted colors, too
-: --------- > 13: 96a3073fb color: add the meta color GIT_COLOR_REVERSE
14: ae0ea5dfc ! 14: 6be4baf60 diff: add an internal option to dual-color diffs of diffs
@@ -14,7 +14,8 @@
now.
This is a feature that was invented by git-tbdiff, and it will be used
- in `branch-diff` in the next commit.
+ by `git range-diff` in the next commit, by offering it via a new option:
+ `--dual-color`.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
@@ -22,26 +23,15 @@
--- a/diff.c
+++ b/diff.c
@@
- GIT_COLOR_BOLD_YELLOW, /* NEW_MOVED ALTERNATIVE */
- GIT_COLOR_FAINT, /* NEW_MOVED_DIM */
- GIT_COLOR_FAINT_ITALIC, /* NEW_MOVED_ALTERNATIVE_DIM */
-+ GIT_COLOR_INV_RED, /* OLD_INV */
-+ GIT_COLOR_INV_GREEN, /* NEW_INV */
- };
-
- static NORETURN void die_want_option(const char *option_name)
-@@
- return DIFF_FILE_NEW_MOVED_DIM;
- if (!strcasecmp(var, "newmovedalternativedimmed"))
- return DIFF_FILE_NEW_MOVED_ALT_DIM;
-+ if (!strcasecmp(var, "oldinv"))
-+ return DIFF_FILE_OLD_INV;
-+ if (!strcasecmp(var, "newinv"))
-+ return DIFF_FILE_NEW_INV;
- return -1;
+ ecbdata->blank_at_eof_in_postimage = (at - l2) + 1;
}
-@@
+-static void emit_line_0(struct diff_options *o, const char *set, const char *reset,
++static void emit_line_0(struct diff_options *o,
++ const char *set, unsigned reverse, const char *reset,
+ int first, const char *line, int len)
+ {
+ int has_trailing_newline, has_trailing_carriage_return;
int nofirst;
FILE *file = o->file;
@@ -54,8 +44,11 @@
if (len == 0) {
has_trailing_newline = (first == '\n');
@@
+ }
if (len || !nofirst) {
++ if (reverse && want_color(o->use_color))
++ fputs(GIT_COLOR_REVERSE, file);
fputs(set, file);
- if (!nofirst)
+ if (first && !nofirst)
@@ -63,6 +56,15 @@
fwrite(line, len, 1, file);
fputs(reset, file);
@@
+ static void emit_line(struct diff_options *o, const char *set, const char *reset,
+ const char *line, int len)
+ {
+- emit_line_0(o, set, reset, line[0], line+1, len-1);
++ emit_line_0(o, set, 0, reset, line[0], line+1, len-1);
+ }
+
+ enum diff_symbol {
+@@
static void emit_line_ws_markup(struct diff_options *o,
const char *set, const char *reset,
@@ -77,20 +79,24 @@
}
- if (!ws)
-+ if (!ws && set_sign == set)
- emit_line_0(o, set, reset, sign, line, len);
+- emit_line_0(o, set, reset, sign, line, len);
- else if (blank_at_eof)
++ if (!ws && !set_sign)
++ emit_line_0(o, set, 0, reset, sign, line, len);
+ else if (!ws) {
+ /* Emit just the prefix, then the rest. */
-+ emit_line_0(o, set_sign, reset, sign, "", 0);
-+ emit_line_0(o, set, reset, 0, line, len);
++ emit_line_0(o, set_sign ? set_sign : set, !!set_sign, reset,
++ sign, "", 0);
++ emit_line_0(o, set, 0, reset, 0, line, len);
+ } else if (blank_at_eof)
/* Blank line at EOF - paint '+' as well */
- emit_line_0(o, ws, reset, sign, line, len);
+- emit_line_0(o, ws, reset, sign, line, len);
++ emit_line_0(o, ws, 0, reset, sign, line, len);
else {
/* Emit just the prefix, then the rest. */
- emit_line_0(o, set, reset, sign, "", 0);
-+ emit_line_0(o, set_sign, reset, sign, "", 0);
++ emit_line_0(o, set_sign ? set_sign : set, !!set_sign, reset,
++ sign, "", 0);
ws_check_emit(line, len, ws_rule,
o->file, set, reset, ws);
}
@@ -103,17 +109,28 @@
struct strbuf sb = STRBUF_INIT;
enum diff_symbol s = eds->s;
+@@
+ context = diff_get_color_opt(o, DIFF_CONTEXT);
+ reset = diff_get_color_opt(o, DIFF_RESET);
+ putc('\n', o->file);
+- emit_line_0(o, context, reset, '\\',
++ emit_line_0(o, context, 0, reset, '\\',
+ nneof, strlen(nneof));
+ break;
+ case DIFF_SYMBOL_SUBMODULE_HEADER:
@@
case DIFF_SYMBOL_CONTEXT:
set = diff_get_color_opt(o, DIFF_CONTEXT);
reset = diff_get_color_opt(o, DIFF_RESET);
- emit_line_ws_markup(o, set, reset, line, len, ' ',
-+ set_sign = set;
++ set_sign = NULL;
+ if (o->flags.dual_color_diffed_diffs) {
+ char c = !len ? 0 : line[0];
+
+ if (c == '+')
+ set = diff_get_color_opt(o, DIFF_FILE_NEW);
++ else if (c == '@')
++ set = diff_get_color_opt(o, DIFF_FRAGINFO);
+ else if (c == '-')
+ set = diff_get_color_opt(o, DIFF_FILE_OLD);
+ }
@@ -127,13 +144,15 @@
reset = diff_get_color_opt(o, DIFF_RESET);
- emit_line_ws_markup(o, set, reset, line, len, '+',
+ if (!o->flags.dual_color_diffed_diffs)
-+ set_sign = set;
++ set_sign = NULL;
+ else {
+ char c = !len ? 0 : line[0];
+
-+ set_sign = diff_get_color_opt(o, DIFF_FILE_NEW_INV);
++ set_sign = set;
+ if (c == '-')
+ set = diff_get_color_opt(o, DIFF_FILE_OLD);
++ else if (c == '@')
++ set = diff_get_color_opt(o, DIFF_FRAGINFO);
+ else if (c != '+')
+ set = diff_get_color_opt(o, DIFF_CONTEXT);
+ }
@@ -147,13 +166,15 @@
reset = diff_get_color_opt(o, DIFF_RESET);
- emit_line_ws_markup(o, set, reset, line, len, '-',
+ if (!o->flags.dual_color_diffed_diffs)
-+ set_sign = set;
++ set_sign = NULL;
+ else {
+ char c = !len ? 0 : line[0];
+
-+ set_sign = diff_get_color_opt(o, DIFF_FILE_OLD_INV);
++ set_sign = set;
+ if (c == '+')
+ set = diff_get_color_opt(o, DIFF_FILE_NEW);
++ else if (c == '@')
++ set = diff_get_color_opt(o, DIFF_FRAGINFO);
+ else if (c != '-')
+ set = diff_get_color_opt(o, DIFF_CONTEXT);
+ }
@@ -161,6 +182,23 @@
flags & DIFF_SYMBOL_CONTENT_WS_MASK, 0);
break;
case DIFF_SYMBOL_WORDS_PORCELAIN:
+@@
+ const char *frag = diff_get_color(ecbdata->color_diff, DIFF_FRAGINFO);
+ const char *func = diff_get_color(ecbdata->color_diff, DIFF_FUNCINFO);
+ const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
++ const char *reverse = ecbdata->color_diff ? GIT_COLOR_REVERSE : "";
+ static const char atat[2] = { '@', '@' };
+ const char *cp, *ep;
+ struct strbuf msgbuf = STRBUF_INIT;
+@@
+ ep += 2; /* skip over @@ */
+
+ /* The hunk header in fraginfo color */
++ if (ecbdata->opt->flags.dual_color_diffed_diffs)
++ strbuf_addstr(&msgbuf, reverse);
+ strbuf_addstr(&msgbuf, frag);
+ strbuf_add(&msgbuf, line, ep - line);
+ strbuf_addstr(&msgbuf, reset);
diff --git a/diff.h b/diff.h
--- a/diff.h
@@ -173,14 +211,3 @@
};
static inline void diff_flags_or(struct diff_flags *a,
-@@
- DIFF_FILE_NEW_MOVED = 13,
- DIFF_FILE_NEW_MOVED_ALT = 14,
- DIFF_FILE_NEW_MOVED_DIM = 15,
-- DIFF_FILE_NEW_MOVED_ALT_DIM = 16
-+ DIFF_FILE_NEW_MOVED_ALT_DIM = 16,
-+ DIFF_FILE_OLD_INV = 17,
-+ DIFF_FILE_NEW_INV = 18
- };
- const char *diff_get_color(int diff_use_color, enum color_diff ix);
- #define diff_get_color_opt(o, ix) \
15: b9be01705 ! 15: 02e13c0c6 branch-diff: offer to dual-color the diffs
@@ -1,6 +1,6 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: offer to dual-color the diffs
+ range-diff: offer to dual-color the diffs
When showing what changed between old and new commits, we show a diff of
the patches. This diff is a diff between diffs, therefore there are
@@ -13,21 +13,22 @@
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/builtin/branch-diff.c b/builtin/branch-diff.c
---- a/builtin/branch-diff.c
-+++ b/builtin/branch-diff.c
+diff --git a/builtin/range-diff.c b/builtin/range-diff.c
+--- a/builtin/range-diff.c
++++ b/builtin/range-diff.c
@@
{
+ int creation_factor = 60;
struct diff_options diffopt = { NULL };
- struct strbuf four_spaces = STRBUF_INIT;
+ int dual_color = 0;
- double creation_weight = 0.6;
struct option options[] = {
+ OPT_INTEGER(0, "creation-factor", &creation_factor,
+ N_("Percentage by which creation is weighted")),
+ OPT_BOOL(0, "dual-color", &dual_color,
+ N_("color both diff and diff-between-diffs")),
- OPT_SET_INT(0, "no-patches", &diffopt.output_format,
- N_("short format (no diffs)"),
- DIFF_FORMAT_NO_OUTPUT),
+ OPT_END()
+ };
+ int i, j, res = 0;
@@
argc = j;
diff_setup_done(&diffopt);
16: b99ab186c ! 16: dfa7b1e71 branch-diff --dual-color: work around bogus white-space warning
@@ -1,6 +1,6 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff --dual-color: work around bogus white-space warning
+ range-diff --dual-color: work around bogus white-space warning
When displaying a diff of diffs, it is possible that there is an outer
`+` before a context line. That happens when the context changed between
@@ -20,8 +20,9 @@
However, the proper fix would be relatively ugly and intrusive because
it would have to *weaken* the WS_SPACE_BEFORE_TAB option in ws.c.
Besides, we do not expose the --dual-color option in cases other than
- the `branch-diff` command, which only uses a hard-coded output_prefix of
- four spaces (which misses the problem by one column ;-)).
+ the `git range-diff` command, which only uses a hard-coded
+ output_prefix of four spaces (which misses the problem by one
+ column... ;-)).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
@@ -29,7 +30,7 @@
--- a/diff.c
+++ b/diff.c
@@
- set = diff_get_color_opt(o, DIFF_FILE_OLD);
+ set = diff_get_color_opt(o, DIFF_FRAGINFO);
else if (c != '+')
set = diff_get_color_opt(o, DIFF_CONTEXT);
+ /* Avoid space-before-tab warning */
17: 950c75377 ! 17: 799da25ef branch-diff: add a man page
@@ -1,29 +1,30 @@
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
- branch-diff: add a man page
+ range-diff: add a man page
- This is a heavily butchered version of the README written by Thomas
- Rast and Thomas Gummerer, lifted from https://github.com/trast/tbdiff.
+ The bulk of this patch consists of a heavily butchered version of
+ tbdiff's README written by Thomas Rast and Thomas Gummerer, lifted from
+ https://github.com/trast/tbdiff.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-diff --git a/Documentation/git-branch-diff.txt b/Documentation/git-branch-diff.txt
+diff --git a/Documentation/git-range-diff.txt b/Documentation/git-range-diff.txt
new file mode 100644
--- /dev/null
-+++ b/Documentation/git-branch-diff.txt
++++ b/Documentation/git-range-diff.txt
@@
-+git-branch-diff(1)
++git-range-diff(1)
+==================
+
+NAME
+----
-+git-branch-diff - Compare two versions of a branch
++git-range-diff - Compare two commit ranges (e.g. two versions of a branch)
+
+SYNOPSIS
+--------
+[verse]
-+'git branch-diff' [--color=[<when>]] [--no-color] [<diff-options>]
-+ [--dual-color] [--no-patches] [--creation-weight=<weight>]
++'git range-diff' [--color=[<when>]] [--no-color] [<diff-options>]
++ [--dual-color] [--creation-factor=<factor>]
+ ( <range1> <range2> | <rev1>...<rev2> | <base> <rev1> <rev2> )
+
+DESCRIPTION
@@ -45,23 +46,19 @@
+
+OPTIONS
+-------
-+--no-patches::
-+ Suppress the diffs between commit pairs that were deemed to
-+ correspond; only show the pairings.
-+
+--dual-color::
+ When the commit diffs differ, recreate the original diffs'
+ coloring, and add outer -/+ diff markers with the *background*
+ being red/green to make it easier to see e.g. when there was a
+ change in what exact lines were added.
+
-+--creation-weight=<factor>::
-+ Set the creation/deletion cost fudge factor to `<factor>`.
-+ Defaults to 0.6. Try a larger value if `git branch-diff`
-+ erroneously considers a large change a total rewrite (deletion
-+ of one commit and addition of another), and a smaller one in
-+ the reverse case. See the ``Algorithm`` section below for an
-+ explanation why this is needed.
++--creation-factor=<percent>::
++ Set the creation/deletion cost fudge factor to `<percent>`.
++ Defaults to 60. Try a larger value if `git range-diff` erroneously
++ considers a large change a total rewrite (deletion of one commit
++ and addition of another), and a smaller one in the reverse case.
++ See the ``Algorithm`` section below for an explanation why this is
++ needed.
+
+<range1> <range2>::
+ Compare the commits specified by the two ranges, where
@@ -74,10 +71,10 @@
+ Equivalent to passing `<base>..<rev1>` and `<base>..<rev2>`.
+ Note that `<base>` does not need to be the exact branch point
+ of the branches. Example: after rebasing a branch `my-topic`,
-+ `git branch-diff my-topic@{u} my-topic@{1} my-topic` would
++ `git range-diff my-topic@{u} my-topic@{1} my-topic` would
+ show the differences introduced by the rebase.
+
-+`git branch-diff` also accepts the regular diff options (see
++`git range-diff` also accepts the regular diff options (see
+linkgit:git-diff[1]), most notably the `--color=[<when>]` and
+`--no-color` options. These options are used when generating the "diff
+between patches", i.e. to compare the author, commit message and diff of
@@ -87,23 +84,23 @@
+
+CONFIGURATION
+-------------
-+This command uses the `diff.color.*` and `pager.branch-diff` settings
++This command uses the `diff.color.*` and `pager.range-diff` settings
+(the latter is on by default).
+See linkgit:git-config[1].
+
+
-+Examples
++EXAMPLES
+--------
+
+When a rebase required merge conflicts to be resolved, compare the changes
+introduced by the rebase directly afterwards using:
+
+------------
-+$ git branch-diff @{u} @{1} @
++$ git range-diff @{u} @{1} @
+------------
+
+
-+A typical output of `git branch-diff` would look like this:
++A typical output of `git range-diff` would look like this:
+
+------------
+-: ------- > 1: 0ddba11 Prepare for the inevitable!
@@ -216,11 +213,11 @@
+------------
+
+The cost of an edge `o--C` is the size of `C`'s diff, modified by a
-+fudge factor that should be smaller than 1.0. The cost of an edge `o--o`
-+is free. The fudge factor is necessary because even if `1` and `C` have
-+nothing in common, they may still share a few empty lines and such,
-+possibly making the assignment `1--C`, `o--o` slightly cheaper than
-+`1--o`, `o--C` even if `1` and `C` have nothing in common. With the
++fudge factor that should be smaller than 100%. The cost of an edge
++`o--o` is free. The fudge factor is necessary because even if `1` and
++`C` have nothing in common, they may still share a few empty lines and
++such, possibly making the assignment `1--C`, `o--o` slightly cheaper
++than `1--o`, `o--C` even if `1` and `C` have nothing in common. With the
+fudge factor we require a much larger common part to consider patches as
+corresponding.
+
18: 71698f118 < -: --------- completion: support branch-diff
-: --------- > 18: d05b54c60 completion: support `git range-diff`
-: --------- > 19: 144363006 range-diff: left-pad patch numbers
-: --------- > 20: 4a68b95ce range-diff: make --dual-color the default mode
--
gitgitgadget
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
@ 2018-04-30 21:54 ` Johannes Schindelin via GitGitGadget
2018-07-06 22:43 ` Junio C Hamano
2018-07-11 10:07 ` SZEDER Gábor
2018-05-01 19:42 ` [PATCH v3 02/20] Introduce `range-diff` to compare iterations of a topic branch Johannes Schindelin via GitGitGadget
` (19 subsequent siblings)
20 siblings, 2 replies; 387+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2018-04-30 21:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Johannes Schindelin
From: Johannes Schindelin <johannes.schindelin@gmx.de>
The problem solved by the code introduced in this commit goes like this:
given two sets of items, and a cost matrix which says how much it
"costs" to assign any given item of the first set to any given item of
the second, assign all items (except when the sets have different size)
in the cheapest way.
We use the Jonker-Volgenant algorithm to solve the assignment problem to
answer questions such as: given two different versions of a topic branch
(or iterations of a patch series), what is the best pairing of
commits/patches between the different versions?
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
Makefile | 1 +
linear-assignment.c | 203 ++++++++++++++++++++++++++++++++++++++++++++
linear-assignment.h | 22 +++++
3 files changed, 226 insertions(+)
create mode 100644 linear-assignment.c
create mode 100644 linear-assignment.h
diff --git a/Makefile b/Makefile
index 0cb6590f2..c5ba124f1 100644
--- a/Makefile
+++ b/Makefile
@@ -868,6 +868,7 @@ LIB_OBJS += gpg-interface.o
LIB_OBJS += graph.o
LIB_OBJS += grep.o
LIB_OBJS += hashmap.o
+LIB_OBJS += linear-assignment.o
LIB_OBJS += help.o
LIB_OBJS += hex.o
LIB_OBJS += ident.o
diff --git a/linear-assignment.c b/linear-assignment.c
new file mode 100644
index 000000000..0b0344b5f
--- /dev/null
+++ b/linear-assignment.c
@@ -0,0 +1,203 @@
+/*
+ * Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path
+ * algorithm for dense and sparse linear assignment problems</i>. Computing,
+ * 38(4), 325-340.
+ */
+#include "cache.h"
+#include "linear-assignment.h"
+
+#define COST(column, row) cost[(column) + column_count * (row)]
+
+/*
+ * The parameter `cost` is the cost matrix: the cost to assign column j to row
+ * i is `cost[j + column_count * i].
+ */
+void compute_assignment(int column_count, int row_count, int *cost,
+ int *column2row, int *row2column)
+{
+ int *v, *d;
+ int *free_row, free_count = 0, saved_free_count, *pred, *col;
+ int i, j, phase;
+
+ memset(column2row, -1, sizeof(int) * column_count);
+ memset(row2column, -1, sizeof(int) * row_count);
+ ALLOC_ARRAY(v, column_count);
+
+ /* column reduction */
+ for (j = column_count - 1; j >= 0; j--) {
+ int i1 = 0;
+
+ for (i = 1; i < row_count; i++)
+ if (COST(j, i1) > COST(j, i))
+ i1 = i;
+ v[j] = COST(j, i1);
+ if (row2column[i1] == -1) {
+ /* row i1 unassigned */
+ row2column[i1] = j;
+ column2row[j] = i1;
+ } else {
+ if (row2column[i1] >= 0)
+ row2column[i1] = -2 - row2column[i1];
+ column2row[j] = -1;
+ }
+ }
+
+ /* reduction transfer */
+ ALLOC_ARRAY(free_row, row_count);
+ for (i = 0; i < row_count; i++) {
+ int j1 = row2column[i];
+ if (j1 == -1)
+ free_row[free_count++] = i;
+ else if (j1 < -1)
+ row2column[i] = -2 - j1;
+ else {
+ int min = COST(!j1, i) - v[!j1];
+ for (j = 1; j < column_count; j++)
+ if (j != j1 && min > COST(j, i) - v[j])
+ min = COST(j, i) - v[j];
+ v[j1] -= min;
+ }
+ }
+
+ if (free_count ==
+ (column_count < row_count ? row_count - column_count : 0)) {
+ free(v);
+ free(free_row);
+ return;
+ }
+
+ /* augmenting row reduction */
+ for (phase = 0; phase < 2; phase++) {
+ int k = 0;
+
+ saved_free_count = free_count;
+ free_count = 0;
+ while (k < saved_free_count) {
+ int u1, u2;
+ int j1 = 0, j2, i0;
+
+ i = free_row[k++];
+ u1 = COST(j1, i) - v[j1];
+ j2 = -1;
+ u2 = INT_MAX;
+ for (j = 1; j < column_count; j++) {
+ int c = COST(j, i) - v[j];
+ if (u2 > c) {
+ if (u1 < c) {
+ u2 = c;
+ j2 = j;
+ } else {
+ u2 = u1;
+ u1 = c;
+ j2 = j1;
+ j1 = j;
+ }
+ }
+ }
+ if (j2 < 0) {
+ j2 = j1;
+ u2 = u1;
+ }
+
+ i0 = column2row[j1];
+ if (u1 < u2)
+ v[j1] -= u2 - u1;
+ else if (i0 >= 0) {
+ j1 = j2;
+ i0 = column2row[j1];
+ }
+
+ if (i0 >= 0) {
+ if (u1 < u2)
+ free_row[--k] = i0;
+ else
+ free_row[free_count++] = i0;
+ }
+ row2column[i] = j1;
+ column2row[j1] = i;
+ }
+ }
+
+ /* augmentation */
+ saved_free_count = free_count;
+ ALLOC_ARRAY(d, column_count);
+ ALLOC_ARRAY(pred, column_count);
+ ALLOC_ARRAY(col, column_count);
+ for (free_count = 0; free_count < saved_free_count; free_count++) {
+ int i1 = free_row[free_count], low = 0, up = 0, last, k;
+ int min, c, u1;
+
+ for (j = 0; j < column_count; j++) {
+ d[j] = COST(j, i1) - v[j];
+ pred[j] = i1;
+ col[j] = j;
+ }
+
+ j = -1;
+ do {
+ last = low;
+ min = d[col[up++]];
+ for (k = up; k < column_count; k++) {
+ j = col[k];
+ c = d[j];
+ if (c <= min) {
+ if (c < min) {
+ up = low;
+ min = c;
+ }
+ col[k] = col[up];
+ col[up++] = j;
+ }
+ }
+ for (k = low; k < up; k++)
+ if (column2row[col[k]] == -1)
+ goto update;
+
+ /* scan a row */
+ do {
+ int j1 = col[low++];
+
+ i = column2row[j1];
+ u1 = COST(j1, i) - v[j1] - min;
+ for (k = up; k < column_count; k++) {
+ j = col[k];
+ c = COST(j, i) - v[j] - u1;
+ if (c < d[j]) {
+ d[j] = c;
+ pred[j] = i;
+ if (c == min) {
+ if (column2row[j] == -1)
+ goto update;
+ col[k] = col[up];
+ col[up++] = j;
+ }
+ }
+ }
+ } while (low != up);
+ } while (low == up);
+
+update:
+ /* updating of the column pieces */
+ for (k = 0; k < last; k++) {
+ int j1 = col[k];
+ v[j1] += d[j1] - min;
+ }
+
+ /* augmentation */
+ do {
+ if (j < 0)
+ BUG("negative j: %d", j);
+ i = pred[j];
+ column2row[j] = i;
+ k = j;
+ j = row2column[i];
+ row2column[i] = k;
+ } while (i1 != i);
+ }
+
+ free(col);
+ free(pred);
+ free(d);
+ free(v);
+ free(free_row);
+}
diff --git a/linear-assignment.h b/linear-assignment.h
new file mode 100644
index 000000000..fc4c502c8
--- /dev/null
+++ b/linear-assignment.h
@@ -0,0 +1,22 @@
+#ifndef HUNGARIAN_H
+#define HUNGARIAN_H
+
+/*
+ * Compute an assignment of columns -> rows (and vice versa) such that every
+ * column is assigned to at most one row (and vice versa) minimizing the
+ * overall cost.
+ *
+ * The parameter `cost` is the cost matrix: the cost to assign column j to row
+ * i is `cost[j + column_count * i].
+ *
+ * The arrays column2row and row2column will be populated with the respective
+ * assignments (-1 for unassigned, which can happen only if column_count !=
+ * row_count).
+ */
+void compute_assignment(int column_count, int row_count, int *cost,
+ int *column2row, int *row2column);
+
+/* The maximal cost in the cost matrix (to prevent integer overflows). */
+#define COST_MAX (1<<16)
+
+#endif
--
gitgitgadget
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-04-30 21:54 ` [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems Johannes Schindelin via GitGitGadget
@ 2018-07-06 22:43 ` Junio C Hamano
2018-07-07 11:34 ` Johannes Schindelin
2018-07-11 10:07 ` SZEDER Gábor
1 sibling, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-06 22:43 UTC (permalink / raw)
To: Johannes Schindelin via GitGitGadget; +Cc: git, Johannes Schindelin
"Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
writes:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> The problem solved by the code introduced in this commit goes like this:
> given two sets of items, and a cost matrix which says how much it
> "costs" to assign any given item of the first set to any given item of
> the second, assign all items (except when the sets have different size)
> in the cheapest way.
>
> We use the Jonker-Volgenant algorithm to solve the assignment problem to
> answer questions such as: given two different versions of a topic branch
> (or iterations of a patch series), what is the best pairing of
> commits/patches between the different versions?
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
Does the "gitgitgadget" thing lie on the Date: e-mail header?
Postdating the patch with in-body header is fine, but mailbox tools
often use and trust the Date: timestamp when sorting and finding
messages etc. so sending a new patch to add linear-assignment.c that
is different from what was added 9 weeks ago with "Date: Mon, 30 Apr
2018" header can easily cause me to miss that message when I look
for things that happened within the past few weeks, for example.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-06 22:43 ` Junio C Hamano
@ 2018-07-07 11:34 ` Johannes Schindelin
2018-07-07 16:34 ` Junio C Hamano
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-07 11:34 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Fri, 6 Jul 2018, Junio C Hamano wrote:
> "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
> writes:
>
> > From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >
> > The problem solved by the code introduced in this commit goes like this:
> > given two sets of items, and a cost matrix which says how much it
> > "costs" to assign any given item of the first set to any given item of
> > the second, assign all items (except when the sets have different size)
> > in the cheapest way.
> >
> > We use the Jonker-Volgenant algorithm to solve the assignment problem to
> > answer questions such as: given two different versions of a topic branch
> > (or iterations of a patch series), what is the best pairing of
> > commits/patches between the different versions?
> >
> > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> > ---
>
> Does the "gitgitgadget" thing lie on the Date: e-mail header?
No, GitGitGadget takes the literal output from `git format-patch`, as far
as I can tell. So if at all, it is `format-patch` that is lying.
You can compare the mail's date to the commit date:
https://public-inbox.org/git/39272eefcfe66de3ca1aa2ee43d6626ce558caae.1530617166.git.gitgitgadget@gmail.com/
https://github.com/dscho/git/commit/39272eefcfe66de3ca1aa2ee43d6626ce558caae
(the nice thing about GitGitGadget is that you can rely on its mails to
reflect *precisely* what the commit is like, the user does not have any
opportunity to interfere with the code that generates the mails:
https://github.com/gitgitgadget/gitgitgadget/blob/c4805370f/lib/patch-series.ts#L605-L611).
> Postdating the patch with in-body header is fine, but mailbox tools
> often use and trust the Date: timestamp when sorting and finding
> messages etc. so sending a new patch to add linear-assignment.c that
> is different from what was added 9 weeks ago with "Date: Mon, 30 Apr
> 2018" header can easily cause me to miss that message when I look
> for things that happened within the past few weeks, for example.
Well, isn't it too bad that we use emails to transport commits, then.
Seriously, I have very little sympathy here, as all I am doing is to
automate *the suggested usage* of `git format-patch` and `git send-email`
(the latter of which I cannot even use due to its limitations).
So if you want to see this "fixed", you should think how you want to see
`git format-patch` fixed.
Or maybe you want to write a script that re-orders the patches on top of
the cover letter according to the `[PATCH M/N]` order, to reinstate the
order of the original commits that got somewhat lost via emailing them.
Of course, you could also save yourself a lot of trouble and use Git:
git fetch https://github.com/gitgitgadget/git \
pr-1/dscho/branch-diff-v3
git cherry-pick -s ..FETCH_HEAD
(This is assuming that you insist, as you did in the past, on changing the
base commit from what the original author chose. If you are fine with my
choice, which is the current `master`, then you could save yourself *even
more* trouble by just pulling my branch, and merely signing off on the
merge commit. Which would be totes okay with me.)
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-07 11:34 ` Johannes Schindelin
@ 2018-07-07 16:34 ` Junio C Hamano
2018-07-07 19:27 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-07 16:34 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Johannes Schindelin via GitGitGadget, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>> Does the "gitgitgadget" thing lie on the Date: e-mail header?
>
> No, GitGitGadget takes the literal output from `git format-patch`, as far
> as I can tell. So if at all, it is `format-patch` that is lying.
format-patch faithfully records the fact about the commit that is
made into the patch. How pieces of information should (or should
not) be used depends on the purpose of the application that uses
its output.
I'd suggest to match what send-email does, which is to notice but
use the current date when adding a Date: header. An option to lie
to SMTP servers may be OK but I do not think we want to encourage
such a behaviour by making it the default.
What is missing in the core-git tools is an ability to tell
send-email to optionaly add an in-body header to record the author
date of the original. We add an in-body header that records the
real author when it is different from the sender automatically, and
it is OK to have an option to allow doing so (but not encouraged
around here---it is easier to reason about the resulting history for
everybody, perhaps other than the original author, to record the
first time you show the change to the public as the author time).
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-07 16:34 ` Junio C Hamano
@ 2018-07-07 19:27 ` Johannes Schindelin
2018-07-07 22:23 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-07 19:27 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Sat, 7 Jul 2018, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> >> Does the "gitgitgadget" thing lie on the Date: e-mail header?
> >
> > No, GitGitGadget takes the literal output from `git format-patch`, as far
> > as I can tell. So if at all, it is `format-patch` that is lying.
>
> format-patch faithfully records the fact about the commit that is
> made into the patch. How pieces of information should (or should
> not) be used depends on the purpose of the application that uses
> its output.
I guess this is one of the fallouts for abusing the `format-patch|am`
dance for `rebase--am`.
> I'd suggest to match what send-email does, which is to notice but
> use the current date when adding a Date: header. An option to lie
> to SMTP servers may be OK but I do not think we want to encourage
> such a behaviour by making it the default.
I opened a PR to add a TODO:
https://github.com/gitgitgadget/gitgitgadget/pull/15
> What is missing in the core-git tools is an ability to tell
> send-email to optionaly add an in-body header to record the author
> date of the original. We add an in-body header that records the
> real author when it is different from the sender automatically, and
> it is OK to have an option to allow doing so (but not encouraged
> around here---it is easier to reason about the resulting history for
> everybody, perhaps other than the original author, to record the
> first time you show the change to the public as the author time).
Pull Request-based workflows keep the original author date all the time.
If that is not desired, we need to do more than paper over it by adjusting
`send-email`.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-07 19:27 ` Johannes Schindelin
@ 2018-07-07 22:23 ` Johannes Schindelin
2018-07-09 22:08 ` refs/notes/amlog problems, was " Johannes Schindelin
2018-07-09 22:23 ` Junio C Hamano
0 siblings, 2 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-07 22:23 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Sat, 7 Jul 2018, Johannes Schindelin wrote:
> On Sat, 7 Jul 2018, Junio C Hamano wrote:
>
> > Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> >
> > >> Does the "gitgitgadget" thing lie on the Date: e-mail header?
> > >
> > > No, GitGitGadget takes the literal output from `git format-patch`, as far
> > > as I can tell. So if at all, it is `format-patch` that is lying.
> >
> > format-patch faithfully records the fact about the commit that is
> > made into the patch. How pieces of information should (or should
> > not) be used depends on the purpose of the application that uses
> > its output.
>
> I guess this is one of the fallouts for abusing the `format-patch|am`
> dance for `rebase--am`.
Speaking of GitGitGadget: I just encoutered a problem with your
`refs/notes/amlog` and I hope you can help me with that.
Concretely, I want GitGitGadget to be able to identify the commit that
corresponds to a given mail that contained a patch (if it ever made it
into `pu`), to automate all kinds of tedious things that I currently have
to perform manually.
And here I hit a block: I am looking for the commit corresponding to
aca087479b35cbcbd7c84c7ca3bcf556133d0548.1530274571.git.gitgitgadget@gmail.com
When I ask `git notes --ref=refs/notes/gitster-amlog show
4cec3986f017d84c8d6a2c4233d2eba4a3ffa60d` (the SHA-1 is the one
corresponding to `Message-Id: <...>` for that mail), it insists on
outputting
5902152ab02291af4454f24a8ccaf2adddefc306
However, I cannot find that commit anywhere.
When I look for the commit in the same manual, tedious way that I want to
automate, I find that it *is* in `pu`, but as
5cf8e064747be2026bb23be37f84f2f0b2a31781
Even curiouser: when I now ask for the commit notes for both of those
SHA-1s, I get back the correct, same Message-Id *for both of them*, which
makes me think that it was recorded correctly, but then overwritten due to
some process I don't understand.
Would you be able to shed light into this?
Thank you,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-07 22:23 ` Johannes Schindelin
@ 2018-07-09 22:08 ` Johannes Schindelin
2018-07-11 16:12 ` Junio C Hamano
2018-07-09 22:23 ` Junio C Hamano
1 sibling, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-09 22:08 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Sun, 8 Jul 2018, Johannes Schindelin wrote:
> I just encoutered a problem with your `refs/notes/amlog` and I hope you
> can help me with that.
>
> Concretely, I want GitGitGadget to be able to identify the commit that
> corresponds to a given mail that contained a patch (if it ever made it
> into `pu`), to automate all kinds of tedious things that I currently have
> to perform manually.
>
> And here I hit a block: I am looking for the commit corresponding to
> aca087479b35cbcbd7c84c7ca3bcf556133d0548.1530274571.git.gitgitgadget@gmail.com
>
> When I ask `git notes --ref=refs/notes/gitster-amlog show
> 4cec3986f017d84c8d6a2c4233d2eba4a3ffa60d` (the SHA-1 is the one
> corresponding to `Message-Id: <...>` for that mail), it insists on
> outputting
>
> 5902152ab02291af4454f24a8ccaf2adddefc306
>
> However, I cannot find that commit anywhere.
>
> When I look for the commit in the same manual, tedious way that I want to
> automate, I find that it *is* in `pu`, but as
>
> 5cf8e064747be2026bb23be37f84f2f0b2a31781
>
> Even curiouser: when I now ask for the commit notes for both of those
> SHA-1s, I get back the correct, same Message-Id *for both of them*, which
> makes me think that it was recorded correctly, but then overwritten due to
> some process I don't understand.
>
> Would you be able to shed light into this?
I think I reconstructed the culprit:
In https://github.com/git/git/commit/a7cddab6e8, your post-applypatch hook
added the note for commit 5902152ab02291af4454f24a8ccaf2adddefc306 that it
was generated from Message-Id:
<aca087479b35cbcbd7c84c7ca3bcf556133d0548.1530274571.git.gitgitgadget@gmail.com>,
and then https://github.com/git/git/commit/ff28c8f9283 added the note to
map that Message-Id back to that commit.
So far, so good!
But then, https://github.com/git/git/commit/81b08c718e9 indicates that you
ran an interactive rebase and amended the commit
5902152ab02291af4454f24a8ccaf2adddefc306 and the result was a new commit
5cf8e064747be2026bb23be37f84f2f0b2a31781 that was then also mapped to that
Message-Id.
And obviously, you lack a post-rewrite hook a la
```sh
refopt=--ref=refs/notes/amlog
while read old new rest
do
mid="$(git notes $refopt show $old 2>/dev/null)" &&
git notes $refopt set -m "$mid" $new
done
```
I was pretty happy to figure that out all on my own, and already on my way
to come up with that post-rewrite hook and a script to parse all of the
commits in refs/notes/amlog whose commit message contains `commit --amend`
to fix those problems, but before starting, I wanted to sanity check the
oldest such commit: https://github.com/git/git/commit/49bc3858e3c
You will be readily able to verify that it maps the commit
73bfebd43e14bcc1502577c0933b6a16ad540b99 to Message-Id:
<20170619175605.27864-3-phillip.wood@talktalk.net>, but that 7c1a3dcf23e
(which corresponds to that Message-Id) maps to
f64760904766db662badf1256923532b9e1a6ebd. So yes, there is the same
problem with this mapping, and we need to fix it.
*However*. Neither https://github.com/git/git/commit/73bfebd43e1 nor
https://github.com/git/git/commit/f6476090476 show any commit!
Does that mean that the patch with that Message-Id never made it into
`master` and was simply dropped and gc'ed at some stage?
Actually, no:
https://public-inbox.org/git/20170619175605.27864-3-phillip.wood@talktalk.net/
corresponds quite clearly to
https://github.com/git/git/commit/1ceb9dfab7e
Now, that commit message was clearly edited by you (I note the capital "A"
in Phillip's "Add" vs your lower-case "a" in "add"), but the patch
quite obviously made it into our code based in its original shape.
So I looked for the commit notes for that commit, but there aren't any!
To summarize, there are two commits recorded for that Message-Id, the
later one not mapped back, and neither is the correct commit that made it
into `master`.
It would be nice to figure out what went wrong there, and how to fix it
for the future (and also to fix up the existing mis-mappings in `amlog`).
However, at this stage I really have not enough information at my hands,
even with as much effort as I spent so far to figure out where my patch
went (which started this bug hunt). Could you kindly spend some time on
that? Otherwise, `amlog` is a lot less useful than it could otherwise be.
Thanks,
Dscho
P.S.: funny side note: it would appear that the rewritten notes all get
the author of the patch author, look e.g. at the author of
https://github.com/git/git/commit/81b08c718e97
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-09 22:08 ` refs/notes/amlog problems, was " Johannes Schindelin
@ 2018-07-11 16:12 ` Junio C Hamano
2018-07-12 15:23 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-11 16:12 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Johannes Schindelin via GitGitGadget, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> To summarize, there are two commits recorded for that Message-Id, the
> later one not mapped back, and neither is the correct commit that made it
> into `master`.
>
> It would be nice to figure out what went wrong there, and how to fix it
> for the future (and also to fix up the existing mis-mappings in `amlog`).
I think what happened is that I used to have post-rewrite, but
because it did not solve the real issue of multiple commits existing
for the same message ID (either because of amending, or because of
running "am" multiple times while looking for the best base to
contruct a topic branch for the series that contains it) *and* the
one that will eventually used in the final history may not be the
last one (e.g. I may "am" twice to see if an older base I use in my
second attempt is a better one than the base I originally used, and
the patches may even apply cleanly to the older history, but may
turn out to need semantic adjustment, at which point I would discard
that second attempt and use the old commit from the first attempt
that built on a newer base), I stopped using it.
The mid-to-commit, for it to be relialble, needs to keep mapping for
all the commits created from a single message, instead of being the
last-one-survives mapping. I just didn't have that much interest
back when I decided it was not worth and dropped the post-rewrite, I
think.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-11 16:12 ` Junio C Hamano
@ 2018-07-12 15:23 ` Johannes Schindelin
2018-07-12 16:59 ` Junio C Hamano
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-12 15:23 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Wed, 11 Jul 2018, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > To summarize, there are two commits recorded for that Message-Id, the
> > later one not mapped back, and neither is the correct commit that made it
> > into `master`.
> >
> > It would be nice to figure out what went wrong there, and how to fix it
> > for the future (and also to fix up the existing mis-mappings in `amlog`).
>
> I think what happened is that I used to have post-rewrite, but
> because it did not solve the real issue of multiple commits existing
> for the same message ID (either because of amending, or because of
> running "am" multiple times while looking for the best base to
> contruct a topic branch for the series that contains it) *and* the
> one that will eventually used in the final history may not be the
> last one (e.g. I may "am" twice to see if an older base I use in my
> second attempt is a better one than the base I originally used, and
> the patches may even apply cleanly to the older history, but may
> turn out to need semantic adjustment, at which point I would discard
> that second attempt and use the old commit from the first attempt
> that built on a newer base), I stopped using it.
>
> The mid-to-commit, for it to be relialble, needs to keep mapping for
> all the commits created from a single message, instead of being the
> last-one-survives mapping. I just didn't have that much interest
> back when I decided it was not worth and dropped the post-rewrite, I
> think.
I would like to ask you to reinstate the post-rewrite hook, as it still
improves the situation over the current one.
Of course, it would be nice to get the automation into a shape where
the mappings in `refs/notes/amlog` of commits that hit `next` are fixed,
if necessary, to stop referring to commits that did not make it into
`next`.
Because the *concept* of `amlog` is quite useful, to put back at least
*some* of the information we lost by transiting Git commits via mails
without any connection to their original commits. It is still the most
annoying thing when I contribute patches myself.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-12 15:23 ` Johannes Schindelin
@ 2018-07-12 16:59 ` Junio C Hamano
2018-07-19 17:06 ` Junio C Hamano
0 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-12 16:59 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Johannes Schindelin via GitGitGadget, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> I would like to ask you to reinstate the post-rewrite hook, as it still
> improves the situation over the current one.
Without post-rewrite I seem to be getting correct amlog entries for
commits created by "git rebase"; do our rebase--am backend still
trigger post-applypatch hook in its "am" phase to apply the patches
created with "format-patch"?
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-12 16:59 ` Junio C Hamano
@ 2018-07-19 17:06 ` Junio C Hamano
2018-07-20 18:51 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-19 17:06 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Johannes Schindelin via GitGitGadget, git
Junio C Hamano <gitster@pobox.com> writes:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
>> I would like to ask you to reinstate the post-rewrite hook, as it still
>> improves the situation over the current one.
>
> Without post-rewrite I seem to be getting correct amlog entries for
> commits created by "git rebase"; do our rebase--am backend still
> trigger post-applypatch hook in its "am" phase to apply the patches
> created with "format-patch"?
That was a wrong line of thought that led to a dead end. format-patch
won't recreate Message-Id to its output from notes/amlog, so even if
the "format-patch --stdout | am" pipeline inside rebase-am triggered
the post-applypatch hook, it would not have a chance to carry the
notes forward that way.
What was really happening was I have
$ git config --list | grep amlog
notes.rewriteref=refs/notes/amlog
and that ought to be sufficient to carry "commit-to-original-msg-id"
entries across rebases. And it seems to correctly work. I however
suspect that "cherry-pick A..B" may lose the notes, but I haven't
checked.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-19 17:06 ` Junio C Hamano
@ 2018-07-20 18:51 ` Johannes Schindelin
2018-07-20 19:34 ` Junio C Hamano
0 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-20 18:51 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Thu, 19 Jul 2018, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
> > Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> >
> >> I would like to ask you to reinstate the post-rewrite hook, as it still
> >> improves the situation over the current one.
> >
> > Without post-rewrite I seem to be getting correct amlog entries for
> > commits created by "git rebase"; do our rebase--am backend still
> > trigger post-applypatch hook in its "am" phase to apply the patches
> > created with "format-patch"?
>
> That was a wrong line of thought that led to a dead end. format-patch
> won't recreate Message-Id to its output from notes/amlog, so even if
> the "format-patch --stdout | am" pipeline inside rebase-am triggered
> the post-applypatch hook, it would not have a chance to carry the
> notes forward that way.
>
> What was really happening was I have
>
> $ git config --list | grep amlog
> notes.rewriteref=refs/notes/amlog
>
> and that ought to be sufficient to carry "commit-to-original-msg-id"
> entries across rebases. And it seems to correctly work. I however
> suspect that "cherry-pick A..B" may lose the notes, but I haven't
> checked.
AFAICT there is at least one scenario where you run `rebase -i`, the notes
get updated, and of course the *reverse mapping* does *not* get updated:
you have a mapping both from commit to Message-Id *and crucially* from
Message-Id to commit. The automatic rewrite of commit notes in `rebase -i`
tackles only the commit notes, obviously, not the reverse.
Hence the post-rewrite hook I think I already suggested at least once in a
previous reply.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-20 18:51 ` Johannes Schindelin
@ 2018-07-20 19:34 ` Junio C Hamano
2018-07-20 21:20 ` Stefan Beller
2018-07-21 21:56 ` Johannes Schindelin
0 siblings, 2 replies; 387+ messages in thread
From: Junio C Hamano @ 2018-07-20 19:34 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Johannes Schindelin via GitGitGadget, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> AFAICT there is at least one scenario where you run `rebase -i`, the notes
> get updated, and of course the *reverse mapping* does *not* get updated:
It turns out that I never had a rewrite hook; the notes.rewriteref
mechanism is the only thing that has been used to maintain amlog.
I've stopped populating the reverse mapping, by the way. The script
that I feed a message from gmane or public-inbox when I need to
learn the set of commits that resulted from the message instead uses
"git grep $message-id notes/amlog". And that is fast enough for my
purpose.
There is no good reason to abuse the notes mechanism to map a random
object-name looking string (i.e. hash result of message id), other
than the ease of "quick access" when somebody is making a lot of
inquiry, but that "database" does not have to be stored in notes.
It certainly does not belong to cycles worth spending by me *while*
I work during the say with various history reshaping tools to record
and/or update the reverse mapping and that is why my post-applypatch
hook no longer has the "reverse map" hack.
It is not like anybody (including me) needs realtime up-to-date
reverse mapping from amlog while I run my "commit --amend", "rebase
-i", etc. and the reverse map is constructable by reversing the
forward map, obviously, with a postprocessing. And I think that is
a reasonably way forward if anybody wants to have a reverse mapping.
The postprocessing can be done either by me before pushing out the
amlog ref, or done by any consumer after fetching the amlog ref from
me. If I did the postprocessing and refuse to use rewrite hook you
wouldn't even know ;-)
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-20 19:34 ` Junio C Hamano
@ 2018-07-20 21:20 ` Stefan Beller
2018-07-20 21:24 ` Junio C Hamano
2018-07-21 21:56 ` Johannes Schindelin
1 sibling, 1 reply; 387+ messages in thread
From: Stefan Beller @ 2018-07-20 21:20 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin, gitgitgadget, git
On Fri, Jul 20, 2018 at 12:35 PM Junio C Hamano <gitster@pobox.com> wrote:
> It is not like anybody (including me) needs realtime up-to-date
I thought the same for a long time, but contributing to other projects
showed me that this is not necessarily the case. Having a real time
update, even if it would be just "your patch is labeled 'under discussion'"
is beneficial as I would know where it is "in the system".
In a way I'd compare our contribution process to having an
incredible fine grained paper map. Most of the world moved
on to digital maps, that zoom in on-demand.
(C.f. spelling out "See banned.h for banned functions" in
Documentation/CodingGuidelines is a fine grained detail
that is not relevant for *most* of the contributions, but just
burdens the bearer of the paper map with weight; if this hint
is given dynamically by the compiler or build system at relevant
times, it is much better;
Regarding the real time aspect here, it is also very good
comparison to maps: While I know how to read paper maps
(or offline maps) and how to navigate my way, it sure is easier
to just follow the online up-to-date navigation service, that
tells me what to do. )
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-20 21:20 ` Stefan Beller
@ 2018-07-20 21:24 ` Junio C Hamano
[not found] ` <CAPc5daW-KoyUX3i7M5YbdQC2mFKAmVBS42-XT84hpm30VFcZ1g@mail.gmail.com>
0 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-20 21:24 UTC (permalink / raw)
To: Stefan Beller; +Cc: Johannes Schindelin, gitgitgadget, git
Stefan Beller <sbeller@google.com> writes:
> On Fri, Jul 20, 2018 at 12:35 PM Junio C Hamano <gitster@pobox.com> wrote:
>
>> It is not like anybody (including me) needs realtime up-to-date
>
> I thought the same for a long time, but contributing to other projects
> showed me that this is not necessarily the case. Having a real time
> update, even if it would be just "your patch is labeled 'under discussion'"
> is beneficial as I would know where it is "in the system".
Well, you wouldn't have an access to the up-to-date amlog maintained
by me *UNTIL* I push it out at the end of the day. So by
definition, you do not have real-time access to the up-to-date
state.
And also by definition, you do not *NEED* such an access, because
you won't see newly created or rewritten commits, whose originating
Message-Id is not in the copy of amlog you have (yet), until I push
the day's integration result out *AND* you fetch what I pushed out.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-20 19:34 ` Junio C Hamano
2018-07-20 21:20 ` Stefan Beller
@ 2018-07-21 21:56 ` Johannes Schindelin
2018-07-23 1:25 ` Jeff King
1 sibling, 1 reply; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-21 21:56 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Fri, 20 Jul 2018, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > AFAICT there is at least one scenario where you run `rebase -i`, the notes
> > get updated, and of course the *reverse mapping* does *not* get updated:
>
> It turns out that I never had a rewrite hook; the notes.rewriteref
> mechanism is the only thing that has been used to maintain amlog.
>
> I've stopped populating the reverse mapping, by the way.
That's just great. I ask you to make my life easier by keeping the
information correct, and now you just drop it altogether? Just great.
Seriously, I am trying to *improve* something here, because I really do
care about contributors, and how hard we make it on them. I would not have
expected such a backlash against that.
> The script that I feed a message from gmane or public-inbox when I need
> to learn the set of commits that resulted from the message instead uses
> "git grep $message-id notes/amlog". And that is fast enough for my
> purpose.
Awesome. You might want to make sure that Peff stops advertising the amlog
notes, then, though.
> There is no good reason to abuse the notes mechanism to map a random
> object-name looking string (i.e. hash result of message id), other
> than the ease of "quick access" when somebody is making a lot of
> inquiry, but that "database" does not have to be stored in notes.
Right. And it does not have to be stored anywhere, because nobody used it
anyway, right?
Well, I hate to break it to you: I just found a really excellent use case,
and you are making it very, very hard for me. Deliberately so. I don't
know how I deserve that.
> It certainly does not belong to cycles worth spending by me *while*
> I work during the say with various history reshaping tools to record
> and/or update the reverse mapping and that is why my post-applypatch
> hook no longer has the "reverse map" hack.
>
> It is not like anybody (including me) needs realtime up-to-date
> reverse mapping from amlog while I run my "commit --amend", "rebase
> -i", etc. and the reverse map is constructable by reversing the
> forward map, obviously, with a postprocessing. And I think that is
> a reasonably way forward if anybody wants to have a reverse mapping.
> The postprocessing can be done either by me before pushing out the
> amlog ref, or done by any consumer after fetching the amlog ref from
> me. If I did the postprocessing and refuse to use rewrite hook you
> wouldn't even know ;-)
The idea that you publish the amlog notes just for your own use cases,
sounds a bit strange to me.
So to reiterate: the information you have in amlog is useful, if faulty.
Rather than "fixing" it by stopping the useful reverse-mapping, it would
make a ton more sense to instate that post-rewrite hook I already drafted
for you.
Besides, while you spent all of that time to make things harder for me,
you still did not look into the most worrisome of my findings: there are
apparently Message-Id mappings where *none* of the commits returned by
said `git grep` you mentioned above are valid. Not a single one. I will
dig out the mail for you on Monday, because I care that much, where I
provided one example of a Message-Id with two commits that match in amlog,
none of which is actually reachable from any of your public branches, and
I also provided the commit that *actually* corresponds to that Message-Id,
and it is not annotated.
So at least in this case *even you* should have a vested interest in
figuring out what goes wrong because even your own use case is affected by
it.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-21 21:56 ` Johannes Schindelin
@ 2018-07-23 1:25 ` Jeff King
2018-07-24 1:50 ` Junio C Hamano
0 siblings, 1 reply; 387+ messages in thread
From: Jeff King @ 2018-07-23 1:25 UTC (permalink / raw)
To: Johannes Schindelin
Cc: Junio C Hamano, Johannes Schindelin via GitGitGadget, git
On Sat, Jul 21, 2018 at 11:56:06PM +0200, Johannes Schindelin wrote:
> > The script that I feed a message from gmane or public-inbox when I need
> > to learn the set of commits that resulted from the message instead uses
> > "git grep $message-id notes/amlog". And that is fast enough for my
> > purpose.
>
> Awesome. You might want to make sure that Peff stops advertising the amlog
> notes, then, though.
Woah, what did I do now?
> > There is no good reason to abuse the notes mechanism to map a random
> > object-name looking string (i.e. hash result of message id), other
> > than the ease of "quick access" when somebody is making a lot of
> > inquiry, but that "database" does not have to be stored in notes.
>
> Right. And it does not have to be stored anywhere, because nobody used it
> anyway, right?
If I understand the situation correctly, Junio is saying that he will
continue to produce the amlog mapping, and that it contains sufficient
information to produce the reverse mapping (which, as an aside, I did
not even know existed -- I mostly want to go the other way, from digging
in history to a mailing list conversation).
E.g., the script below builds and queries an incremental reverse
mapping.
-- >8 --
#!/usr/bin/perl
my $REF = 'refs/notes/amlog';
my $DBFILE = '.git/amlog.rev';
use DB_File;
my %h;
my $db = tie %h, 'DB_File', $DBFILE, O_CREAT|O_RDWR, 0644
or die "unable to open/create $DBFILE: $!";
my $db_tip = $h{TIP};
chomp(my $rev_tip = `git rev-parse $REF`);
if (!defined $db_tip || $db_tip ne $rev_tip) {
print STDERR "Updating reverse mapping...\n";
# using -p here is quick and easy, since we know the
# shape of the data. Using --raw and cat-file might be less
# hacky, though.
my @cmd = (qw(git log --format= --reverse -p), $rev_tip);
push @cmd, "^$db_tip" if defined $db_tip;
open(my $fh, "-|", @cmd);
my $commit;
while (<$fh>) {
if (m{^\+\+\+ b/([0-9a-f/]+)}) {
$commit = $1;
$commit =~ s/[^0-9a-f]//g;
} elsif (/^\+Message-Id: <(.*)>/i) {
print STDERR "Imported $commit => $1\n";
$h{$1} = $commit;
}
}
$h{TIP} = $rev_tip;
}
print "$h{$_} $_\n" for @ARGV;
-- >8 --
That stores it in a local dbm. But it could also build a git-notes tree
if you really want that.
And if I understand what is being said here:
> > It certainly does not belong to cycles worth spending by me *while*
> > I work during the say with various history reshaping tools to record
> > and/or update the reverse mapping and that is why my post-applypatch
> > hook no longer has the "reverse map" hack.
> >
> > It is not like anybody (including me) needs realtime up-to-date
> > reverse mapping from amlog while I run my "commit --amend", "rebase
> > -i", etc. and the reverse map is constructable by reversing the
> > forward map, obviously, with a postprocessing. And I think that is
> > a reasonably way forward if anybody wants to have a reverse mapping.
> > The postprocessing can be done either by me before pushing out the
> > amlog ref, or done by any consumer after fetching the amlog ref from
> > me. If I did the postprocessing and refuse to use rewrite hook you
> > wouldn't even know ;-)
It is not "I refuse to push out a reverse mapping". It is "I could make
the reverse mapping before push-out, and you would not need to know or
care if I did it all at once, or using a rewrite hook".
Though personally, I do not know if there is much point in pushing it
out, given that receivers can reverse the mapping themselves.
Or is there some argument that there is information in the reverse map
that _cannot_ be generated from the forward map?
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-23 1:25 ` Jeff King
@ 2018-07-24 1:50 ` Junio C Hamano
2018-07-24 9:45 ` Jeff King
0 siblings, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-24 1:50 UTC (permalink / raw)
To: Jeff King; +Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git
Jeff King <peff@peff.net> writes:
> If I understand the situation correctly, Junio is saying that he will
> continue to produce the amlog mapping, and that it contains sufficient
> information to produce the reverse mapping (which, as an aside, I did
> not even know existed -- I mostly want to go the other way, from digging
> in history to a mailing list conversation).
Yes, the reverse mapping in amlog was an experiment that did not
work well in the end.
When I use "git am" to make a commit out of a message, a
post-applypatch hook picks up the "Message-Id:" from the original
message and adds a git note to the resulting commit. This is in
line with how the notes are meant to be used. We have a commit
object, and a piece of information that we want to associate with
the commit object, which is not recorded as a part of the commit
object. So we say "git notes add -m 'that piece of info' $commit"
(the message-id happens to be that piece of info in this example).
And with notes.rewriteRef, "git commit --amend" etc. would copy the
piece of info about the original commit to the rewritten commit.
Side Note: there are a few workflow elements I do want to
keep using but they currently *lose* the mapping info. An
obvious one is
$ git checkout -b to/pic master &&
... review in MUA and then ...
$ git am -s mbox &&
... review in tree, attempt to build, tweak, etc.
$ git format-patch --stdout master..to/pic >P &&
$ edit P &&
$ git reset --hard master &&
$ git am P
which is far more versatile and efficient when doing certain
transformations on the series than running "rebase -i" and
reopening and editing the target files of the patches one by
one in each step. But because format-patch does not
generate Message-Id header of the original one out of the
commit, the post-applypatch hook run by "am" at the end of
the steps would not have a chance to record that for the
newly created commit.
For this one, I think I can use "format-patch --notes=amlog"
to produce the patch file and then teach post-applypatch
script to pay attention to the Notes annotation without
changing anything else to record the message id of the
original. Other workflow elements that lose the notes need
to be identified and either a fix implemented or a
workaround found for each of them. For example, I suspect
there is no workaround for "cherry-pick" and it would take a
real fix.
A reverse mapping entry used to get created by post-applypatch to
map the blob that represents the notes text added to the $commit to
another text blob that contains the 40-hex of the commit object.
This is the experiment that did not work well. As none of the later
integrator's work e.g. "commit --amend", "rebase", "cherry-pick",
etc. is about rewriting that blob, notes.rewriteRef mechanism would
not kick in, and that is understandasble.
And these (incomplete) reverse mapping entries get in the way to
maintain and correct the forward mapping. When a commit that got
unreachable gets expired, I want "git notes prune" to remove notes
on them, and I do not want to even think about what should happen to
the entries in the notes tree that abuse the mechanism to map blobs
that are otherwise *not* even reachable from the main history.
A much more important task is to make sure that the forward mapping
that annotates invidual commits reachable from 'pu' and/or 'master'
is maintained correctly by various tools. From a correctly maintained
forward mapping, it should be straight forward to get a reverse mapping
if needed.
> Though personally, I do not know if there is much point in pushing it
> out, given that receivers can reverse the mapping themselves.
Before this thread, I was planning to construct and publish the
reverse mapping at the end of the day, but do so on a separate notes
ref (see above---the hacky abuse gets in the way of maintaining and
debugging the forward mapping, but a separate notes-ref that only
contains hacks is less worrysome). But I have changed my mind and
decided not to generate or publish one. It is sort of similar to
the way the pack .idx is constructed only by the receiver [*1*].
> Or is there some argument that there is information in the reverse map
> that _cannot_ be generated from the forward map?
I know there is no information loss (after all I was the only one
who ran that experimental hack), but there is one objection that is
still possible, even though I admit that is a weak argument.
If a plumbing "diff-{files,tree,index}" family had a sibling
"diff-notes" to compare two notes-shaped trees while pretending that
the object-name fan-out did not exist (i.e. instead, the trees being
compared is without a subtree and full of 40-hex filenames), then it
would be less cumbersome to incrementally update the reverse mapping
by reading forward mapping with something like:
git diff-notes --raw amlog@{1} amlog
to learn the commits whose notes have changed. But without such a
plumbing, it is cumbersome to do so correctly. "git diff-tree -r"
could serve as a rough substitute, until the note tree grows and get
rebalanced by reorganizing the fan-out, and on the day it happens
the reverse mapper needs to read and discard ghost changes that are
only due to tree reorganizing [*2*].
[Footnotes]
*1* Even if the sender could give one when it creates a .pack, the
receiver would not trust that it is matches the corresponding
.pack before using it, and the cost to validate is similar to
the cost to generate.
*2* That makes it less efficient on that day (which hopefully would
happen once in a blue moon) but would not affect correctness.
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: refs/notes/amlog problems, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-24 1:50 ` Junio C Hamano
@ 2018-07-24 9:45 ` Jeff King
0 siblings, 0 replies; 387+ messages in thread
From: Jeff King @ 2018-07-24 9:45 UTC (permalink / raw)
To: Junio C Hamano
Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git
On Mon, Jul 23, 2018 at 06:50:46PM -0700, Junio C Hamano wrote:
> Side Note: there are a few workflow elements I do want to
> keep using but they currently *lose* the mapping info. An
> obvious one is
>
> $ git checkout -b to/pic master &&
> ... review in MUA and then ...
> $ git am -s mbox &&
> ... review in tree, attempt to build, tweak, etc.
> $ git format-patch --stdout master..to/pic >P &&
> $ edit P &&
> $ git reset --hard master &&
> $ git am P
>
> which is far more versatile and efficient when doing certain
> transformations on the series than running "rebase -i" and
> reopening and editing the target files of the patches one by
> one in each step. But because format-patch does not
> generate Message-Id header of the original one out of the
> commit, the post-applypatch hook run by "am" at the end of
> the steps would not have a chance to record that for the
> newly created commit.
>
> For this one, I think I can use "format-patch --notes=amlog"
> to produce the patch file and then teach post-applypatch
> script to pay attention to the Notes annotation without
> changing anything else to record the message id of the
> original.
Yes. I wonder if it would make sense to teach format-patch/am a
micro-format to automatically handle this case. I.e., some
machine-readable way of passing the notes in the email message.
Of course it's easy to design a format that covers the relatively
restricted form of these amlog notes, and much harder to cover the
general case.
> Other workflow elements that lose the notes need
> to be identified and either a fix implemented or a
> workaround found for each of them. For example, I suspect
> there is no workaround for "cherry-pick" and it would take a
> real fix.
I think the existing notes.rewriteRef is probably a good match here. I
can definitely think of notes you wouldn't want to cherry-pick, but I'm
having trouble coming up with an example that should survive a rebase
but not a cherry-pick.
> And these (incomplete) reverse mapping entries get in the way to
> maintain and correct the forward mapping. When a commit that got
> unreachable gets expired, I want "git notes prune" to remove notes
> on them, and I do not want to even think about what should happen to
> the entries in the notes tree that abuse the mechanism to map blobs
> that are otherwise *not* even reachable from the main history.
Right, I think the notes tree is a poor distribution method for that
reason.
> > Though personally, I do not know if there is much point in pushing it
> > out, given that receivers can reverse the mapping themselves.
>
> Before this thread, I was planning to construct and publish the
> reverse mapping at the end of the day, but do so on a separate notes
> ref (see above---the hacky abuse gets in the way of maintaining and
> debugging the forward mapping, but a separate notes-ref that only
> contains hacks is less worrysome). But I have changed my mind and
> decided not to generate or publish one. It is sort of similar to
> the way the pack .idx is constructed only by the receiver [*1*].
Yes, the pack .idx was the same mental model I had when writing my
earlier message.
> > Or is there some argument that there is information in the reverse map
> > that _cannot_ be generated from the forward map?
>
> I know there is no information loss (after all I was the only one
> who ran that experimental hack), but there is one objection that is
> still possible, even though I admit that is a weak argument.
I wondered if you might have a case like this (building as we go):
- message-id M becomes commit X
- we write the forward map X->M
- we write the reverse map M->X
- during a rewrite (e.g., --amend), commit X becomes commit Y
- we write the forward map Y->M
- we write the reverse map M->Y
The difference between that result and an inverted map created at the
end is that we know that M->Y is the final result. Whereas by looking at
the inverted map, we do not know which of M->X and M->Y is correct. In
fact they are _both_ correct. But only one of X and Y would eventually
get merged (both may make it into the repo's of people fetching from you
if we imagine that X is on "pu" and you push between the two steps).
So I think the inverted mapping is not actually one-to-one, and in
either case you'd want to retain all possible matches (pruning only when
a commit is eventually dropped from the forward mapping, which rewritten
things from "pu" would eventually do). And in that case it does not
matter if you generate it incrementally or all at once.
> If a plumbing "diff-{files,tree,index}" family had a sibling
> "diff-notes" to compare two notes-shaped trees while pretending that
> the object-name fan-out did not exist (i.e. instead, the trees being
> compared is without a subtree and full of 40-hex filenames), then it
> would be less cumbersome to incrementally update the reverse mapping
> by reading forward mapping with something like:
>
> git diff-notes --raw amlog@{1} amlog
>
> to learn the commits whose notes have changed. But without such a
> plumbing, it is cumbersome to do so correctly. "git diff-tree -r"
> could serve as a rough substitute, until the note tree grows and get
> rebalanced by reorganizing the fan-out, and on the day it happens
> the reverse mapper needs to read and discard ghost changes that are
> only due to tree reorganizing [*2*].
Yeah. My "log" hackery was trying to do that incremental comparison, but
it did not handle the multiple-commit case (nor did it handle
deletions). I agree an end-point diff is sufficient (and more
efficient).
-Peff
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-07 22:23 ` Johannes Schindelin
2018-07-09 22:08 ` refs/notes/amlog problems, was " Johannes Schindelin
@ 2018-07-09 22:23 ` Junio C Hamano
2018-07-10 10:47 ` refs/notes/amlog woes, was " Johannes Schindelin
1 sibling, 1 reply; 387+ messages in thread
From: Junio C Hamano @ 2018-07-09 22:23 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Johannes Schindelin via GitGitGadget, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> Speaking of GitGitGadget: I just encoutered a problem with your
> `refs/notes/amlog` and I hope you can help me with that.
> ...
> When I ask `git notes --ref=refs/notes/gitster-amlog show
> 4cec3986f017d84c8d6a2c4233d2eba4a3ffa60d` (the SHA-1 is the one
> corresponding to `Message-Id: <...>` for that mail), it insists on
> outputting
>
> 5902152ab02291af4454f24a8ccaf2adddefc306
It is not uncommon for me to have to do "am" the same patch twice
when attempting to find the right branch/commit to base a change on,
so the reverse direction that abuses the notes mechanism to map
message id to resulting commits would be unreliable, especially
given that they may need to further go through "rebase -i" or manual
"cherry-pick <range>" depending on the situation.
I am kind of surprised that the message-to-commit mapping still
records any data that is remotely useful (these days, I only use it
to run "show --notes=amlog" for commit-to-message mapping). I do
not think I have anything special when amending the commit, but
amlog notes should be updated in both diretions for its entries to
stay correct across amending, I would think.
^ permalink raw reply [flat|nested] 387+ messages in thread
* refs/notes/amlog woes, was Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-09 22:23 ` Junio C Hamano
@ 2018-07-10 10:47 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-10 10:47 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git
Hi Junio,
On Mon, 9 Jul 2018, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > Speaking of GitGitGadget: I just encoutered a problem with your
> > `refs/notes/amlog` and I hope you can help me with that.
> > ...
> > When I ask `git notes --ref=refs/notes/gitster-amlog show
> > 4cec3986f017d84c8d6a2c4233d2eba4a3ffa60d` (the SHA-1 is the one
> > corresponding to `Message-Id: <...>` for that mail), it insists on
> > outputting
> >
> > 5902152ab02291af4454f24a8ccaf2adddefc306
>
> It is not uncommon for me to have to do "am" the same patch twice
> when attempting to find the right branch/commit to base a change on,
But then the `post-applypatch` hook just kicks in twice, leaving the
correct mapping in place, no?
> so the reverse direction that abuses the notes mechanism to map
> message id to resulting commits would be unreliable, especially
> given that they may need to further go through "rebase -i" or manual
> "cherry-pick <range>" depending on the situation.
We already have a mechanism in place that rewrites notes in `rebase -i`'s
case. Not so sure about `cherry-pick`, but if it is missing, then that is
definitely something we will want to address.
In other words, let's not let shortcomings of our own software dictate
what we record and what we don't record.
This is highly important information that we willfully lose by using the
patch contribution process we are going with. And we *can* at least record
that information.
> I am kind of surprised that the message-to-commit mapping still
> records any data that is remotely useful (these days, I only use it
> to run "show --notes=amlog" for commit-to-message mapping).
Please do understand that this information is the only remotely sane way
to work around the limitations of the mailing list-based approach we use
here.
It costs me a ton of time to figure out these mappings manually, and I
think that others simply are not as tenacious as I am and simply drop the
ball, which is not good for the project.
> I do not think I have anything special when amending the commit, but
> amlog notes should be updated in both diretions for its entries to stay
> correct across amending, I would think.
Indeed. See my other mail about the `post-rewrite` hook I suggest you to
install (I did not test this code, of course, but you will probably be
able to validate/fix it without much trouble).
Ciao,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-04-30 21:54 ` [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems Johannes Schindelin via GitGitGadget
2018-07-06 22:43 ` Junio C Hamano
@ 2018-07-11 10:07 ` SZEDER Gábor
2018-07-12 15:11 ` Johannes Schindelin
1 sibling, 1 reply; 387+ messages in thread
From: SZEDER Gábor @ 2018-07-11 10:07 UTC (permalink / raw)
To: Johannes Schindelin via GitGitGadget
Cc: SZEDER Gábor, git, Junio C Hamano, Johannes Schindelin
> diff --git a/linear-assignment.c b/linear-assignment.c
> new file mode 100644
> index 000000000..0b0344b5f
> --- /dev/null
> +++ b/linear-assignment.c
> @@ -0,0 +1,203 @@
> +/*
> + * Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path
> + * algorithm for dense and sparse linear assignment problems</i>. Computing,
> + * 38(4), 325-340.
> + */
> +#include "cache.h"
> +#include "linear-assignment.h"
> +
> +#define COST(column, row) cost[(column) + column_count * (row)]
> +
> +/*
> + * The parameter `cost` is the cost matrix: the cost to assign column j to row
> + * i is `cost[j + column_count * i].
> + */
> +void compute_assignment(int column_count, int row_count, int *cost,
> + int *column2row, int *row2column)
> +{
[...]
> +update:
> + /* updating of the column pieces */
> + for (k = 0; k < last; k++) {
> + int j1 = col[k];
> + v[j1] += d[j1] - min;
> + }
> +
> + /* augmentation */
> + do {
> + if (j < 0)
> + BUG("negative j: %d", j);
> + i = pred[j];
> + column2row[j] = i;
> + k = j;
> + j = row2column[i];
> + row2column[i] = k;
Coccinelle suggests using SWAP(j, row2column[i]) instead of the last
three lines above.
It's more idiomatic, and it avoids (ab)using the 'k' variable
(elsewhere used as loop variable) as a temporary variable.
> + } while (i1 != i);
> + }
> +
> + free(col);
> + free(pred);
> + free(d);
> + free(v);
> + free(free_row);
> +}
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems
2018-07-11 10:07 ` SZEDER Gábor
@ 2018-07-12 15:11 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-12 15:11 UTC (permalink / raw)
To: SZEDER Gábor
Cc: Johannes Schindelin via GitGitGadget, git, Junio C Hamano
[-- Attachment #1: Type: text/plain, Size: 1628 bytes --]
Hi Gábor,
On Wed, 11 Jul 2018, SZEDER Gábor wrote:
> > diff --git a/linear-assignment.c b/linear-assignment.c
> > new file mode 100644
> > index 000000000..0b0344b5f
> > --- /dev/null
> > +++ b/linear-assignment.c
> > @@ -0,0 +1,203 @@
> > +/*
> > + * Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path
> > + * algorithm for dense and sparse linear assignment problems</i>. Computing,
> > + * 38(4), 325-340.
> > + */
> > +#include "cache.h"
> > +#include "linear-assignment.h"
> > +
> > +#define COST(column, row) cost[(column) + column_count * (row)]
> > +
> > +/*
> > + * The parameter `cost` is the cost matrix: the cost to assign column j to row
> > + * i is `cost[j + column_count * i].
> > + */
> > +void compute_assignment(int column_count, int row_count, int *cost,
> > + int *column2row, int *row2column)
> > +{
>
> [...]
>
> > +update:
> > + /* updating of the column pieces */
> > + for (k = 0; k < last; k++) {
> > + int j1 = col[k];
> > + v[j1] += d[j1] - min;
> > + }
> > +
> > + /* augmentation */
> > + do {
> > + if (j < 0)
> > + BUG("negative j: %d", j);
> > + i = pred[j];
> > + column2row[j] = i;
> > + k = j;
> > + j = row2column[i];
> > + row2column[i] = k;
>
> Coccinelle suggests using SWAP(j, row2column[i]) instead of the last
> three lines above.
> It's more idiomatic, and it avoids (ab)using the 'k' variable
> (elsewhere used as loop variable) as a temporary variable.
Good point.
I audited the rest of the code in this file, and there are no more swap
operations.
Thanks,
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v3 02/20] Introduce `range-diff` to compare iterations of a topic branch
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
2018-04-30 21:54 ` [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems Johannes Schindelin via GitGitGadget
@ 2018-05-01 19:42 ` Johannes Schindelin via GitGitGadget
2018-05-02 0:34 ` [PATCH v3 03/20] range-diff: first rudimentary implementation Johannes Schindelin via GitGitGadget
` (18 subsequent siblings)
20 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2018-05-01 19:42 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Johannes Schindelin
From: Johannes Schindelin <johannes.schindelin@gmx.de>
This command does not do a whole lot so far, apart from showing a usage
that is oddly similar to that of `git tbdiff`. And for a good reason:
the next commits will turn `range-branch` into a full-blown replacement
for `tbdiff`.
At this point, we ignore tbdiff's color options, as they will all be
implemented later using diff_options.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
.gitignore | 1 +
Makefile | 1 +
builtin.h | 1 +
builtin/range-diff.c | 25 +++++++++++++++++++++++++
command-list.txt | 1 +
git.c | 1 +
6 files changed, 30 insertions(+)
create mode 100644 builtin/range-diff.c
diff --git a/.gitignore b/.gitignore
index 3284a1e9b..cc0ad74b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -113,6 +113,7 @@
/git-pull
/git-push
/git-quiltimport
+/git-range-diff
/git-read-tree
/git-rebase
/git-rebase--am
diff --git a/Makefile b/Makefile
index c5ba124f1..190384cae 100644
--- a/Makefile
+++ b/Makefile
@@ -1059,6 +1059,7 @@ BUILTIN_OBJS += builtin/prune-packed.o
BUILTIN_OBJS += builtin/prune.o
BUILTIN_OBJS += builtin/pull.o
BUILTIN_OBJS += builtin/push.o
+BUILTIN_OBJS += builtin/range-diff.o
BUILTIN_OBJS += builtin/read-tree.o
BUILTIN_OBJS += builtin/rebase--helper.o
BUILTIN_OBJS += builtin/receive-pack.o
diff --git a/builtin.h b/builtin.h
index 0362f1ce2..99206df4b 100644
--- a/builtin.h
+++ b/builtin.h
@@ -201,6 +201,7 @@ extern int cmd_prune(int argc, const char **argv, const char *prefix);
extern int cmd_prune_packed(int argc, const char **argv, const char *prefix);
extern int cmd_pull(int argc, const char **argv, const char *prefix);
extern int cmd_push(int argc, const char **argv, const char *prefix);
+extern int cmd_range_diff(int argc, const char **argv, const char *prefix);
extern int cmd_read_tree(int argc, const char **argv, const char *prefix);
extern int cmd_rebase__helper(int argc, const char **argv, const char *prefix);
extern int cmd_receive_pack(int argc, const char **argv, const char *prefix);
diff --git a/builtin/range-diff.c b/builtin/range-diff.c
new file mode 100644
index 000000000..36788ea4f
--- /dev/null
+++ b/builtin/range-diff.c
@@ -0,0 +1,25 @@
+#include "cache.h"
+#include "builtin.h"
+#include "parse-options.h"
+
+static const char * const builtin_range_diff_usage[] = {
+N_("git range-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
+N_("git range-diff [<options>] <old-tip>...<new-tip>"),
+N_("git range-diff [<options>] <base> <old-tip> <new-tip>"),
+NULL
+};
+
+int cmd_range_diff(int argc, const char **argv, const char *prefix)
+{
+ int creation_factor = 60;
+ struct option options[] = {
+ OPT_INTEGER(0, "creation-factor", &creation_factor,
+ N_("Percentage by which creation is weighted")),
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, NULL, options,
+ builtin_range_diff_usage, 0);
+
+ return 0;
+}
diff --git a/command-list.txt b/command-list.txt
index e1c26c1bb..a9dda3b8a 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -139,6 +139,7 @@ git-prune-packed plumbingmanipulators
git-pull mainporcelain remote
git-push mainporcelain remote
git-quiltimport foreignscminterface
+git-range-diff mainporcelain
git-read-tree plumbingmanipulators
git-rebase mainporcelain history
git-receive-pack synchelpers
diff --git a/git.c b/git.c
index 9dbe6ffaa..13e37f1e3 100644
--- a/git.c
+++ b/git.c
@@ -517,6 +517,7 @@ static struct cmd_struct commands[] = {
{ "prune-packed", cmd_prune_packed, RUN_SETUP },
{ "pull", cmd_pull, RUN_SETUP | NEED_WORK_TREE },
{ "push", cmd_push, RUN_SETUP },
+ { "range-diff", cmd_range_diff, RUN_SETUP | USE_PAGER },
{ "read-tree", cmd_read_tree, RUN_SETUP | SUPPORT_SUPER_PREFIX},
{ "rebase--helper", cmd_rebase__helper, RUN_SETUP | NEED_WORK_TREE },
{ "receive-pack", cmd_receive_pack },
--
gitgitgadget
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v3 03/20] range-diff: first rudimentary implementation
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
2018-04-30 21:54 ` [PATCH v3 01/20] linear-assignment: a function to solve least-cost assignment problems Johannes Schindelin via GitGitGadget
2018-05-01 19:42 ` [PATCH v3 02/20] Introduce `range-diff` to compare iterations of a topic branch Johannes Schindelin via GitGitGadget
@ 2018-05-02 0:34 ` Johannes Schindelin via GitGitGadget
2018-07-16 6:55 ` Eric Sunshine
2018-05-02 10:22 ` [PATCH v3 04/20] range-diff: improve the order of the shown commits Johannes Schindelin via GitGitGadget
` (17 subsequent siblings)
20 siblings, 1 reply; 387+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2018-05-02 0:34 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Johannes Schindelin
From: Johannes Schindelin <johannes.schindelin@gmx.de>
At this stage, `git range-diff` can determine corresponding commits
of two related commit ranges. This makes use of the recently introduced
implementation of the Hungarian algorithm.
The core of this patch is a straight port of the ideas of tbdiff, the
apparently dormant project at https://github.com/trast/tbdiff.
The output does not at all match `tbdiff`'s output yet, as this patch
really concentrates on getting the patch matching part right.
Note: due to differences in the diff algorithm (`tbdiff` uses the Python
module `difflib`, Git uses its xdiff fork), the cost matrix calculated
by `range-diff` is different (but very similar) to the one calculated
by `tbdiff`. Therefore, it is possible that they find different matching
commits in corner cases (e.g. when a patch was split into two patches of
roughly equal length).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
Makefile | 1 +
builtin/range-diff.c | 47 ++++++-
range-diff.c | 307 +++++++++++++++++++++++++++++++++++++++++++
range-diff.h | 7 +
4 files changed, 359 insertions(+), 3 deletions(-)
create mode 100644 range-diff.c
create mode 100644 range-diff.h
diff --git a/Makefile b/Makefile
index 190384cae..f20126e11 100644
--- a/Makefile
+++ b/Makefile
@@ -921,6 +921,7 @@ LIB_OBJS += progress.o
LIB_OBJS += prompt.o
LIB_OBJS += protocol.o
LIB_OBJS += quote.o
+LIB_OBJS += range-diff.o
LIB_OBJS += reachable.o
LIB_OBJS += read-cache.o
LIB_OBJS += reflog-walk.o
diff --git a/builtin/range-diff.c b/builtin/range-diff.c
index 36788ea4f..c37a72100 100644
--- a/builtin/range-diff.c
+++ b/builtin/range-diff.c
@@ -1,6 +1,7 @@
#include "cache.h"
#include "builtin.h"
#include "parse-options.h"
+#include "range-diff.h"
static const char * const builtin_range_diff_usage[] = {
N_("git range-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
@@ -17,9 +18,49 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix)
N_("Percentage by which creation is weighted")),
OPT_END()
};
+ int res = 0;
+ struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
- argc = parse_options(argc, argv, NULL, options,
- builtin_range_diff_usage, 0);
+ argc = parse_options(argc, argv, NULL, options, builtin_range_diff_usage,
+ 0);
- return 0;
+ if (argc == 2) {
+ if (!strstr(argv[0], ".."))
+ warning(_("no .. in range: '%s'"), argv[0]);
+ strbuf_addstr(&range1, argv[0]);
+
+ if (!strstr(argv[1], ".."))
+ warning(_("no .. in range: '%s'"), argv[1]);
+ strbuf_addstr(&range2, argv[1]);
+ } else if (argc == 3) {
+ strbuf_addf(&range1, "%s..%s", argv[0], argv[1]);
+ strbuf_addf(&range2, "%s..%s", argv[0], argv[2]);
+ } else if (argc == 1) {
+ const char *b = strstr(argv[0], "..."), *a = argv[0];
+ int a_len;
+
+ if (!b)
+ die(_("single arg format requires a symmetric range"));
+
+ a_len = (int)(b - a);
+ if (!a_len) {
+ a = "HEAD";
+ a_len = strlen(a);
+ }
+ b += 3;
+ if (!*b)
+ b = "HEAD";
+ strbuf_addf(&range1, "%s..%.*s", b, a_len, a);
+ strbuf_addf(&range2, "%.*s..%s", a_len, a, b);
+ } else {
+ error(_("need two commit ranges"));
+ usage_with_options(builtin_range_diff_usage, options);
+ }
+
+ res = show_range_diff(range1.buf, range2.buf, creation_factor);
+
+ strbuf_release(&range1);
+ strbuf_release(&range2);
+
+ return res;
}
diff --git a/range-diff.c b/range-diff.c
new file mode 100644
index 000000000..c374333a4
--- /dev/null
+++ b/range-diff.c
@@ -0,0 +1,307 @@
+#include "cache.h"
+#include "range-diff.h"
+#include "string-list.h"
+#include "run-command.h"
+#include "argv-array.h"
+#include "hashmap.h"
+#include "xdiff-interface.h"
+#include "linear-assignment.h"
+
+struct patch_util {
+ /* For the search for an exact match */
+ struct hashmap_entry e;
+ const char *diff, *patch;
+
+ int i;
+ int diffsize;
+ size_t diff_offset;
+ /* the index of the matching item in the other branch, or -1 */
+ int matching;
+ struct object_id oid;
+};
+
+/*
+ * Reads the patches into a string list, with the `util` field being populated
+ * as struct object_id (will need to be free()d).
+ */
+static int read_patches(const char *range, struct string_list *list)
+{
+ struct child_process cp = CHILD_PROCESS_INIT;
+ FILE *in;
+ struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT;
+ struct patch_util *util = NULL;
+ int in_header = 1;
+
+ argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges",
+ "--reverse", "--date-order", "--decorate=no",
+ "--no-abbrev-commit", range,
+ NULL);
+ cp.out = -1;
+ cp.no_stdin = 1;
+ cp.git_cmd = 1;
+
+ if (start_command(&cp))
+ return error_errno(_("could not start `log`"));
+ in = fdopen(cp.out, "r");
+ if (!in) {
+ error_errno(_("could not read `log` output"));
+ finish_command(&cp);
+ return -1;
+ }
+
+ while (strbuf_getline(&line, in) != EOF) {
+ const char *p;
+
+ if (skip_prefix(line.buf, "commit ", &p)) {
+ if (util) {
+ string_list_append(list, buf.buf)->util = util;
+ strbuf_reset(&buf);
+ }
+ util = xcalloc(sizeof(*util), 1);
+ if (get_oid(p, &util->oid)) {
+ error(_("could not parse commit '%s'"), p);
+ free(util);
+ string_list_clear(list, 1);
+ strbuf_release(&buf);
+ strbuf_release(&line);
+ fclose(in);
+ finish_command(&cp);
+ return -1;
+ }
+ util->matching = -1;
+ in_header = 1;
+ continue;
+ }
+
+ if (starts_with(line.buf, "diff --git")) {
+ in_header = 0;
+ strbuf_addch(&buf, '\n');
+ if (!util->diff_offset)
+ util->diff_offset = buf.len;
+ strbuf_addbuf(&buf, &line);
+ } else if (in_header) {
+ if (starts_with(line.buf, "Author: ")) {
+ strbuf_addbuf(&buf, &line);
+ strbuf_addstr(&buf, "\n\n");
+ } else if (starts_with(line.buf, " ")) {
+ strbuf_addbuf(&buf, &line);
+ strbuf_addch(&buf, '\n');
+ }
+ continue;
+ } else if (starts_with(line.buf, "@@ "))
+ strbuf_addstr(&buf, "@@");
+ else if (line.buf[0] && !starts_with(line.buf, "index "))
+ /*
+ * A completely blank (not ' \n', which is context)
+ * line is not valid in a diff. We skip it
+ * silently, because this neatly handles the blank
+ * separator line between commits in git-log
+ * output.
+ */
+ strbuf_addbuf(&buf, &line);
+ else
+ continue;
+
+ strbuf_addch(&buf, '\n');
+ util->diffsize++;
+ }
+ fclose(in);
+ strbuf_release(&line);
+
+ if (util)
+ string_list_append(list, buf.buf)->util = util;
+ strbuf_release(&buf);
+
+ if (finish_command(&cp))
+ return -1;
+
+ return 0;
+}
+
+static int patch_util_cmp(const void *dummy, const struct patch_util *a,
+ const struct patch_util *b, const char *keydata)
+{
+ return strcmp(a->diff, keydata ? keydata : b->diff);
+}
+
+static void find_exact_matches(struct string_list *a, struct string_list *b)
+{
+ struct hashmap map;
+ int i;
+
+ hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0);
+
+ /* First, add the patches of a to a hash map */
+ for (i = 0; i < a->nr; i++) {
+ struct patch_util *util = a->items[i].util;
+
+ util->i = i;
+ util->patch = a->items[i].string;
+ util->diff = util->patch + util->diff_offset;
+ hashmap_entry_init(util, strhash(util->diff));
+ hashmap_add(&map, util);
+ }
+
+ /* Now try to find exact matches in b */
+ for (i = 0; i < b->nr; i++) {
+ struct patch_util *util = b->items[i].util, *other;
+
+ util->i = i;
+ util->patch = b->items[i].string;
+ util->diff = util->patch + util->diff_offset;
+ hashmap_entry_init(util, strhash(util->diff));
+ other = hashmap_remove(&map, util, NULL);
+ if (other) {
+ if (other->matching >= 0)
+ BUG("already assigned!");
+
+ other->matching = i;
+ util->matching = other->i;
+ }
+ }
+
+ hashmap_free(&map, 0);
+}
+
+static void diffsize_consume(void *data, char *line, unsigned long len)
+{
+ (*(int *)data)++;
+}
+
+static int diffsize(const char *a, const char *b)
+{
+ xpparam_t pp = { 0 };
+ xdemitconf_t cfg = { 0 };
+ mmfile_t mf1, mf2;
+ int count = 0;
+
+ mf1.ptr = (char *)a;
+ mf1.size = strlen(a);
+ mf2.ptr = (char *)b;
+ mf2.size = strlen(b);
+
+ cfg.ctxlen = 3;
+ if (!xdi_diff_outf(&mf1, &mf2, diffsize_consume, &count, &pp, &cfg))
+ return count;
+
+ error(_("failed to generate diff"));
+ return COST_MAX;
+}
+
+static void get_correspondences(struct string_list *a, struct string_list *b,
+ int creation_factor)
+{
+ int n = a->nr + b->nr;
+ int *cost, c, *a2b, *b2a;
+ int i, j;
+
+ ALLOC_ARRAY(cost, st_mult(n, n));
+ ALLOC_ARRAY(a2b, n);
+ ALLOC_ARRAY(b2a, n);
+
+ for (i = 0; i < a->nr; i++) {
+ struct patch_util *a_util = a->items[i].util;
+
+ for (j = 0; j < b->nr; j++) {
+ struct patch_util *b_util = b->items[j].util;
+
+ if (a_util->matching == j)
+ c = 0;
+ else if (a_util->matching < 0 && b_util->matching < 0)
+ c = diffsize(a_util->diff, b_util->diff);
+ else
+ c = COST_MAX;
+ cost[i + n * j] = c;
+ }
+
+ c = a_util->matching < 0 ?
+ a_util->diffsize * creation_factor / 100 : COST_MAX;
+ for (j = b->nr; j < n; j++)
+ cost[i + n * j] = c;
+ }
+
+ for (j = 0; j < b->nr; j++) {
+ struct patch_util *util = b->items[j].util;
+
+ c = util->matching < 0 ?
+ util->diffsize * creation_factor / 100 : COST_MAX;
+ for (i = a->nr; i < n; i++)
+ cost[i + n * j] = c;
+ }
+
+ for (i = a->nr; i < n; i++)
+ for (j = b->nr; j < n; j++)
+ cost[i + n * j] = 0;
+
+ compute_assignment(n, n, cost, a2b, b2a);
+
+ for (i = 0; i < a->nr; i++)
+ if (a2b[i] >= 0 && a2b[i] < b->nr) {
+ struct patch_util *a_util = a->items[i].util;
+ struct patch_util *b_util = b->items[a2b[i]].util;
+
+ a_util->matching = a2b[i];
+ b_util->matching = i;
+ }
+
+ free(cost);
+ free(a2b);
+ free(b2a);
+}
+
+static const char *short_oid(struct patch_util *util)
+{
+ return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
+}
+
+static void output(struct string_list *a, struct string_list *b)
+{
+ int i;
+
+ for (i = 0; i < b->nr; i++) {
+ struct patch_util *util = b->items[i].util, *prev;
+
+ if (util->matching < 0)
+ printf("-: -------- > %d: %s\n",
+ i + 1, short_oid(util));
+ else {
+ prev = a->items[util->matching].util;
+ printf("%d: %s ! %d: %s\n",
+ util->matching + 1, short_oid(prev),
+ i + 1, short_oid(util));
+ }
+ }
+
+ for (i = 0; i < a->nr; i++) {
+ struct patch_util *util = a->items[i].util;
+
+ if (util->matching < 0)
+ printf("%d: %s < -: --------\n",
+ i + 1, short_oid(util));
+ }
+}
+
+int show_range_diff(const char *range1, const char *range2,
+ int creation_factor)
+{
+ int res = 0;
+
+ struct string_list branch1 = STRING_LIST_INIT_DUP;
+ struct string_list branch2 = STRING_LIST_INIT_DUP;
+
+ if (read_patches(range1, &branch1))
+ res = error(_("could not parse log for '%s'"), range1);
+ if (!res && read_patches(range2, &branch2))
+ res = error(_("could not parse log for '%s'"), range2);
+
+ if (!res) {
+ find_exact_matches(&branch1, &branch2);
+ get_correspondences(&branch1, &branch2, creation_factor);
+ output(&branch1, &branch2);
+ }
+
+ string_list_clear(&branch1, 1);
+ string_list_clear(&branch2, 1);
+
+ return res;
+}
diff --git a/range-diff.h b/range-diff.h
new file mode 100644
index 000000000..dd30449c4
--- /dev/null
+++ b/range-diff.h
@@ -0,0 +1,7 @@
+#ifndef BRANCH_DIFF_H
+#define BRANCH_DIFF_H
+
+int show_range_diff(const char *range1, const char *range2,
+ int creation_factor);
+
+#endif
--
gitgitgadget
^ permalink raw reply related [flat|nested] 387+ messages in thread
* Re: [PATCH v3 03/20] range-diff: first rudimentary implementation
2018-05-02 0:34 ` [PATCH v3 03/20] range-diff: first rudimentary implementation Johannes Schindelin via GitGitGadget
@ 2018-07-16 6:55 ` Eric Sunshine
2018-07-17 9:53 ` Johannes Schindelin
0 siblings, 1 reply; 387+ messages in thread
From: Eric Sunshine @ 2018-07-16 6:55 UTC (permalink / raw)
To: gitgitgadget; +Cc: Git List, Junio C Hamano, Johannes Schindelin
On Tue, Jul 3, 2018 at 7:27 AM Johannes Schindelin via GitGitGadget
<gitgitgadget@gmail.com> wrote:
> At this stage, `git range-diff` can determine corresponding commits
> of two related commit ranges. This makes use of the recently introduced
> implementation of the Hungarian algorithm.
Did you want s/Hungarian/Jonker-Volgenant/ here? (Not worth a re-roll.)
> The core of this patch is a straight port of the ideas of tbdiff, the
> apparently dormant project at https://github.com/trast/tbdiff.
> [...]
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
> diff --git a/builtin/range-diff.c b/builtin/range-diff.c
> @@ -17,9 +18,49 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix)
> + int res = 0;
> + struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
>
> - argc = parse_options(argc, argv, NULL, options,
> - builtin_range_diff_usage, 0);
> + argc = parse_options(argc, argv, NULL, options, builtin_range_diff_usage,
> + 0);
This parse_options() change appears to be merely a re-wrapping of the
line between patches 2 and 3.
> - return 0;
> + if (argc == 2) {
> + if (!strstr(argv[0], ".."))
> + warning(_("no .. in range: '%s'"), argv[0]);
> + strbuf_addstr(&range1, argv[0]);
> +
> + if (!strstr(argv[1], ".."))
> + warning(_("no .. in range: '%s'"), argv[1]);
> + strbuf_addstr(&range2, argv[1]);
Should these die() (like the "..." case below) rather than warning()?
Warning and continuing doesn't seem like intended behavior. When I
test this with on git.git and omit the "..", git sits for a long, long
time consuming the CPU. I guess it's git-log'ing pretty much the
entire history.
% GIT_TRACE=1 git range-diff v1 v2
warning: no .. in range: 'v1'
warning: no .. in range: 'v2'
trace: git log --no-color -p --no-merges --reverse \
--date-order --decorate=no --no-abbrev-commit v1
^C
%
> + } else if (argc == 3) {
> + strbuf_addf(&range1, "%s..%s", argv[0], argv[1]);
> + strbuf_addf(&range2, "%s..%s", argv[0], argv[2]);
> + } else if (argc == 1) {
> + const char *b = strstr(argv[0], "..."), *a = argv[0];
> + int a_len;
> +
> + if (!b)
> + die(_("single arg format requires a symmetric range"));
> diff --git a/range-diff.c b/range-diff.c
> @@ -0,0 +1,307 @@
> +static int read_patches(const char *range, struct string_list *list)
> +{
> + while (strbuf_getline(&line, in) != EOF) {
> + if (skip_prefix(line.buf, "commit ", &p)) {
> + [...]
> + in_header = 1;
> + continue;
> + }
> + if (starts_with(line.buf, "diff --git")) {
> + in_header = 0;
> + [...]
> + } else if (in_header) {
> + if (starts_with(line.buf, "Author: ")) {
> + [...]
> + } else if (starts_with(line.buf, " ")) {
> + [...]
> + }
> + continue;
> + } else if (starts_with(line.buf, "@@ "))
> + strbuf_addstr(&buf, "@@");
> + else if (line.buf[0] && !starts_with(line.buf, "index "))
> + /*
> + * A completely blank (not ' \n', which is context)
> + * line is not valid in a diff. We skip it
> + * silently, because this neatly handles the blank
> + * separator line between commits in git-log
> + * output.
> + */
> + strbuf_addbuf(&buf, &line);
This comment had me confused for a bit since it doesn't seem to agree
with the 'then' part of the 'if', but rather applies more to the
'else'. Had it been split into two parts (one for 'then' and one for
'else'), it might have been easier to digest. That is, something like:
else if (line.buf[0] && !starts_with(..., "index "))
/* A line we wish to keep. */
strbuf_addbuf(...);
else
/*
* A completely blank line between commits or
* or one in which we are otherwise not interested.
*/
continue;
or something. Structuring it a bit differently might have helped, as well:
else if (!line.buf[0])
/* A completely blank line between commits. */
continue;
else if (starts_with(..., "index "))
/* A line in which we are not interested. */
continue;
else
strbuf_addbuf(&buf, &line);
Not at all worth a re-roll.
> + else
> + continue;
> + if (util)
> + string_list_append(list, buf.buf)->util = util;
So, the parser is grabbing each commit and shoving all the
"interesting" information about the commit in a 'patch_util'. It grabs
the OID, author, the commit message (indented), the "diff --git",
"+++", "---" lines (but ignores "index" line), "@@" lines (but
ignoring the gunk after "@@"), and all context and patch lines.
Looks good.
> + strbuf_release(&buf);
> +
> + if (finish_command(&cp))
> + return -1;
> +
> + return 0;
> +}
^ permalink raw reply [flat|nested] 387+ messages in thread
* Re: [PATCH v3 03/20] range-diff: first rudimentary implementation
2018-07-16 6:55 ` Eric Sunshine
@ 2018-07-17 9:53 ` Johannes Schindelin
0 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin @ 2018-07-17 9:53 UTC (permalink / raw)
To: Eric Sunshine; +Cc: gitgitgadget, Git List, Junio C Hamano
Hi Eric,
On Mon, 16 Jul 2018, Eric Sunshine wrote:
> On Tue, Jul 3, 2018 at 7:27 AM Johannes Schindelin via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> > At this stage, `git range-diff` can determine corresponding commits
> > of two related commit ranges. This makes use of the recently introduced
> > implementation of the Hungarian algorithm.
>
> Did you want s/Hungarian/Jonker-Volgenant/ here? (Not worth a re-roll.)
It is worth a new iteration, and I'd rather say "linear assignment" than
either Hungarian or Jonker-Volgenant. Thanks for pointing this out.
> > The core of this patch is a straight port of the ideas of tbdiff, the
> > apparently dormant project at https://github.com/trast/tbdiff.
> > [...]
> > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> > ---
> > diff --git a/builtin/range-diff.c b/builtin/range-diff.c
> > @@ -17,9 +18,49 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix)
> > + int res = 0;
> > + struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
> >
> > - argc = parse_options(argc, argv, NULL, options,
> > - builtin_range_diff_usage, 0);
> > + argc = parse_options(argc, argv, NULL, options, builtin_range_diff_usage,
> > + 0);
>
> This parse_options() change appears to be merely a re-wrapping of the
> line between patches 2 and 3.
True, and it is a bad change because it makes the line longer than 80
columns.
Fixed.
> > - return 0;
> > + if (argc == 2) {
> > + if (!strstr(argv[0], ".."))
> > + warning(_("no .. in range: '%s'"), argv[0]);
> > + strbuf_addstr(&range1, argv[0]);
> > +
> > + if (!strstr(argv[1], ".."))
> > + warning(_("no .. in range: '%s'"), argv[1]);
> > + strbuf_addstr(&range2, argv[1]);
>
> Should these die() (like the "..." case below) rather than warning()?
> Warning and continuing doesn't seem like intended behavior. When I
> test this with on git.git and omit the "..", git sits for a long, long
> time consuming the CPU. I guess it's git-log'ing pretty much the
> entire history.
I had to go back to `git-tbdiff.py` to see how it handles this, and you
are right: it should die().
Fixed.
(Technically, it is conceivable that some user wants to compare two
independent commit histories, e.g. when a repository was imported from a
different SCM two times, independently. I guess when that happens, we can
always implement a `range-diff --root <tip1> <tip2>` or some such.)
> % GIT_TRACE=1 git range-diff v1 v2
> warning: no .. in range: 'v1'
> warning: no .. in range: 'v2'
> trace: git log --no-color -p --no-merges --reverse \
> --date-order --decorate=no --no-abbrev-commit v1
> ^C
> %
>
> > + } else if (argc == 3) {
> > + strbuf_addf(&range1, "%s..%s", argv[0], argv[1]);
> > + strbuf_addf(&range2, "%s..%s", argv[0], argv[2]);
> > + } else if (argc == 1) {
> > + const char *b = strstr(argv[0], "..."), *a = argv[0];
> > + int a_len;
> > +
> > + if (!b)
> > + die(_("single arg format requires a symmetric range"));
> > diff --git a/range-diff.c b/range-diff.c
> > @@ -0,0 +1,307 @@
> > +static int read_patches(const char *range, struct string_list *list)
> > +{
> > + while (strbuf_getline(&line, in) != EOF) {
> > + if (skip_prefix(line.buf, "commit ", &p)) {
> > + [...]
> > + in_header = 1;
> > + continue;
> > + }
> > + if (starts_with(line.buf, "diff --git")) {
> > + in_header = 0;
> > + [...]
> > + } else if (in_header) {
> > + if (starts_with(line.buf, "Author: ")) {
> > + [...]
> > + } else if (starts_with(line.buf, " ")) {
> > + [...]
> > + }
> > + continue;
> > + } else if (starts_with(line.buf, "@@ "))
> > + strbuf_addstr(&buf, "@@");
> > + else if (line.buf[0] && !starts_with(line.buf, "index "))
> > + /*
> > + * A completely blank (not ' \n', which is context)
> > + * line is not valid in a diff. We skip it
> > + * silently, because this neatly handles the blank
> > + * separator line between commits in git-log
> > + * output.
> > + */
> > + strbuf_addbuf(&buf, &line);
>
> This comment had me confused for a bit since it doesn't seem to agree
> with the 'then' part of the 'if', but rather applies more to the
> 'else'. Had it been split into two parts (one for 'then' and one for
> 'else'), it might have been easier to digest. That is, something like:
>
> else if (line.buf[0] && !starts_with(..., "index "))
> /* A line we wish to keep. */
> strbuf_addbuf(...);
> else
> /*
> * A completely blank line between commits or
> * or one in which we are otherwise not interested.
> */
> continue;
>
> or something. Structuring it a bit differently might have helped, as well:
>
> else if (!line.buf[0])
> /* A completely blank line between commits. */
> continue;
> else if (starts_with(..., "index "))
> /* A line in which we are not interested. */
> continue;
> else
> strbuf_addbuf(&buf, &line);
I like this much better, too.
> Not at all worth a re-roll.
I'll have to send a new iteration anyway, after digging into ws.c, I
think.
Also: I had this idea that dimming the "old" diff would make a ton of
sense, so I want to try that.
> > + else
> > + continue;
> > + if (util)
> > + string_list_append(list, buf.buf)->util = util;
>
> So, the parser is grabbing each commit and shoving all the
> "interesting" information about the commit in a 'patch_util'. It grabs
> the OID, author, the commit message (indented), the "diff --git",
> "+++", "---" lines (but ignores "index" line), "@@" lines (but
> ignoring the gunk after "@@"), and all context and patch lines.
>
> Looks good.
Correct.
> > + strbuf_release(&buf);
> > +
> > + if (finish_command(&cp))
> > + return -1;
> > +
> > + return 0;
> > +}
Thank you for your suggestions!
Dscho
^ permalink raw reply [flat|nested] 387+ messages in thread
* [PATCH v3 04/20] range-diff: improve the order of the shown commits
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
` (2 preceding siblings ...)
2018-05-02 0:34 ` [PATCH v3 03/20] range-diff: first rudimentary implementation Johannes Schindelin via GitGitGadget
@ 2018-05-02 10:22 ` Johannes Schindelin via GitGitGadget
2018-05-02 14:49 ` [PATCH v3 06/20] range-diff: right-trim commit messages Johannes Schindelin via GitGitGadget
` (16 subsequent siblings)
20 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2018-05-02 10:22 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Johannes Schindelin
From: Johannes Schindelin <johannes.schindelin@gmx.de>
This patch lets `git range-diff` use the same order as tbdiff.
The idea is simple: for left-to-right readers, it is natural to assume
that the `git range-diff` is performed between an older vs a newer
version of the branch. As such, the user is probably more interested in
the question "where did this come from?" rather than "where did that one
go?".
To that end, we list the commits in the order of the second commit range
("the newer version"), inserting the unmatched commits of the first
commit range as soon as all their predecessors have been shown.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
range-diff.c | 59 +++++++++++++++++++++++++++++++++++-----------------
1 file changed, 40 insertions(+), 19 deletions(-)
diff --git a/range-diff.c b/range-diff.c
index c374333a4..e71cf0ba7 100644
--- a/range-diff.c
+++ b/range-diff.c
@@ -12,7 +12,7 @@ struct patch_util {
struct hashmap_entry e;
const char *diff, *patch;
- int i;
+ int i, shown;
int diffsize;
size_t diff_offset;
/* the index of the matching item in the other branch, or -1 */
@@ -256,28 +256,49 @@ static const char *short_oid(struct patch_util *util)
static void output(struct string_list *a, struct string_list *b)
{
- int i;
-
- for (i = 0; i < b->nr; i++) {
- struct patch_util *util = b->items[i].util, *prev;
+ int i = 0, j = 0;
+
+ /*
+ * We assume the user is really more interested in the second argument
+ * ("newer" version). To that end, we print the output in the order of
+ * the RHS (the `b` parameter). To put the LHS (the `a` parameter)
+ * commits that are no longer in the RHS into a good place, we place
+ * them once we have shown all of their predecessors in the LHS.
+ */
+
+ while (i < a->nr || j < b->nr) {
+ struct patch_util *a_util, *b_util;
+ a_util = i < a->nr ? a->items[i].util : NULL;
+ b_util = j < b->nr ? b->items[j].util : NULL;
+
+ /* Skip all the already-shown commits from the LHS. */
+ while (i < a->nr && a_util->shown)
+ a_util = ++i < a->nr ? a->items[i].util : NULL;
+
+ /* Show unmatched LHS commit whose predecessors were shown. */
+ if (i < a->nr && a_util->matching < 0) {
+ printf("%d: %s < -: --------\n",
+ i + 1, short_oid(a_util));
+ i++;
+ continue;
+ }
- if (util->matching < 0)
+ /* Show unmatched RHS commits. */
+ while (j < b->nr && b_util->matching < 0) {
printf("-: -------- > %d: %s\n",
- i + 1, short_oid(util));
- else {
- prev = a->items[util->matching].util;
- printf("%d: %s ! %d: %s\n",
- util->matching + 1, short_oid(prev),
- i + 1, short_oid(util));
+ j + 1, short_oid(b_util));
+ b_util = ++j < b->nr ? b->items[j].util : NULL;
}
- }
-
- for (i = 0; i < a->nr; i++) {
- struct patch_util *util = a->items[i].util;
- if (util->matching < 0)
- printf("%d: %s < -: --------\n",
- i + 1, short_oid(util));
+ /* Show matching LHS/RHS pair. */
+ if (j < b->nr) {
+ a_util = a->items[b_util->matching].util;
+ printf("%d: %s ! %d: %s\n",
+ b_util->matching + 1, short_oid(a_util),
+ j + 1, short_oid(b_util));
+ a_util->shown = 1;
+ j++;
+ }
}
}
--
gitgitgadget
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v3 06/20] range-diff: right-trim commit messages
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
` (3 preceding siblings ...)
2018-05-02 10:22 ` [PATCH v3 04/20] range-diff: improve the order of the shown commits Johannes Schindelin via GitGitGadget
@ 2018-05-02 14:49 ` Johannes Schindelin via GitGitGadget
2018-05-02 14:52 ` [PATCH v3 07/20] range-diff: indent the diffs just like tbdiff Johannes Schindelin via GitGitGadget
` (15 subsequent siblings)
20 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2018-05-02 14:49 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Johannes Schindelin
From: Johannes Schindelin <johannes.schindelin@gmx.de>
When comparing commit messages, we need to keep in mind that they are
indented by four spaces. That is, empty lines are no longer empty, but
have "trailing whitespace". When displaying them in color, that results
in those nagging red lines.
Let's just right-trim the lines in the commit message, it's not like
trailing white-space in the commit messages are important enough to care
about in `git range-diff`.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
range-diff.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/range-diff.c b/range-diff.c
index 530f2fc32..8d3b96455 100644
--- a/range-diff.c
+++ b/range-diff.c
@@ -85,6 +85,7 @@ static int read_patches(const char *range, struct string_list *list)
strbuf_addbuf(&buf, &line);
strbuf_addstr(&buf, "\n\n");
} else if (starts_with(line.buf, " ")) {
+ strbuf_rtrim(&line);
strbuf_addbuf(&buf, &line);
strbuf_addch(&buf, '\n');
}
--
gitgitgadget
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v3 07/20] range-diff: indent the diffs just like tbdiff
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-diff`, " Johannes Schindelin via GitGitGadget
` (4 preceding siblings ...)
2018-05-02 14:49 ` [PATCH v3 06/20] range-diff: right-trim commit messages Johannes Schindelin via GitGitGadget
@ 2018-05-02 14:52 ` Johannes Schindelin via GitGitGadget
2018-05-02 14:53 ` [PATCH v3 08/20] range-diff: suppress the diff headers Johannes Schindelin via GitGitGadget
` (14 subsequent siblings)
20 siblings, 0 replies; 387+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2018-05-02 14:52 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Johannes Schindelin
From: Johannes Schindelin <johannes.schindelin@gmx.de>
The main information in the `range-diff` view comes from the list of
matching and non-matching commits, the diffs are additional information.
Indenting them helps with the reading flow.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/range-diff.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/builtin/range-diff.c b/builtin/range-diff.c
index 5f12bbfa9..660e1f961 100644
--- a/builtin/range-diff.c
+++ b/builtin/range-diff.c
@@ -11,6 +11,11 @@ N_("git range-diff [<options>] <base> <old-tip> <new-tip>"),
NULL
};
+static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data)
+{
+ return data;
+}
+
int cmd_range_diff(int argc, const char **argv, const char *prefix)
{
int creation_factor = 60;
@@ -21,12 +26,16 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix)
OPT_END()
};
int i, j, res = 0;
+ struct strbuf four_spaces = STRBUF_INIT;
struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
git_config(git_diff_ui_config, NULL);
diff_setup(&diffopt);
diffopt.output_format = DIFF_FORMAT_PATCH;
+ diffopt.output_prefix = output_prefix_cb;
+ strbuf_addstr(&four_spaces, " ");
+ diffopt.output_prefix_data = &four_spaces;
argc = parse_options(argc, argv, NULL, options,
builtin_range_diff_usage, PARSE_OPT_KEEP_UNKNOWN);
@@ -78,6 +87,7 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix)
strbuf_release(&range1);
strbuf_release(&range2);
+ strbuf_release(&four_spaces);
return res;
}
--
gitgitgadget
^ permalink raw reply related [flat|nested] 387+ messages in thread
* [PATCH v3 08/20] range-diff: suppress the diff headers
2018-07-03 11:26 ` [PATCH v3 00/20] Add `range-d