user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/3] more solver fixes
@ 2020-01-04  3:34 Eric Wong
  2020-01-04  3:34 ` [PATCH 1/3] xt/solver.t: real-world regression tests Eric Wong
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Eric Wong @ 2020-01-04  3:34 UTC (permalink / raw)
  To: meta

Eric Wong (3):
  xt/solver.t: real-world regression tests
  solver: do not enforce order on extended headers
  solver: minor cleanups to diff extraction

 MANIFEST                     |  1 +
 lib/PublicInbox/SolverGit.pm | 41 +++++++++++++-------------
 xt/solver.t                  | 57 ++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 21 deletions(-)
 create mode 100644 xt/solver.t

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] xt/solver.t: real-world regression tests
  2020-01-04  3:34 [PATCH 0/3] more solver fixes Eric Wong
@ 2020-01-04  3:34 ` Eric Wong
  2020-01-04  3:34 ` [PATCH 2/3] solver: do not enforce order on extended headers Eric Wong
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-01-04  3:34 UTC (permalink / raw)
  To: meta

There's a lot of test cases which we should probably
make self-contained at some point, but right now it's
easier to just mark them off in a maintainer test.
---
 MANIFEST    |  1 +
 xt/solver.t | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 xt/solver.t

diff --git a/MANIFEST b/MANIFEST
index 4d922f97..845fee7b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -301,3 +301,4 @@ xt/nntpd-validate.t
 xt/perf-msgview.t
 xt/perf-nntpd.t
 xt/perf-threading.t
+xt/solver.t
diff --git a/xt/solver.t b/xt/solver.t
new file mode 100644
index 00000000..238abecd
--- /dev/null
+++ b/xt/solver.t
@@ -0,0 +1,57 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Config; # this relies on PI_CONFIG // ~/.public-inbox/config
+use PublicInbox::WWW;
+my @psgi = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder);
+require_mods(qw(DBD::SQLite Search::Xapian), @psgi);
+use_ok($_) for @psgi;
+my $cfg = PublicInbox::Config->new;
+my $www = PublicInbox::WWW->new($cfg);
+my $app = sub {
+	my $env = shift;
+	$env->{'psgi.errors'} = \*STDERR;
+	$www->call($env);
+};
+
+# TODO: convert these to self-contained test cases
+my $todo = {
+	'git' => [
+		# 'eebf7a8/s/?b=t%2Ftest-lib.sh', TODO
+		'eb580ca513/s/?b=remote-odb.c',
+		'776fa90f7f/s/?b=contrib/git-jump/git-jump',
+		'5cd8845/s/?b=submodule.c',
+		'81c1164ae5/s/?b=builtin/log.c',
+		'6aa8857a11/s/?b=protocol.c', # TODO: i/, w/ instead of a/ b/
+		'96f1c7f/s/', # TODO: b=contrib/completion/git-completion.bash
+		'b76f2c0/s/?b=po/zh_CN.po',
+	],
+};
+
+my ($ibx, $urls);
+my $client = sub {
+	my ($cb) = @_;
+	for (@$urls) {
+		my $url = "/$ibx/$_";
+		my $res = $cb->(GET($url));
+		is($res->code, 200, $url);
+		next if $res->code == 200;
+		# diag $res->content;
+		diag "$url failed";
+	}
+};
+
+while (($ibx, $urls) = each %$todo) {
+	SKIP: {
+		if (!$cfg->lookup_name($ibx)) {
+			skip("$ibx not configured", scalar(@$urls));
+		}
+		test_psgi($app, $client);
+	}
+}
+
+done_testing();
+1;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] solver: do not enforce order on extended headers
  2020-01-04  3:34 [PATCH 0/3] more solver fixes Eric Wong
  2020-01-04  3:34 ` [PATCH 1/3] xt/solver.t: real-world regression tests Eric Wong
@ 2020-01-04  3:34 ` Eric Wong
  2020-01-04  3:34 ` [PATCH 3/3] solver: minor cleanups to diff extraction Eric Wong
  2020-01-04  4:19 ` [PATCH 4/3] solver: allow literal '\r' character in diff lines Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-01-04  3:34 UTC (permalink / raw)
  To: meta

This is needed to work with patches with many renames,
such as what makes "git/eebf7a8/s/?b=t%2Ftest-lib.sh"
---
 lib/PublicInbox/SolverGit.pm | 23 ++++++++++++-----------
 xt/solver.t                  |  2 +-
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index a78360fd..b12cd9d2 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -112,6 +112,8 @@ sub extract_diff ($$) {
 	}
 
 	state $LF = qr!\r?\n!;
+	state $ANY = qr![^\r\n]+!;
+	state $MODE = '100644|120000|100755';
 	state $FN = qr!(?:("?[^/\n]+/[^\r\n]+)|/dev/null)!;
 
 	$s =~ m!( # $1 start header lines we save for debugging:
@@ -124,17 +126,16 @@ sub extract_diff ($$) {
 		# try to get the pre-and-post filenames as $2 and $3
 		(?:^diff\x20--git\x20$FN\x20$FN$LF)
 
-		# old mode $4
-		(?:^old mode\x20(100644|120000|100755)$LF)?
-
-		# ignore other info
-		(?:^(?:copy|rename|deleted|dissimilarity|similarity).*$LF)?
-
-		# new mode (possibly new file) ($5)
-		(?:^new\x20(?:file\x20)?mode\x20(100644|120000|100755)$LF)?
-
-		# ignore other info
-		(?:^(?:copy|rename|deleted|dissimilarity|similarity).*$LF)?
+		(?:^(?: # pass all this to git-apply:
+			# old mode $4
+			(?:old\x20mode\x20($MODE))
+			|
+			# new mode (possibly new file) ($5)
+			(?:new\x20(?:file\x20)?mode\x20($MODE))
+			|
+			(?:(?:copy|rename|deleted|
+				dissimilarity|similarity)$ANY)
+		)$LF)*
 
 		)? # end of optional stuff, everything below is required
 
diff --git a/xt/solver.t b/xt/solver.t
index 238abecd..e9f24e7f 100644
--- a/xt/solver.t
+++ b/xt/solver.t
@@ -20,7 +20,7 @@ my $app = sub {
 # TODO: convert these to self-contained test cases
 my $todo = {
 	'git' => [
-		# 'eebf7a8/s/?b=t%2Ftest-lib.sh', TODO
+		'eebf7a8/s/?b=t%2Ftest-lib.sh',
 		'eb580ca513/s/?b=remote-odb.c',
 		'776fa90f7f/s/?b=contrib/git-jump/git-jump',
 		'5cd8845/s/?b=submodule.c',

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] solver: minor cleanups to diff extraction
  2020-01-04  3:34 [PATCH 0/3] more solver fixes Eric Wong
  2020-01-04  3:34 ` [PATCH 1/3] xt/solver.t: real-world regression tests Eric Wong
  2020-01-04  3:34 ` [PATCH 2/3] solver: do not enforce order on extended headers Eric Wong
@ 2020-01-04  3:34 ` Eric Wong
  2020-01-04  4:19 ` [PATCH 4/3] solver: allow literal '\r' character in diff lines Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-01-04  3:34 UTC (permalink / raw)
  To: meta

Initialize the $di hashref at use to make it more obvious it's
a local variable.  We can also use the :utf8 IO layer via
open+print to save ourselves the trouble of converting the UTF-8
patch to an octet stream.
---
 lib/PublicInbox/SolverGit.pm | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index b12cd9d2..eab8459b 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -102,7 +102,6 @@ sub extract_diff ($$) {
 	my $ct = $part->content_type || 'text/plain';
 	my ($s, undef) = msg_part_text($part, $ct);
 	defined $s or return;
-	my $di = {};
 
 	# Email::MIME::Encodings forces QP to be CRLF upon decoding,
 	# change it back to LF:
@@ -159,12 +158,14 @@ sub extract_diff ($$) {
 		(?:^(?:[\@\+\x20\-\\][^\r\n]*|)$LF)+
 	)!smx or return;
 
-	my $hdr_lines = $1;
+	my $di = {
+		hdr_lines => $1,
+		oid_a => $6,
+		oid_b => $7,
+		mode_a => $5 // $8 // $4, # new (file) // unchanged // old
+	};
 	my $path_a = $2 // $10;
 	my $path_b = $3 // $11;
-	$di->{oid_a} = $6;
-	$di->{oid_b} = $7;
-	$di->{mode_a} = $5 // $8 // $4; # new (file) // unchanged // old
 	my $patch = $9;
 
 	# don't care for leading 'a/' and 'b/'
@@ -179,19 +180,16 @@ sub extract_diff ($$) {
 	$di->{path_a} = join('/', @a);
 	$di->{path_b} = join('/', @b);
 
-	utf8::encode($hdr_lines);
-	utf8::encode($patch);
 	my $path = ++$self->{tot};
 	$di->{n} = $path;
-	open(my $tmp, '>', $self->{tmp}->dirname . "/$path") or
+	open(my $tmp, '>:utf8', $self->{tmp}->dirname . "/$path") or
 		die "open(tmp): $!";
-	print $tmp $hdr_lines, $patch or die "print(tmp): $!";
+	print $tmp $di->{hdr_lines}, $patch or die "print(tmp): $!";
 	close $tmp or die "close(tmp): $!";
 
 	# for debugging/diagnostics:
 	$di->{ibx} = $ibx;
 	$di->{smsg} = $smsg;
-	$di->{hdr_lines} = $hdr_lines;
 
 	push @$diffs, $di;
 }

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/3] solver: allow literal '\r' character in diff lines
  2020-01-04  3:34 [PATCH 0/3] more solver fixes Eric Wong
                   ` (2 preceding siblings ...)
  2020-01-04  3:34 ` [PATCH 3/3] solver: minor cleanups to diff extraction Eric Wong
@ 2020-01-04  4:19 ` Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-01-04  4:19 UTC (permalink / raw)
  To: meta

While filenames are escaped, the actual diff contents may
contain an unescaped "\r" carriage return byte not in front
of the "\n" line feed.  So just allow "\r" to appear in the
middle of a line.
---
 lib/PublicInbox/SolverGit.pm | 2 +-
 xt/solver.t                  | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index eab8459b..5ac27988 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -155,7 +155,7 @@ sub extract_diff ($$) {
 		# the meat of the diff, including "^\\No newline ..."
 		# We also allow for totally blank lines w/o leading spaces,
 		# because git-apply(1) handles that case, too
-		(?:^(?:[\@\+\x20\-\\][^\r\n]*|)$LF)+
+		(?:^(?:[\@\+\x20\-\\][^\n]*|)$LF)+
 	)!smx or return;
 
 	my $di = {
diff --git a/xt/solver.t b/xt/solver.t
index e9f24e7f..4ff57fe7 100644
--- a/xt/solver.t
+++ b/xt/solver.t
@@ -20,6 +20,8 @@ my $app = sub {
 # TODO: convert these to self-contained test cases
 my $todo = {
 	'git' => [
+		'9e9048b02bd04d287461543d85db0bb715b89f8c'
+			.'/s/?b=t%2Ft3420%2Fremove-ids.sed',
 		'eebf7a8/s/?b=t%2Ftest-lib.sh',
 		'eb580ca513/s/?b=remote-odb.c',
 		'776fa90f7f/s/?b=contrib/git-jump/git-jump',

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-01-04  4:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-04  3:34 [PATCH 0/3] more solver fixes Eric Wong
2020-01-04  3:34 ` [PATCH 1/3] xt/solver.t: real-world regression tests Eric Wong
2020-01-04  3:34 ` [PATCH 2/3] solver: do not enforce order on extended headers Eric Wong
2020-01-04  3:34 ` [PATCH 3/3] solver: minor cleanups to diff extraction Eric Wong
2020-01-04  4:19 ` [PATCH 4/3] solver: allow literal '\r' character in diff lines Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).