git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH/RFC] Export file attachements in git-remote-mediawiki
@ 2012-06-04 20:49 NGUYEN Kim Thuat
  2012-06-04 21:34 ` Matthieu Moy
  0 siblings, 1 reply; 6+ messages in thread
From: NGUYEN Kim Thuat @ 2012-06-04 20:49 UTC (permalink / raw)
  To: git
  Cc: nguyenkimthuat, Pavel Volek, NGUYEN Kim Thuat,
	ROUCHER IGLESIAS Javier, Matthieu Moy

From: nguyenkimthuat <nguyenkimthuat@gmail.com>

The current version of the git-remote-mediawiki supports only import and export of the pages, doesn't support import and export of file attachements which are also exposed by MediaWiki API. This patch add the functionnality to export the file attachements from the local git's repository using the API of mediawiki.
 For the moment, i removed the line: "use enconding 'utf8'" because it's broken, especially when reding a binary file for exemple .pdf or .jpg.

Signed-off-by: Pavel Volek <Pavel.Volek@ensimag.imag.fr>
Signed-off-by: NGUYEN Kim Thuat <Kim-Thuat.Nguyen@ensimag.imag.fr>
Signed-off-by: ROUCHER IGLESIAS Javier <roucherj@ensimag.imag.fr>
Signed-off-by: Matthieu Moy <Matthieu.Moy@imag.fr>
---
 contrib/mw-to-git/git-remote-mediawiki | 90 ++++++++++++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 3 deletions(-)

diff --git a/contrib/mw-to-git/git-remote-mediawiki b/contrib/mw-to-git/git-remote-mediawiki
index c18bfa1..830e2d0 100755
--- a/contrib/mw-to-git/git-remote-mediawiki
+++ b/contrib/mw-to-git/git-remote-mediawiki
@@ -36,7 +36,6 @@
 use strict;
 use MediaWiki::API;
 use DateTime::Format::ISO8601;
-use encoding 'utf8';
 
 # use encoding 'utf8' doesn't change STDERROR
 # but we're going to output UTF-8 filenames to STDERR
@@ -114,6 +113,9 @@ $wiki_name =~ s/[^\/]*:\/\///;
 # and '@' sign, to avoid author like MWUser@HTTPUser@host.com
 $wiki_name =~ s/^.*@//;
 
+# Get the list of file extensions supported by the current version of mediawiki 
+my @list_file_extensions = get_file_extensions();
+
 # Commands parser
 my $entry;
 my @cmd;
@@ -275,6 +277,13 @@ sub run_git {
 	return $res;
 }
 
+sub run_git_raw {
+        open(my $g,"-|","git " . $_[0]);   
+        my $r = do { local $/; <$g> };
+        close($g);
+
+        return $r;
+}
 
 sub get_last_local_revision {
 	# Get note regarding last mediawiki revision
@@ -642,8 +651,14 @@ sub mw_push_file {
 	my $old_sha1 = $diff_info_split[2];
 	my $page_created = ($old_sha1 eq NULL_SHA1);
 	my $page_deleted = ($new_sha1 eq NULL_SHA1);
+	my $file_deleted = ($new_sha1 eq NULL_SHA1);
 	$complete_file_name = mediawiki_clean_filename($complete_file_name);
 
+	my %hashFiles = map {$_ => 1}@list_file_extensions;
+	my $path = "File:".$complete_file_name;
+	my @extensions = split(/\./,$complete_file_name);
+	my $extension = pop(@extensions);
+
 	if (substr($complete_file_name,-3) eq ".mw") {
 		my $title = substr($complete_file_name,0,-3);
 
@@ -686,8 +701,34 @@ sub mw_push_file {
 		}
 		$newrevid = $result->{edit}->{newrevid};
 		print STDERR "Pushed file: $new_sha1 - $title\n";
-	} else {
-		print STDERR "$complete_file_name not a mediawiki file (Not pushable on this version of git-remote-mediawiki).\n"
+	} elsif (exists($hashFiles{$extension})) {
+		# Deleting and uploading a file require the priviledge of the user
+		if ($file_deleted) {
+			mw_connect_maybe();
+			my $res = $mediawiki->edit( {
+			action => 'delete',
+			title => $path,
+			reason => $summary } )
+			|| die $mediawiki-> {error}->{code} . ':' . $mediawiki->{error}->{details};
+
+		} else {
+			my $content = run_git_raw("cat-file blob $new_sha1");
+			mw_connect_maybe();
+				$mediawiki->{config}->{upload_url} = "$url/index.php/Special:Upload";
+
+				$mediawiki->upload( {
+				title => $complete_file_name,
+				summary => $summary,
+				data => $content,
+				ignorewarnings=>1
+                                  }, {
+					skip_encoding => 1 # Helps with names with accentuated characters
+			} ) || die $mediawiki-> {error}->{code} . ':' . $mediawiki->{error}->{details};
+			$newrevid = get_reviId_filepage();
+			print STDERR "Pushed file: $new_sha1 - $complete_file_name\n";
+			 }
+	else {
+		print STDERR "$complete_file_name is not supported on this version of Mediawiki.\n"
 	}
 	return ($newrevid, "ok");
 }
@@ -825,3 +866,46 @@ sub mw_push_revision {
 	print STDOUT "ok $remote\n";
 	return 1;
 }
+
+sub get_reviId_filepage() {
+	mw_connect_maybe();
+
+	my $max_rev_num_file = 0;
+
+	my @list_file_pages = get_mw_media_pages();
+
+	foreach my $file_page (@list_file_pages) {
+	my $id = $file_page->{pageid};
+        
+	my $query = {
+		action => 'query',
+		prop => 'revisions',
+		rvprop => 'ids',
+		pageids => $id,
+		};
+
+		my $result = $mediawiki->api($query);
+
+		my $lastrev = pop(@{$result->{query}->{pages}->{$id}->{revisions}});
+	}
+}
+
+sub get_file_extensions {
+	mw_connect_maybe();
+
+	my $query = {
+	action => 'query',
+	meta => 'siteinfo',
+	siprop => 'fileextensions'
+	};
+
+	my $result = $mediawiki->api($query);
+
+	my @file_extensions = map $_->{ext},@{$result->{query}->{fileextensions}};
+
+	return @file_extensions;
+}
+
+
+
+
-- 
1.7.10.2.552.gaa3bb87

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH/RFC] Export file attachements in git-remote-mediawiki
  2012-06-04 20:49 [PATCH/RFC] Export file attachements in git-remote-mediawiki NGUYEN Kim Thuat
@ 2012-06-04 21:34 ` Matthieu Moy
  2012-06-05 17:00   ` nguyenki
  2012-06-05 17:11   ` nguyenki
  0 siblings, 2 replies; 6+ messages in thread
From: Matthieu Moy @ 2012-06-04 21:34 UTC (permalink / raw)
  To: NGUYEN Kim Thuat
  Cc: git, nguyenkimthuat, Pavel Volek, ROUCHER IGLESIAS Javier

NGUYEN Kim Thuat <kim-thuat.nguyen@ensimag.imag.fr> writes:

> +# Get the list of file extensions supported by the current version of mediawiki 
> +my @list_file_extensions = get_file_extensions();

You should do it only on demand (like $mediawiki is created lazily).

> +        open(my $g,"-|","git " . $_[0]);   

Space after ",".

> @@ -642,8 +651,14 @@ sub mw_push_file {
>  	my $old_sha1 = $diff_info_split[2];
>  	my $page_created = ($old_sha1 eq NULL_SHA1);
>  	my $page_deleted = ($new_sha1 eq NULL_SHA1);
> +	my $file_deleted = ($new_sha1 eq NULL_SHA1);

This line looks suspiciously similar to the previous one. Do you need
another variable for the same value?

> +	my @extensions = split(/\./,$complete_file_name);

Space after "," (many more instances after).

> -	} else {
> -		print STDERR "$complete_file_name not a mediawiki file (Not pushable on this version of git-remote-mediawiki).\n"
> +	} elsif (exists($hashFiles{$extension})) {
> +		# Deleting and uploading a file require the priviledge of the user
> +		if ($file_deleted) {
> +			mw_connect_maybe();
> +			my $res = $mediawiki->edit( {
> +			action => 'delete',
> +			title => $path,
> +			reason => $summary } )

Indent the body of {} please.

> +		} else {
> +			my $content = run_git_raw("cat-file blob $new_sha1");
> +			mw_connect_maybe();
> +				$mediawiki->{config}->{upload_url} = "$url/index.php/Special:Upload";

Broken indentation.

Does this work on wiki configured in foreign languages, like french that
has Spécial:Téléverser instead?

> +			} ) || die $mediawiki-> {error}->{code} . ':' . $mediawiki->{error}->{details};
> +			$newrevid = get_reviId_filepage();

This queries the wiki to get the last revision id. The existing code (to
deal with page) could get this from the response of the API to the edit
request, like this:

		$newrevid = $result->{edit}->{newrevid};

Your version is much more inefficient (many requests each time you
upload a file), and has a race condition (what happens if someone else
creates a revision at the same time?). Isn't there a better way?

> +			print STDERR "Pushed file: $new_sha1 - $complete_file_name\n";
> +			 }

Broken indentation.

> +	else {
> +		print STDERR "$complete_file_name is not supported on this version of Mediawiki.\n"

It's not a matter of version, it's a matter of configuration.

> +sub get_reviId_filepage() {

Strange name (two consecutive i?). If this function fetches the last
wiki revision, why not call it get_last_mw_revid or something like this?

> +	mw_connect_maybe();
> +
> +	my $max_rev_num_file = 0;
> +
> +	my @list_file_pages = get_mw_media_pages();
> +
> +	foreach my $file_page (@list_file_pages) {
> +	my $id = $file_page->{pageid};

Broken indentation.

> +	my $query = {
> +		action => 'query',
> +		prop => 'revisions',
> +		rvprop => 'ids',
> +		pageids => $id,
> +		};
> +
> +		my $result = $mediawiki->api($query);

Broken indentation.

> +sub get_file_extensions {
> +	mw_connect_maybe();
> +
> +	my $query = {
> +	action => 'query',
> +	meta => 'siteinfo',
> +	siprop => 'fileextensions'
> +	};

Broken indentation.

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH/RFC] Export file attachements in git-remote-mediawiki
  2012-06-04 21:34 ` Matthieu Moy
@ 2012-06-05 17:00   ` nguyenki
  2012-06-05 17:05     ` Matthieu Moy
  2012-06-05 17:11   ` nguyenki
  1 sibling, 1 reply; 6+ messages in thread
From: nguyenki @ 2012-06-05 17:00 UTC (permalink / raw)
  To: Matthieu.Moy; +Cc: git, roucherj, Pavel.Volek

On Mon, 04 Jun 2012 23:34:29 +0200, Matthieu Moy wrote:
> NGUYEN Kim Thuat <kim-thuat.nguyen@ensimag.imag.fr> writes:
>
>> +# Get the list of file extensions supported by the current version 
>> of mediawiki
>> +my @list_file_extensions = get_file_extensions();
>
> You should do it only on demand (like $mediawiki is created lazily).
yeah, i changed the code in this part, it look like:

@@ -113,9 +113,6 @@ $wiki_name =~ s/[^\/]*:\/\///;
  # and '@' sign, to avoid author like MWUser@HTTPUser@host.com
  $wiki_name =~ s/^.*@//;

-# Get the list of file extensions supported by the current version of 
mediawiki
-my @list_file_extensions = get_file_extensions();
-
  # Commands parser
  my $entry;
  my @cmd;
@@ -654,7 +651,7 @@ sub mw_push_file {
  	my $file_deleted = ($new_sha1 eq NULL_SHA1);
  	$complete_file_name = mediawiki_clean_filename($complete_file_name);

-	my %hashFiles = map {$_ => 1}@list_file_extensions;
+	my %hashFiles = get_file_extensions_maybe($complete_file_name);
  	my $path = "File:".$complete_file_name;
  	my @extensions = split(/\./,$complete_file_name);
  	my $extension = pop(@extensions);

-sub get_file_extensions {
-	mw_connect_maybe();
-
-	my $query = {
-	action => 'query',
-	meta => 'siteinfo',
-	siprop => 'fileextensions'
-	};
+sub get_file_extensions_maybe {
+	my $file_name = shift;
+	my $est_mw_page = substr($file_name,-3) eq ".mw";
+	if(!$est_mw_page) {
+		mw_connect_maybe();

-	my $result = $mediawiki->api($query);
+		my $query = {
+			action => 'query',
+			meta => 'siteinfo',
+			siprop => 'fileextensions'
+			};

-	my @file_extensions = map 
$_->{ext},@{$result->{query}->{fileextensions}};
+		my $result = $mediawiki->api($query);
+		my @file_extensions = map 
$_->{ext},@{$result->{query}->{fileextensions}};
+		my %hashFile = map {$_ => 1}@file_extensions;

-	return @file_extensions;
+		return %hashFile;
+	} else {
+		return ;
+	}
  }

  Now, the function will list the file extensions on demand.


>> @@ -642,8 +651,14 @@ sub mw_push_file {
>>  	my $old_sha1 = $diff_info_split[2];
>>  	my $page_created = ($old_sha1 eq NULL_SHA1);
>>  	my $page_deleted = ($new_sha1 eq NULL_SHA1);
>> +	my $file_deleted = ($new_sha1 eq NULL_SHA1);
>
> This line looks suspiciously similar to the previous one. Do you need
> another variable for the same value?
Yes, it's true. I just want the code to be more visible. Because, when 
we delete a file attachment, it's not a page wiki.

> Does this work on wiki configured in foreign languages, like french 
> that
> has Spécial:Téléverser instead?
>

>> +	else {
>> +		print STDERR "$complete_file_name is not supported on this 
>> version of Mediawiki.\n"
>
> It's not a matter of version, it's a matter of configuration.
What do you think if i change it like:
         else {
	print STDERR "$complete_file_name is not a permitted file type. Check 
your configuration for more information\n"

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH/RFC] Export file attachements in git-remote-mediawiki
  2012-06-05 17:00   ` nguyenki
@ 2012-06-05 17:05     ` Matthieu Moy
  2012-06-06 22:25       ` nguyenki
  0 siblings, 1 reply; 6+ messages in thread
From: Matthieu Moy @ 2012-06-05 17:05 UTC (permalink / raw)
  To: nguyenki; +Cc: git, roucherj, Pavel.Volek

nguyenki <nguyenki@ensibm.imag.fr> writes:

>>> @@ -642,8 +651,14 @@ sub mw_push_file {
>>>  	my $old_sha1 = $diff_info_split[2];
>>>  	my $page_created = ($old_sha1 eq NULL_SHA1);
>>>  	my $page_deleted = ($new_sha1 eq NULL_SHA1);
>>> +	my $file_deleted = ($new_sha1 eq NULL_SHA1);
>>
>> This line looks suspiciously similar to the previous one. Do you need
>> another variable for the same value?
> Yes, it's true. I just want the code to be more visible. Because, when
> we delete a file attachment, it's not a page wiki.

I still don't see a reason to have two variables for the same thing. If
$page_deleted is not a good name, then rename the variable to
something more general (e.g. $path_deleted ?)

>>> +	else {
>>> +		print STDERR "$complete_file_name is not supported on
>>> this version of Mediawiki.\n"
>>
>> It's not a matter of version, it's a matter of configuration.
> What do you think if i change it like:
>         else {
> 	print STDERR "$complete_file_name is not a permitted file
> type. Check your configuration for more information\n"

That's better, but it doesn't tell the user which configuration to
check (Git's one, or the wiki's one).

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH/RFC] Export file attachements in git-remote-mediawiki
  2012-06-04 21:34 ` Matthieu Moy
  2012-06-05 17:00   ` nguyenki
@ 2012-06-05 17:11   ` nguyenki
  1 sibling, 0 replies; 6+ messages in thread
From: nguyenki @ 2012-06-05 17:11 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Mon, 04 Jun 2012 23:34:29 +0200, Matthieu Moy wrote:
> NGUYEN Kim Thuat <kim-thuat.nguyen@ensimag.imag.fr> writes:
>

>> +		} else {
>> +			my $content = run_git_raw("cat-file blob $new_sha1");
>> +			mw_connect_maybe();
>> +				$mediawiki->{config}->{upload_url} = 
>> "$url/index.php/Special:Upload";
>
> Broken indentation.
>
> Does this work on wiki configured in foreign languages, like french 
> that
> has Spécial:Téléverser instead?
>

[[Special:Upload]] will work on any wiki, since the English names 
(known
internally as the "canonical" page) will work regardless of the wiki's
language.

For example, if you go to http://fr.wikipedia.org/wiki/Special:Upload, 
you'll
see that it redirects automatically.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH/RFC] Export file attachements in git-remote-mediawiki
  2012-06-05 17:05     ` Matthieu Moy
@ 2012-06-06 22:25       ` nguyenki
  0 siblings, 0 replies; 6+ messages in thread
From: nguyenki @ 2012-06-06 22:25 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Tue, 05 Jun 2012 19:05:44 +0200, Matthieu Moy wrote:
> nguyenki <nguyenki@ensibm.imag.fr> writes:
>
>>>> @@ -642,8 +651,14 @@ sub mw_push_file {
>>>>  	my $old_sha1 = $diff_info_split[2];
>>>>  	my $page_created = ($old_sha1 eq NULL_SHA1);
>>>>  	my $page_deleted = ($new_sha1 eq NULL_SHA1);
>>>> +	my $file_deleted = ($new_sha1 eq NULL_SHA1);
>>>
>>> This line looks suspiciously similar to the previous one. Do you 
>>> need
>>> another variable for the same value?
>> Yes, it's true. I just want the code to be more visible. Because, 
>> when
>> we delete a file attachment, it's not a page wiki.
>
> I still don't see a reason to have two variables for the same thing. 
> If
> $page_deleted is not a good name, then rename the variable to
> something more general (e.g. $path_deleted ?)
>

>>>> +	else {
>>>> +		print STDERR "$complete_file_name is not supported on
>>>> this version of Mediawiki.\n"
>>>
>>> It's not a matter of version, it's a matter of configuration.
>> What do you think if i change it like:
>>         else {
>> 	print STDERR "$complete_file_name is not a permitted file
>> type. Check your configuration for more information\n"
>
> That's better, but it doesn't tell the user which configuration to
> check (Git's one, or the wiki's one).
Thank for your advices, i changed the code from your suggestions, now 
it look like:
@@ -36,6 +36,7 @@
  use strict;
  use MediaWiki::API;
  use DateTime::Format::ISO8601;
+use encoding 'utf8';

  # use encoding 'utf8' doesn't change STDERROR
  # but we're going to output UTF-8 filenames to STDERR
@@ -275,7 +276,8 @@ sub run_git {
  }

  sub run_git_raw {
-        open(my $g,"-|","git " . $_[0]);
+	no encoding 'utf8';
+        open(my $g, "-|:utf8", "git " . $_[0]);
          my $r = do { local $/; <$g> };
          close($g);

@@ -648,7 +650,6 @@ sub mw_push_file {
  	my $old_sha1 = $diff_info_split[2];
  	my $page_created = ($old_sha1 eq NULL_SHA1);
  	my $page_deleted = ($new_sha1 eq NULL_SHA1);
-	my $file_deleted = ($new_sha1 eq NULL_SHA1);
  	$complete_file_name = mediawiki_clean_filename($complete_file_name);

  	my %hashFiles = get_file_extensions_maybe($complete_file_name);
@@ -700,28 +701,29 @@ sub mw_push_file {
  		print STDERR "Pushed file: $new_sha1 - $title\n";
  	} elsif (exists($hashFiles{$extension})) {
  		# Deleting and uploading a file require the priviledge of the user
-		if ($file_deleted) {
+		if ($page_deleted) {
  			mw_connect_maybe();
  			my $res = $mediawiki->edit( {
-			action => 'delete',
-			title => $path,
-			reason => $summary } )
+				action => 'delete',
+				title => $path,
+				reason => $summary } )
  			|| die $mediawiki-> {error}->{code} . ':' . 
$mediawiki->{error}->{details};

  		} else {
  			my $content = run_git_raw("cat-file blob $new_sha1");
  			mw_connect_maybe();
-				$mediawiki->{config}->{upload_url} = 
"$url/index.php/Special:Upload";
+			$mediawiki->{config}->{upload_url} = 
"$url/index.php/Special:Upload";

-				$mediawiki->upload( {
+			$mediawiki->upload( {
  				title => $complete_file_name,
  				summary => $summary,
  				data => $content,
  				ignorewarnings=>1
-                                  }, {
-					skip_encoding => 1 # Helps with names with accentuated characters
-			} ) || die $mediawiki-> {error}->{code} . ':' . 
$mediawiki->{error}->{details};
-			$newrevid = get_reviId_filepage();
+					}, {
+						skip_encoding => 1 # Helps with names with accentuated 
characters
+					} ) || die $mediawiki-> {error}->{code} . ':' . 
$mediawiki->{error}->{details};
+			my $last_file_page = $mediawiki->get_page({title =>$path});
+			$newrevid = $last_file_page->{revid};
  			print STDERR "Pushed file: $new_sha1 - $complete_file_name\n";
  			 }
  	else {
@@ -864,29 +866,6 @@ sub mw_push_revision {
  	return 1;
  }

-sub get_reviId_filepage() {
-	mw_connect_maybe();
-
-	my $max_rev_num_file = 0;
-
-	my @list_file_pages = get_mw_media_pages();
-
-	foreach my $file_page (@list_file_pages) {
-	my $id = $file_page->{pageid};
-
-	my $query = {
-		action => 'query',
-		prop => 'revisions',
-		rvprop => 'ids',
-		pageids => $id,
-		};
-
-	my $result = $mediawiki->api($query);
-
-	my $lastrev = pop(@{$result->{query}->{pages}->{$id}->{revisions}});
-	}
-}
-

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-06-06 22:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-04 20:49 [PATCH/RFC] Export file attachements in git-remote-mediawiki NGUYEN Kim Thuat
2012-06-04 21:34 ` Matthieu Moy
2012-06-05 17:00   ` nguyenki
2012-06-05 17:05     ` Matthieu Moy
2012-06-06 22:25       ` nguyenki
2012-06-05 17:11   ` nguyenki

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).