-
Notifications
You must be signed in to change notification settings - Fork 1
/
foswiki2mediawiki
executable file
·207 lines (165 loc) · 4.86 KB
/
foswiki2mediawiki
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/perl
#
# foswiki2mediawiki converts FOSWiki sourcecode to MediaWiki
#
# The MediaWiki importTextFile tool would then import the file into mediawiki
#
# Derived from:
# https://bl0rg.net/software/twiki2mediawiki/
#
# 2014-05-01 - ASM
#
# Changed script to simply convert one file from FOSwiki to MediaWiki syntax
# and write the file to the same directory.
#
#
# BUGS
# - Leaves some HTML-markup that worked in FOSWiki hanging around
# - Doesn't handle *fat* FOSWiki-style
# - Doesn't handle Inter-TwikiWeb.Links correctly
# - Doesn't handle all styles of links
#
# - Rename a lot of pages (CameLcase)
# - Fix b0rken links (see BUGS)
# - Fix b0rken markup (see BUGS)
#
#
# TODO :
# - Handle attachments better
#
#
use strict;
use Getopt::Std;
# temp
use Data::Dumper;
# Options
my %opts;
getopts('f:d', \%opts);
sub usage {
print "\n";
print " Usage: $0 [-d] -f filename \n";
print "\n";
exit;
}
main();
sub main {
# take filename from -f in getopts
(!exists $opts{f}) && usage() ;
my $foswikifile = $opts{f};
(!-e $foswikifile ) && die "File: $foswikifile does not exist.\n";
# maybe check for .txt extension and remove to form output filename
# for now just output to STDOUT
# file handle
my $fh;
open(my $fh, "<", "$foswikifile") or die "cannot open < $foswikifile : $!";
# INPUT
my $input;
while (<$fh>) {
$input .= $_;
}
#
# Show input if debug flag is set
#
if ( $opts{d} ) {
print "--------------------------------------------\n";
print "INPUT: \n";
print $input;
print "\n";
print "--------------------------------------------\n";
print "OUTPUT: \n";
}
my $output = convert_from_foswiki($input);
print $output;
print "\n";
if ( $opts{d} ) {
print "--------------------------------------------\n";
}
exit;
}
sub convert_from_foswiki {
my $cur_text = shift;
# HTML comments are lame and used by Twiki for stuff
$cur_text =~ s#<!--.*?-->##gms;
# Handle special fields
$cur_text =~ s/%META:FIELD{(.*?)}%/$1/g;
# Strip FOSWiki meta-tags
$cur_text =~ s/%.*?%//g;
# strip ! preceding a (wiki)word
$cur_text =~ s/ !/ /gm;
# Convert headings
# add good num of leading newlines
# h4-h7 get converted to h4
$cur_text =~ s/\n*---\+\+\++\s*(.*)/\n\n==== $1 ====/gm;
$cur_text =~ s/\n*---\+\+\++\s*(.*)/\n\n=== $1 ===/gm;
$cur_text =~ s/\n*---\+\+\s*(.*)/\n\n\n== $1 ==/gm;
# MediaWiki style generally does not use H1 tags, just bold it.
$cur_text =~ s/\n*---\+\s*(.*)/\n\'\'\'$1\'\'\'/gm;
# Convert tabbed lists to "*"-lists
$cur_text =~ s/^\t\*/\*/gm;
$cur_text =~ s/^\t\t\*/\*\*/gm;
$cur_text =~ s/^\t\t\t\*/\*\*\*/gm;
$cur_text =~ s/^\t1/#/gm;
$cur_text =~ s/^\t\t1/##/gm;
$cur_text =~ s/^\t\t\t1/###/gm;
# Convert 3-space lists to "*"-lists
$cur_text =~ s/^ \*/\*/gm;
$cur_text =~ s/^ \*/\*\*/gm;
$cur_text =~ s/^ \*/\*\*\*/gm;
$cur_text =~ s/^ 1/\#/gm;
$cur_text =~ s/^ 1/\#\#/gm;
$cur_text =~ s/^ 1/\#\#\#/gm;
# Convert umlauts and other funny characters to UTF8
$cur_text =~ s/�/"/g;
$cur_text =~ s/�/"/g;
$cur_text =~ s/�/"/g;
$cur_text =~ s/�/-/g;
$cur_text =~ s/�/ä/g;
$cur_text =~ s/�/ö/g;
$cur_text =~ s/�/ü/g;
$cur_text =~ s/�/ß/g;
$cur_text =~ s/�/Ä/g;
$cur_text =~ s/�/Ö/g;
$cur_text =~ s/�/Ü/g;
$cur_text =~ s/�/''/g;
# Convert the funny links
$cur_text =~ s/\[\[(.*?)\]\[(.*?)\]\]/\[$1 $2\]/g;
# We don't need no stinking A HREF-tags
$cur_text =~ s#<a href="([^>]*)">([^<]*)</a>#\[$1 $2]#gi;
$cur_text =~ s#<a href=([^>]*)>([^<]*)</a>#\[$1 $2]#gi;
# I've got no idea what <nop> does. I'll delete it anyway.
$cur_text =~ s#<nop>##gi;
$cur_text =~ s#<k>##gi;
# One and two spaces at the beginning of a line has a different meaning
# in MediaWiki. Strip it!
$cur_text =~ s#^ ([^ ])#$1#gm;
$cur_text =~ s#^ ([^ ])#$1#gm;
# replace <pre> </pre> sections with space at beginning of line
# the '?' is for not-greedy matching
$cur_text =~ s#<pre>\n?(.*?)\n?</pre>#my $x=$1; $x=~s@^@ @gms; $x;#egms;
# Tables
#
# FW table begin: ^|
# FW table end: ^[^|]
# MW table begin: {| class="wikitable
# MW table end: |}
#
$cur_text =~ s#^(\|.*?)^[^|]#
my $x = $1;
$x =~ s/\|/ || /gm;
$x =~ s/^ \|\|/|-\n|/gm;
$x =~ s/^\|-\n[\| ]*?$//gms;
$x =~ s/\|\|\s*$/|/gm;
$x =~ s/\| *\*(.*?)\* *\|/! $1 !/gm;
$x =~ s/[!|\s]*$//gms;
"{| class=\"wikitable\"\n$x\n|}\n\n" #egms;
#
# - replace single pipes with double pipes surrounded by spaces
# - replace space,pipe,pipe at beginning of line w/ pipe,hyphen,neline,pipe
# - remove empty rows
# - clean up double pipes at line ends
# - convert bolded table cells from pipes to bangs
# - remove empty cells from end of line
# trim tailing newlines
$cur_text =~ s#\s*$#\n\n#s;
return $cur_text;
}