-
Notifications
You must be signed in to change notification settings - Fork 5
/
offline_filter.pl
executable file
·142 lines (100 loc) · 2.49 KB
/
offline_filter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/perl -Tw
# Simple liferea (and possibly for other things) conversion filter to download
# content for offline viewing during feed updates.
#
# Pigeon <pigeon at pigeond dot net>
#
# http://pigeond.net/
#
# Latest version of this script can be found at:
# git://pigeond.net/offline_filter/
#
# GPLv2
#
#
# What it currently does:
# - Look for matching pattern (e.g. img src url).
# - Download it using wget if it is not already downloaded.
# - Replace original src in the feed with the local downloaded one.
#
# Limitations:
# - Will download all matched elements (img), one by one.
# - Will not redownload a file if it has been changed remotely.
#
# TODO:
# - Doesn't handle things spanned across multiple lines
#
use strict;
use Digest::SHA1 qw(sha1_hex);
use HTML::Entities;
delete @ENV{qw(IFS CDPATH ENV BASH_ENV PATH)};
my ($HOME) = ($ENV{'HOME'} =~ /([a-zA-Z0-9\/\.\_]+)/);
my $SAVE_PATH = "${HOME}/.liferea_1.4/cache/saves";
my $WGET = '/usr/bin/wget';
my $WGET_TIMEOUT = 5;
my @BLACKLIST_RE = (
);
my $debug = 0;
sub url_fetch {
my ($url) = @_;
my ($ok) = undef;
my $outfile = $SAVE_PATH.'/'.sha1_hex($url);
if ($url =~ m/(\.[a-zA-Z]{3,4})$/) {
$outfile .= lc($1);
}
if (-f $outfile) {
if ($debug) {
print(STDERR "Skipping [${url}], already in [${outfile}]\n");
}
} else {
$url = decode_entities($url);
# use one upper level as referer
my $referer = $url;
$referer =~ s![^/]*?$!!;
my @args = ($WGET, '-O', $outfile, '-T', $WGET_TIMEOUT);
if (!$debug) {
push(@args, '-q');
}
push(@args, "--referer=${referer}");
push(@args, $url);
if ($debug) {
print(STDERR "Running [".join(' ', @args)."]\n");
}
if (system(@args) != 0) {
if ($debug) {
print(STDERR "Fetch failed for [${url}]\n");
}
unlink(${outfile});
$outfile = undef;
}
}
return $outfile;
}
sub img_func {
my ($matched, $src) = @_;
my ($ok) = undef;
if ($src ne '') {
foreach my $u (@BLACKLIST_RE) {
return ${matched} if (${src} =~ ${u});
}
my $outfile = url_fetch($src);
if ($outfile) {
$matched =~ s!\Q${src}\E!file://${outfile}!g;
}
}
return $matched;
}
my %handler = (
# save <img> images for offline viewing
qr{(?:<|<)img .*?src=(?:"|'|")?(http://.*?)(?:>|>|"|'|"|\s)}i =>
'img_func($&, $1)',
);
my @lines = <>;
system("mkdir -p ${SAVE_PATH}");
foreach my $l (@lines) {
foreach my $regex (keys(%handler)) {
$l =~ s/$regex/$handler{$regex}/gee;
}
print($l);
}
exit(0);