-
Notifications
You must be signed in to change notification settings - Fork 0
/
mailwww.py
executable file
·222 lines (187 loc) · 7.55 KB
/
mailwww.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# kate: space-indent on; tab-width 4; indent-width 4;
""" @package docstring
Cronjob emailer script
Reads an HTML page from a Web server and sends it through email
@author Gabriele Tozzi <[email protected]>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import sys
import re
import logging
import posixpath
from optparse import OptionParser
import urllib, urlparse
from HTMLParser import HTMLParser
import smtplib
from email.Utils import COMMASPACE, formatdate, make_msgid
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
class Main:
NAME = 'mailwww'
VERSION = '0.5'
def run(self):
""" Main entry point """
# Read command line
usage = "%prog [options] <url> <address> [<address2>] [<address...>]"
parser = OptionParser(usage=usage, version=self.NAME + ' ' + self.VERSION)
parser.add_option("--http-user", dest="http_user",
help="Username for HTTP POST authentication")
parser.add_option("--http-pass", dest="http_pass",
help="Password for HTTP POST authentication")
parser.add_option("-s", "--smtp", dest="smtp",
help="SMTP server address. Default: localhost",
default='localhost')
parser.add_option("--smtp-user", dest="smtp_user",
help="Username for SMTP authentication")
parser.add_option("--smtp-pass", dest="smtp_pass",
help="Password for SMTP authentication")
parser.add_option("-c", "--cc", dest="cc",
help="Carbon Copy recipient")
parser.add_option("-f", "--from", dest="sender",
help="eMail sender. Default: emailer@localhost",
default="emailer@localhost")
parser.add_option("-j", "--subject", dest="subject",
help="eMail Subject. Default: MailWWW Autogenerated Mail",
default="MailWWW Autogenerated Mail")
parser.add_option("-n", "--no-css", dest="nocss",
help="Disable embedding of linked Style Sheets",
default=False, action="store_true")
parser.add_option("-m", "--multiple", dest="multiple",
help="Send multiple emails: one for each recipient (Cc field is ignored)",
default=False, action="store_true")
parser.add_option("-v", "--verbose", dest="verbose",
help="Show progress information",
default=False, action="store_true")
(options, args) = parser.parse_args()
# Parse mandatory arguments
if len(args) < 2:
parser.error("unvalid number of arguments")
dest = []
i = 0
for a in args:
if i == 0:
url = a
else:
dest.append(a)
i += 1
# Parse optional arguments
http_user = options.http_user
http_pass = options.http_pass
cc = []
if options.cc:
cc.append(options.cc)
host = options.smtp
port = 25
user = options.smtp_user
pwd = options.smtp_pass
sender = options.sender
subject = options.subject
nocss = options.nocss
multiple = options.multiple
verbose = options.verbose
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
# Opens URL
logging.info('Fetching url %s', url)
data = None
if http_user or http_pass:
# Use POST authentication
data = urllib.urlencode({ 'username': http_user, 'password': http_pass, 'login': True })
f = urllib.urlopen(url, data)
html = f.read()
# Search for meta content-type tag, use this encoding when found
encre = re.compile(r'<meta\s+http-equiv=(?:"|\')Content-Type(?:"|\')\s+content=(?:"|\')([^\'"]*)(?:"|\')\s*/>',
re.I | re.M)
match = encre.search(html)
if match:
encoding = self.__parseEncoding(match.group(1))
try:
html = unicode(html, encoding, errors='replace')
except LookupError as e:
encoding = self.__parseEncoding(f.headers['content-type'])
html = unicode(html, encoding, errors='replace')
else:
encoding = self.__parseEncoding(f.headers['content-type'])
html = unicode(html, encoding, errors='replace')
logging.info('Detected charset: %s', encoding)
f.close()
# Retrieve linked style sheets
if not nocss:
logging.info('Fetching Style Sheets...')
parser = CSSLister(url)
parser.feed(html)
parser.close()
for search, replace in parser.get_replacements():
html = html.replace(search, replace, 1)
# Prepare mail
msg = MIMEMultipart()
msg['Date'] = formatdate(localtime=True)
msg['Message-ID'] = make_msgid('emailer')
msg['Subject'] = subject
msg['From'] = sender
if cc and not multiple:
msg['Cc'] = ', '.join(cc)
msg.preamble = 'This is a milti-part message in MIME format.'
txt = MIMEText(html.encode('utf-8'), 'html', 'utf-8')
msg.attach(txt)
if not multiple:
msg['To'] = ', '.join(dest)
# Sends message
smtp = smtplib.SMTP()
smtp.connect(host, port)
if user:
smtp.login(user, pwd)
if multiple:
for d in dest:
del msg['To']
msg['To'] = d
logging.info('Sending mail to: %s', d)
smtp.sendmail(sender, d, msg.as_string())
else:
logging.info('Sending mail to: %s, Cc: %s', dest, cc)
smtp.sendmail(sender, dest+cc, msg.as_string())
smtp.quit()
def __parseEncoding(self, encstr, default='utf-8'):
encoding = encstr.split('charset=')[-1]
if encoding.find('/') == -1:
return encoding
return default
class CSSLister(HTMLParser):
def __init__(self, baseurl):
(scheme,netloc,path,parameters,query,fragment) = urlparse.urlparse(baseurl)
self.__baseurl = scheme + '://' + netloc + posixpath.dirname(path) + '/'
HTMLParser.__init__(self)
self.__log = logging.getLogger('css')
def reset(self):
self.__repl = []
HTMLParser.reset(self)
def handle_starttag(self, tag, attrs):
if tag == 'link' and ('rel', 'stylesheet') in attrs:
# Found new link tag
for k, v in attrs:
if k == 'href':
# Go get the CSS
self.__log.info('Fetching CSS %s%s', self.__baseurl, v)
c = urllib.urlopen(self.__baseurl + v)
css = "<style>\n" + c.read() + "</style>\n"
c.close()
self.__repl.append( (self.get_starttag_text(), css) )
break
def handle_endtag(self, data):
pass
def get_replacements(self):
return self.__repl
if __name__ == '__main__':
app = Main()
app.run()
sys.exit(0)