-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathfeatures.py
39 lines (28 loc) · 933 Bytes
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def numwords(emailtext):
splittext = emailtext.split(" ")
return len(splittext)
def has_html(emailtext):
return 1 if "html" in emailtext.lower() else 0
def num_link(emailtext):
return emailtext.count('http')
#join free buy start click discount
def spammy_words(emailtext):
spam_words = ['helvetica', 'new', 'money', 'e-mail', 'recieve', 'business']
splittext = emailtext.split(" ")
total = 0
for word in spam_words:
total += splittext.count(word)
return total
def not_spammy_words(emailtext):
spam_words = ['email', 'people', 'time', 'please']
splittext = emailtext.split(" ")
total = 0
for word in spam_words:
total += splittext.count(word)
return total
def all_caps(emailtext):
return 1 if emailtext == emailtext.upper() else 0
def cap_ratio(emailtext):
lowers = float(len([f for f in emailtext if f == f.lower()]))
uppers = float(len([f for f in emailtext if f == f.upper()]))
return uppers/lowers