-
Notifications
You must be signed in to change notification settings - Fork 1
/
extractnames1.py
66 lines (60 loc) · 1.72 KB
/
extractnames1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#this is a python file for extracting information from html
import os
def open_file():
count=1
check=0
problem_list=[]
while check==0:
filename=str(count)+".html"
print("trying to open"+filename)
try:
filedata=open(filename,"r")
problem_list=find_data(filedata)
save_problems(problem_list)
os.remove(filename)
except:
check=1
count+=1
def find_data(filedata):
line=filedata.readline()
problem_line=""
while line:
test=line.find('<h3><a href="https://www.moonboard.com/Problems/View')
if test==-1:
line=filedata.readline()
else:
problem_line=line
line=""
filedata.close()
if problem_line!="":
extract=extract_data(problem_line)
return(extract)
filedata.close()
def extract_data(problem_line):
check=0
item_list=problem_line.split('"')
problem_list=[]
for i in item_list:
position=i.find('https://www.moonboard.com/Problems')
if position!=-1:
problem_list.append(i)
return(problem_list)
def saved_problems():
save_file=open("problem_list.txt","r")
saved_line=save_file.readline()
saved_list=[]
while saved_line:
saved_list.append(saved_line)
saved_line=save_file.readline()
save_file.close()
return(saved_list)
def save_problems(save_list):
saved_list=saved_problems()
save_file=open("problem_list.txt","a")
for item in save_list:
if item not in saved_list:
save_file.write(item+"\r\n")
save_file.close()
def main():
open_file()
main()