Skip to content

Commit

Permalink
Add archive count to UI and memgator timeout control
Browse files Browse the repository at this point in the history
  • Loading branch information
machawk1 committed Dec 15, 2017
1 parent 6d94b21 commit 76a6d4d
Showing 1 changed file with 41 additions and 14 deletions.
55 changes: 41 additions & 14 deletions bundledApps/WAIL.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@

try:
with open (infoPlistPath, "r") as myfile:
data=myfile.read()
m = re.search(r"<key>CFBundleShortVersionString</key>\n\t<string>(.*)</string>",
data)
data = myfile.read()
vsXML = r"<key>CFBundleShortVersionString</key>\n\t<string>(.*)</string>"
m = re.search(vsXML, data)
WAIL_VERSION = m.groups()[0].strip()
except:
print('User likely has the binary in the wrong location.')
Expand All @@ -98,9 +98,10 @@
msg_waybackNotStarted_body = "Launch Wayback and re-check?"
msg_uriNotInArchives = "The URL is not yet in the archives."
msg_uriInArchives_title = "This page has been archived!"
msg_uriInArchives_body = ("This URL is currently in the archives!"
msg_uriInArchives_body = ("This URL is currently in your archive!"
" Hit the \"View Archive\" Button")
msg_wrongLocation_body = "WAIL must reside in your Applications directory. Move it there then relaunch. \n* Current Location: "
msg_wrongLocation_body = ("WAIL must reside in your Applications directory. "
"Move it there then relaunch. \n* Current Location: ")
msg_wrongLocation_title = "Wrong Location"
msg_noJavaRuntime = "No Java runtime present, requesting install."
msg_fetchingMementos = "Fetching memento count..."
Expand Down Expand Up @@ -331,17 +332,36 @@ def __init__(self, parent):

thread.start_new_thread(self.fetchMementos,())
self.uri.Bind(wx.EVT_KEY_UP, self.uriChanged) # Call memgator on URI change

def setMementoCount(self, count):


def getHosts(self, tm):
matches = re.findall(r'\<(.*)\>; rel=.*memento\"', tm)

This comment has been minimized.

Copy link
@ibnesayeed

ibnesayeed Dec 15, 2017

Contributor

RegEx is known to be slow, especially when run on a long text with wildcards in the pattern. Also, this is not a generic pattern to run against any valid TM, instead it has some assumptions on how MemGator returns responses. If that fact is to be exploited, then there can be other more performant approaches that would involve traversing the response line-by-line (using CDXJ could be helpful, but not necessary here). Ref: #282

This comment has been minimized.

Copy link
@machawk1

machawk1 Dec 15, 2017

Author Owner

has some assumptions on how MemGator returns responses

Indeed, and it uses an instance of MemGator included with the package, so the output will be as expected.

If that fact is to be exploited

I am aware that the regex is naive. This implementation was step 1 in (1) making it work (2) making it work efficiently. I am open to optimizations but would prefer a more reliable solution. For example, if there's a relation value after "memento", this URI-M counting scheme would be inaccurate.


hosts = {}
for match in matches:
host = urlparse(match).netloc
if host not in hosts:
hosts[host] = 1
else:
hosts[host] += 1
return hosts

This comment has been minimized.

Copy link
@ibnesayeed

ibnesayeed Dec 15, 2017

Contributor

If you are only interested in counting unique archive hosts then just create a Set and push the host name in it (which will take care of uniqueness by itself). If you do want the dictionary of each host name as a key and their corresponding memento count as the value, then use official Counter dict (as noted in #282), that way you can avoid if else.



def setMementoCount(self, mCount, aCount=''):
ui_mementoCountMessage_pos = (105, 85)
ui_mementoCountMessage_size = (150, 20)
if hasattr(self, 'mementoStatus'):
self.mementoStatus.Destroy()
self.mementoStatusPublicArchives.Destroy()

memCountMsg = ''
if count:
memCountMsg = str(count) + ' mementos available'
if mCount:
plurality = 's'
if aCount == 1:
plurality = ''
memCountMsg = '{0} mementos available from {1} archive{2}'.format(
mCount, aCount, plurality
)
else:
memCountMsg = msg_fetchingMementos

Expand All @@ -350,8 +370,6 @@ def setMementoCount(self, count):
pos=ui_mementoCountMessage_pos,
size=ui_mementoCountMessage_size)

#italicFont = self.mementoStatus.GetFont().SetStyle(wx.ITALIC)
#self.mementoStatus.SetFont(italicFont)

self.mementoStatusPublicArchives = \
wx.StaticText(self, -1,label="Public archives: ",
Expand All @@ -366,14 +384,21 @@ def setMessage(self, msg):
def fetchMementos(self):
# TODO: Use CDXJ for counting the mementos
currentURIValue = self.uri.GetValue()
out = check_output([memGatorPath, "-a", archivesJSON, currentURIValue])
out = check_output([memGatorPath, "-a", archivesJSON,
'--restimeout', '0m3s',
'--hdrtimeout', '3s',
'--contimeout', '3s',
currentURIValue])
print('MEMGATOR checking {0}'.format(currentURIValue))

# TODO: bug, on Gogo internet MemGator cannot hit aggregator, which
# results in 0 mementos, for which MemGator throws exception

mCount = out.count("memento")
self.setMementoCount(mCount) # UI not updated on Windows
aCount = len(self.getHosts(out))

self.setMementoCount(mCount, aCount) # UI not updated on Windows

print('MEMGATOR counted {0} {1}'.format(currentURIValue, mCount))
# TODO: cache the TM

Expand Down Expand Up @@ -540,7 +565,9 @@ def checkIfURLIsInArchive(self, button):
wx.MessageBox(msg_uriNotInArchives,"Checking for " + self.uri.GetValue())
else:
mb = wx.MessageBox(msg_uriInArchives_body,msg_uriInArchives_title)
mb.AddButton(wx.Button(self, -1, buttonLabel_mementoCountInfo, pos=(10,85), size=(25,15)))
b = wx.Button(self, -1, buttonLabel_mementoCountInfo, pos=(10,85),
size=(25,15))
mb.AddButton(b) # Will not work in wxPython >4

def viewArchiveInBrowser(self, button):
if Wayback().accessible():
Expand Down

0 comments on commit 76a6d4d

Please sign in to comment.