Skip to content

Commit

Permalink
Merge pull request #37 from ugodiggi/fully_cached_performance_improv
Browse files Browse the repository at this point in the history
Speed up the best-case scenario of dependency resolution.
  • Loading branch information
wickman committed Jan 30, 2015
2 parents 3cd0564 + b660f1f commit 258fc73
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 7 deletions.
7 changes: 3 additions & 4 deletions pex/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def crawl(self, link_or_links, follow_links=False):
def execute():
while not converged.is_set():
try:
link = queue.get(timeout=0.1)
link = queue.get(timeout=0.01)
except Empty:
continue
if link not in seen:
Expand Down Expand Up @@ -134,7 +134,6 @@ def execute():
queue.join()
converged.set()

for worker in workers:
worker.join()

# We deliberately not join back the worker threads, since they are no longer of
# any use to us.
return links
10 changes: 9 additions & 1 deletion pex/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from .compatibility import string as compatible_string
from .compatibility import PY3
from .util import Memoizer

if PY3:
import urllib.parse as urlparse
Expand Down Expand Up @@ -50,10 +51,17 @@ def wrap_iterable(cls, url_or_urls):
def _normalize(cls, filename):
return 'file://' + os.path.realpath(os.path.expanduser(filename))

# A cache for the result of from_filename
_FROM_FILENAME_CACHE = Memoizer()

@classmethod
def from_filename(cls, filename):
"""Return a :class:`Link` wrapping the local filename."""
return cls(cls._normalize(filename))
result = cls._FROM_FILENAME_CACHE.get(filename)
if result is None:
result = cls(cls._normalize(filename))
cls._FROM_FILENAME_CACHE.store(filename, result)
return result

def __init__(self, url):
"""Construct a :class:`Link` from a url.
Expand Down
15 changes: 13 additions & 2 deletions pex/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .link import Link
from .pep425 import PEP425, PEP425Extras
from .platforms import Platform
from .util import Memoizer


class Package(Link):
Expand All @@ -22,6 +23,9 @@ class InvalidPackage(Error): pass
# The registry of concrete implementations
_REGISTRY = set()

# The cache of packages that we have already constructed.
_HREF_TO_PACKAGE_CACHE = Memoizer()

@classmethod
def register(cls, package_type):
"""Register a concrete implementation of a Package to be recognized by pex."""
Expand All @@ -37,12 +41,19 @@ def from_href(cls, href, **kw):
:type href: string
:returns: A Package object if a valid concrete implementation exists, otherwise None.
"""
href = Link.wrap(href)
package = cls._HREF_TO_PACKAGE_CACHE.get(href)
if package is not None:
return package
link_href = Link.wrap(href)
for package_type in cls._REGISTRY:
try:
return package_type(href.url, **kw)
package = package_type(link_href.url, **kw)
break
except package_type.InvalidPackage:
continue
if package is not None:
cls._HREF_TO_PACKAGE_CACHE.store(href, package)
return package

@property
def name(self):
Expand Down
17 changes: 17 additions & 0 deletions pex/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import shutil
import uuid
from hashlib import sha1
from threading import Lock

from pkg_resources import find_distributions

Expand Down Expand Up @@ -145,3 +146,19 @@ def cache_distribution(cls, zf, source, target_dir):
dist = DistributionHelper.distribution_from_path(target_dir)
assert dist is not None, 'Failed to cache distribution %s' % source
return dist


class Memoizer(object):
"""A thread safe class for memoizing the results of a computation."""

def __init__(self):
self._data = {}
self._lock = Lock()

def get(self, key, default=None):
with self._lock:
return self._data.get(key, default)

def store(self, key, value):
with self._lock:
self._data[key] = value

0 comments on commit 258fc73

Please sign in to comment.