From 8b5af07fa15ca6195df6f2e1292d2ecead56782a Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Thu, 25 Mar 2021 20:21:32 -0400 Subject: [PATCH 01/12] (joe) couple of performance changes related to choosing the best package candidate --- .gitignore | 3 +++ news/9749.perf.rst | 1 + src/pip/_internal/index/package_finder.py | 14 +++++++------- src/pip/_internal/models/wheel.py | 13 ++++++++++++- 4 files changed, 23 insertions(+), 8 deletions(-) create mode 100644 news/9749.perf.rst diff --git a/.gitignore b/.gitignore index dc6244855fe..da9a31ab521 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,6 @@ tests/data/common_wheels/ # Mac .DS_Store + +# Profiling related artifacts +*.prof diff --git a/news/9749.perf.rst b/news/9749.perf.rst new file mode 100644 index 00000000000..2c3a7bfec37 --- /dev/null +++ b/news/9749.perf.rst @@ -0,0 +1 @@ +* A couple performance related edits to package finding. diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index b826690fa5f..dac547b7da6 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -434,6 +434,7 @@ def __init__( self._project_name = project_name self._specifier = specifier self._supported_tags = supported_tags + self._supported_tag_to_idx = {tag: idx for idx, tag in enumerate(supported_tags)} def get_applicable_candidates( self, @@ -470,7 +471,6 @@ def get_applicable_candidates( hashes=self._hashes, project_name=self._project_name, ) - return sorted(filtered_applicable_candidates, key=self._sort_key) def _sort_key(self, candidate): @@ -512,14 +512,15 @@ def _sort_key(self, candidate): if link.is_wheel: # can raise InvalidWheelFilename wheel = Wheel(link.filename) - if not wheel.supported(valid_tags): + if self._prefer_binary: + binary_preference = 1 + try: + pri = -(wheel.support_index_min_fast(valid_tags, self._supported_tag_to_idx)) + except ValueError: raise UnsupportedWheel( "{} is not a supported wheel for this platform. It " "can't be sorted.".format(wheel.filename) ) - if self._prefer_binary: - binary_preference = 1 - pri = -(wheel.support_index_min(valid_tags)) if wheel.build_tag is not None: match = re.match(r'^(\d+)(.*)$', wheel.build_tag) build_tag_groups = match.groups() @@ -544,8 +545,7 @@ def sort_best_candidate( """ if not candidates: return None - best_candidate = max(candidates, key=self._sort_key) - return best_candidate + return max(candidates, key=self._sort_key) def compute_best_candidate( self, diff --git a/src/pip/_internal/models/wheel.py b/src/pip/_internal/models/wheel.py index 708bff33067..bfcef288249 100644 --- a/src/pip/_internal/models/wheel.py +++ b/src/pip/_internal/models/wheel.py @@ -66,10 +66,21 @@ def support_index_min(self, tags): """ return min(tags.index(tag) for tag in self.file_tags if tag in tags) + def support_index_min_fast(self, tags, tag_to_idx): + return min(tag_to_idx[tag] for tag in self.file_tags if tag in tag_to_idx) + def supported(self, tags): # type: (List[Tag]) -> bool """Return whether the wheel is compatible with one of the given tags. :param tags: the PEP 425 tags to check the wheel against. """ - return not self.file_tags.isdisjoint(tags) + # not disjoint means has some overlap + # tags is a list (and long) + # file tags is a set (and short) + # print("len(tags)", len(tags), type(tags)) + # print("len(file_tags)", len(self.file_tags), type(self.file_tags)) + try: + return bool(self.file_tags & tags) + except TypeError: + return bool(self.file_tags & set(tags)) From a5860e591fa5873f28f2dbefa1f3869158247160 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Wed, 31 Mar 2021 18:37:46 -0400 Subject: [PATCH 02/12] (joe) news should point to the pr and use a valid news type? --- news/{9749.perf.rst => 9748.bugfix.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename news/{9749.perf.rst => 9748.bugfix.rst} (100%) diff --git a/news/9749.perf.rst b/news/9748.bugfix.rst similarity index 100% rename from news/9749.perf.rst rename to news/9748.bugfix.rst From dfa808ebccfddcc7955e1fef56c19f3207f5803b Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Wed, 31 Mar 2021 18:54:52 -0400 Subject: [PATCH 03/12] (joe) linty business --- src/pip/_internal/index/package_finder.py | 8 ++++++-- src/pip/_internal/models/wheel.py | 18 +++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index dac547b7da6..9fd35d069c3 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -434,7 +434,9 @@ def __init__( self._project_name = project_name self._specifier = specifier self._supported_tags = supported_tags - self._supported_tag_to_idx = {tag: idx for idx, tag in enumerate(supported_tags)} + self._supported_tag_to_idx = { + tag: idx for idx, tag in enumerate(supported_tags) + } def get_applicable_candidates( self, @@ -515,7 +517,9 @@ def _sort_key(self, candidate): if self._prefer_binary: binary_preference = 1 try: - pri = -(wheel.support_index_min_fast(valid_tags, self._supported_tag_to_idx)) + pri = -(wheel.support_index_min_fast( + valid_tags, self._supported_tag_to_idx + )) except ValueError: raise UnsupportedWheel( "{} is not a supported wheel for this platform. It " diff --git a/src/pip/_internal/models/wheel.py b/src/pip/_internal/models/wheel.py index bfcef288249..9e52c9a9d54 100644 --- a/src/pip/_internal/models/wheel.py +++ b/src/pip/_internal/models/wheel.py @@ -2,7 +2,7 @@ name that have meaning. """ import re -from typing import List +from typing import Any, Dict, List, Set, Union from pip._vendor.packaging.tags import Tag @@ -67,20 +67,16 @@ def support_index_min(self, tags): return min(tags.index(tag) for tag in self.file_tags if tag in tags) def support_index_min_fast(self, tags, tag_to_idx): + # type: (List[Tag], Dict[Tag, int]) -> int return min(tag_to_idx[tag] for tag in self.file_tags if tag in tag_to_idx) def supported(self, tags): - # type: (List[Tag]) -> bool + # type: (Union[List[Tag],Dict[Tag, Any],Set[Tag]]) -> bool """Return whether the wheel is compatible with one of the given tags. :param tags: the PEP 425 tags to check the wheel against. """ - # not disjoint means has some overlap - # tags is a list (and long) - # file tags is a set (and short) - # print("len(tags)", len(tags), type(tags)) - # print("len(file_tags)", len(self.file_tags), type(self.file_tags)) - try: - return bool(self.file_tags & tags) - except TypeError: - return bool(self.file_tags & set(tags)) + for itag in self.file_tags: + if itag in tags: + return True + return False From de1cf13c52ecea4505a29fee3511068cdb931292 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 2 Apr 2021 14:47:05 -0400 Subject: [PATCH 04/12] (joe) update news, bugfix => feature and better descriptor --- news/9748.bugfix.rst | 1 - news/9748.feature.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 news/9748.bugfix.rst create mode 100644 news/9748.feature.rst diff --git a/news/9748.bugfix.rst b/news/9748.bugfix.rst deleted file mode 100644 index 2c3a7bfec37..00000000000 --- a/news/9748.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -* A couple performance related edits to package finding. diff --git a/news/9748.feature.rst b/news/9748.feature.rst new file mode 100644 index 00000000000..df9aaaf34ba --- /dev/null +++ b/news/9748.feature.rst @@ -0,0 +1 @@ +* Improve performance when picking the best file from indexes during `pip install` From 750b424cf76f9ccf95265d6af00bba11aaa09776 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 2 Apr 2021 15:16:00 -0400 Subject: [PATCH 05/12] (joe) some renames, and take a wild stab at docstring formatting --- src/pip/_internal/index/package_finder.py | 6 +++--- src/pip/_internal/models/wheel.py | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 9fd35d069c3..817d2930070 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -434,7 +434,7 @@ def __init__( self._project_name = project_name self._specifier = specifier self._supported_tags = supported_tags - self._supported_tag_to_idx = { + self._wheel_tag_preferences = { tag: idx for idx, tag in enumerate(supported_tags) } @@ -517,8 +517,8 @@ def _sort_key(self, candidate): if self._prefer_binary: binary_preference = 1 try: - pri = -(wheel.support_index_min_fast( - valid_tags, self._supported_tag_to_idx + pri = -(wheel.find_most_preferred_tag( + valid_tags, self._wheel_tag_preferences )) except ValueError: raise UnsupportedWheel( diff --git a/src/pip/_internal/models/wheel.py b/src/pip/_internal/models/wheel.py index 9e52c9a9d54..dad50a10aac 100644 --- a/src/pip/_internal/models/wheel.py +++ b/src/pip/_internal/models/wheel.py @@ -66,9 +66,23 @@ def support_index_min(self, tags): """ return min(tags.index(tag) for tag in self.file_tags if tag in tags) - def support_index_min_fast(self, tags, tag_to_idx): + def find_most_preferred_tag(self, tags, tag_to_priority): # type: (List[Tag], Dict[Tag, int]) -> int - return min(tag_to_idx[tag] for tag in self.file_tags if tag in tag_to_idx) + """Return the priority of the most preferred tag that one of the wheel's file + tag combinations acheives in the given list of supported tags using the given + tag_to_priority mapping, where lower priorities are more-preferred. + + This is used in place of support_index_min in some cases in order to avoid + an expensive linear scan of a large list of tags. + + :param tags: the PEP 425 tags to check the wheel against. + :param tag_to_priority: a mapping from tag to priority of that tag, where + lower is more preferred. + + :raises ValueError: If none of the wheel's file tags match one of + the supported tags. + """ + return min(tag_to_priority[tag] for tag in self.file_tags if tag in tag_to_priority) def supported(self, tags): # type: (Union[List[Tag],Dict[Tag, Any],Set[Tag]]) -> bool From 87e328326dddbaf9294df3909a50ad95dfa585e5 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 2 Apr 2021 15:57:20 -0400 Subject: [PATCH 06/12] Update news/9748.feature.rst Co-authored-by: Tzu-ping Chung --- news/9748.feature.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/9748.feature.rst b/news/9748.feature.rst index df9aaaf34ba..cb4a1cdede2 100644 --- a/news/9748.feature.rst +++ b/news/9748.feature.rst @@ -1 +1 @@ -* Improve performance when picking the best file from indexes during `pip install` +Improve performance when picking the best file from indexes during `pip install`. From 9072275c02b44c53dd1091e81c3648e092d9638a Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 2 Apr 2021 16:00:38 -0400 Subject: [PATCH 07/12] Update src/pip/_internal/models/wheel.py Co-authored-by: Tzu-ping Chung --- src/pip/_internal/models/wheel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/_internal/models/wheel.py b/src/pip/_internal/models/wheel.py index dad50a10aac..25207b754f9 100644 --- a/src/pip/_internal/models/wheel.py +++ b/src/pip/_internal/models/wheel.py @@ -85,7 +85,7 @@ def find_most_preferred_tag(self, tags, tag_to_priority): return min(tag_to_priority[tag] for tag in self.file_tags if tag in tag_to_priority) def supported(self, tags): - # type: (Union[List[Tag],Dict[Tag, Any],Set[Tag]]) -> bool + # type: (Iterable[Tag]) -> bool """Return whether the wheel is compatible with one of the given tags. :param tags: the PEP 425 tags to check the wheel against. From db7af45cce08174149247d116e8dc19fe59eec51 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 2 Apr 2021 16:01:13 -0400 Subject: [PATCH 08/12] (joe) explain why we're constructing a dict from tag to index in list --- src/pip/_internal/index/package_finder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 817d2930070..f33270ecb36 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -434,6 +434,9 @@ def __init__( self._project_name = project_name self._specifier = specifier self._supported_tags = supported_tags + # Since the index of the tag in the _supported_tags list is used + # as a priority, precompute a map from tag to index/priority to be + # used in wheel.find_most_preferred_tag. self._wheel_tag_preferences = { tag: idx for idx, tag in enumerate(supported_tags) } From c689a8096ef787d02d9a4da771ad8bcd9e22e79b Mon Sep 17 00:00:00 2001 From: Joe Bylund Date: Sat, 3 Apr 2021 07:46:47 -0400 Subject: [PATCH 09/12] (joe) undo a couple changes that shouldn't have been included --- src/pip/_internal/index/package_finder.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index f33270ecb36..cd2c4683128 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -476,6 +476,7 @@ def get_applicable_candidates( hashes=self._hashes, project_name=self._project_name, ) + return sorted(filtered_applicable_candidates, key=self._sort_key) def _sort_key(self, candidate): @@ -517,8 +518,6 @@ def _sort_key(self, candidate): if link.is_wheel: # can raise InvalidWheelFilename wheel = Wheel(link.filename) - if self._prefer_binary: - binary_preference = 1 try: pri = -(wheel.find_most_preferred_tag( valid_tags, self._wheel_tag_preferences @@ -528,6 +527,8 @@ def _sort_key(self, candidate): "{} is not a supported wheel for this platform. It " "can't be sorted.".format(wheel.filename) ) + if self._prefer_binary: + binary_preference = 1 if wheel.build_tag is not None: match = re.match(r'^(\d+)(.*)$', wheel.build_tag) build_tag_groups = match.groups() @@ -552,7 +553,9 @@ def sort_best_candidate( """ if not candidates: return None - return max(candidates, key=self._sort_key) + best_candidate = max(candidates, key=self._sort_key) + return best_candidate + def compute_best_candidate( self, From 16e1fd277937f583dffc58a5b3c2b7aff4f9604d Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Sat, 3 Apr 2021 07:47:39 -0400 Subject: [PATCH 10/12] Update src/pip/_internal/models/wheel.py Co-authored-by: Tzu-ping Chung --- src/pip/_internal/models/wheel.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pip/_internal/models/wheel.py b/src/pip/_internal/models/wheel.py index 25207b754f9..05424e2b468 100644 --- a/src/pip/_internal/models/wheel.py +++ b/src/pip/_internal/models/wheel.py @@ -90,7 +90,4 @@ def supported(self, tags): :param tags: the PEP 425 tags to check the wheel against. """ - for itag in self.file_tags: - if itag in tags: - return True - return False + return not self.file_tags.isdisjoint(tags) From 208cb26b5c617b1be3f83e3646da80bf6660fc71 Mon Sep 17 00:00:00 2001 From: Joe Bylund Date: Sat, 3 Apr 2021 07:50:05 -0400 Subject: [PATCH 11/12] (joe) whitespace is the worst --- src/pip/_internal/index/package_finder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index cd2c4683128..b6716e675eb 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -556,7 +556,6 @@ def sort_best_candidate( best_candidate = max(candidates, key=self._sort_key) return best_candidate - def compute_best_candidate( self, candidates, # type: List[InstallationCandidate] From 0d538ed4fb1013426ea94dace4324a49cf4f0e15 Mon Sep 17 00:00:00 2001 From: Joe Bylund Date: Sat, 3 Apr 2021 07:52:00 -0400 Subject: [PATCH 12/12] (joe) remove now unused imports --- src/pip/_internal/models/wheel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/_internal/models/wheel.py b/src/pip/_internal/models/wheel.py index 05424e2b468..c206d13cb7a 100644 --- a/src/pip/_internal/models/wheel.py +++ b/src/pip/_internal/models/wheel.py @@ -2,7 +2,7 @@ name that have meaning. """ import re -from typing import Any, Dict, List, Set, Union +from typing import Dict, List from pip._vendor.packaging.tags import Tag