-
Notifications
You must be signed in to change notification settings - Fork 16
/
postgres.py
7619 lines (6575 loc) · 321 KB
/
postgres.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# thoth-storages
# Copyright(C) 2019, 2020 Francesco Murdaca, Fridolin Pokorny
#
# This program is free software: you can redistribute it and / or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""An SQL database for storing Thoth data."""
import functools
import re
import logging
import json
import os
import itertools
import weakref
import ssdeep
from decimal import Decimal
from typing import List
from typing import Set
from typing import Tuple
from typing import Optional
from typing import FrozenSet
from typing import Dict
from typing import Union
from typing import Any
from collections import deque
from contextlib import contextmanager
from datetime import datetime
import attr
from packaging.specifiers import SpecifierSet
from packaging.version import parse as parse_version
from sqlalchemy import create_engine
from sqlalchemy import desc
from sqlalchemy import func
from sqlalchemy import exists
from sqlalchemy import and_
from sqlalchemy import tuple_
from sqlalchemy import or_
from sqlalchemy.orm import Query
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm.session import Session
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm.exc import NoResultFound
from thoth.python import PackageVersion
from thoth.python import Pipfile
from thoth.python import PipfileLock
from thoth.common.helpers import format_datetime
from thoth.common.helpers import datetime2datetime_str
from thoth.common.helpers import normalize_os_version
from thoth.common import OpenShift
from thoth.common import map_os_name
from thoth.common.enums import ThothAdviserIntegrationEnum
from .models_base import BaseExtension
from .models import AdviserRun
from .models import ALL_MAIN_MODELS
from .models import CVE
from .models import CVETimestamp
from .models import DebDependency
from .models import DebPackageVersion
from .models import DependencyMonkeyRun
from .models import EcosystemSolver
from .models import ExternalHardwareInformation
from .models import ExternalPythonRequirements
from .models import ExternalPythonRequirementsLock
from .models import ExternalPythonSoftwareStack
from .models import ExternalSoftwareEnvironment
from .models import HardwareInformation
from .models import InspectionRun
from .models import ImportPackage
from .models import KebechetGithubAppInstallations
from .models import PackageExtractRun
from .models import ProvenanceCheckerRun
from .models import PythonArtifact
from .models import PythonFileDigest
from .models import PythonInterpreter
from .models import PythonPackageIndex
from .models import PythonPackageMetadata
from .models import PythonPackageMetadataClassifier
from .models import PythonPackageMetadataDistutils
from .models import PythonPackageMetadataPlatform
from .models import PythonPackageMetadataProjectUrl
from .models import PythonPackageMetadataProvidesExtra
from .models import PythonPackageMetadataRequiresExternal
from .models import PythonPackageMetadataSupportedPlatform
from .models import PythonPackageRequirement
from .models import PythonPackageVersion
from .models import PythonPackageVersionEntity
from .models import PythonPackageVersionEntityRule
from .models import PythonPackageVersionEntityRulesAssociation
from .models import PythonRequirements
from .models import PythonRequirementsLock
from .models import PythonSoftwareStack
from .models import RPMPackageVersion
from .models import RPMRequirement
from .models import SecurityIndicatorAggregatedRun
from .models import SoftwareEnvironment
from .models import VersionedSymbol
from .models import Advised
from .models import DebDepends
from .models import DebPreDepends
from .models import DebReplaces
from .models import DependsOn
from .models import DetectedSymbol
from .models import FoundDeb
from .models import FoundImportPackage
from .models import FoundPythonFile
from .models import FoundPythonInterpreter
from .models import FoundRPM
from .models import HasArtifact
from .models import HasExternalPythonRequirements
from .models import HasExternalPythonRequirementsLock
from .models import HasPythonRequirements
from .models import HasPythonRequirementsLock
from .models import HasMetadataClassifier
from .models import HasMetadataDistutils
from .models import HasMetadataPlatform
from .models import HasMetadataProjectUrl
from .models import HasMetadataProvidesExtra
from .models import HasMetadataRequiresExternal
from .models import HasMetadataSupportedPlatform
from .models import HasSymbol
from .models import HasUnresolved
from .models import HasVulnerability
from .models import Identified
from .models import PythonDependencyMonkeyRequirements
from .models import RequiresSymbol
from .models import RPMRequires
from .models import SIAggregated
from .models import Solved
from .models import ALL_RELATION_MODELS
from .models_performance import PERFORMANCE_MODEL_BY_NAME, ALL_PERFORMANCE_MODELS
from .models_performance import PERFORMANCE_MODELS_ML_FRAMEWORKS
from .sql_base import SQLBase
from .models_base import Base
from .postgres_utils import database_exists
from .postgres_utils import create_database
from .query_result_base import PythonQueryResult
from .enums import EnvironmentTypeEnum
from .enums import SoftwareStackTypeEnum
from .enums import InspectionSyncStateEnum
from .enums import MetadataDistutilsTypeEnum
from .enums import QuerySortTypeEnum
from .enums import PlatformEnum
from .enums import KebechetManagerEnum
from ..analyses import AnalysisResultsStore
from ..dependency_monkey_reports import DependencyMonkeyReportsStore
from ..provenance import ProvenanceResultsStore
from ..solvers import SolverResultsStore
from ..advisers import AdvisersResultsStore
from ..exceptions import NotFoundError
from ..exceptions import PythonIndexNotRegistered
from ..exceptions import PerformanceIndicatorNotRegistered
from ..exceptions import PythonIndexNotProvided
from ..exceptions import SolverNotRun
from ..exceptions import NotConnected
from ..exceptions import AlreadyConnected
from ..exceptions import DatabaseNotInitialized
from ..exceptions import DistutilsKeyNotKnown
from ..exceptions import SortTypeQueryError
from ..exceptions import CudaVersionDoesNotMatch
from ..ceph import CephStore
# Name of environment variables are long
# intentionally - you should adjust them only if
# you know what do you do.
_HAS_PYTHON_SOLVER_ERROR_CACHE_SIZE = int(os.getenv("THOTH_STORAGE_HAS_PYTHON_SOLVER_ERROR_CACHE_SIZE", 4096))
_GET_PYTHON_PACKAGE_VERSION_RECORDS_CACHE_SIZE = int(
os.getenv("THOTH_STORAGE_GET_PYTHON_PACKAGE_VERSION_RECORDS_CACHE_SIZE", 16384)
)
_GET_DEPENDS_ON_CACHE_SIZE = int(os.getenv("THOTH_STORAGE_GET_DEPENDS_ON_CACHE_SIZE", 8192))
_GET_PYTHON_CVE_RECORDS_ALL_CACHE_SIZE = int(os.getenv("THOTH_STORAGE_GET_PYTHON_CVE_RECORDS_ALL_CACHE_SIZE", 4096))
_GET_PYTHON_PACKAGE_REQUIRED_SYMBOLS_CACHE_SIZE = int(
os.getenv("THOTH_STORAGE_GET_PYTHON_PACKAGE_REQUIRED_SYMBOLS_CACHE_SIZE", 4096)
)
_GET_PYTHON_ENVIRONMENT_MARKER_CACHE_SIZE = int(os.getenv("THOTH_GET_PYTHON_ENVIRONMENT_MARKER_CACHE_SIZE", 4096))
_GET_S2I_ANALYZED_IMAGE_SYMBOLS = int(os.getenv("THOTH_S2I_ANALYZED_IMAGE_SYMBOLS_CACHE_SIZE", 1))
_GET_SI_AGGREGATED_PYTHON_PACKAGE_VERSION_CACHE_SIZE = int(
os.getenv("THOTH_GET_PYTHON_ENVIRONMENT_MARKER_CACHE_SIZE", 4096)
)
_GET_PYTHON_PYTHON_PACKAGE_VERSION_SOLVER_RULES_CACHE_SIZE = int(
os.getenv("THOTH_GET_PYTHON_PYTHON_PACKAGE_VERSION_SOLVER_RULES_CACHE_SIZE", 4096)
)
_GET_RPM_PACKAGE_VERSION_CACHE_SIZE = int(os.getenv("THOTH_GET_RPM_PACKAGE_VERSION_CACHE_SIZE", 1))
_GET_PYTHON_PACKAGE_VERSION_CACHE_SIZE = int(os.getenv("THOTH_GET_PYTHON_PACKAGE_VERSION_CACHE_SIZE", 1))
_LOGGER = logging.getLogger(__name__)
def lru_cache(*lru_args, **lru_kwargs):
"""Implement a cache for methods.
Based on:
https://stackoverflow.com/questions/33672412/python-functools-lru-cache-with-class-methods-release-object
"""
# XXX: possibly move to another module to make it available for the whole Thoth
def decorator(func):
@functools.wraps(func)
def wrapped_func(self, *args, **kwargs):
# We're storing the wrapped method inside the instance. If we had
# a strong reference to self the instance would never die.
self_weak = weakref.ref(self)
@functools.wraps(func)
@functools.lru_cache(*lru_args, **lru_kwargs)
def cached_method(*args, **kwargs):
return func(self_weak(), *args, **kwargs)
setattr(self, func.__name__, cached_method)
self._CACHED_METHODS.append(cached_method)
return cached_method(*args, **kwargs)
return wrapped_func
return decorator
@attr.s()
class GraphDatabase(SQLBase):
"""A SQL database adapter providing graph-like operations on top of SQL queries."""
_DECLARATIVE_BASE = Base
DEFAULT_COUNT = 100
_MULTI_VALUE_KEY_PYTHON_PACKAGE_METADATA_MAP = {
"classifier": [HasMetadataClassifier, PythonPackageMetadataClassifier, "classifier"],
"platform": [HasMetadataPlatform, PythonPackageMetadataPlatform, "platform"],
"supported_platform": [
HasMetadataSupportedPlatform,
PythonPackageMetadataSupportedPlatform,
"supported_platform",
],
"requires_external": [HasMetadataRequiresExternal, PythonPackageMetadataRequiresExternal, "requires_external"],
"project_url": [HasMetadataProjectUrl, PythonPackageMetadataProjectUrl, ["label", "url"]],
"provides_extra": [HasMetadataProvidesExtra, PythonPackageMetadataProvidesExtra, "optional_feature"],
}
_CACHED_METHODS = []
def __del__(self) -> None:
"""Destruct adapter object."""
if int(bool(os.getenv("THOTH_STORAGES_LOG_STATS", 0))):
stats = self.stats()
_LOGGER.info("Graph adapter statistics:\n%s", json.dumps(stats, indent=2))
@staticmethod
def construct_connection_string() -> str:
"""Construct a connection string needed to connect to database."""
connection_string = (
f"postgresql+psycopg2://"
f"{os.getenv('KNOWLEDGE_GRAPH_USER', 'postgres')}:{os.getenv('KNOWLEDGE_GRAPH_PASSWORD', 'postgres')}"
f"@{os.getenv('KNOWLEDGE_GRAPH_HOST', 'localhost')}:{os.getenv('KNOWLEDGE_GRAPH_PORT', 5432)}"
f"/{os.getenv('KNOWLEDGE_GRAPH_DATABASE', 'postgres')}"
)
if bool(int(os.getenv("KNOWLEDGE_GRAPH_SSL_DISABLED", 0))):
connection_string += "?sslmode=disable"
return connection_string
@contextmanager
def _session_scope(self) -> Session:
"""Handle session commit and rollback."""
session = self._sessionmaker()
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
def connect(self) -> None:
"""Connect to the database."""
if self.is_connected():
raise AlreadyConnected("Cannot connect, the adapter is already connected")
echo = bool(int(os.getenv("THOTH_STORAGES_DEBUG_QUERIES", 0)))
try:
self._engine = create_engine(self.construct_connection_string(), echo=echo)
self._sessionmaker = sessionmaker(bind=self._engine)
except Exception as engine_exc:
_LOGGER.warning("Failed to create engine: %s", str(engine_exc))
# Drop engine and session in case of any connection issues so is_connected behaves correctly.
if self._engine:
try:
self._engine.dispose()
except Exception as exc:
_LOGGER.warning("Failed to dispose engine: %s", str(exc))
pass
self._engine = None
self._sessionmaker = None
raise
if not database_exists(self._engine.url):
_LOGGER.warning("The database has not been created yet, no check for schema version is performed")
return
try:
if not self.is_schema_up2date():
_LOGGER.debug("Database adapter connected, database is initialized")
except DatabaseNotInitialized as exc:
_LOGGER.warning("Database is not ready to receive or query data: %s", str(exc))
@staticmethod
def _get_alembic_configuration():
import thoth.storages
from alembic import config
alembic_cfg = config.Config(os.path.join(os.path.dirname(thoth.storages.__file__), "data", "alembic.ini"))
alembic_cfg.attributes["configure_logger"] = False
alembic_cfg.set_section_option(
"alembic", "script_location", os.path.join(os.path.dirname(thoth.storages.__file__), "data", "alembic")
)
return alembic_cfg
def initialize_schema(self):
"""Initialize schema of database."""
from alembic import command
if not self.is_connected():
raise NotConnected("Cannot initialize schema: the adapter is not connected yet")
if not database_exists(self._engine.url):
_LOGGER.info("The database has not been created yet, it will be created now...")
create_database(self._engine.url)
alembic_cfg = self._get_alembic_configuration()
# Overwrite URL based on deployment configuration.
alembic_cfg.set_main_option("sqlalchemy.url", self.construct_connection_string())
command.upgrade(alembic_cfg, "head")
def drop_all(self):
"""Drop all content stored in the database."""
super().drop_all()
# Drop alembic version to be able re-run alembic migrations next time.
self._engine.execute("DROP TABLE alembic_version;")
def _get_script_directory_revisions(self):
from alembic import script
alembic_cfg = self._get_alembic_configuration()
directory = script.ScriptDirectory.from_config(alembic_cfg)
return directory
def get_script_alembic_version_head(self) -> str:
"""Get alembic version head from alembic folder scripts."""
directory = self._get_script_directory_revisions()
head_revision = directory.get_current_head()
return head_revision
def get_table_alembic_version_head(self) -> str:
"""Get alembic version head from database table."""
query = f"SELECT * FROM alembic_version"
with self._session_scope() as session:
result = session.execute(query).fetchone()
return result[0]
def is_schema_up2date(self) -> bool:
"""Check if the current schema is up2date with the one configured on database side."""
database_head = self.get_table_alembic_version_head()
script_head = self.get_script_alembic_version_head()
is_up2date = database_head == script_head
if not is_up2date:
_LOGGER.warning(
"The database schema is not in sync with library head revision, the current library revision "
"head: %r, database head: %r",
script_head,
database_head,
)
return is_up2date
def get_alembic_version_count_all(self) -> int:
"""Get number of records in alembic version table (1 expected)."""
query = f"SELECT COUNT(*) FROM alembic_version"
with self._session_scope() as session:
result = session.execute(query).fetchone()
return result[0]
def get_last_solver_datetime(
self, os_name: Optional[str] = None, os_version: Optional[str] = None, python_version: Optional[str] = None
) -> datetime:
"""Get the datetime of the last solver run synced in the database."""
with self._session_scope() as session:
result = session.query(Solved.datetime)
if os_name or os_version or python_version:
result = result.filter(Solved.ecosystem_solver_id == EcosystemSolver.id)
if os_name is not None:
result = result.filter(EcosystemSolver.os_name == os_name)
if os_version is not None:
result = result.filter(EcosystemSolver.os_version == os_version)
if python_version is not None:
result = result.filter(EcosystemSolver.python_version == python_version)
return max([datetime[0] for datetime in result.all()])
def get_last_analysis_datetime(self) -> datetime:
"""Get the datetime of the last container image analysis synced in the database."""
with self._session_scope() as session:
result = session.query(PackageExtractRun.datetime)
return max([datetime[0] for datetime in result.all()])
@staticmethod
def normalize_python_package_name(package_name: str) -> str:
"""Normalize Python package name based on PEP-0503."""
return PackageVersion.normalize_python_package_name(package_name)
@staticmethod
def normalize_python_package_version(package_version: str) -> str:
"""Normalize Python package name based on PEP-440."""
return PackageVersion.normalize_python_package_version(package_version)
@staticmethod
def normalize_python_index_url(index_url: Optional[str]) -> Optional[str]:
"""Map python index url."""
if index_url == "https://pypi.python.org/simple":
return "https://pypi.org/simple"
return index_url
def get_analysis_metadata(self, analysis_document_id: str) -> Dict[str, Any]:
"""Get metadata stored for the given analysis document.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_analysis_metadata()
{
'analysis_datetime': datetime(2019, 10, 7, 18, 57, 22, 658131),
'analysis_document_id': 'package-extract-2ef02c9cea8b1ef7',
'package_extract_name': 'thoth-package-extract',
'package_extract_version': '1.0.1'
}
"""
with self._session_scope() as session:
query = (
session.query(PackageExtractRun)
.filter(PackageExtractRun.analysis_document_id == analysis_document_id)
.with_entities(
PackageExtractRun.datetime,
PackageExtractRun.analysis_document_id,
PackageExtractRun.package_extract_name,
PackageExtractRun.package_extract_version,
)
)
query_result = query.first()
if query_result is None:
raise NotFoundError(f"No records found for analysis with id {analysis_document_id!r}")
return {
"analysis_datetime": query_result[0],
"analysis_document_id": query_result[1],
"package_extract_name": query_result[2],
"package_extract_version": query_result[3],
}
def _do_software_environment_listing(
self, start_offset: int, count: Optional[int], is_external: bool, environment_type: str
) -> List[str]:
"""Perform actual query to software environments."""
if is_external:
class_ = ExternalSoftwareEnvironment
else:
class_ = SoftwareEnvironment
with self._session_scope() as session:
result = (
session.query(class_.environment_name)
.filter(class_.environment_type == environment_type)
.offset(start_offset)
.limit(count)
.all()
)
return [item[0] for item in result]
def get_run_software_environment_all(
self, start_offset: int = 0, count: Optional[int] = DEFAULT_COUNT, is_external: bool = False
) -> List[str]:
"""Get all software environments available for run.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_run_software_environment_all()
['quay.io/thoth-station/thoth-pylint:v0.7.0-ubi8']
"""
return self._do_software_environment_listing(
start_offset, count, is_external, EnvironmentTypeEnum.RUNTIME.value
)
def get_build_software_environment_all(
self, start_offset: int = 0, count: Optional[int] = DEFAULT_COUNT
) -> List[str]:
"""Get all software environments available for build.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_run_software_environment_all()
['quay.io/thoth-station/thoth-pylint:v0.7.0-ubi8']
"""
# We do not have external/user software environment which is build environment yet.
return self._do_software_environment_listing(start_offset, count, False, EnvironmentTypeEnum.BUILDTIME.value)
def _do_software_environment_analyses_listing(
self,
software_environment_name: str,
start_offset: int,
count: Optional[int],
convert_datetime: bool,
is_external: bool,
environment_type: str,
) -> List[dict]:
"""Get listing of available software environment analyses."""
if is_external:
class_ = ExternalSoftwareEnvironment
else:
class_ = SoftwareEnvironment
with self._session_scope() as session:
query_result = (
session.query(class_)
.filter(class_.environment_type == environment_type)
.filter(class_.environment_name == software_environment_name)
.join(PackageExtractRun)
.with_entities(
PackageExtractRun.datetime,
PackageExtractRun.analysis_document_id,
PackageExtractRun.package_extract_name,
PackageExtractRun.package_extract_version,
)
.offset(start_offset)
.limit(count)
.all()
)
result = []
for item in query_result:
result.append(
{
"analysis_datetime": item[0] if not convert_datetime else format_datetime(item[0]),
"analysis_document_id": item[1],
"package_extract_name": item[2],
"package_extract_version": item[3],
}
)
return result
def get_run_software_environment_analyses_all(
self,
run_software_environment_name: str,
start_offset: int = 0,
count: Optional[int] = DEFAULT_COUNT,
convert_datetime: bool = True,
is_external: bool = False,
) -> List[dict]:
"""Get listing of analyses available for the given software environment for run.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_run_software_environment_analyses_all()
[{
'analysis_datetime': datetime(2019, 10, 7, 18, 57, 22, 658131),
'analysis_document_id': 'package-extract-2ef02c9cea8b1ef7',
'package_extract_name': 'thoth-package-extract',
'package_extract_version': '1.0.1'
}]
"""
return self._do_software_environment_analyses_listing(
run_software_environment_name,
start_offset=start_offset,
count=count,
is_external=is_external,
convert_datetime=convert_datetime,
environment_type=EnvironmentTypeEnum.RUNTIME.value,
)
def get_build_software_environment_analyses_all(
self,
build_software_environment_name: str,
start_offset: int = 0,
count: Optional[int] = DEFAULT_COUNT,
convert_datetime: bool = True,
is_external: bool = False,
) -> List[dict]:
"""Get listing of analyses available for the given software environment for build."""
return self._do_software_environment_analyses_listing(
build_software_environment_name,
start_offset=start_offset,
count=count,
is_external=is_external,
convert_datetime=convert_datetime,
environment_type=EnvironmentTypeEnum.BUILDTIME.value,
)
def python_package_version_exists(
self,
package_name: str,
package_version: str,
index_url: Optional[str] = None,
solver_name: Optional[str] = None,
) -> bool:
"""Check if the given Python package version exists in the graph database.
If optional solver_name parameter is set, the call answers if the given package was solved by
the given solver. Otherwise, any solver run is taken into account.
"""
package_name = self.normalize_python_package_name(package_name)
package_version = self.normalize_python_package_version(package_version)
index_url = self.normalize_python_index_url(index_url)
with self._session_scope() as session:
query = (
session.query(PythonPackageVersion)
.filter(PythonPackageVersion.package_name == package_name)
.filter(PythonPackageVersion.package_version == package_version)
)
if solver_name:
solver_info = OpenShift.parse_python_solver_name(solver_name)
os_name = map_os_name(solver_info["os_name"])
os_version = solver_info["os_version"]
python_version = solver_info["python_version"]
query = (
query.filter(PythonPackageVersion.os_name == os_name)
.filter(PythonPackageVersion.os_version == os_version)
.filter(PythonPackageVersion.python_version == python_version)
)
if index_url:
query = query.join(PythonPackageIndex).filter(PythonPackageIndex.url == index_url)
return query.count() > 0
def python_package_exists(self, package_name: str) -> bool:
"""Check if the given Python package exists regardless of version."""
package_name = self.normalize_python_package_name(package_name)
with self._session_scope() as session:
return (
session.query(PythonPackageVersionEntity)
.filter(PythonPackageVersion.package_name == package_name)
.count()
> 0
)
def solved_software_environment_exists(self, os_name: str, os_version: str, python_version: str) -> bool:
"""Check if there are any solved packages for the given software environment."""
os_name = map_os_name(os_name)
os_version = normalize_os_version(os_name, os_version)
with self._session_scope() as session:
result = session.query(
session.query(PythonPackageVersion)
.filter(
PythonPackageVersion.os_name == os_name,
PythonPackageVersion.os_version == os_version,
PythonPackageVersion.python_version == python_version,
)
.exists()
).scalar()
return result
def get_solved_python_package_versions_software_environment_all(self) -> List[Dict[str, str]]:
"""Retrieve software environment configurations used to solve Python packages."""
with self._session_scope() as session:
result = (
session.query(EcosystemSolver)
.with_entities(EcosystemSolver.os_name, EcosystemSolver.os_version, EcosystemSolver.python_version)
.distinct()
.all()
)
return [{"os_name": i[0], "os_version": i[1], "python_version": i[2]} for i in result]
@lru_cache(maxsize=_HAS_PYTHON_SOLVER_ERROR_CACHE_SIZE)
def has_python_solver_error(
self,
package_name: str,
package_version: str,
index_url: str,
*,
os_name: Union[str, None],
os_version: Union[str, None],
python_version: Union[str, None],
) -> bool:
"""Retrieve information whether the given package has any solver error."""
package_name = self.normalize_python_package_name(package_name)
package_version = self.normalize_python_package_version(package_version)
os_name = map_os_name(os_name)
os_version = normalize_os_version(os_name, os_version)
index_url = self.normalize_python_index_url(index_url)
with self._session_scope() as session:
query = (
session.query(PythonPackageVersion)
.filter(PythonPackageVersion.package_name == package_name)
.filter(PythonPackageVersion.package_version == package_version)
)
if os_name is not None:
query = query.filter(PythonPackageVersion.os_name == os_name)
if os_version is not None:
query = query.filter(PythonPackageVersion.os_version == os_version)
if python_version is not None:
query = query.filter(PythonPackageVersion.python_version == python_version)
query = (
query.join(PythonPackageIndex)
.filter(PythonPackageIndex.url == index_url)
.join(Solved)
.order_by(desc(Solved.datetime))
.with_entities(Solved.error)
)
result = query.first()
if result is None:
raise NotFoundError(
f"No package record found for {package_name!r} in version {package_version!r} "
f"from {index_url!r}, OS name is {os_name!r}:{os_version!r} with Python version {python_version!r}"
)
return result[0]
@staticmethod
def _count_per_package(result: Union[List, Dict[str, Any]]) -> Dict[Tuple[str, str, str], int]:
"""Format Query result to count per package."""
query_result = {}
for item in result:
if (item[0], item[1], item[2]) not in query_result:
query_result[(item[0], item[1], item[2])] = item[3]
else:
query_result[(item[0], item[1], item[2])] += item[3]
return query_result
@staticmethod
def _count_per_index(result: Union[List, Dict[str, Any]], index_url: str) -> Dict[str, Dict[Tuple[str, str], int]]:
"""Format Query result to count per index."""
index_url = GraphDatabase.normalize_python_index_url(index_url)
query_result = {index_url: {}}
for item in result:
if item[2] == index_url:
if (item[0], item[1]) not in query_result[index_url].keys():
query_result[index_url][(item[0], item[1])] = item[3]
else:
query_result[index_url][(item[0], item[1])] += item[3]
return query_result
@staticmethod
def _count_per_version(
result: Union[List, Dict[str, Any]],
) -> Dict[str, Dict[str, int]]:
"""Format Query result to count per version."""
query_result = {}
for item in result:
if item[1] not in query_result:
query_result[item[1]] = {}
query_result[item[1]][item[2]] = item[3]
else:
if item[2] not in query_result[item[1]]:
query_result[item[1]][item[2]] = item[3]
else:
query_result[item[1]][item[2]] += item[3]
return query_result
@staticmethod
def _group_by_package_name(
result: Union[List, Dict[str, Any]],
) -> Dict[str, List[Tuple[str, str]]]:
"""Format Query result to group by package name."""
query_result = {}
for item in result:
if item[0] not in query_result:
query_result[item[0]] = []
query_result[item[0]].append((item[1], item[2]))
return query_result
# Solved Python Packages
def get_solved_python_packages_all(
self,
*,
start_offset: int = 0,
count: Optional[int] = DEFAULT_COUNT,
os_name: Optional[str] = None,
os_version: Optional[str] = None,
python_version: Optional[str] = None,
distinct: bool = False,
) -> List[Tuple[str, str]]:
"""Retrieve solved Python package with index in Thoth Database.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_solved_python_packages_all()
[('regex', 'https://pypi.org/simple'), ('tensorflow', 'https://pypi.org/simple')]
"""
os_name = map_os_name(os_name)
os_version = normalize_os_version(os_name, os_version)
return self.__class__.get_python_packages_all(**locals())
def _construct_solved_python_packages_query(
self,
session: Session,
*,
os_name: Optional[str] = None,
os_version: Optional[str] = None,
python_version: Optional[str] = None,
) -> Query:
"""Construct query for solved Python packages functions, the query is not executed."""
kwargs = locals()
kwargs.pop("self", None) # static method
return self.__class__._construct_python_packages_query(**kwargs)
def get_solved_python_packages_count_all(
self,
*,
os_name: Optional[str] = None,
os_version: Optional[str] = None,
python_version: Optional[str] = None,
distinct: bool = False,
) -> int:
"""Retrieve number of solved Python package versions in Thoth Database."""
os_name = map_os_name(os_name)
os_version = normalize_os_version(os_name, os_version)
with self._session_scope() as session:
query = self._construct_solved_python_packages_query(
session, os_name=os_name, os_version=os_version, python_version=python_version
)
if distinct:
query = query.distinct()
return query.count()
def get_solved_python_packages_all_versions(
self,
*,
start_offset: int = 0,
count: Optional[int] = DEFAULT_COUNT,
os_name: Optional[str] = None,
os_version: Optional[str] = None,
python_version: Optional[str] = None,
distinct: bool = False,
) -> Dict[str, List[Tuple[str, str]]]:
"""Retrieve solved Python package versions per package in Thoth Database.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_solved_python_packages_all_versions()
{'absl-py': [('0.1.10', 'https://pypi.org/simple'), ('0.2.1', 'https://pypi.org/simple')]}
"""
os_name = map_os_name(os_name)
os_version = normalize_os_version(os_name, os_version)
with self._session_scope() as session:
query = self._construct_solved_python_packages_query(
session, os_name=os_name, os_version=os_version, python_version=python_version
)
query = query.offset(start_offset).limit(count)
if distinct:
query = query.distinct()
result = query.all()
query_result = {}
for item in result:
if item[0] not in query_result:
query_result[item[0]] = []
query_result[item[0]].append((item[1], item[2]))
return query_result
def get_solved_python_package_version_environments_all(
self,
package_name: str,
package_version: str,
index_url: str,
*,
start_offset: int = 0,
count: Optional[int] = DEFAULT_COUNT,
distinct: bool = False,
) -> List[Dict[str, str]]:
"""Retrieve all the environments that were used to solve the given package.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_solved_python_package_version_environments_all( \
package_name="flask", package_version="2.0.2", index_url="https://pypi.org/simple")
[
{"os_name": "rhel", "os_version": "8", "python_version": "3.8"},
{"os_name": "fedora", "os_version": "35", "python_version": "3.9"},
]
"""
package_name = self.normalize_python_package_name(package_name)
package_version = self.normalize_python_package_version(package_version)
index_url = self.normalize_python_index_url(index_url)
with self._session_scope() as session:
query = (
session.query(PythonPackageVersion)
.filter(
PythonPackageVersion.package_name == package_name,
PythonPackageVersion.package_version == package_version,
)
.join(PythonPackageIndex)
.filter(PythonPackageIndex.url == index_url)
)
query = query.offset(start_offset).limit(count)
if distinct:
query = query.distinct()
result = query.with_entities(
PythonPackageVersion.os_name, PythonPackageVersion.os_version, PythonPackageVersion.python_version
).all()
return [{"os_name": i[0], "os_version": i[1], "python_version": i[2]} for i in result]
def get_solved_python_package_versions_count(
self,
*,
start_offset: int = 0,
count: Optional[int] = DEFAULT_COUNT,
os_name: Optional[str] = None,
os_version: Optional[str] = None,
python_version: Optional[str] = None,
distinct: bool = False,
) -> Dict[Tuple[str, str, str], int]:
"""Retrieve number of Python Package (package_name, package_version, index_url) solved in Thoth Database.
Examples:
>>> from thoth.storages import GraphDatabase
>>> graph = GraphDatabase()
>>> graph.get_solved_python_package_versions_count()
{('absl-py', '0.1.10', 'https://pypi.org/simple'): 1, ('absl-py', '0.2.1', 'https://pypi.org/simple'): 1}
"""
os_name = map_os_name(os_name)
os_version = normalize_os_version(os_name, os_version)
return self.__class__.get_python_package_versions_count(**locals())
def get_solved_python_package_versions_count_per_index(
self,
index_url: str,
*,
start_offset: int = 0,
count: Optional[int] = DEFAULT_COUNT,
os_name: Optional[str] = None,
os_version: Optional[str] = None,
python_version: Optional[str] = None,
distinct: bool = False,
) -> Dict[str, Dict[Tuple[str, str], int]]:
"""Retrieve number of solved Python package versions per index url in Thoth Database.
Examples: