From 041a37762386c305f5fc44edabf1c3344506e99f Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Sat, 8 Sep 2018 12:03:12 +0200 Subject: [PATCH 001/360] Basics of data importer for database change. Can import dbSNP. --- backend/db.py | 474 +++++++++--------- backend/settings.py | 15 +- scripts/add_picture_to_db.py | 2 +- scripts/importer/data_importer/__init__.py | 0 .../importer/data_importer/data_importer.py | 119 +++++ .../importer/data_importer/dbsnp_importer.py | 113 +++++ scripts/importer/importer.py | 74 +++ scripts/manage.sh | 36 ++ 8 files changed, 586 insertions(+), 247 deletions(-) mode change 100644 => 100755 scripts/add_picture_to_db.py create mode 100644 scripts/importer/data_importer/__init__.py create mode 100644 scripts/importer/data_importer/data_importer.py create mode 100644 scripts/importer/data_importer/dbsnp_importer.py create mode 100755 scripts/importer/importer.py create mode 100755 scripts/manage.sh diff --git a/backend/db.py b/backend/db.py index ed6d600eb..8be3a56f4 100644 --- a/backend/db.py +++ b/backend/db.py @@ -1,28 +1,27 @@ -from peewee import ( - BlobField, - CharField, - DateTimeField, - Field, - FloatField, - ForeignKeyField, - IntegerField, - Model, - MySQLDatabase, - PrimaryKeyField, - TextField, - fn, - ) -import logging -import settings - -database = MySQLDatabase( - settings.mysql_schema, - host=settings.mysql_host, - user=settings.mysql_user, - password=settings.mysql_passwd, - port=settings.mysql_port - ) +#!/usr/bin/env python3 +import settings +from peewee import (BigIntegerField, + BlobField, + CharField, + DateTimeField, + IntegerField, + Field, + FloatField, + ForeignKeyField, + Model, + PostgresqlDatabase, + PrimaryKeyField, + SQL, + TextField, + ) +from playhouse.postgres_ext import ArrayField, BinaryJSONField + +database = PostgresqlDatabase( settings.psql_name, + user = settings.psql_user, + password = settings.psql_pass, + host = settings.psql_host, + port = settings.psql_port) class BaseModel(Model): class Meta: @@ -32,8 +31,8 @@ class Meta: class EnumField(Field): db_field = 'string' # The same as for CharField - def __init__(self, values=None, *args, **kwargs): - self.values = values or [] + def __init__(self, choices=None, *args, **kwargs): + self.values = choices or [] super().__init__(*args, **kwargs) def db_value(self, value): @@ -46,57 +45,139 @@ def python_value(self, value): raise ValueError("Illegal value for '{}'".format(self.db_column)) return value +### +# Reference Tables +## + +class DbSNP_version(BaseModel): + """ + dbSNP datasets are very large, and some reference sets can use the same set, + which is why they are given their own header-table. + """ + class Meta: + db_table = 'dbsnp_versions' + schema = 'data' + + version_id = CharField() + + +class DbSNP(BaseModel): + class Meta: + db_table = 'dbsnp' + schema = 'data' + + version = ForeignKeyField(DbSNP_version, related_name="variants") + rsid = BigIntegerField() + chrom = CharField(max_length=10) + pos = IntegerField() + + +class ReferenceSet(BaseModel): + """ + The gencode, ensembl, dbNSFP and omim data are combined to fill out the + Gene, Transcript and Feature tables. DbSNP data is separate, and may be + shared between reference sets, so it uses a foreign key instead. + """ + class Meta: + db_table = 'reference_sets' + schema = 'data' + + dbsnp_version = ForeignKeyField(DbSNP_version, db_column="dbsnp_version", related_name="references") + name = CharField(db_column="reference_name", null=True) + ensembl_version = CharField() + gencode_version = CharField() + dbnsfp_version = CharField() + omim_version = CharField() + + +class Gene(BaseModel): + class Meta: + db_table = 'genes' + schema = 'data' + + reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name="genes") + gene_id = CharField(unique=True, max_length=15) + name = CharField(db_column="gene_name", null=True) + full_name = CharField(null=True) + other_names = ArrayField(CharField, null=True) + canonical_transcript = CharField(null=True, max_length=15) + chrom = CharField(max_length=10) + start = IntegerField(db_column="start_pos") + stop = IntegerField(db_column="stop_pos") + strand = EnumField(choices=['+','-']) + + +class Transcript(BaseModel): + class Meta: + db_table = 'transcripts' + schema = 'data' + + transcript_id = CharField(max_length=15) + gene = ForeignKeyField(Gene, db_column="gene", related_name="transcripts") + chrom = CharField(max_length=10) + start = IntegerField(db_column="start_pos") + stop = IntegerField(db_column="stop_pos") + strand = EnumField(choices = ['+', '-']) -class User(BaseModel): - user = PrimaryKeyField(db_column='user_pk') - name = CharField(null=True) - email = CharField(unique=True) - identity = CharField(unique=True) - identity_type = EnumField(null=False, values=['google', 'elixir']) - affiliation = CharField(null=True) - country = CharField(null=True) - - def is_admin(self, dataset): - return DatasetAccess.select().where( - DatasetAccess.dataset == dataset, - DatasetAccess.user == self, - DatasetAccess.is_admin - ).count() - - def has_access(self, dataset): - return DatasetAccessCurrent.select().where( - DatasetAccessCurrent.dataset == dataset, - DatasetAccessCurrent.user == self, - ).count() - - def has_requested_access(self, dataset): - return DatasetAccessPending.select().where( - DatasetAccessPending.dataset == dataset, - DatasetAccessPending.user == self - ).count() +class Feature(BaseModel): + class Meta: + db_table = 'features' + schema = 'data' + + gene = ForeignKeyField(Gene, db_column="gene", related_name='exons') + transcript = ForeignKeyField(Transcript, db_column="transcript", related_name='transcripts') + chrom = CharField(max_length=10) + start = IntegerField(db_column="start_pos") + stop = IntegerField(db_column="stop_pos") + strand = EnumField(choices = ['+', '-']) + feature_type = CharField() + +### +# Study and Dataset fields +## + +class Collection(BaseModel): + """ + A collection is a source of data which can be sampled into a SampleSet. + """ class Meta: - db_table = 'user' + db_table = 'collections' + schema = 'data' + + name = CharField(null = True) + ethnicity = CharField(null = True) class Study(BaseModel): - study = PrimaryKeyField(db_column='study_pk') + """ + A study is a scientific study with a PI and a description, and may include + one or more datasets. + """ + class Meta: + db_table = 'studies' + schema = 'data' + pi_name = CharField() pi_email = CharField() contact_name = CharField() contact_email = CharField() title = CharField() - description = TextField(null=True) + description = TextField(db_column="study_description", null=True) publication_date = DateTimeField() ref_doi = CharField(null=True) - class Meta: - db_table = 'study' - class Dataset(BaseModel): - dataset = PrimaryKeyField(db_column='dataset_pk') - study = ForeignKeyField(db_column='study_pk', rel_model=Study, to_field='study', related_name='datasets') + """ + A dataset is part of a study, and usually include a certain population. + Most studies only have a single dataset, but multiple are allowed. + """ + class Meta: + db_table = 'datasets' + schema = 'data' + + study = ForeignKeyField(Study, related_name='datasets') short_name = CharField() full_name = CharField() browser_uri = CharField(null=True) @@ -106,7 +187,6 @@ class Dataset(BaseModel): seq_tech = CharField(null=True) seq_center = CharField(null=True) dataset_size = IntegerField() - mongodb_collection = CharField(null=False) def has_image(self): try: @@ -115,15 +195,26 @@ def has_image(self): except DatasetLogo.DoesNotExist: return False + +class SampleSet(BaseModel): class Meta: - db_table = 'dataset' + db_table = 'sample_sets' + schema = 'data' + + dataset = ForeignKeyField(Dataset, related_name='sample_sets') + collection = ForeignKeyField(Collection, related_name='sample_sets') + sample_size = IntegerField() + phenotype = CharField(null=True) class DatasetVersion(BaseModel): - dataset_version = PrimaryKeyField(db_column='dataset_version_pk') - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='versions') - version = CharField() - description = TextField() + class Meta: + db_table = 'dataset_versions' + schema = 'data' + + dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='versions') + version = CharField(db_column="dataset_version") + description = TextField(db_column="dataset_description") terms = TextField() var_call_ref = CharField(null=True) available_from = DateTimeField() @@ -131,199 +222,94 @@ class DatasetVersion(BaseModel): data_contact_name = CharField(null=True) data_contact_link = CharField(null=True) - class Meta: - db_table = 'dataset_version' - - -class Collection(BaseModel): - collection = PrimaryKeyField(db_column = 'collection_pk') - name = CharField(null = True) - ethnicity = CharField(null = True) - - class Meta: - db_table = 'collection' - - -class SampleSet(BaseModel): - sample_set = PrimaryKeyField(db_column='sample_set_pk') - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='sample_sets') - collection = ForeignKeyField(db_column='collection_pk', rel_model=Collection, to_field='collection', related_name='sample_sets') - sample_size = IntegerField() - phenotype = CharField(null=True) +class DatasetFile(BaseModel): class Meta: - db_table = 'sample_set' - + db_table = 'dataset_files' + schema = 'data' -class DatasetFile(BaseModel): - dataset_file = PrimaryKeyField(db_column='dataset_file_pk') - dataset_version = ForeignKeyField(db_column='dataset_version_pk', rel_model=DatasetVersion, to_field='dataset_version', related_name='files') - name = CharField() + dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version", related_name='files') + name = CharField(db_column="basename") uri = CharField() bytes = IntegerField() - class Meta: - db_table = 'dataset_file' - - -class UserAccessLog(BaseModel): - user_access_log = PrimaryKeyField(db_column='user_access_log_pk') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='access_logs') - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='access_logs') - action = EnumField(null=True, values=['access_requested','access_granted','access_revoked','private_link']) - ts = DateTimeField() - - class Meta: - db_table = 'user_access_log' - - -class UserConsentLog(BaseModel): - user_consent_log = PrimaryKeyField(db_column='user_access_log_pk') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='consent_logs') - dataset_version = ForeignKeyField(db_column='dataset_version_pk', rel_model=DatasetVersion, to_field='dataset_version', related_name='consent_logs') - ts = DateTimeField() - - class Meta: - db_table = 'user_consent_log' - - -class UserDownloadLog(BaseModel): - user_download_log = PrimaryKeyField(db_column='user_download_log_pk') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='download_logs') - dataset_file = ForeignKeyField(db_column='dataset_file_pk', rel_model=DatasetFile, to_field='dataset_file', related_name='download_logs') - ts = DateTimeField() - - class Meta: - db_table = 'user_download_log' - - -class DatasetAccess(BaseModel): - dataset_access = PrimaryKeyField(db_column='dataset_access_pk') - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='access') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='access') - wants_newsletter = IntegerField(null=True) - is_admin = IntegerField(null=True) - - class Meta: - db_table = 'dataset_access' - - -class DatasetAccessCurrent(DatasetAccess): - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='access_current') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='access_current') - has_access = IntegerField() - access_requested = DateTimeField() - - class Meta: - db_table = 'dataset_access_current' - - -class DatasetAccessPending(DatasetAccess): - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='access_pending') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='access_pending') - has_access = IntegerField() - access_requested = DateTimeField() +class DatasetLogo(BaseModel): class Meta: - db_table = 'dataset_access_pending' - + db_table = 'dataset_logos' + schema = 'data' -class DatasetLogo(BaseModel): - dataset_logo = PrimaryKeyField(db_column='dataset_logo_pk') - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='logo') + dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='logo') mimetype = CharField() - data = BlobField() - - class Meta: - db_table = 'dataset_logo' + data = BlobField(db_column="bytes") -class Linkhash(BaseModel): - linkhash = PrimaryKeyField(db_column='linkhash_pk') - dataset_version = ForeignKeyField(db_column='dataset_version_pk', rel_model=DatasetVersion, to_field='dataset_version', related_name='link_hashes') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='link_hashes') - hash = CharField() - expires_on = DateTimeField() +### +# Variant and coverage data fields +## +class Variant(BaseModel): class Meta: - db_table = 'linkhash' - + db_table = "variants" + schema = 'data' + + dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version", related_name="variants") + rsid = IntegerField() + chrom = CharField(max_length=10) + pos = IntegerField() + ref = CharField() + alt = CharField() + site_quality = FloatField() + genes = ArrayField(CharField) + transcripts = ArrayField(CharField) + orig_alt_alleles = ArrayField(CharField) + hom_count = IntegerField() + allele_freq = FloatField() + filter_string = CharField() + variant_id = CharField() + allele_count = IntegerField() + allele_num = IntegerField() + quality_metrics = BinaryJSONField() + vep_annotations = BinaryJSONField() + +class Coverage(BaseModel): + """ + Coverage statistics are pre-calculated for each variant for a given + dataset. -class DatasetVersionCurrent(DatasetVersion): - dataset = ForeignKeyField(db_column='dataset_pk', rel_model=Dataset, to_field='dataset', related_name='current_version') + The fields show the fraction of a population that reaches the + mapping coverages given by the variable names. + ex. cov20 = 0.994 means that 99.4% of the population had at a mapping + coverage of at least 20 in this position. + """ class Meta: - db_table = 'dataset_version_current' - - -class SFTPUser(BaseModel): - sftp_user = PrimaryKeyField(db_column='sftp_user_pk') - user = ForeignKeyField(db_column='user_pk', rel_model=User, to_field='user', related_name='sftp_user') - user_uid = IntegerField(unique=True) - user_name = CharField(null=False) - password_hash = CharField(null=False) - account_expires = DateTimeField(null=False) - + db_table = "coverage" + schema = 'data' + + dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version") + chrom = CharField(max_length=10) + pos = IntegerField() + mean = FloatField() + median = FloatField() + chrom = CharField(max_length=10) + cov1 = FloatField() + cov5 = FloatField() + cov10 = FloatField() + cov15 = FloatField() + cov20 = FloatField() + cov25 = FloatField() + cov30 = FloatField() + cov50 = FloatField() + cov100 = FloatField() + + +class Metrics(BaseModel): class Meta: - db_table = 'sftp_user' - - -def get_next_free_uid(): - """ - Returns the next free uid >= 10000, and higher than the current uid's - from the sftp_user table in the database. - """ - default = 10000 - next_uid = default - try: - current_max_uid = SFTPUser.select(fn.MAX(SFTPUser.user_uid)).get().user_uid - if current_max_uid: - next_uid = current_max_uid+1 - except SFTPUser.DoesNotExist: - pass - - return next_uid + db_table = "metrics" + schema = 'data' - -def get_admin_datasets(user): - return DatasetAccess.select().where( DatasetAccess.user == user, DatasetAccess.is_admin) - - -def get_dataset(dataset): - dataset = Dataset.select().where( Dataset.short_name == dataset).get() - return dataset - - -def get_dataset_version(dataset, version=None): - if version: - try: - dataset_version = (DatasetVersion - .select(DatasetVersion, Dataset) - .join(Dataset) - .where(DatasetVersion.version == version, - Dataset.short_name == dataset)).get() - except DatasetVersion.DoesNotExist: - logging.error("get_dataset_version({}, {}): ".format(dataset, version) + - "cannot retrieve dataset version") - return - else: - try: - dataset_version = (DatasetVersionCurrent - .select(DatasetVersionCurrent, Dataset) - .join(Dataset) - .where(Dataset.short_name == dataset)).get() - except DatasetVersionCurrent.DoesNotExist: - logging.error("get_dataset_version({}, version=None): ".format(dataset) + - "cannot retrieve dataset version") - return - return dataset_version - - -def build_dict_from_row(row): - d = {} - for field in row._meta.sorted_fields: #pylint: disable=protected-access - column = field.db_column - if column.endswith("_pk"): - continue - d[column] = getattr(row, column) - return d + dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version") + metric = CharField() + mids = ArrayField(IntegerField) + hist = ArrayField(IntegerField) diff --git a/backend/settings.py b/backend/settings.py index 577b93ef5..7d579275b 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -1,9 +1,13 @@ + +import os import json try: - json_settings_fh = open("settings.json") + current_dir = os.path.dirname(os.path.realpath(__file__)) + json_settings_fh = open(os.path.join(current_dir, "settings.json")) except FileNotFoundError: - json_settings_fh = open("../settings.json") + parent_dir = os.path.join(current_dir, os.pardir) + json_settings_fh = open(os.path.join(parent_dir, "settings.json")) json_settings = json.load(json_settings_fh) json_settings_fh.close() @@ -31,6 +35,13 @@ mongo_password = json_settings["mongoPassword"] mongo_databases = json_settings["mongoDatabases"] +# PostgreSQL settings +psql_host = json_settings["postgresHost"] +psql_port = json_settings["postgresPort"] +psql_name = json_settings["postgresName"] +psql_user = json_settings["postgresUser"] +psql_pass = json_settings["postgresPass"] + # e-mail config mail_server = json_settings["mailServer"] from_address = json_settings["fromAddress"] diff --git a/scripts/add_picture_to_db.py b/scripts/add_picture_to_db.py old mode 100644 new mode 100755 index cf768f738..819c11732 --- a/scripts/add_picture_to_db.py +++ b/scripts/add_picture_to_db.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import argparse import re import sys diff --git a/scripts/importer/data_importer/__init__.py b/scripts/importer/data_importer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/importer/data_importer/data_importer.py b/scripts/importer/data_importer/data_importer.py new file mode 100644 index 000000000..c0d37b730 --- /dev/null +++ b/scripts/importer/data_importer/data_importer.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +import os +import sys +import gzip +import time +import logging +import MySQLdb +import urllib.request + +class DataImporter( object ): + + BLOCKSIZE = 1024 + + def __init__(self, download_dir, chrom = None, batch_size = 5000): + self.download_dir = download_dir + self.chrom = chrom + self.batch_size = batch_size + self.in_file = None + + def _connect(self, host, user, passwd, database): + try: + logging.info("Connecting to database {}".format(database)) + db = MySQLdb.connect(host=host, + user=user, + passwd=passwd, + db =database) + return db.cursor() + except MySQLdb.Error as e: + logging.error("Error connecting: {}".format(e)) + + def _download(self, base_url, version = None): + """ + Download a file into the download_dir. + """ + url = base_url.format(version = version) + filename = os.path.join(self.download_dir, url.split("/")[-1]) + if not os.path.exists(self.download_dir): + os.makedirs(self.download_dir) + try: + os.stat(filename) + logging.info("Found file: {}, not downloading".format(filename)) + return filename + except FileNotFoundError: + pass + + request = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) + response = urllib.request.urlopen(request) + filesize = None + if 'Content-length' in response.headers.keys(): + filesize = int(response.headers['Content-length']) + else: + logging.info("response lacks content-length header, but will still download.") + downloaded = 0 + logging.info("Downloading file {}".format(url)) + if filesize: + self._print_progress_bar() + with open(filename, 'wb') as out: + block = response.read(DataImporter.BLOCKSIZE) + last_progress = 0 + while block: + downloaded += len(block) + if logging.getLogger().getEffectiveLevel() < 30 and filesize: + progress = downloaded / filesize + while progress -last_progress > 0.01: + last_progress += 0.01 + self._tick() + out.write(block) + block = response.read(DataImporter.BLOCKSIZE) + response.close() + if logging.getLogger().getEffectiveLevel() < 30 and filesize: + self._tick(True) + sys.stderr.write("=\n") + return filename + + def _download_and_open(self, base_url, version = None): + """ + Downloads a file and returns an open file handle + """ + filename = self._download(base_url, version) + return self._open(filename) + + def _open(self, filename): + try: + logging.info("Opening file {}".format(filename)) + return gzip.open(filename,'rb') if filename.endswith(".gz") else open(filename) + except IOError as e: + logging.error("IOERROR: {}".format(e)) + + def _print_progress_bar(self): + if logging.getLogger().getEffectiveLevel() < 30: + sys.stderr.write("".join(["{:<10}".format(i) for i in range(0,101,10)]) + "\n") + sys.stderr.write("| ------- "*10 + "|\n") + + def _tick(self, finished = False): + """ + Prints a single progress tick, and a newline if finished is True. + """ + sys.stderr.write("=") + if finished: + sys.stderr.write("\n") + sys.stderr.flush() + + def _time_format(self, seconds): + h, rem = divmod(seconds, 3600) + mins, secs = divmod(rem, 60) + retval = "" + if h: + retval += "{:d} hours, ".format(int(h)) + if mins: + retval += "{:d} mins, ".format(int(mins)) + retval += "{:3.1f} secs".format(secs) + return retval + + def _time_since(self, start): + return self._time_format(time.time() - start) + + def _time_to(self, start, progress = 0.01): + return self._time_format( (time.time() - start)/progress ) diff --git a/scripts/importer/data_importer/dbsnp_importer.py b/scripts/importer/data_importer/dbsnp_importer.py new file mode 100644 index 000000000..0d67c9b97 --- /dev/null +++ b/scripts/importer/data_importer/dbsnp_importer.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +import os +import time +import logging +import db +from .data_importer import DataImporter + +class DbSNPImporter( DataImporter ): + """ + Downloads and imports a dbSNP-dataset into the swefreq database. + """ + + URL=("ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_{a.version}" + "_{a.reference}/database/data/organism_data/{a.version}_" + "SNPChrPosOnRef_{a.number}.bcp.gz") + + def __init__(self, version, reference, download_dir="", chrom=None, batch_size=5000): + super().__init__(download_dir, chrom, batch_size) + self.version = version + self.reference = reference + self.number = 105 if reference.startswith("GRCh37") else 108 + self.total = None + + def count_entries(self): + start = time.time() + self.total = 0 + logging.info("Counting valid lines in file (for progress bar)") + for line in self.in_file: + line = line.decode('ascii').strip() + if line.startswith("#"): + continue + + if line.count("\t") != 2: + continue + + if self.chrom and not line.split("\t")[1] == self.chrom: + continue + + self.total += 1 + self.in_file.rewind() + logging.info("Found {} lines in {}".format(self.total, self._time_since(start))) + + def prepare_data(self): + url = DbSNPImporter.URL.format(a=self) + filename = url.split("/")[-1] + try: + os.stat( os.path.join(self.download_dir, filename) ) + self.in_file = self._open( os.path.join(self.download_dir, filename) ) + except FileNotFoundError: + self.in_file = self._download_and_open(url) + + def prepare_version(self): + version_id = "{a.version}_{a.reference}".format(a=self) + dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) + if created: + logging.info("Created dbsnp_version '{}'".format(version_id)) + else: + logging.info("dbsnp_version '{}' already in database".format(version_id)) + return dbsnp_version + + def start_import(self): + """ + dbsnp-file header is 'rsid', 'chrom', 'position' + """ + dbsnp_version = self.prepare_version() + + start = time.time() + last_progress = 0.0 + logging.info("Inserting dbSNP data into database.") + + counter = 0 + batch = [] + with db.database.atomic(): + for line in self.in_file: + line = line.decode('ascii').strip() + if line.startswith("#"): + continue + + try: + rsid, chrom, position = line.split("\t")[:3] + except ValueError: + # we don't care for incomplete entries + continue + + if self.chrom and not chrom == self.chrom: + continue + + batch += [{ 'version':dbsnp_version, + 'rsid':rsid, + 'chrom':chrom, + 'pos':position}] + counter += 1 + + if self.total != None: + progress = counter / self.total + while progress > last_progress + 0.01: + if not last_progress: + logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) + if self.total != None: + self._print_progress_bar() + self._tick() + last_progress += 0.01 + + if len(batch) >= self.batch_size: + db.DbSNP.insert_many(batch).execute() + batch = [] + db.database.commit() + if batch: + db.DbSNP.insert_many(batch) + if self.total != None: + self._tick(True) + logging.info("Inserted {} valid lines in {}".format(counter, self._time_since(start))) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py new file mode 100755 index 000000000..5951c6950 --- /dev/null +++ b/scripts/importer/importer.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Swefreq data importer. +""" + +from data_importer.dbsnp_importer import DbSNPImporter + +if __name__ == '__main__': + + import os + import argparse + import logging + + parser = argparse.ArgumentParser( description = __doc__ , + formatter_class = argparse.ArgumentDefaultsHelpFormatter ) + + parser.add_argument("--batch_size", type=int, default=5000, + help = ("Where batch insertion is possible, use this number of" + " inserts per batch.")) + parser.add_argument("--limit_chrom", default=None, + help = "Limit chromosome to insert into the database.") + parser.add_argument("--data_dir", default=os.path.join(os.path.dirname(__file__), + "downloaded_files"), + help = "Default directory to download and look for files.") + + # Reference versions + parser.add_argument("--add_reference", action="store_true", + help = "Insert new reference set.") + + parser.add_argument("--gencode_version", default=19, type=int, + help = "Gencode version to download and use.") + parser.add_argument("--ensembl_version", default="homo_sapiens_core_75_37", + help = "Ensembl database to connect to.") + parser.add_argument("--dbnsfp_version", default="2.9.3", + help = "dbNSFP version to download and use.") + parser.add_argument("--dbsnp_version", default="b150", + help = "dbSNP version to download and use.") + parser.add_argument("--dbsnp_reference", default="GRCh37p13", + help = "Which reference the dbSNP should be aligned to.") + + # Actions + parser.add_argument("--add_dbsnp", action="store_true", + help = "Adds a new dbSNP version to the database.") + + # Logging and verbosity + parser.add_argument("--disable_progress", action="store_true", + help="Do not show progress bars.") + parser.add_argument("-v", "--verbose", action = "count", default = 3, + help="Increase output Verbosity.") + parser.add_argument("-q", "--quiet", action = "count", default = 0, + help="Decrease output Verbosity.") + + args = parser.parse_args() + + logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", level = (5-args.verbose+args.quiet)*10, datefmt="%H:%M:%S") + + if args.add_dbsnp: + logging.info("Adding new dbSNP version") + logging.info(" - dbSNP version: {}".format(args.dbsnp_version)) + logging.info(" - dbSNP reference: {}".format(args.dbsnp_reference)) + + importer = DbSNPImporter(args.dbsnp_version, args.dbsnp_reference, + args.data_dir, args.limit_chrom, args.batch_size) + importer.prepare_data() + if not args.disable_progress: + importer.count_entries() + importer.start_import() + + if args.add_reference: + logging.info("Adding a new reference set using these sources:") + logging.info(" - Gencode: {}".format(args.gencode_version)) + logging.info(" - Ensembl: {}".format(args.ensembl_version)) + logging.info(" - dbNSFP: {}".format(args.dbnsfp_version)) + logging.info(" - dbSNP: {}".format(args.dbsnp_version)) diff --git a/scripts/manage.sh b/scripts/manage.sh new file mode 100755 index 000000000..773d57a4d --- /dev/null +++ b/scripts/manage.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Some variables +COMMANDS=(import add_picture) +PRINT_HELP=$# +SCRIPT_DIR="$(cd $(dirname ${BASH_SOURCE[0]}) && pwd)" +export PYTHONPATH="${PYTHONPATH}:${SCRIPT_DIR}/../backend" + +# Figure out if -h/--help goes to this script or to the command +for arg in $@ +do + for command in ${COMMANDS[@]}; do [[ "$arg" == "$command" ]] && break 2; done + [[ "$arg" == "-h" ]] || [[ "$arg" == "--help" ]] && PRINT_HELP="0" +done + +if [[ "$PRINT_HELP" == "0" ]] +then + cat <<-USAGE +USAGE: $0 [command] + +Valid commands are: + + import Import data into the database. + add_picture Add a picture into the database + +Use $0 [command] -h or --help to get help on a specific command. +USAGE +fi + +while (( "$#" )) +do + arg="$1" + shift + [[ "$arg" == "import" ]] && ${SCRIPT_DIR}/importer/importer.py $@ && break + [[ "$arg" == "add_picture" ]] && ${SCRIPT_DIR}/add_picture_to_db.py $@ && break +done From 6975d9f92350c40ee6bebd9a3d73182e2c1105a3 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Sat, 8 Sep 2018 16:03:18 +0200 Subject: [PATCH 002/360] Some changes to how arguments are passed. --- .../importer/data_importer/data_importer.py | 15 ++++++------ .../importer/data_importer/dbsnp_importer.py | 23 ++++++++++--------- scripts/importer/importer.py | 3 +-- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/scripts/importer/data_importer/data_importer.py b/scripts/importer/data_importer/data_importer.py index c0d37b730..e8b1f58bc 100644 --- a/scripts/importer/data_importer/data_importer.py +++ b/scripts/importer/data_importer/data_importer.py @@ -12,10 +12,11 @@ class DataImporter( object ): BLOCKSIZE = 1024 - def __init__(self, download_dir, chrom = None, batch_size = 5000): - self.download_dir = download_dir - self.chrom = chrom - self.batch_size = batch_size + def __init__(self, settings): + self.download_dir = settings.data_dir + self.chrom = settings.limit_chrom + self.batch_size = settings.batch_size + self.progress_bar = not settings.disable_progress self.in_file = None def _connect(self, host, user, passwd, database): @@ -53,14 +54,14 @@ def _download(self, base_url, version = None): logging.info("response lacks content-length header, but will still download.") downloaded = 0 logging.info("Downloading file {}".format(url)) - if filesize: + if filesize and self.progress_bar: self._print_progress_bar() with open(filename, 'wb') as out: block = response.read(DataImporter.BLOCKSIZE) last_progress = 0 while block: downloaded += len(block) - if logging.getLogger().getEffectiveLevel() < 30 and filesize: + if self.progress_bar and logging.getLogger().getEffectiveLevel() < 30 and filesize: progress = downloaded / filesize while progress -last_progress > 0.01: last_progress += 0.01 @@ -68,7 +69,7 @@ def _download(self, base_url, version = None): out.write(block) block = response.read(DataImporter.BLOCKSIZE) response.close() - if logging.getLogger().getEffectiveLevel() < 30 and filesize: + if self.progress_bar and logging.getLogger().getEffectiveLevel() < 30 and filesize: self._tick(True) sys.stderr.write("=\n") return filename diff --git a/scripts/importer/data_importer/dbsnp_importer.py b/scripts/importer/data_importer/dbsnp_importer.py index 0d67c9b97..16258987a 100644 --- a/scripts/importer/data_importer/dbsnp_importer.py +++ b/scripts/importer/data_importer/dbsnp_importer.py @@ -11,15 +11,16 @@ class DbSNPImporter( DataImporter ): Downloads and imports a dbSNP-dataset into the swefreq database. """ - URL=("ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_{a.version}" - "_{a.reference}/database/data/organism_data/{a.version}_" - "SNPChrPosOnRef_{a.number}.bcp.gz") - - def __init__(self, version, reference, download_dir="", chrom=None, batch_size=5000): - super().__init__(download_dir, chrom, batch_size) - self.version = version - self.reference = reference - self.number = 105 if reference.startswith("GRCh37") else 108 + URL=("ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_{a.dbsnp_version}" + "_{a.dbsnp_reference}/database/data/organism_data/{a.dbsnp_version}_" + "SNPChrPosOnRef_{a.dbsnp_number}.bcp.gz") + + def __init__(self, settings): + super().__init__(settings) + self.settings = settings + self.settings.dbsnp_number = 105 + if settings.dbsnp_reference.startswith("GRCh37"): + self.settings.dbsnp_number = 108 self.total = None def count_entries(self): @@ -42,7 +43,7 @@ def count_entries(self): logging.info("Found {} lines in {}".format(self.total, self._time_since(start))) def prepare_data(self): - url = DbSNPImporter.URL.format(a=self) + url = DbSNPImporter.URL.format(a=self.settings) filename = url.split("/")[-1] try: os.stat( os.path.join(self.download_dir, filename) ) @@ -51,7 +52,7 @@ def prepare_data(self): self.in_file = self._download_and_open(url) def prepare_version(self): - version_id = "{a.version}_{a.reference}".format(a=self) + version_id = "{a.dbsnp_version}_{a.dbsnp_reference}".format(a=self.settings) dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) if created: logging.info("Created dbsnp_version '{}'".format(version_id)) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 5951c6950..d7ad8006b 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -59,8 +59,7 @@ logging.info(" - dbSNP version: {}".format(args.dbsnp_version)) logging.info(" - dbSNP reference: {}".format(args.dbsnp_reference)) - importer = DbSNPImporter(args.dbsnp_version, args.dbsnp_reference, - args.data_dir, args.limit_chrom, args.batch_size) + importer = DbSNPImporter(args) importer.prepare_data() if not args.disable_progress: importer.count_entries() From bb36ec7f835ecb8914593127481f1ee1dfbf6e54 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Mon, 10 Sep 2018 11:58:58 +0200 Subject: [PATCH 003/360] Adds reference set importer to importer, as well as bug fixes to dbsnp importer. --- backend/db.py | 2 + .../importer/data_importer/data_importer.py | 1 + .../importer/data_importer/dbsnp_importer.py | 7 +- .../data_importer/reference_set_importer.py | 415 ++++++++++++++++++ scripts/importer/importer.py | 14 + scripts/manage.sh | 61 +-- 6 files changed, 469 insertions(+), 31 deletions(-) create mode 100644 scripts/importer/data_importer/reference_set_importer.py diff --git a/backend/db.py b/backend/db.py index 8be3a56f4..ad21508f5 100644 --- a/backend/db.py +++ b/backend/db.py @@ -114,6 +114,8 @@ class Meta: transcript_id = CharField(max_length=15) gene = ForeignKeyField(Gene, db_column="gene", related_name="transcripts") + mim_gene_accession = IntegerField() + mim_annotation = CharField() chrom = CharField(max_length=10) start = IntegerField(db_column="start_pos") stop = IntegerField(db_column="stop_pos") diff --git a/scripts/importer/data_importer/data_importer.py b/scripts/importer/data_importer/data_importer.py index e8b1f58bc..bda4cc988 100644 --- a/scripts/importer/data_importer/data_importer.py +++ b/scripts/importer/data_importer/data_importer.py @@ -13,6 +13,7 @@ class DataImporter( object ): BLOCKSIZE = 1024 def __init__(self, settings): + self.settings = settings self.download_dir = settings.data_dir self.chrom = settings.limit_chrom self.batch_size = settings.batch_size diff --git a/scripts/importer/data_importer/dbsnp_importer.py b/scripts/importer/data_importer/dbsnp_importer.py index 16258987a..664f73adc 100644 --- a/scripts/importer/data_importer/dbsnp_importer.py +++ b/scripts/importer/data_importer/dbsnp_importer.py @@ -17,9 +17,8 @@ class DbSNPImporter( DataImporter ): def __init__(self, settings): super().__init__(settings) - self.settings = settings self.settings.dbsnp_number = 105 - if settings.dbsnp_reference.startswith("GRCh37"): + if settings.dbsnp_reference.startswith("GRCh38"): self.settings.dbsnp_number = 108 self.total = None @@ -32,10 +31,10 @@ def count_entries(self): if line.startswith("#"): continue - if line.count("\t") != 2: + if line.count("\t") < 2: continue - if self.chrom and not line.split("\t")[1] == self.chrom: + if self.chrom and not line.split("\t")[1] == str(self.chrom): continue self.total += 1 diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py new file mode 100644 index 000000000..86f8cfe1a --- /dev/null +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 + +import os +import gzip +import time +import shutil +import logging +import zipfile +import db + +from .data_importer import DataImporter + +class ReferenceSetImporter( DataImporter ): + + GENCODE = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_{a.gencode_version}/gencode.v{a.gencode_version}.annotation.gtf.gz" + DBNSFP = "ftp://dbnsfp:dbnsfp@dbnsfp.softgenetics.com/dbNSFPv{a.dbnsfp_version}.zip" + ENSEMBL = ("ensembldb.ensembl.org", "anonymous", "") + + def __init__(self, settings): + super().__init__(settings) + + # counters for statistics and progress + self.numbers = {'genes':None, + 'transcripts':None, + 'features':None} + self.counters = {'genes':0, + 'transcripts':0, + 'features':0} + + # dictionaries to hold data while processing + self.genes = [] + self.transcripts = [] + self.features = [] + + # file handlers for later + self.gencode = None + self.dbnsfp = None + self.omim = None + self.ensembl = None + + def _insert_features(self): + logging.info("Inserting features into database") + start = time.time() + self._print_progress_bar() + last_progress = 0 + batch = [] + with db.database.atomic(): + for i, feature in enumerate(self.features): + batch += [{'gene':self.gene_db_ids[feature['gene_id']], + 'transcript':self.transcript_db_ids[feature['transcript_id']], + 'chrom':feature['chrom'], + 'start':feature['start'], + 'stop':feature['stop'], + 'strand':feature['strand'], + 'feature_type':feature['feature_type']}] + + if len(batch) % self.batch_size == 0: + db.Feature.insert_many(batch).execute() + batch = [] + + progress = i / len(self.features) + while progress - last_progress > 0.01: + last_progress += 0.01 + self._tick() + if len(batch): + db.Feature.insert_many(batch).execute() + batch = [] + self._tick(True) + + logging.info("Features inserted in {}".format( self._time_since(start) )) + + def _insert_genes(self): + logging.info("Inserting genes into database") + start = time.time() + self.gene_db_ids = {} + self._print_progress_bar() + last_progress = 0 + for i, gene in enumerate(self.genes): + # As far as I know I can't batch insert these and still get the id's back + + db_gene = db.Gene( reference_set = self.db_reference, + gene_id = gene['gene_id'], + name = gene['name'], + full_name = gene.get('full_name', None), + other_names = gene.get('other_names', None), + canonical_transcript = gene.get('canonical_transcript', None), + chrom = gene['chrom'], + start = gene['start'], + end = gene['stop'], + strand = gene['strand'] + ) + db_gene.save() + self.gene_db_ids[gene['gene_id']] = db_gene.id + + progress = i / len(self.genes) + while progress - last_progress > 0.01: + last_progress += 0.01 + self._tick() + self._tick(True) + + logging.info("Genes inserted in {}".format( self._time_since(start) )) + + def _insert_reference(self): + logging.info("Getting dbSNP version id") + version_id = "{a.dbsnp_version}_{a.dbsnp_reference}".format(a=self.settings) + dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) + if created: + logging.info("Created dbsnp_version '{}'".format(version_id)) + else: + logging.info("Using dbsnp_version '{}'".format(version_id)) + + logging.info("inserting reference header") + self.db_reference = db.ReferenceSet(name = None, + ensembl_version = self.settings.ensembl_version, + gencode_version = self.settings.gencode_version, + dbnsfp_version = self.settings.dbnsfp_version, + omim_version = self.settings.omim_file, + dbsnp_version = dbsnp_version.id) + self.db_reference.save() + logging.info("Reference {} created".format(self.db_reference.id)) + + def _insert_transcripts(self): + logging.info("Inserting transcripts into database") + start = time.time() + + self.transcript_db_ids = {} + self._print_progress_bar() + last_progress = 0 + for i, transcript in enumerate(self.transcripts): + db_transcript = db.Transcript( transcript_id = transcript['transcript_id'], + gene = self.gene_db_ids[transcript['gene_id']], + mim_annotation = transcript.get('mim_annotation', None), + mim_gene_accession = transcript.get('mim_gene_accession', None), + chrom = transcript['chrom'], + start = transcript['start'], + stop = transcript['stop'], + strand = transcript['strand'] + ) + db_transcript.save() + self.transcript_db_ids[transcript['transcript_id']] = db_transcript.id + + progress = i / len(self.transcripts) + while progress - last_progress > 0.01: + last_progress += 0.01 + self._tick() + self._tick(True) + + logging.info("Transcripts inserted in {}".format( self._time_since(start) )) + + def _open_dbnsfp(self): + """ + Downloads (if needed) and opens the given dbNSFP file. + + Only a small part, 'dbNSFP2.9_gene' of the ~13Gb file is needed, but in + order to get it we have to download the entire file, extract the part + that we want, and then discard the dbNSFP package. + """ + logging.info("----- Opening dbNSFP file -----") + url = ReferenceSetImporter.DBNSFP.format(a=self.settings) + filename = url.split("/")[-1] + dbnsfp_file = "dbNSFP2.9_gene" + dbnsfp_path = os.path.join( self.download_dir, dbnsfp_file ) + dbnsfp_gzip = "{}.gz".format(dbnsfp_path) + try: + os.stat( dbnsfp_gzip ) + except FileNotFoundError: + try: + package_file = os.path.join( self.download_dir, filename) + os.stat( package_file ) + except FileNotFoundError: + self._download( url ) + logging.info("extracting {} from {}".format(dbnsfp_file, package_file)) + package = zipfile.ZipFile(package_file) + package.extract(dbnsfp_file, self.download_dir) + logging.info("gzipping {}".format(dbnsfp_file)) + with open( dbnsfp_path, 'rb') as f_in: + with gzip.open( dbnsfp_gzip, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + logging.info("removing non-zipped file and package file.") + os.remove(dbnsfp_path) + os.remove(package_file) + self.dbnsfp = self._open( dbnsfp_gzip ) + + def _open_ensembl(self): + """ + Connects to the given ensembl database. + """ + logging.info("----- Opening ensembl database connection -----") + self.ensembl = self._connect(*(ReferenceSetImporter.ENSEMBL + (self.settings.ensembl_version,))) + + def _open_gencode(self): + """ + Downloads (if needed) and opens the given gencode file + """ + logging.info("----- Opening gencode file -----") + url = ReferenceSetImporter.GENCODE.format(a=self.settings) + filename = url.split("/")[-1] + try: + os.stat( os.path.join(self.download_dir, filename) ) + self.gencode = self._open( os.path.join(self.download_dir, filename) ) + except FileNotFoundError: + self.gencode = self._download_and_open(url) + + def _open_omim(self): + """ + We can't download OMIM files, so we just open the given OMIM file + """ + logging.info("----- Opening OMIM file -----") + self.omim = self._open( self.settings.omim_file ) + + def _read_dbnsfp(self): + start = time.time() + header = None + logging.info("Adding dbNSFP annotation") + + dbnsfp_cache = {} + for line in self.dbnsfp: + raw = line.decode('utf8').strip().split("\t") + if not header: + header = raw + if header: + continue + + values = {} + for i, value in enumerate(raw): + values[header[i]] = value + + dbnsfp_cache[ values['Ensembl_gene'] ] = { + 'other_names': values['Gene_other_names'], + 'full_name': values['Gene_full_name'] + } + for i, gene in enumerate(self.genes): + if gene['gene_id'] in dbnsfp_cache: + for key, item in dbnsfp_cache[gene['gene_id']].items(): + if item in ['', '.']: + item = None + self.genes[i][key] = item + + logging.info("dbNSFP information added in {}.".format( self._time_since(start) )) + + def _read_ensembl(self): + """ + Reads the ensembl information into the gene dictionary + """ + query = """SELECT g.stable_id, + t.stable_id + FROM gene g + JOIN transcript t + ON (g.canonical_transcript_id = t.transcript_id) + """ + start = time.time() + + canonical_dict = {} + logging.info("Pre-fetching all canonical transcripts") + self.ensembl.execute(query) + for transcript in self.ensembl.fetchall(): + canonical_dict[transcript[0]] = transcript[1] + + last_progress = 0.0 + if self.numbers['genes'] != None: + self._print_progress_bar() + + for i, gene in enumerate(self.genes): + if gene['gene_id'] in canonical_dict: + self.genes[i]['canonical_transcript'] = canonical_dict[gene['gene_id']] + + self.counters['genes'] += 1 + if self.numbers['genes'] != None: + progress = i / self.numbers['genes'] + while progress - last_progress > 0.01: + last_progress += 0.01 + self._tick() + if self.numbers['genes'] != None: + self._tick(True) + logging.info("Canonical transcript information from ensembl added in {}.".format( self._time_since(start) )) + + def _read_omim(self): + start = time.time() + logging.info("Adding OMIM annotations") + + cache = {} + header = None + for line in self.omim: + raw = line.decode('utf8').strip().split("\t") + if not header: + header = [r.strip() for r in raw] + if header: + continue + + values = {} + for i, value in enumerate(raw): + values[header[i]] = value + + if 'MIM Gene Description' not in values: + continue + + if 'Ensembl Transcript ID' in cache: + logging.warning(("The Ensembl Transcript ID '{}' was found twice" + " in the OMIM file. this was not planned for.")) + cache[values['Ensembl Transcript ID']] = \ + {'mim_gene_accession':int(values['MIM Gene Accession']), + 'mim_annotation':values['MIM Gene Description'].strip().capitalize(), + } + + counter = 0 + empty = {'mim_gene_accession':None, 'mim_annotation':None} + for i, transcript in enumerate(self.transcripts): + if transcript['transcript_id'] in cache: + self.transcripts[i].update(cache[transcript['transcript_id']]) + if counter == 0: + print(self.transcripts[i]) + counter += 1 + else: + self.transcripts[i].update(empty) + + logging.info("OMIM information added in {}.".format( self._time_since(start) )) + + def count_entries(self): + logging.info("Counting features in gencode file (for progress bar)") + start = time.time() + self.numbers['genes'] = 0 + self.numbers['transcripts'] = 0 + self.numbers['features'] = 0 + for row in self.gencode: + raw = row.decode('ascii').strip() + if raw[0] == "#": + continue + values = raw.split("\t") + if len(values) < 2: + continue + + if self.chrom and values[0][3:] not in self.chrom: + continue + + if values[2] == 'gene': + self.numbers['genes'] += 1 + elif values[2] == 'transcript': + self.numbers['transcripts'] += 1 + elif values[2] in ['CDS', 'exon', 'UTR']: + self.numbers['features'] += 1 + + self.gencode.rewind() + logging.info("Parsed file in {:3.1f}s".format(time.time()-start)) + logging.info("Genes : {}".format(self.numbers['genes'])) + logging.info("Transcripts: {}".format(self.numbers['transcripts'])) + logging.info("Features : {}".format(self.numbers['features'])) + + def prepare_data(self): + self._open_gencode() + self._open_dbnsfp() + self._open_omim() + self._open_ensembl() + + def start_import(self): + start = time.time() + logging.info("Reading gencode data into buffers.") + last_progress = 0.0 + if self.numbers['genes'] != None: + self._print_progress_bar() + for line in self.gencode: + line = line.decode('ascii').strip() + if line.startswith("#"): + continue + try: + values = line.split("\t") + + if self.chrom and values[0][3:] not in self.chrom: + continue + + info = dict(x.strip().split() for x in values[8].split(';') if x != '') + info = {k: v.strip('"') for k, v in info.items()} + + data = {'chrom':values[0][3:], + 'start':int(values[3]) + 1, # bed files are 0-indexed + 'stop':int(values[4]) + 1, + 'strand':values[6], + 'gene_id':info['gene_id'].split('.')[0]} + + # only progress for genes to keep it simple + if self.numbers['genes'] != None: + progress = self.counters['genes'] / self.numbers['genes'] + while progress - last_progress > 0.01: + last_progress += 0.01 + self._tick() + if values[2] == 'gene': + data['name'] = info['gene_name'] + self.genes += [data] + self.counters['genes'] += 1 + continue + + data['transcript_id'] = info['transcript_id'].split('.')[0] + if values[2] == 'transcript': + self.transcripts += [data] + self.counters['transcripts'] += 1 + continue + + if values[2] in ['exon', 'CDS', 'UTR']: + data['feature_type'] = values[2] + self.features += [data] + self.counters['features'] += 1 + continue + + except Exception as e: + logging.error("{}".format(e)) + break + if self.numbers['genes'] != None: + self._tick(True) + logging.info("Gencode data read into buffers in {}.".format( self._time_since(start) )) + self._read_ensembl() + self._read_dbnsfp() + self._read_omim() + self._insert_reference() + self._insert_genes() + self._insert_transcripts() + self._insert_features() diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index d7ad8006b..cfdaad0a9 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -4,6 +4,7 @@ """ from data_importer.dbsnp_importer import DbSNPImporter +from data_importer.reference_set_importer import ReferenceSetImporter if __name__ == '__main__': @@ -38,6 +39,13 @@ parser.add_argument("--dbsnp_reference", default="GRCh37p13", help = "Which reference the dbSNP should be aligned to.") + # omim file, since we can't download or version them + parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), + "downloaded_files", + "omim_info.txt.gz"), + help = "OMIM annotation file.") + + # Actions parser.add_argument("--add_dbsnp", action="store_true", help = "Adds a new dbSNP version to the database.") @@ -71,3 +79,9 @@ logging.info(" - Ensembl: {}".format(args.ensembl_version)) logging.info(" - dbNSFP: {}".format(args.dbnsfp_version)) logging.info(" - dbSNP: {}".format(args.dbsnp_version)) + + importer = ReferenceSetImporter(args) + importer.prepare_data() + if not args.disable_progress: + importer.count_entries() + importer.start_import() \ No newline at end of file diff --git a/scripts/manage.sh b/scripts/manage.sh index 773d57a4d..164769436 100755 --- a/scripts/manage.sh +++ b/scripts/manage.sh @@ -1,36 +1,43 @@ #!/usr/bin/env bash # Some variables -COMMANDS=(import add_picture) -PRINT_HELP=$# -SCRIPT_DIR="$(cd $(dirname ${BASH_SOURCE[0]}) && pwd)" -export PYTHONPATH="${PYTHONPATH}:${SCRIPT_DIR}/../backend" - -# Figure out if -h/--help goes to this script or to the command -for arg in $@ -do - for command in ${COMMANDS[@]}; do [[ "$arg" == "$command" ]] && break 2; done - [[ "$arg" == "-h" ]] || [[ "$arg" == "--help" ]] && PRINT_HELP="0" -done - -if [[ "$PRINT_HELP" == "0" ]] -then - cat <<-USAGE -USAGE: $0 [command] +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +export PYTHONPATH=${PYTHONPATH:+"$PYTHONPATH:"}"$SCRIPT_DIR/../backend" + +do_help () { + cat <&2 + do_help >&2 + exit 1 +esac From 98660998dc68dbf6f07462ee726a9edef11cdbdb Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Fri, 14 Sep 2018 21:11:53 +0200 Subject: [PATCH 004/360] Adds importer for old database data tables. --- backend/db.py | 12 +- scripts/importer/data_importer/old_db.py | 303 ++++++++++++++++++ .../importer/data_importer/old_db_importer.py | 226 +++++++++++++ .../data_importer/reference_set_importer.py | 47 ++- scripts/importer/importer.py | 17 +- 5 files changed, 579 insertions(+), 26 deletions(-) create mode 100644 scripts/importer/data_importer/old_db.py create mode 100644 scripts/importer/data_importer/old_db_importer.py diff --git a/backend/db.py b/backend/db.py index ad21508f5..48ea11d43 100644 --- a/backend/db.py +++ b/backend/db.py @@ -147,7 +147,7 @@ class Meta: db_table = 'collections' schema = 'data' - name = CharField(null = True) + name = CharField(db_column="study_name", null = True) ethnicity = CharField(null = True) @@ -179,7 +179,8 @@ class Meta: db_table = 'datasets' schema = 'data' - study = ForeignKeyField(Study, related_name='datasets') + study = ForeignKeyField(Study, db_column="study", related_name='datasets') + reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='datasets') short_name = CharField() full_name = CharField() browser_uri = CharField(null=True) @@ -203,8 +204,8 @@ class Meta: db_table = 'sample_sets' schema = 'data' - dataset = ForeignKeyField(Dataset, related_name='sample_sets') - collection = ForeignKeyField(Collection, related_name='sample_sets') + dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='sample_sets') + collection = ForeignKeyField(Collection, db_column="collection", related_name='sample_sets') sample_size = IntegerField() phenotype = CharField(null=True) @@ -233,7 +234,7 @@ class Meta: dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version", related_name='files') name = CharField(db_column="basename") uri = CharField() - bytes = IntegerField() + file_size = IntegerField() class DatasetLogo(BaseModel): @@ -274,6 +275,7 @@ class Meta: quality_metrics = BinaryJSONField() vep_annotations = BinaryJSONField() + class Coverage(BaseModel): """ Coverage statistics are pre-calculated for each variant for a given diff --git a/scripts/importer/data_importer/old_db.py b/scripts/importer/data_importer/old_db.py new file mode 100644 index 000000000..ba9870bb8 --- /dev/null +++ b/scripts/importer/data_importer/old_db.py @@ -0,0 +1,303 @@ +from peewee import ( + BlobField, + CharField, + DateTimeField, + Field, + FloatField, + ForeignKeyField, + IntegerField, + Model, + MySQLDatabase, + PrimaryKeyField, + TextField, + fn, + ) +import settings + +mysql_database = MySQLDatabase( + settings.mysql_schema, + host=settings.mysql_host, + user=settings.mysql_user, + password=settings.mysql_passwd, + port=settings.mysql_port + ) + + +class MySQLModel(Model): + class Meta: + database = mysql_database + + +class EnumField(Field): + db_field = 'string' # The same as for CharField + + def __init__(self, values=None, *args, **kwargs): + self.values = values or [] + super().__init__(*args, **kwargs) + + def db_value(self, value): + if value not in self.values: + raise ValueError("Illegal value for '{}'".format(self.db_column)) + return value + + def python_value(self, value): + if value not in self.values: + raise ValueError("Illegal value for '{}'".format(self.db_column)) + return value + + +class User(MySQLModel): + user = PrimaryKeyField(db_column='user_pk') + name = CharField(null=True) + email = CharField(unique=True) + identity = CharField(unique=True) + identity_type = EnumField(null=False, values=['google', 'elixir']) + affiliation = CharField(null=True) + country = CharField(null=True) + + def is_admin(self, dataset): + return DatasetAccess.select().where( + DatasetAccess.dataset == dataset, + DatasetAccess.user == self, + DatasetAccess.is_admin + ).count() + + def has_access(self, dataset): + return DatasetAccessCurrent.select().where( + DatasetAccessCurrent.dataset == dataset, + DatasetAccessCurrent.user == self, + ).count() + + def has_requested_access(self, dataset): + return DatasetAccessPending.select().where( + DatasetAccessPending.dataset == dataset, + DatasetAccessPending.user == self + ).count() + + class Meta: + db_table = 'user' + + +class Study(MySQLModel): + study = PrimaryKeyField(db_column='study_pk') + pi_name = CharField() + pi_email = CharField() + contact_name = CharField() + contact_email = CharField() + title = CharField() + description = TextField(null=True) + publication_date = DateTimeField() + ref_doi = CharField(null=True) + + class Meta: + db_table = 'study' + + +class Dataset(MySQLModel): + dataset = PrimaryKeyField(db_column='dataset_pk') + study = ForeignKeyField(Study, db_column='study_pk', to_field='study', related_name='datasets') + short_name = CharField() + full_name = CharField() + browser_uri = CharField(null=True) + beacon_uri = CharField(null=True) + avg_seq_depth = FloatField(null=True) + seq_type = CharField(null=True) + seq_tech = CharField(null=True) + seq_center = CharField(null=True) + dataset_size = IntegerField() + mongodb_collection = CharField(null=False) + + def has_image(self): + try: + DatasetLogo.get(DatasetLogo.dataset == self) + return True + except DatasetLogo.DoesNotExist: + return False + + class Meta: + db_table = 'dataset' + + +class DatasetVersion(MySQLModel): + dataset_version = PrimaryKeyField(db_column='dataset_version_pk') + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='versions') + version = CharField() + description = TextField() + terms = TextField() + var_call_ref = CharField(null=True) + available_from = DateTimeField() + ref_doi = CharField(null=True) + data_contact_name = CharField(null=True) + data_contact_link = CharField(null=True) + + class Meta: + db_table = 'dataset_version' + + +class Collection(MySQLModel): + collection = PrimaryKeyField(db_column = 'collection_pk') + name = CharField(null = True) + ethnicity = CharField(null = True) + + class Meta: + db_table = 'collection' + + +class SampleSet(MySQLModel): + sample_set = PrimaryKeyField(db_column='sample_set_pk') + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='sample_sets') + collection = ForeignKeyField(Collection, db_column='collection_pk', to_field='collection', related_name='sample_sets') + sample_size = IntegerField() + phenotype = CharField(null=True) + + class Meta: + db_table = 'sample_set' + + +class DatasetFile(MySQLModel): + dataset_file = PrimaryKeyField(db_column='dataset_file_pk') + dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version_pk', to_field='dataset_version', related_name='files') + name = CharField() + uri = CharField() + bytes = IntegerField() + + class Meta: + db_table = 'dataset_file' + + +class UserAccessLog(MySQLModel): + user_access_log = PrimaryKeyField(db_column='user_access_log_pk') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='access_logs') + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='access_logs') + action = EnumField(null=True, values=['access_requested','access_granted','access_revoked','private_link']) + ts = DateTimeField() + + class Meta: + db_table = 'user_access_log' + + +class UserConsentLog(MySQLModel): + user_consent_log = PrimaryKeyField(db_column='user_access_log_pk') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='consent_logs') + dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version_pk', to_field='dataset_version', related_name='consent_logs') + ts = DateTimeField() + + class Meta: + db_table = 'user_consent_log' + + +class UserDownloadLog(MySQLModel): + user_download_log = PrimaryKeyField(db_column='user_download_log_pk') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='download_logs') + dataset_file = ForeignKeyField(DatasetFile, db_column='dataset_file_pk', to_field='dataset_file', related_name='download_logs') + ts = DateTimeField() + + class Meta: + db_table = 'user_download_log' + + +class DatasetAccess(MySQLModel): + dataset_access = PrimaryKeyField(db_column='dataset_access_pk') + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='access') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='access') + wants_newsletter = IntegerField(null=True) + is_admin = IntegerField(null=True) + + class Meta: + db_table = 'dataset_access' + + +class DatasetAccessCurrent(DatasetAccess): + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='access_current') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='access_current') + has_access = IntegerField() + access_requested = DateTimeField() + + class Meta: + db_table = 'dataset_access_current' + + +class DatasetAccessPending(DatasetAccess): + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='access_pending') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='access_pending') + has_access = IntegerField() + access_requested = DateTimeField() + + class Meta: + db_table = 'dataset_access_pending' + + +class DatasetLogo(MySQLModel): + dataset_logo = PrimaryKeyField(db_column='dataset_logo_pk') + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='logo') + mimetype = CharField() + data = BlobField() + + class Meta: + db_table = 'dataset_logo' + + +class Linkhash(MySQLModel): + linkhash = PrimaryKeyField(db_column='linkhash_pk') + dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version_pk', to_field='dataset_version', related_name='link_hashes') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='link_hashes') + hash = CharField() + expires_on = DateTimeField() + + class Meta: + db_table = 'linkhash' + + +class DatasetVersionCurrent(DatasetVersion): + dataset = ForeignKeyField(Dataset, db_column='dataset_pk', to_field='dataset', related_name='current_version') + + class Meta: + db_table = 'dataset_version_current' + + +class SFTPUser(MySQLModel): + sftp_user = PrimaryKeyField(db_column='sftp_user_pk') + user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='sftp_user') + user_uid = IntegerField(unique=True) + user_name = CharField(null=False) + password_hash = CharField(null=False) + account_expires = DateTimeField(null=False) + + class Meta: + db_table = 'sftp_user' + + +def get_next_free_uid(): + """ + Returns the next free uid >= 10000, and higher than the current uid's + from the sftp_user table in the database. + """ + default = 10000 + next_uid = default + try: + current_max_uid = SFTPUser.select(fn.MAX(SFTPUser.user_uid)).get().user_uid + if current_max_uid: + next_uid = current_max_uid+1 + except SFTPUser.DoesNotExist: + pass + + return next_uid + + +def get_admin_datasets(user): + return DatasetAccess.select().where( DatasetAccess.user == user, DatasetAccess.is_admin) + + +def get_dataset(dataset): + dataset = Dataset.select().where( Dataset.short_name == dataset).get() + return dataset + + +def build_dict_from_row(row): + d = {} + for field in row._meta.sorted_fields: #pylint: disable=protected-access + column = field.db_column + if column.endswith("_pk"): + continue + d[column] = getattr(row, column) + return d diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py new file mode 100644 index 000000000..4b4ac1af7 --- /dev/null +++ b/scripts/importer/data_importer/old_db_importer.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 + +import sys +import time +import logging +import db +from peewee import OperationalError, InterfaceError +from . import old_db + +from .data_importer import DataImporter + +class OldDbImporter( DataImporter ): + + def __init__(self, settings): + super().__init__(settings) + self.reference_sets = [] + self.id_map = {'collection':{}, + 'study':{}, + 'dataset':{}, + 'dataset_version':{}, + } + + def _select_reference_set(self): + if len(self.reference_sets) == 1: + logging.info(("Only one reference set is available, " + "will default to this set for all datasets")) + return self.reference_sets[0].id + else: + print("Select a reference set to use with this dataset") + retval = -1 + reflen = len(self.reference_sets)-1 + while retval not in [r.id for r in self.reference_sets]: + for reference_set in self.reference_sets: + print(" {} : {}".format(reference_set.id, reference_set.name)) + try: + retval = int(input("Please select a reference: ")) + except ValueError: + print("Please select a number between 0 and {}".format(reflen)) + return retval + + def _move_collections(self): + logging.info("Moving Collections") + for collection in old_db.Collection.select(): + logging.info(" - Moving '{}'".format(collection.name)) + + try: + new_id = db.Collection.get(name = collection.name, + ethnicity = collection.ethnicity).id + except db.Collection.DoesNotExist: + new_id = (db.Collection + .insert(name = collection.name, + ethnicity = collection.ethnicity) + .execute()) + + self.id_map['collection'][collection.collection] = new_id + + def _move_studies(self): + logging.info("Moving Studies") + for study in old_db.Study.select(): + logging.info(" - Moving '{}'".format(study.title)) + + try: + new_id = db.Study.get(pi_name = study.pi_name, + pi_email = study.pi_email, + contact_name = study.contact_name, + contact_email = study.contact_email, + title = study.title, + description = study.description, + publication_date = study.publication_date, + ref_doi = study.ref_doi).id + except db.Study.DoesNotExist: + new_id = (db.Study + .insert(pi_name = study.pi_name, + pi_email = study.pi_email, + contact_name = study.contact_name, + contact_email = study.contact_email, + title = study.title, + description = study.description, + publication_date = study.publication_date, + ref_doi = study.ref_doi) + .execute()) + + self.id_map['study'][study.study] = new_id + + def _move_datasets(self): + logging.info("Moving Datasets") + for dataset in old_db.Dataset.select(): + logging.info(" - Moving '{}'".format(dataset.short_name)) + study_ref_id = self.id_map['study'][dataset.study.study] + try: + # short_name is unique, so we only really need to check that. + new_id = db.Dataset.get(study = study_ref_id, + short_name = dataset.short_name).id + except db.Dataset.DoesNotExist: + target_reference_id = self._select_reference_set() + new_id = (db.Dataset + .insert(study = study_ref_id, + reference_set = target_reference_id, + short_name = dataset.short_name, + full_name = dataset.full_name, + browser_uri = dataset.browser_uri, + beacon_uri = dataset.beacon_uri, + avg_seq_depth = dataset.avg_seq_depth, + seq_type = dataset.seq_type, + seq_tech = dataset.seq_tech, + seq_center = dataset.seq_center, + dataset_size = dataset.dataset_size) + .execute()) + + self.id_map['dataset'][dataset.dataset] = new_id + + def _move_dataset_logos(self): + logging.info("Moving Dataset Logos") + for dataset_file in old_db.DatasetLogo.select(): + logging.info(" - Moving '{}'".format(dataset_file.mimetype)) + dataset_ref_id = self.id_map['dataset'][dataset_file.dataset.dataset] + try: + db.DatasetLogo.get(dataset = dataset_ref_id, + mimetype = dataset_file.mimetype, + data = dataset_file.data) + except db.DatasetLogo.DoesNotExist: + db.DatasetLogo.insert(dataset = dataset_ref_id, + mimetype = dataset_file.mimetype, + data = dataset_file.data).execute() + + def _move_dataset_versions(self): + logging.info("Moving Dataset Versions") + for dataset_version in old_db.DatasetVersion.select(): + logging.info(" - Moving '{}:{}'".format(dataset_version.dataset.short_name, dataset_version.version)) + dataset_ref_id = self.id_map['dataset'][dataset_version.dataset.dataset] + try: + new_id = db.DatasetVersion.get(dataset = dataset_ref_id, + version = dataset_version.version, + description = dataset_version.description, + terms = dataset_version.terms, + var_call_ref = dataset_version.var_call_ref, + available_from = dataset_version.available_from, + ref_doi = dataset_version.ref_doi, + data_contact_name = dataset_version.data_contact_name, + data_contact_link = dataset_version.data_contact_link).id + except db.DatasetVersion.DoesNotExist: + new_id = (db.DatasetVersion + .insert(dataset = dataset_ref_id, + version = dataset_version.version, + description = dataset_version.description, + terms = dataset_version.terms, + var_call_ref = dataset_version.var_call_ref, + available_from = dataset_version.available_from, + ref_doi = dataset_version.ref_doi, + data_contact_name = dataset_version.data_contact_name, + data_contact_link = dataset_version.data_contact_link) + .execute()) + + self.id_map['dataset_version'][dataset_version.dataset_version] = new_id + + def _move_dataset_files(self): + logging.info("Moving Dataset Files") + for dataset_file in old_db.DatasetFile.select(): + logging.info(" - Moving '{}'".format(dataset_file.name)) + dataset_version_ref_id = self.id_map['dataset_version'][dataset_file.dataset_version.dataset_version] + try: + db.DatasetFile.get(dataset_version = dataset_version_ref_id, + name = dataset_file.name, + uri = dataset_file.uri, + file_size = dataset_file.bytes) + except db.DatasetFile.DoesNotExist: + db.DatasetFile.insert(dataset_version = dataset_version_ref_id, + name = dataset_file.name, + uri = dataset_file.uri, + file_size = dataset_file.bytes).execute() + + def _move_sample_sets(self): + logging.info("Moving Sample Sets") + for sample_set in old_db.SampleSet.select(): + logging.info(" - Moving '{}'".format(sample_set.phenotype)) + dataset_ref_id = self.id_map['dataset'][sample_set.dataset.dataset] + collection_ref_id = self.id_map['collection'][sample_set.collection.collection] + try: + db.SampleSet.get(dataset = dataset_ref_id, + collection = collection_ref_id, + sample_size = sample_set.sample_size, + phenotype = sample_set.phenotype) + except db.SampleSet.DoesNotExist: + db.SampleSet.insert(dataset = dataset_ref_id, + collection = collection_ref_id, + sample_size = sample_set.sample_size, + phenotype = sample_set.phenotype).execute() + + def _move_database(self): + self._move_collections() + self._move_studies() + self._move_datasets() + self._move_dataset_logos() + self._move_dataset_versions() + self._move_sample_sets() + self._move_dataset_files() + + def prepare_data(self): + """ + Connects to the old and new databases. + """ + logging.info("Checking connection to old database") + try: + old_db.Collection.get() + except OperationalError: + logging.error("Could not connect to old database") + sys.exit(1) + logging.info("Checking connection to new database") + try: + db.ReferenceSet.get() + for reference_set in db.ReferenceSet.select(): + self.reference_sets += [reference_set] + except db.ReferenceSet.DoesNotExist: + logging.error(("Connection works, but no reference sets are available." + "use '--add_reference' to add a new reference set and" + "Then use this tool again.")) + sys.exit(1) + except (OperationalError, InterfaceError): + logging.error("Could not connect to new database") + sys.exit(1) + + def start_import(self): + start = time.time() + self._move_database() + + logging.info("Moved data in {}".format(self._time_since(start))) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 86f8cfe1a..67abba338 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 import os +import re import gzip import time import shutil import logging import zipfile import db - +from peewee import IntegrityError from .data_importer import DataImporter class ReferenceSetImporter( DataImporter ): @@ -78,18 +79,25 @@ def _insert_genes(self): for i, gene in enumerate(self.genes): # As far as I know I can't batch insert these and still get the id's back - db_gene = db.Gene( reference_set = self.db_reference, - gene_id = gene['gene_id'], - name = gene['name'], - full_name = gene.get('full_name', None), - other_names = gene.get('other_names', None), - canonical_transcript = gene.get('canonical_transcript', None), - chrom = gene['chrom'], - start = gene['start'], - end = gene['stop'], - strand = gene['strand'] - ) - db_gene.save() + try: + db_gene = db.Gene( reference_set = self.db_reference, + gene_id = gene['gene_id'], + name = gene['name'], + full_name = gene.get('full_name', None), + other_names = gene.get('other_names', None), + canonical_transcript = gene.get('canonical_transcript', None), + chrom = gene['chrom'], + start = gene['start'], + end = gene['stop'], + strand = gene['strand'] + ) + db_gene.save() + except IntegrityError as e: + print("\n") + logging.warning("Ignoring ") + print("{}:{}".format(type(e),e)) + import sys + sys.exit(0) self.gene_db_ids[gene['gene_id']] = db_gene.id progress = i / len(self.genes) @@ -109,12 +117,13 @@ def _insert_reference(self): else: logging.info("Using dbsnp_version '{}'".format(version_id)) + omim_filename = self.settings.omim_file.split("/")[-1] logging.info("inserting reference header") self.db_reference = db.ReferenceSet(name = None, ensembl_version = self.settings.ensembl_version, gencode_version = self.settings.gencode_version, dbnsfp_version = self.settings.dbnsfp_version, - omim_version = self.settings.omim_file, + omim_version = omim_filename, dbsnp_version = dbsnp_version.id) self.db_reference.save() logging.info("Reference {} created".format(self.db_reference.id)) @@ -158,7 +167,13 @@ def _open_dbnsfp(self): logging.info("----- Opening dbNSFP file -----") url = ReferenceSetImporter.DBNSFP.format(a=self.settings) filename = url.split("/")[-1] - dbnsfp_file = "dbNSFP2.9_gene" + match = re.match("^\d+.\d+", self.settings.dbnsfp_version) + if match: + dbnsfp_gene_version = match.group(0) + else: + raise ValueError("Cannot parse dbNSFP version") + dbnsfp_file = "dbNSFP{}_gene".format(dbnsfp_gene_version) + logging.info("Using dbNSFP gene file: {}".format(dbnsfp_file)) dbnsfp_path = os.path.join( self.download_dir, dbnsfp_file ) dbnsfp_gzip = "{}.gz".format(dbnsfp_path) try: @@ -307,8 +322,6 @@ def _read_omim(self): for i, transcript in enumerate(self.transcripts): if transcript['transcript_id'] in cache: self.transcripts[i].update(cache[transcript['transcript_id']]) - if counter == 0: - print(self.transcripts[i]) counter += 1 else: self.transcripts[i].update(empty) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index cfdaad0a9..99451a0cb 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -5,6 +5,7 @@ from data_importer.dbsnp_importer import DbSNPImporter from data_importer.reference_set_importer import ReferenceSetImporter +from data_importer.old_db_importer import OldDbImporter if __name__ == '__main__': @@ -25,9 +26,6 @@ help = "Default directory to download and look for files.") # Reference versions - parser.add_argument("--add_reference", action="store_true", - help = "Insert new reference set.") - parser.add_argument("--gencode_version", default=19, type=int, help = "Gencode version to download and use.") parser.add_argument("--ensembl_version", default="homo_sapiens_core_75_37", @@ -47,8 +45,14 @@ # Actions + parser.add_argument("--add_reference", action="store_true", + help = "Insert new reference set.") parser.add_argument("--add_dbsnp", action="store_true", help = "Adds a new dbSNP version to the database.") + parser.add_argument("--move_studies", action="store_true", + help = ("Moves studies and datasets from an old database" + " to a new one.")) + # Logging and verbosity parser.add_argument("--disable_progress", action="store_true", @@ -84,4 +88,9 @@ importer.prepare_data() if not args.disable_progress: importer.count_entries() - importer.start_import() \ No newline at end of file + importer.start_import() + + if args.move_studies: + importer = OldDbImporter(args) + importer.prepare_data() + importer.start_import() From a585601741f8fa25005716a3e3f03f95bbf6d6ca Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Fri, 14 Sep 2018 21:14:55 +0200 Subject: [PATCH 005/360] removes accidentally committed debugging code. --- .../data_importer/reference_set_importer.py | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 67abba338..22b0791af 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -79,25 +79,18 @@ def _insert_genes(self): for i, gene in enumerate(self.genes): # As far as I know I can't batch insert these and still get the id's back - try: - db_gene = db.Gene( reference_set = self.db_reference, - gene_id = gene['gene_id'], - name = gene['name'], - full_name = gene.get('full_name', None), - other_names = gene.get('other_names', None), - canonical_transcript = gene.get('canonical_transcript', None), - chrom = gene['chrom'], - start = gene['start'], - end = gene['stop'], - strand = gene['strand'] - ) - db_gene.save() - except IntegrityError as e: - print("\n") - logging.warning("Ignoring ") - print("{}:{}".format(type(e),e)) - import sys - sys.exit(0) + db_gene = db.Gene( reference_set = self.db_reference, + gene_id = gene['gene_id'], + name = gene['name'], + full_name = gene.get('full_name', None), + other_names = gene.get('other_names', None), + canonical_transcript = gene.get('canonical_transcript', None), + chrom = gene['chrom'], + start = gene['start'], + end = gene['stop'], + strand = gene['strand'] + ) + db_gene.save() self.gene_db_ids[gene['gene_id']] = db_gene.id progress = i / len(self.genes) From 150a636ec10caf5b7e8377a9b0c4dad6eee60aee Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Sat, 15 Sep 2018 21:53:44 +0200 Subject: [PATCH 006/360] Adds support to move user fields. --- backend/db.py | 72 +++++++++ scripts/importer/data_importer/old_db.py | 2 +- .../importer/data_importer/old_db_importer.py | 147 ++++++++++++++++-- scripts/importer/importer.py | 8 + 4 files changed, 217 insertions(+), 12 deletions(-) diff --git a/backend/db.py b/backend/db.py index 48ea11d43..4e74b65c9 100644 --- a/backend/db.py +++ b/backend/db.py @@ -3,6 +3,7 @@ import settings from peewee import (BigIntegerField, BlobField, + BooleanField, CharField, DateTimeField, IntegerField, @@ -317,3 +318,74 @@ class Meta: metric = CharField() mids = ArrayField(IntegerField) hist = ArrayField(IntegerField) + +class Users(BaseModel): + class Meta: + db_table = "users" + schema = 'users' + + name = CharField(db_column="username", null=True) + email = CharField(unique=True) + identity = CharField(unique=True) + identity_type = EnumField(null=False, choices=['google', 'elixir']) + affiliation = CharField(null=True) + country = CharField(null=True) + +class SFTPUsers(BaseModel): + class Meta: + db_table = "sftp_users" + schema = 'users' + + user = ForeignKeyField(Users, related_name='sftp_user') + user_uid = IntegerField(unique=True) + user_name = CharField(null=False) + password_hash = CharField(null=False) + account_expires = DateTimeField(null=False) + +class UserAccessLog(BaseModel): + class Meta: + db_table = "user_access_log" + schema = 'users' + + user = ForeignKeyField(Users, related_name='access_logs') + dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_logs') + action = EnumField(null=True, choices=['access_granted','access_revoked','access_requested','private_link']) + ts = DateTimeField() + +class UserConsentLog(BaseModel): + class Meta: + db_table = "user_consent_log" + schema = 'users' + + user = ForeignKeyField(Users, related_name='consent_logs') + dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='consent_logs') + ts = DateTimeField() + +class UserDownloadLog(BaseModel): + class Meta: + db_table = "user_download_log" + schema = 'users' + + user = ForeignKeyField(Users, related_name='download_logs') + dataset_file = ForeignKeyField(DatasetFile, db_column='dataset_file', related_name='download_logs') + ts = DateTimeField() + +class DatasetAccess(BaseModel): + class Meta: + db_table = "dataset_access" + schema = 'users' + + dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access') + user = ForeignKeyField(Users, related_name='access') + wants_newsletter = BooleanField(null=True) + is_admin = BooleanField(null=True) + +class Linkhash(BaseModel): + class Meta: + db_table = "linkhash" + schema = 'users' + + dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='link_hashes') + user = ForeignKeyField(Users, related_name='link_hashes') + hash = CharField() + expires_on = DateTimeField() diff --git a/scripts/importer/data_importer/old_db.py b/scripts/importer/data_importer/old_db.py index ba9870bb8..226276b0d 100644 --- a/scripts/importer/data_importer/old_db.py +++ b/scripts/importer/data_importer/old_db.py @@ -177,7 +177,7 @@ class Meta: class UserConsentLog(MySQLModel): - user_consent_log = PrimaryKeyField(db_column='user_access_log_pk') + user_consent_log = PrimaryKeyField(db_column='user_consent_log_pk') user = ForeignKeyField(User, db_column='user_pk', to_field='user', related_name='consent_logs') dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version_pk', to_field='dataset_version', related_name='consent_logs') ts = DateTimeField() diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py index 4b4ac1af7..09a8da272 100644 --- a/scripts/importer/data_importer/old_db_importer.py +++ b/scripts/importer/data_importer/old_db_importer.py @@ -18,6 +18,8 @@ def __init__(self, settings): 'study':{}, 'dataset':{}, 'dataset_version':{}, + 'dataset_file':{}, + 'user':{} } def _select_reference_set(self): @@ -112,7 +114,6 @@ def _move_datasets(self): def _move_dataset_logos(self): logging.info("Moving Dataset Logos") for dataset_file in old_db.DatasetLogo.select(): - logging.info(" - Moving '{}'".format(dataset_file.mimetype)) dataset_ref_id = self.id_map['dataset'][dataset_file.dataset.dataset] try: db.DatasetLogo.get(dataset = dataset_ref_id, @@ -126,7 +127,6 @@ def _move_dataset_logos(self): def _move_dataset_versions(self): logging.info("Moving Dataset Versions") for dataset_version in old_db.DatasetVersion.select(): - logging.info(" - Moving '{}:{}'".format(dataset_version.dataset.short_name, dataset_version.version)) dataset_ref_id = self.id_map['dataset'][dataset_version.dataset.dataset] try: new_id = db.DatasetVersion.get(dataset = dataset_ref_id, @@ -159,20 +159,22 @@ def _move_dataset_files(self): logging.info(" - Moving '{}'".format(dataset_file.name)) dataset_version_ref_id = self.id_map['dataset_version'][dataset_file.dataset_version.dataset_version] try: - db.DatasetFile.get(dataset_version = dataset_version_ref_id, - name = dataset_file.name, - uri = dataset_file.uri, - file_size = dataset_file.bytes) + new_id = db.DatasetFile.get(dataset_version = dataset_version_ref_id, + name = dataset_file.name, + uri = dataset_file.uri, + file_size = dataset_file.bytes).id except db.DatasetFile.DoesNotExist: - db.DatasetFile.insert(dataset_version = dataset_version_ref_id, - name = dataset_file.name, - uri = dataset_file.uri, - file_size = dataset_file.bytes).execute() + new_id = (db.DatasetFile + .insert(dataset_version = dataset_version_ref_id, + name = dataset_file.name, + uri = dataset_file.uri, + file_size = dataset_file.bytes).execute()) + + self.id_map['dataset_file'][dataset_file.dataset_file] = new_id def _move_sample_sets(self): logging.info("Moving Sample Sets") for sample_set in old_db.SampleSet.select(): - logging.info(" - Moving '{}'".format(sample_set.phenotype)) dataset_ref_id = self.id_map['dataset'][sample_set.dataset.dataset] collection_ref_id = self.id_map['collection'][sample_set.collection.collection] try: @@ -195,6 +197,128 @@ def _move_database(self): self._move_sample_sets() self._move_dataset_files() + def _move_users(self): + logging.info("Moving Users") + for user in old_db.User.select(): + try: + new_id = (db.Users + .get(name = user.name, + email = user.email, + identity = user.identity, + identity_type = user.identity_type, + affiliation = user.affiliation, + country = user.country).id) + except db.Users.DoesNotExist: + new_id = (db.Users + .insert(name = user.name, + email = user.email, + identity = user.identity, + identity_type = user.identity_type, + affiliation = user.affiliation, + country = user.country).execute()) + + self.id_map['user'][user.user] = new_id + + def _move_sftp_users(self): + logging.info("Moving SFTP Users") + for user in old_db.SFTPUser.select(): + user_ref_id = self.id_map['user'][user.user.user] + try: + # user_uid is unique, so we rely on that + db.SFTPUsers.get(user = user_ref_id, + user_uid = user.user_uid) + except db.SFTPUsers.DoesNotExist: + db.SFTPUsers.insert(user = user_ref_id, + user_uid = user.user_uid, + user_name = user.user_name, + password_hash = user.password_hash, + account_expires = user.account_expires).execute() + + def _move_user_access_logs(self): + logging.info("Moving User Access Logs") + for log in old_db.UserAccessLog.select(): + user_ref_id = self.id_map['user'][log.user.user] + dataset_ref_id = self.id_map['dataset'][log.dataset.dataset] + try: + db.UserAccessLog.get(user = user_ref_id, + dataset = dataset_ref_id, + action = log.action, + ts = log.ts) + except db.UserAccessLog.DoesNotExist: + db.UserAccessLog.insert(user = user_ref_id, + dataset = dataset_ref_id, + action = log.action, + ts = log.ts).execute() + + def _move_user_consent_logs(self): + logging.info("Moving User Consent Logs") + for log in old_db.UserConsentLog.select(): + user_ref_id = self.id_map['user'][log.user.user] + version_ref_id = self.id_map['dataset_version'][log.dataset_version.dataset_version] + try: + db.UserConsentLog.get(user = user_ref_id, + dataset_version = version_ref_id, + ts = log.ts) + except db.UserConsentLog.DoesNotExist: + db.UserConsentLog.insert(user = user_ref_id, + dataset_version = version_ref_id, + ts = log.ts).execute() + + def _move_user_download_logs(self): + logging.info("Moving User Download Logs") + for log in old_db.UserDownloadLog.select(): + user_ref_id = self.id_map['user'][log.user.user] + file_ref_id = self.id_map['dataset_file'][log.dataset_file.dataset_file] + try: + db.UserDownloadLog.get(user = user_ref_id, + dataset_file = file_ref_id, + ts = log.ts) + except db.UserDownloadLog.DoesNotExist: + db.UserDownloadLog.insert(user = user_ref_id, + dataset_file = file_ref_id, + ts = log.ts).execute() + + def _move_dataset_access(self): + logging.info("Moving Dataset Access Records") + for access in old_db.DatasetAccess.select(): + user_ref_id = self.id_map['user'][access.user.user] + dataset_ref_id = self.id_map['dataset'][access.dataset.dataset] + try: + db.DatasetAccess.get(dataset = dataset_ref_id, + user = user_ref_id, + wants_newsletter = access.wants_newsletter, + is_admin = access.is_admin) + except db.DatasetAccess.DoesNotExist: + db.DatasetAccess.insert(dataset = dataset_ref_id, + user = user_ref_id, + wants_newsletter = access.wants_newsletter, + is_admin = access.is_admin).execute() + + def _move_linkhashes(self): + logging.info("Moving Linkhashes") + for linkhash in old_db.Linkhash.select(): + user_ref_id = self.id_map['user'][linkhash.user.user] + version_ref_id = self.id_map['dataset_version'][linkhash.dataset_version.dataset_version] + try: + db.Linkhash.get(dataset_version = version_ref_id, + user = user_ref_id, + hash = linkhash.hash, + expires_on = linkhash.expires_on) + except db.Linkhash.DoesNotExist: + db.Linkhash.insert(dataset_version = version_ref_id, + user = user_ref_id, + hash = linkhash.hash, + expires_on = linkhash.expires_on).execute() + + def _move_userbase(self): + self._move_users() + self._move_sftp_users() + self._move_user_access_logs() + self._move_user_consent_logs() + self._move_user_download_logs() + self._move_dataset_access() + self._move_linkhashes() + def prepare_data(self): """ Connects to the old and new databases. @@ -222,5 +346,6 @@ def prepare_data(self): def start_import(self): start = time.time() self._move_database() + self._move_userbase() logging.info("Moved data in {}".format(self._time_since(start))) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 99451a0cb..15b79d43d 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -1,6 +1,14 @@ #!/usr/bin/env python3 """ Swefreq data importer. + +Requires apt-packages: + - python3-pip + - sudo apt-get install libmysqlclient-dev + +as well as pip3 packages: + - mysqlclient + - peewee-2.10.2 """ from data_importer.dbsnp_importer import DbSNPImporter From e327cbb149fb9abb20c6edad982585a8f7bb2e74 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Thu, 20 Sep 2018 09:53:54 +0200 Subject: [PATCH 007/360] Adds basic raw data importer. --- backend/db.py | 1 - .../importer/data_importer/old_db_importer.py | 2 +- .../data_importer/raw_data_importer.py | 258 ++++++++++++++++++ scripts/importer/importer.py | 15 + 4 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 scripts/importer/data_importer/raw_data_importer.py diff --git a/backend/db.py b/backend/db.py index 4e74b65c9..d7f135605 100644 --- a/backend/db.py +++ b/backend/db.py @@ -297,7 +297,6 @@ class Meta: pos = IntegerField() mean = FloatField() median = FloatField() - chrom = CharField(max_length=10) cov1 = FloatField() cov5 = FloatField() cov10 = FloatField() diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py index 09a8da272..ca3e06705 100644 --- a/scripts/importer/data_importer/old_db_importer.py +++ b/scripts/importer/data_importer/old_db_importer.py @@ -37,7 +37,7 @@ def _select_reference_set(self): try: retval = int(input("Please select a reference: ")) except ValueError: - print("Please select a number between 0 and {}".format(reflen)) + print("Please select a number in {}".format([r.id for r in self.reference_sets])) return retval def _move_collections(self): diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py new file mode 100644 index 000000000..f68a65689 --- /dev/null +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 + +import re +import sys +import json +import time +import logging + +import db +from .data_importer import DataImporter + +METRICS = [ + 'BaseQRankSum', + 'ClippingRankSum', + 'DP', + 'FS', + 'InbreedingCoeff', + 'MQ', + 'MQRankSum', + 'QD', + 'ReadPosRankSum', + 'VQSLOD' +] + +def get_minimal_representation(pos, ref, alt): + """ + Get the minimal representation of a variant, based on the ref + alt alleles in a VCF + This is used to make sure that multiallelic variants in different datasets, + with different combinations of alternate alleles, can always be matched directly. + + Note that chromosome is ignored here - in xbrowse, we'll probably be dealing with 1D coordinates + Args: + pos (int): genomic position in a chromosome (1-based) + ref (str): ref allele string + alt (str): alt allele string + Returns: + tuple: (pos, ref, alt) of remapped coordinate + """ + pos = int(pos) + # If it's a simple SNV, don't remap anything + if len(ref) == 1 and len(alt) == 1: + return pos, ref, alt + else: + # strip off identical suffixes + while(alt[-1] == ref[-1] and min(len(alt),len(ref)) > 1): + alt = alt[:-1] + ref = ref[:-1] + # strip off identical prefixes and increment position + while(alt[0] == ref[0] and min(len(alt),len(ref)) > 1): + alt = alt[1:] + ref = ref[1:] + pos += 1 + return pos, ref, alt + +class RawDataImporter( DataImporter ): + + def __init__(self, settings): + super().__init__(settings) + self.dataset_version = None + self.counter = {'coverage':None, + 'variants':None} + + def _select_dataset_version(self): + datasets = [] + + print("Select a Dataset to use with this data") + for dataset in db.Dataset.select(): + print(" {} : {}".format(dataset.id, dataset.short_name)) + datasets += [dataset] + + d_id = -1 + while d_id not in [d.id for d in datasets]: + try: + d_id = int(input("Please select a dataset: ")) + except ValueError: + print("Please select a number in {}".format([d.id for d in datasets])) + + versions = [] + print("Select a Version of this dataset to use") + for version in db.DatasetVersion.select(): + print(" {} : {}".format(version.id, version.version)) + versions += [version] + + v_id = -1 + while v_id not in [v.id for v in versions]: + try: + v_id = int(input("Please select a version: ")) + except ValueError: + print("Please select a number in {}".format([v.id for v in versions])) + + self.dataset_version = [v for v in versions if v.id == v_id][0] + + def _insert_coverage(self): + """ + Header columns are chromosome, position, mean coverage, median coverage, + and then coverage under 1, 5 10, 15, 20, 25, 30, 50, 100. + """ + start = time.time() + header = [('chrom', str), ('pos', int), ('mean', float), + ('median', float), ('cov1', float), ('cov5', float), + ('cov10', float), ('cov15', float), ('cov20', float), ('cov25', float), + ('cov30', float), ('cov50', float), ('cov100', float)] + logging.info("Inserting Coverage") + batch = [] + last_progress = 0.0 + counter = 0 + with db.database.atomic(): + for line in self._open(self.settings.coverage_file): + line = line.decode('utf8').strip() + if line.startswith("#"): + continue + + data = {} + for i, item in enumerate(line.strip().split("\t")): + if i == 0: + data['dataset_version'] = self.dataset_version + data[header[i][0]] = header[i][1](item) + + if self.counter['coverage'] != None: + counter += 1 + + batch += [data] + if len(batch) >= self.settings.batch_size: + + db.Coverage.insert_many(batch).execute() + batch = [] + # Update progress + if self.counter['coverage'] != None: + progress = counter / self.counter['coverage'] + while progress > last_progress + 0.01: + if not last_progress: + logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) + self._print_progress_bar() + self._tick() + last_progress += 0.01 + if batch: + db.Coverage.insert_many(batch) + if self.counter['coverage'] != None: + self._tick(True) + logging.info("Inserted {} coverage records in {}".format(counter, self._time_since(start))) + + def _insert_variants(self): + logging.info("Inserting variants") + header = [("chrom",str), ("pos", int), ("rsid", str), ("ref", str), + ("alt", str), ("site_quality", float), ("filter_string", str)] + start = time.time() + batch = [] + last_progress = 0.0 + counter = 0 + vep_field_names = None + dp_mids = None + gq_mids = None + with db.database.atomic(): + for line in self._open(self.settings.variant_file): + line = line.decode('utf8').strip() + if line.startswith("#"): + # Check for some information that we need + if line.startswith('##INFO=').split('|') + if line.startswith('##INFO=').split('|')) + if line.startswith('##INFO=').split('|')) + continue + + if vep_field_names is None: + logging.error("VEP_field_names is empty. Make sure VCF header is present.") + sys.exit(1) + + base = {} + for i, item in enumerate(line.strip().split("\t")): + if i == 0: + base['dataset_version'] = self.dataset_version + if i < 7: + base[header[i][0]] = header[i][1](item) + else: + info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) + + if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): + continue + + consequence_array = info['CSQ'].split(',') if 'CSQ' in info else [] + annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] + + alt_alleles = base['alt'].split(",") + for i, alt in enumerate(alt_alleles): + vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] + + data = dict(base) + data['alt'] = alt + data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None + data['allele_num'] = int(info['AN_Adj']) + data['allele_count'] = int(info['AC_Adj'].split(',')[i]) + data['allele_freq'] = None + if 'AF' in info and data['allele_num'] > 0: + data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) + + data['vep_annotations'] = json.dumps(vep_annotations) + data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) + data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) + + data['orig_alt_alleles'] = [ + '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles + ] + # I don't think this is needed. + #data['hom_count'] = + data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) + data['quality_metrics'] = json.dumps(dict([(x, info[x]) for x in METRICS if x in info])) + batch += [data] + + counter += 1 + + if len(batch) >= self.settings.batch_size: + db.Variant.insert_many(batch).execute() + batch = [] + # Update progress + if self.counter['variants'] != None: + progress = counter / self.counter['variants'] + while progress > last_progress + 0.01: + if not last_progress: + logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) + self._print_progress_bar() + self._tick() + last_progress += 0.01 + if batch: + db.Variant.insert_many(batch) + if self.counter['variants'] != None: + self._tick(True) + logging.info("Inserted {} variant records in {}".format(counter, self._time_since(start))) + + def count_entries(self): + start = time.time() + self.counter['coverage'] = 0 + logging.info("Counting coverage lines") + for line in self._open(self.settings.coverage_file): + line = line.decode('utf8').strip() + if line.startswith("#"): + continue + self.counter['coverage'] += 1 + # print("'{}'".format(line.split("\t"))) + # break + + self.counter['variants'] = 0 + logging.info("Counting variant lines") + for line in self._open(self.settings.variant_file): + line = line.decode('utf8').strip() + if line.startswith("#"): + continue + self.counter['variants'] += 1 + + logging.info("Counted input data lines in {} ".format(self._time_since(start))) + + def prepare_data(self): + self._select_dataset_version() + + def start_import(self): + #self._insert_coverage() + self._insert_variants() diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 15b79d43d..999eccc03 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -14,6 +14,7 @@ from data_importer.dbsnp_importer import DbSNPImporter from data_importer.reference_set_importer import ReferenceSetImporter from data_importer.old_db_importer import OldDbImporter +from data_importer.raw_data_importer import RawDataImporter if __name__ == '__main__': @@ -51,10 +52,17 @@ "omim_info.txt.gz"), help = "OMIM annotation file.") + # Raw data (coverage + variants) files + parser.add_argument("--coverage_file", default=os.path.join(os.path.dirname(__file__), + "data", "Panel.0001.txt.gz")) + parser.add_argument("--variant_file", default=os.path.join(os.path.dirname(__file__), + "data", "variations.vcf.gz")) # Actions parser.add_argument("--add_reference", action="store_true", help = "Insert new reference set.") + parser.add_argument("--add_raw_data", action="store_true", + help = "Adds a Coverage and Variants to the database.") parser.add_argument("--add_dbsnp", action="store_true", help = "Adds a new dbSNP version to the database.") parser.add_argument("--move_studies", action="store_true", @@ -102,3 +110,10 @@ importer = OldDbImporter(args) importer.prepare_data() importer.start_import() + + if args.add_raw_data: + importer = RawDataImporter(args) + importer.prepare_data() + if not args.disable_progress: + importer.count_entries() + importer.start_import() From e2faa76a4913983e9b7fb3acf03625401ad6cdf9 Mon Sep 17 00:00:00 2001 From: Johan Viklund Date: Thu, 20 Sep 2018 09:43:09 +0200 Subject: [PATCH 008/360] Quick and dirty postgres --- docker-compose.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 1a861d0f3..fa8643e8e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,14 @@ services: - type: volume source: mysql-data-volume target: /var/lib/mysql/ + postgres: + container_name: postgres + restart: always + image: postgres:latest + volumes: + - ./postgres-data:/var/lib/postgresql/data + ports: + - 5432:5432 rebuilder: build: context: ./ From eff70cdc6d8fb1eb3bde265001dd2bd774e3e4ee Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Thu, 20 Sep 2018 09:58:59 +0200 Subject: [PATCH 009/360] Adds postgres schemas --- sql/beacon_schema.sql | 47 ++++++++++ sql/data_schema.sql | 207 ++++++++++++++++++++++++++++++++++++++++++ sql/user_schema.sql | 75 +++++++++++++++ 3 files changed, 329 insertions(+) create mode 100644 sql/beacon_schema.sql create mode 100644 sql/data_schema.sql create mode 100644 sql/user_schema.sql diff --git a/sql/beacon_schema.sql b/sql/beacon_schema.sql new file mode 100644 index 000000000..a43a41af6 --- /dev/null +++ b/sql/beacon_schema.sql @@ -0,0 +1,47 @@ +------------------------------------------------------------------------------- +-- +-- + +-------------------------------------------------------------------------------- +-- Beacon consent codes. +-- +-- These tables are only used by the beacon, and are thus copied directly from +-- the default beacon schema. + +CREATE TABLE beacon.consent_code_category_table ( + id serial PRIMARY KEY, + name character varying(11) +); + +INSERT INTO beacon.consent_code_category_table(name) VALUES ('PRIMARY'); +INSERT INTO beacon.consent_code_category_table(name) VALUES ('SECONDARY'); +INSERT INTO beacon.consent_code_category_table(name) VALUES ('REQUIREMENT'); + +CREATE TABLE beacon.consent_code_table ( + id serial PRIMARY KEY, + name character varying(100) NOT NULL, + abbr character varying(20) NOT NULL, + description character varying(400) NOT NULL, + additional_constraint_required boolean NOT NULL, + category_id int NOT NULL REFERENCES beacon.consent_code_category_table(id) +); + +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('No restrictions', 'NRES', 'No restrictions on data use.', false, 1); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('General research use and clinical care', 'GRU(CC)', 'For health/medical/biomedical purposes, including the study of population origins or ancestry.', false, 1); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Health/medical/biomedical research and clinical care', 'HMB(CC)', 'Use of the data is limited to health/medical/biomedical purposes; does not include the study of population origins or ancestry.', false, 1); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Disease-specific research and clinical care', 'DS-[XX](CC)', 'Use of the data must be related to [disease].', true, 1); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Population origins/ancestry research', 'POA', 'Use of the data is limited to the study of population origins or ancestry.', false, 1); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Oher research-specific restrictions', 'RS-[XX]', 'Use of the data is limited to studies of [research type] (e.g., pediatric research).', true, 2); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Research use only', 'RUO', 'Use of data is limited to research purposes (e.g., does not include its use in clinical care).', false, 2); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('No “general methods” research', 'NMDS', 'Use of the data includes methods development research (e.g., development of software or algorithms) ONLY within the bounds of other data use limitations.', false, 2); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Genetic studies only', 'GSO', 'Use of the data is limited to genetic studies only (i.e., no “phenotype-only” research).', false, 2); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Not-for-profit use only', 'NPU', 'Use of the data is limited to not-for-profit organizations.', false, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Publication required', 'PUB', 'Requestor agrees to make results of studies using the data available to the larger scientific community.', false, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Collaboration required', 'COL-[XX]', 'Requestor must agree to collaboration with the primary study investigator(s).', true, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Ethics approval required', 'IRB', 'Requestor must provide documentation of local IRB/REC approval.', false, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Geographical restrictions', 'GS-[XX]', 'Use of the data is limited to within [geographic region].', true, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Publication moratorium/embargo', 'MOR-[XX]', 'Requestor agrees not to publish results of studies until [date].', true, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Time limits on use', 'TS-[XX]', 'Use of data is approved for [x months].', true, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('User-specific restrictions', 'US', 'Use of data is limited to use by approved users.', false, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Project-specific restrictions', 'PS', 'Use of data is limited to use within an approved project.', false, 3); +INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Institution-specific restrictions', 'IS', 'Use of data is limited to use within an approved institution.', false, 3); diff --git a/sql/data_schema.sql b/sql/data_schema.sql new file mode 100644 index 000000000..94fcd2d42 --- /dev/null +++ b/sql/data_schema.sql @@ -0,0 +1,207 @@ +-------------------------------------------------------------------------------- +-- Swefreq data schema -- +-- -- +-- This schema contains the studies and datasets, as well as the actual data -- +-- (reference-data, variants, and coverage) the goes into the Swefreq system. -- +-- -- +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +-- dbSNP tables. +-- +-- dbSNP datasets are quite large (~200.000.000 entries) and completely separate +-- from the rest of the reference data. In order to minimize the number of dbSNP +-- sets that need to be stored, the dbsnp_version table (which links to the +-- dataset table) allows multiple datasets to use the same dbSNP data. + +CREATE TABLE IF NOT EXISTS data.dbsnp_versions ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + version_id varchar(64) +); + +CREATE TABLE IF NOT EXISTS data.dbsnp ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + version_id integer REFERENCES data.dbsnp_versions, + rsid bigint UNIQUE, + chrom varchar(10), + pos integer +); + +CREATE INDEX IF NOT EXISTS rsid_index ON data.dbsnp USING hash (rsid); + +-------------------------------------------------------------------------------- +-- Reference Set tables +-- + +CREATE TABLE IF NOT EXISTS data.reference_sets ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dbsnp_version integer REFERENCES data.dbsnp_versions, + reference_name varchar, + ensembl_version varchar, + gencode_version varchar, + dbnsfp_version varchar, + omim_version varchar +); + +CREATE TABLE IF NOT EXISTS data.genes ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + reference_set integer REFERENCES data.reference_sets, + gene_id varchar(15), + gene_name varchar, + full_name varchar, + other_names varchar[], + canonical_transcript varchar(15), + chrom varchar(10), + start_pos integer, + end_pos integer, + strand varchar +); + +CREATE TABLE IF NOT EXISTS data.transcripts ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + transcript_id varchar(15), + gene integer REFERENCES data.genes, + mim_annotation varchar, + mim_gene_accession integer, + chrom varchar(10), + start_pos integer, + stop_pos integer, + strand varchar +); + +CREATE TABLE IF NOT EXISTS data.features ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + gene integer REFERENCES data.genes, + transcript integer REFERENCES data.transcripts, + chrom varchar(10), + start_pos integer, + stop_pos integer, + strand varchar, + feature_type varchar +); + +-------------------------------------------------------------------------------- +-- Study and Dataset fields +-- + +CREATE TABLE IF NOT EXISTS data.collections ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + study_name varchar, + ethnicity varchar +); + +CREATE TABLE IF NOT EXISTS data.studies ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + pi_name varchar(100) NOT NULL, + pi_email varchar(100) NOT NULL, + contact_name varchar(100) NOT NULL, + contact_email varchar(100) NOT NULL, + title varchar(100) NOT NULL, + study_description text DEFAULT NULL, + publication_date timestamp NOT NULL, + ref_doi varchar(100), + UNIQUE (pi_email, title) +); + +CREATE TABLE IF NOT EXISTS data.datasets ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + study integer NOT NULL REFERENCES data.studies, + reference_set integer NOT NULL REFERENCES data.reference_sets, + short_name varchar(50) UNIQUE NOT NULL, + full_name varchar(100) NOT NULL, + browser_uri varchar(200) DEFAULT NULL, + beacon_uri varchar(200) DEFAULT NULL, + avg_seq_depth real DEFAULT NULL, + seq_type varchar(50) DEFAULT NULL, + seq_tech varchar(50) DEFAULT NULL, + seq_center varchar(100) DEFAULT NULL, + dataset_size integer NOT NULL CHECK (dataset_size >= 0) +); + +CREATE TABLE IF NOT EXISTS data.dataset_logos ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset integer NOT NULL REFERENCES data.datasets, + mimetype varchar(50) NOT NULL, + bytes bytea NOT NULL +); + +CREATE TABLE IF NOT EXISTS data.sample_sets ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset integer NOT NULL REFERENCES data.datasets, + "collection" integer NOT NULL REFERENCES data.collections, + sample_size integer NOT NULL, + phenotype varchar(50) NOT NULL +); + +CREATE TABLE IF NOT EXISTS data.dataset_versions ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset integer NOT NULL REFERENCES data.datasets, + dataset_version varchar(20) NOT NULL, + dataset_description text NOT NULL, + terms text NOT NULL, + var_call_ref varchar(50) DEFAULT NULL, + available_from timestamp DEFAULT current_timestamp, + ref_doi varchar(100) DEFAULT NULL, + data_contact_name varchar(100) DEFAULT NULL, + data_contact_link varchar(100) DEFAULT NULL +); + +CREATE TABLE IF NOT EXISTS data.dataset_files ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset_version integer NOT NULL REFERENCES data.dataset_versions, + basename varchar(100) NOT NULL, + uri varchar(200) UNIQUE NOT NULL, + file_size bigint NOT NULL +); + +-------------------------------------------------------------------------------- +-- Variant and coverage data fields +-- + +CREATE TABLE IF NOT EXISTS data.variants ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset_version integer REFERENCES data.dataset_versions, + rsid integer, + chrom varchar(10), + pos integer, + ref varchar, + alt varchar, + site_quality real, + genes varchar[], + transcripts varchar[], + orig_alt_alleles varchar[], + hom_count integer, + allele_freq real, + filter_string varchar, + variant_id varchar, + allele_count integer, + allele_num integer, + quality_metrics jsonb, + vep_annotations jsonb +); + +CREATE TABLE IF NOT EXISTS data.coverage ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset_version integer REFERENCES data.dataset_versions, + chrom varchar(10), + pos integer, + mean real, + median real, + cov1 real, + cov5 real, + cov10 real, + cov15 real, + cov20 real, + cov25 real, + cov30 real, + cov50 real, + cov100 real +); + +CREATE TABLE IF NOT EXISTS data.metrics ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset_version integer REFERENCES data.dataset_versions, + metric varchar, + mids integer[], + hist integer +); diff --git a/sql/user_schema.sql b/sql/user_schema.sql new file mode 100644 index 000000000..6ca1f32f8 --- /dev/null +++ b/sql/user_schema.sql @@ -0,0 +1,75 @@ +-------------------------------------------------------------------------------- +-- Swefreq user schema -- +-- -- +-- This schema contains the user tables, including the access rights to the -- +-- datasets (in the data schema) in the Swefreq system. -- +-- -- +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +-- User fields +-- + +CREATE TYPE identity_enum AS ENUM('google', 'elixir'); + +CREATE TABLE IF NOT EXISTS users.users ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + username varchar(100) DEFAULT NULL, + email varchar(100) UNIQUE NOT NULL, + affiliation varchar(100) DEFAULT NULL, + country varchar(100) DEFAULT NULL, + identity varchar(100) NOT NULL, + identity_type identity_enum NOT NULL, + UNIQUE (identity, identity_type) +); + +CREATE TABLE IF NOT EXISTS users.sftp_users ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + user_id integer NOT NULL REFERENCES users.users, + user_uid integer UNIQUE NOT NULL CHECK (user_uid >= 10000), + user_name varchar(50) NOT NULL, + password_hash varchar(100) NOT NULL, + account_expires timestamp NOT NULL +); + +CREATE TABLE IF NOT EXISTS users.linkhash ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset_version integer NOT NULL REFERENCES data.dataset_versions, + user_id integer NOT NULL REFERENCES users.users, + "hash" varchar(64) UNIQUE NOT NULL, + expires_on timestamp NOT NULL +); + +CREATE TABLE IF NOT EXISTS users.dataset_access ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset integer NOT NULL REFERENCES data.datasets, + user_id integer NOT NULL REFERENCES users.users, + wants_newsletter boolean DEFAULT false, + is_admin boolean DEFAULT false, + UNIQUE (dataset, user_id) +); + +CREATE TYPE access_action AS ENUM('access_granted', 'access_revoked', + 'access_requested', 'private_link'); + +CREATE TABLE IF NOT EXISTS users.user_access_log ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + user_id integer NOT NULL REFERENCES users.users, + dataset integer NOT NULL REFERENCES data.datasets, + ts timestamp NOT NULL DEFAULT current_timestamp, + "action" access_action DEFAULT NULL +); + +CREATE TABLE IF NOT EXISTS users.user_consent_log ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + user_id integer NOT NULL REFERENCES users.users, + dataset_version integer NOT NULL REFERENCES data.dataset_versions, + ts timestamp NOT NULL DEFAULT current_timestamp +); + +CREATE TABLE IF NOT EXISTS users.user_download_log ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + user_id integer NOT NULL REFERENCES users.users, + dataset_file integer NOT NULL REFERENCES data.dataset_files, + ts timestamp NOT NULL DEFAULT current_timestamp +); From 4d5bdcb2bec7d7189d7935e4486d3a4e31bf929b Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Mon, 22 Oct 2018 13:43:04 +0200 Subject: [PATCH 010/360] fixes bug where gziped lines where not read in the same way as unzipped lines. --- scripts/importer/data_importer/raw_data_importer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index f68a65689..1b7deec12 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -106,7 +106,7 @@ def _insert_coverage(self): counter = 0 with db.database.atomic(): for line in self._open(self.settings.coverage_file): - line = line.decode('utf8').strip() + line = bytes(line).decode('utf8').strip() if line.startswith("#"): continue @@ -152,7 +152,7 @@ def _insert_variants(self): gq_mids = None with db.database.atomic(): for line in self._open(self.settings.variant_file): - line = line.decode('utf8').strip() + line = bytes(line).decode('utf8').strip() if line.startswith("#"): # Check for some information that we need if line.startswith('##INFO= Date: Tue, 23 Oct 2018 16:14:47 +0200 Subject: [PATCH 011/360] adds dry-run support to dbsnp importer. --- .../importer/data_importer/dbsnp_importer.py | 27 +++++++++++++------ scripts/importer/importer.py | 8 ++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/scripts/importer/data_importer/dbsnp_importer.py b/scripts/importer/data_importer/dbsnp_importer.py index 664f73adc..cecc4a827 100644 --- a/scripts/importer/data_importer/dbsnp_importer.py +++ b/scripts/importer/data_importer/dbsnp_importer.py @@ -4,6 +4,7 @@ import time import logging import db +from peewee import fn from .data_importer import DataImporter class DbSNPImporter( DataImporter ): @@ -39,7 +40,7 @@ def count_entries(self): self.total += 1 self.in_file.rewind() - logging.info("Found {} lines in {}".format(self.total, self._time_since(start))) + logging.info("Found {:,} lines in {}".format(self.total, self._time_since(start))) def prepare_data(self): url = DbSNPImporter.URL.format(a=self.settings) @@ -52,11 +53,19 @@ def prepare_data(self): def prepare_version(self): version_id = "{a.dbsnp_version}_{a.dbsnp_reference}".format(a=self.settings) - dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) - if created: - logging.info("Created dbsnp_version '{}'".format(version_id)) + if self.settings.dry_run: + try: + dbsnp_version = db.DbSNP_version.get(version_id = version_id) + logging.info("dbsnp_version '{}' already in database".format(version_id)) + except db.DbSNP_version.DoesNotExist: + dbsnp_version = db.DbSNP_version.select(fn.Max(db.DbSNP_version.version_id)).get() + logging.info("Created dbsnp_version '{}'".format(version_id)) else: - logging.info("dbsnp_version '{}' already in database".format(version_id)) + dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) + if created: + logging.info("Created dbsnp_version '{}'".format(version_id)) + else: + logging.info("dbsnp_version '{}' already in database".format(version_id)) return dbsnp_version def start_import(self): @@ -103,11 +112,13 @@ def start_import(self): last_progress += 0.01 if len(batch) >= self.batch_size: - db.DbSNP.insert_many(batch).execute() + if not self.settings.dry_run: + db.DbSNP.insert_many(batch).execute() batch = [] db.database.commit() if batch: - db.DbSNP.insert_many(batch) + if not self.settings.dry_run: + db.DbSNP.insert_many(batch) if self.total != None: self._tick(True) - logging.info("Inserted {} valid lines in {}".format(counter, self._time_since(start))) + logging.info("Inserted {:,} valid lines in {}".format(counter, self._time_since(start))) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 999eccc03..a518b9a4c 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -46,6 +46,12 @@ parser.add_argument("--dbsnp_reference", default="GRCh37p13", help = "Which reference the dbSNP should be aligned to.") + # Dataset connections + parser.add_argument("--dataset", default="", + help="Which dataset to connect the data to.") + parser.add_argument("--version", default="last", + help="Which dataset version to connect the data to.") + # omim file, since we can't download or version them parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), "downloaded_files", @@ -68,6 +74,8 @@ parser.add_argument("--move_studies", action="store_true", help = ("Moves studies and datasets from an old database" " to a new one.")) + parser.add_argument("--dry_run", action="store_true", + help = "Do not insert anything into the database") # Logging and verbosity From 1bc9d38b8a9c157eaeafb4e2dbbdbaeeeaa5cef1 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Tue, 23 Oct 2018 16:15:28 +0200 Subject: [PATCH 012/360] adds mysql-data and downloaded files to gitignore. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index e07e09d6e..b8db17e69 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ tornado/static/js/app.min.js backend/static backend/templates static +# importer and config stuff +mysql-data* +scripts/importer/downloaded_files From 55b138cc3408da8caa7c5bfce39b1979cd16bc85 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Tue, 23 Oct 2018 16:15:51 +0200 Subject: [PATCH 013/360] adds importer requirements. --- scripts/importer/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 scripts/importer/requirements.txt diff --git a/scripts/importer/requirements.txt b/scripts/importer/requirements.txt new file mode 100644 index 000000000..6bf362d48 --- /dev/null +++ b/scripts/importer/requirements.txt @@ -0,0 +1,3 @@ +mysqlclient==1.3.13 +peewee==2.10.2 +psycopg2-binary==2.7.5 From c228a885f65f6d659ca660e41d38027a56eb2477 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Tue, 23 Oct 2018 17:36:26 +0200 Subject: [PATCH 014/360] Adds dry run support to reference set import. --- .../data_importer/reference_set_importer.py | 70 +++++++++++++------ 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 22b0791af..0c6935943 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -8,7 +8,7 @@ import logging import zipfile import db -from peewee import IntegrityError +from peewee import IntegrityError, fn from .data_importer import DataImporter class ReferenceSetImporter( DataImporter ): @@ -56,7 +56,8 @@ def _insert_features(self): 'feature_type':feature['feature_type']}] if len(batch) % self.batch_size == 0: - db.Feature.insert_many(batch).execute() + if not self.settings.dry_run: + db.Feature.insert_many(batch).execute() batch = [] progress = i / len(self.features) @@ -64,7 +65,8 @@ def _insert_features(self): last_progress += 0.01 self._tick() if len(batch): - db.Feature.insert_many(batch).execute() + if not self.settings.dry_run: + db.Feature.insert_many(batch).execute() batch = [] self._tick(True) @@ -90,8 +92,12 @@ def _insert_genes(self): end = gene['stop'], strand = gene['strand'] ) - db_gene.save() - self.gene_db_ids[gene['gene_id']] = db_gene.id + + if self.settings.dry_run: + self.gene_db_ids[gene['gene_id']] = 0 + else: + db_gene.save() + self.gene_db_ids[gene['gene_id']] = db_gene.id progress = i / len(self.genes) while progress - last_progress > 0.01: @@ -102,13 +108,21 @@ def _insert_genes(self): logging.info("Genes inserted in {}".format( self._time_since(start) )) def _insert_reference(self): - logging.info("Getting dbSNP version id") version_id = "{a.dbsnp_version}_{a.dbsnp_reference}".format(a=self.settings) - dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) - if created: - logging.info("Created dbsnp_version '{}'".format(version_id)) + + if self.settings.dry_run: + try: + dbsnp_version = db.DbSNP_version.get(version_id = version_id) + logging.info("Using dbsnp_version '{}'".format(version_id)) + except db.DbSNP_version.DoesNotExist: + dbsnp_version = db.DbSNP_version.select(fn.Max(db.DbSNP_version.version_id)).get() + logging.info("Created dbsnp_version '{}'".format(version_id)) else: - logging.info("Using dbsnp_version '{}'".format(version_id)) + dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) + if created: + logging.info("Created dbsnp_version '{}'".format(version_id)) + else: + logging.info("Using dbsnp_version '{}'".format(version_id)) omim_filename = self.settings.omim_file.split("/")[-1] logging.info("inserting reference header") @@ -118,7 +132,16 @@ def _insert_reference(self): dbnsfp_version = self.settings.dbnsfp_version, omim_version = omim_filename, dbsnp_version = dbsnp_version.id) - self.db_reference.save() + + + if self.settings.dry_run: + max_id = db.ReferenceSet.select(fn.Max(db.ReferenceSet.id)).get() + if max_id.id is None: + self.db_reference.id = 0 + else: + self.db_reference.id = max_id.id + 1 + else: + self.db_reference.save() logging.info("Reference {} created".format(self.db_reference.id)) def _insert_transcripts(self): @@ -138,8 +161,13 @@ def _insert_transcripts(self): stop = transcript['stop'], strand = transcript['strand'] ) - db_transcript.save() - self.transcript_db_ids[transcript['transcript_id']] = db_transcript.id + + + if self.settings.dry_run: + self.transcript_db_ids[transcript['transcript_id']] = 0 + else: + db_transcript.save() + self.transcript_db_ids[transcript['transcript_id']] = db_transcript.id progress = i / len(self.transcripts) while progress - last_progress > 0.01: @@ -223,7 +251,7 @@ def _read_dbnsfp(self): dbnsfp_cache = {} for line in self.dbnsfp: - raw = line.decode('utf8').strip().split("\t") + raw = bytes(line).decode('utf8').strip().split("\t") if not header: header = raw if header: @@ -289,7 +317,7 @@ def _read_omim(self): cache = {} header = None for line in self.omim: - raw = line.decode('utf8').strip().split("\t") + raw = bytes(line).decode('utf8').strip().split("\t") if not header: header = [r.strip() for r in raw] if header: @@ -328,7 +356,7 @@ def count_entries(self): self.numbers['transcripts'] = 0 self.numbers['features'] = 0 for row in self.gencode: - raw = row.decode('ascii').strip() + raw = bytes(row).decode('ascii').strip() if raw[0] == "#": continue values = raw.split("\t") @@ -346,10 +374,12 @@ def count_entries(self): self.numbers['features'] += 1 self.gencode.rewind() + + pad = max([len("{:,}".format(self.numbers[t])) for t in ["genes", "transcripts", "features"]]) logging.info("Parsed file in {:3.1f}s".format(time.time()-start)) - logging.info("Genes : {}".format(self.numbers['genes'])) - logging.info("Transcripts: {}".format(self.numbers['transcripts'])) - logging.info("Features : {}".format(self.numbers['features'])) + logging.info("Genes : {0:>{pad},}".format(self.numbers['genes'], pad=pad)) + logging.info("Transcripts: {0:>{pad},}".format(self.numbers['transcripts'], pad=pad)) + logging.info("Features : {0:>{pad},}".format(self.numbers['features'], pad=pad)) def prepare_data(self): self._open_gencode() @@ -364,7 +394,7 @@ def start_import(self): if self.numbers['genes'] != None: self._print_progress_bar() for line in self.gencode: - line = line.decode('ascii').strip() + line = bytes(line).decode('ascii').strip() if line.startswith("#"): continue try: From 7016c5f504e187a524483867056f93df21889cef Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Fri, 26 Oct 2018 09:00:59 +0200 Subject: [PATCH 015/360] updates to old db importer to support auto-picking refset and dry-runs. --- .../importer/data_importer/old_db_importer.py | 136 +++++++++++++++--- .../data_importer/raw_data_importer.py | 35 +++-- .../data_importer/reference_set_importer.py | 2 +- scripts/importer/importer.py | 9 +- 4 files changed, 141 insertions(+), 41 deletions(-) diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py index ca3e06705..100e7cfb7 100644 --- a/scripts/importer/data_importer/old_db_importer.py +++ b/scripts/importer/data_importer/old_db_importer.py @@ -22,15 +22,18 @@ def __init__(self, settings): 'user':{} } - def _select_reference_set(self): + def _select_reference_set(self, short_name): if len(self.reference_sets) == 1: - logging.info(("Only one reference set is available, " - "will default to this set for all datasets")) + logging.info(("Only one reference set is available, {}," + "will default to this set for all datasets".format(self.reference_sets[0]))) return self.reference_sets[0].id + elif short_name.lower() in [r.name.lower() for r in self.reference_sets]: + refset = [r for r in self.reference_sets if r.name.lower() == short_name.lower()][0] + logging.info("Auto-selecting reference set '{}' based on name.".format(refset.name)) + return refset else: print("Select a reference set to use with this dataset") retval = -1 - reflen = len(self.reference_sets)-1 while retval not in [r.id for r in self.reference_sets]: for reference_set in self.reference_sets: print(" {} : {}".format(reference_set.id, reference_set.name)) @@ -49,6 +52,8 @@ def _move_collections(self): new_id = db.Collection.get(name = collection.name, ethnicity = collection.ethnicity).id except db.Collection.DoesNotExist: + if self.settings.dry_run: + continue new_id = (db.Collection .insert(name = collection.name, ethnicity = collection.ethnicity) @@ -71,6 +76,8 @@ def _move_studies(self): publication_date = study.publication_date, ref_doi = study.ref_doi).id except db.Study.DoesNotExist: + if self.settings.dry_run: + continue new_id = (db.Study .insert(pi_name = study.pi_name, pi_email = study.pi_email, @@ -88,13 +95,20 @@ def _move_datasets(self): logging.info("Moving Datasets") for dataset in old_db.Dataset.select(): logging.info(" - Moving '{}'".format(dataset.short_name)) - study_ref_id = self.id_map['study'][dataset.study.study] + try: + study_ref_id = self.id_map['study'][dataset.study.study] + except KeyError: + if not self.settings.dry_run: + raise + study_ref_id = -1 try: # short_name is unique, so we only really need to check that. new_id = db.Dataset.get(study = study_ref_id, short_name = dataset.short_name).id except db.Dataset.DoesNotExist: - target_reference_id = self._select_reference_set() + target_reference_id = self._select_reference_set(dataset.short_name) + if self.settings.dry_run: + continue new_id = (db.Dataset .insert(study = study_ref_id, reference_set = target_reference_id, @@ -114,12 +128,19 @@ def _move_datasets(self): def _move_dataset_logos(self): logging.info("Moving Dataset Logos") for dataset_file in old_db.DatasetLogo.select(): - dataset_ref_id = self.id_map['dataset'][dataset_file.dataset.dataset] + try: + dataset_ref_id = self.id_map['dataset'][dataset_file.dataset.dataset] + except KeyError: + if not self.settings.dry_run: + raise + dataset_ref_id = -1 try: db.DatasetLogo.get(dataset = dataset_ref_id, mimetype = dataset_file.mimetype, data = dataset_file.data) except db.DatasetLogo.DoesNotExist: + if self.settings.dry_run: + continue db.DatasetLogo.insert(dataset = dataset_ref_id, mimetype = dataset_file.mimetype, data = dataset_file.data).execute() @@ -127,7 +148,12 @@ def _move_dataset_logos(self): def _move_dataset_versions(self): logging.info("Moving Dataset Versions") for dataset_version in old_db.DatasetVersion.select(): - dataset_ref_id = self.id_map['dataset'][dataset_version.dataset.dataset] + try: + dataset_ref_id = self.id_map['dataset'][dataset_version.dataset.dataset] + except KeyError: + if not self.settings.dry_run: + raise + dataset_ref_id = -1 try: new_id = db.DatasetVersion.get(dataset = dataset_ref_id, version = dataset_version.version, @@ -139,6 +165,8 @@ def _move_dataset_versions(self): data_contact_name = dataset_version.data_contact_name, data_contact_link = dataset_version.data_contact_link).id except db.DatasetVersion.DoesNotExist: + if self.settings.dry_run: + continue new_id = (db.DatasetVersion .insert(dataset = dataset_ref_id, version = dataset_version.version, @@ -157,13 +185,20 @@ def _move_dataset_files(self): logging.info("Moving Dataset Files") for dataset_file in old_db.DatasetFile.select(): logging.info(" - Moving '{}'".format(dataset_file.name)) - dataset_version_ref_id = self.id_map['dataset_version'][dataset_file.dataset_version.dataset_version] + try: + dataset_version_ref_id = self.id_map['dataset_version'][dataset_file.dataset_version.dataset_version] + except KeyError: + if not self.settings.dry_run: + raise + dataset_version_ref_id = -1 try: new_id = db.DatasetFile.get(dataset_version = dataset_version_ref_id, name = dataset_file.name, uri = dataset_file.uri, file_size = dataset_file.bytes).id except db.DatasetFile.DoesNotExist: + if self.settings.dry_run: + continue new_id = (db.DatasetFile .insert(dataset_version = dataset_version_ref_id, name = dataset_file.name, @@ -175,14 +210,22 @@ def _move_dataset_files(self): def _move_sample_sets(self): logging.info("Moving Sample Sets") for sample_set in old_db.SampleSet.select(): - dataset_ref_id = self.id_map['dataset'][sample_set.dataset.dataset] - collection_ref_id = self.id_map['collection'][sample_set.collection.collection] + try: + dataset_ref_id = self.id_map['dataset'][sample_set.dataset.dataset] + collection_ref_id = self.id_map['collection'][sample_set.collection.collection] + except KeyError: + if not self.settings.dry_run: + raise + dataset_ref_id = -1 + collection_ref_id = -1 try: db.SampleSet.get(dataset = dataset_ref_id, collection = collection_ref_id, sample_size = sample_set.sample_size, phenotype = sample_set.phenotype) except db.SampleSet.DoesNotExist: + if self.settings.dry_run: + continue db.SampleSet.insert(dataset = dataset_ref_id, collection = collection_ref_id, sample_size = sample_set.sample_size, @@ -209,6 +252,8 @@ def _move_users(self): affiliation = user.affiliation, country = user.country).id) except db.Users.DoesNotExist: + if self.settings.dry_run: + continue new_id = (db.Users .insert(name = user.name, email = user.email, @@ -222,12 +267,19 @@ def _move_users(self): def _move_sftp_users(self): logging.info("Moving SFTP Users") for user in old_db.SFTPUser.select(): - user_ref_id = self.id_map['user'][user.user.user] + try: + user_ref_id = self.id_map['user'][user.user.user] + except KeyError: + if not self.settings.dry_run: + raise + user_ref_id = -1 try: # user_uid is unique, so we rely on that db.SFTPUsers.get(user = user_ref_id, user_uid = user.user_uid) except db.SFTPUsers.DoesNotExist: + if self.settings.dry_run: + continue db.SFTPUsers.insert(user = user_ref_id, user_uid = user.user_uid, user_name = user.user_name, @@ -237,14 +289,22 @@ def _move_sftp_users(self): def _move_user_access_logs(self): logging.info("Moving User Access Logs") for log in old_db.UserAccessLog.select(): - user_ref_id = self.id_map['user'][log.user.user] - dataset_ref_id = self.id_map['dataset'][log.dataset.dataset] + try: + user_ref_id = self.id_map['user'][log.user.user] + dataset_ref_id = self.id_map['dataset'][log.dataset.dataset] + except KeyError: + if not self.settings.dry_run: + raise + user_ref_id = -1 + dataset_ref_id = -1 try: db.UserAccessLog.get(user = user_ref_id, dataset = dataset_ref_id, action = log.action, ts = log.ts) except db.UserAccessLog.DoesNotExist: + if self.settings.dry_run: + continue db.UserAccessLog.insert(user = user_ref_id, dataset = dataset_ref_id, action = log.action, @@ -253,13 +313,21 @@ def _move_user_access_logs(self): def _move_user_consent_logs(self): logging.info("Moving User Consent Logs") for log in old_db.UserConsentLog.select(): - user_ref_id = self.id_map['user'][log.user.user] - version_ref_id = self.id_map['dataset_version'][log.dataset_version.dataset_version] + try: + user_ref_id = self.id_map['user'][log.user.user] + version_ref_id = self.id_map['dataset_version'][log.dataset_version.dataset_version] + except KeyError: + if not self.settings.dry_run: + raise + user_ref_id = -1 + version_ref_id = -1 try: db.UserConsentLog.get(user = user_ref_id, dataset_version = version_ref_id, ts = log.ts) except db.UserConsentLog.DoesNotExist: + if self.settings.dry_run: + continue db.UserConsentLog.insert(user = user_ref_id, dataset_version = version_ref_id, ts = log.ts).execute() @@ -267,13 +335,21 @@ def _move_user_consent_logs(self): def _move_user_download_logs(self): logging.info("Moving User Download Logs") for log in old_db.UserDownloadLog.select(): - user_ref_id = self.id_map['user'][log.user.user] - file_ref_id = self.id_map['dataset_file'][log.dataset_file.dataset_file] + try: + user_ref_id = self.id_map['user'][log.user.user] + file_ref_id = self.id_map['dataset_file'][log.dataset_file.dataset_file] + except KeyError: + if not self.settings.dry_run: + raise + user_ref_id = -1 + file_ref_id = -1 try: db.UserDownloadLog.get(user = user_ref_id, dataset_file = file_ref_id, ts = log.ts) except db.UserDownloadLog.DoesNotExist: + if self.settings.dry_run: + continue db.UserDownloadLog.insert(user = user_ref_id, dataset_file = file_ref_id, ts = log.ts).execute() @@ -281,14 +357,22 @@ def _move_user_download_logs(self): def _move_dataset_access(self): logging.info("Moving Dataset Access Records") for access in old_db.DatasetAccess.select(): - user_ref_id = self.id_map['user'][access.user.user] - dataset_ref_id = self.id_map['dataset'][access.dataset.dataset] + try: + user_ref_id = self.id_map['user'][access.user.user] + dataset_ref_id = self.id_map['dataset'][access.dataset.dataset] + except KeyError: + if not self.settings.dry_run: + raise + user_ref_id = -1 + dataset_ref_id = -1 try: db.DatasetAccess.get(dataset = dataset_ref_id, user = user_ref_id, wants_newsletter = access.wants_newsletter, is_admin = access.is_admin) except db.DatasetAccess.DoesNotExist: + if self.settings.dry_run: + continue db.DatasetAccess.insert(dataset = dataset_ref_id, user = user_ref_id, wants_newsletter = access.wants_newsletter, @@ -297,14 +381,22 @@ def _move_dataset_access(self): def _move_linkhashes(self): logging.info("Moving Linkhashes") for linkhash in old_db.Linkhash.select(): - user_ref_id = self.id_map['user'][linkhash.user.user] - version_ref_id = self.id_map['dataset_version'][linkhash.dataset_version.dataset_version] + try: + user_ref_id = self.id_map['user'][linkhash.user.user] + version_ref_id = self.id_map['dataset_version'][linkhash.dataset_version.dataset_version] + except KeyError: + if not self.settings.dry_run: + raise + user_ref_id = -1 + version_ref_id = -1 try: db.Linkhash.get(dataset_version = version_ref_id, user = user_ref_id, hash = linkhash.hash, expires_on = linkhash.expires_on) except db.Linkhash.DoesNotExist: + if self.settings.dry_run: + continue db.Linkhash.insert(dataset_version = version_ref_id, user = user_ref_id, hash = linkhash.hash, diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 1b7deec12..acd1dcc66 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -63,17 +63,20 @@ def __init__(self, settings): def _select_dataset_version(self): datasets = [] - print("Select a Dataset to use with this data") - for dataset in db.Dataset.select(): - print(" {} : {}".format(dataset.id, dataset.short_name)) - datasets += [dataset] - - d_id = -1 - while d_id not in [d.id for d in datasets]: - try: - d_id = int(input("Please select a dataset: ")) - except ValueError: + d_id = db.Dataset.get(short_name = self.settings.dataset) + if not d_id: + print("Select a Dataset to use with this data") + for dataset in db.Dataset.select(): + print(" {} : {}".format(dataset.id, dataset.short_name)) + datasets += [dataset] + + d_id = -1 + while d_id not in [d.id for d in datasets]: print("Please select a number in {}".format([d.id for d in datasets])) + try: + d_id = int(input("Please select a dataset: ")) + except ValueError: + print("Please select a number in {}".format([d.id for d in datasets])) versions = [] print("Select a Version of this dataset to use") @@ -83,6 +86,7 @@ def _select_dataset_version(self): v_id = -1 while v_id not in [v.id for v in versions]: + print("Please select a number in {}".format([v.id for v in versions])) try: v_id = int(input("Please select a version: ")) except ValueError: @@ -121,8 +125,8 @@ def _insert_coverage(self): batch += [data] if len(batch) >= self.settings.batch_size: - - db.Coverage.insert_many(batch).execute() + if not self.settings.dry_run: + db.Coverage.insert_many(batch).execute() batch = [] # Update progress if self.counter['coverage'] != None: @@ -133,7 +137,7 @@ def _insert_coverage(self): self._print_progress_bar() self._tick() last_progress += 0.01 - if batch: + if batch and not self.settings.dry_run: db.Coverage.insert_many(batch) if self.counter['coverage'] != None: self._tick(True) @@ -211,7 +215,8 @@ def _insert_variants(self): counter += 1 if len(batch) >= self.settings.batch_size: - db.Variant.insert_many(batch).execute() + if not self.settings.dry_run: + db.Variant.insert_many(batch).execute() batch = [] # Update progress if self.counter['variants'] != None: @@ -222,7 +227,7 @@ def _insert_variants(self): self._print_progress_bar() self._tick() last_progress += 0.01 - if batch: + if batch and not self.settings.dry_run: db.Variant.insert_many(batch) if self.counter['variants'] != None: self._tick(True) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 0c6935943..e2daea536 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -126,7 +126,7 @@ def _insert_reference(self): omim_filename = self.settings.omim_file.split("/")[-1] logging.info("inserting reference header") - self.db_reference = db.ReferenceSet(name = None, + self.db_reference = db.ReferenceSet(name = self.settings.ref_name, ensembl_version = self.settings.ensembl_version, gencode_version = self.settings.gencode_version, dbnsfp_version = self.settings.dbnsfp_version, diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index a518b9a4c..4ee3658cc 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -46,11 +46,14 @@ parser.add_argument("--dbsnp_reference", default="GRCh37p13", help = "Which reference the dbSNP should be aligned to.") - # Dataset connections + # Dataset connections and naming parser.add_argument("--dataset", default="", - help="Which dataset to connect the data to.") + help="Which dataset to connect imported data to.") parser.add_argument("--version", default="last", - help="Which dataset version to connect the data to.") + help="Which dataset version to connect imported data to.") + parser.add_argument("--ref_name", default="", + help=("Reference name to use when creating a reference set " + "AND selection of reference set for imported data")) # omim file, since we can't download or version them parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), From ecc2e8e14135b867cc6f0e360080f50eee0ecd88 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Fri, 26 Oct 2018 09:10:56 +0200 Subject: [PATCH 016/360] Corrects a help message. --- scripts/importer/importer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 4ee3658cc..0b0cabb37 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -52,8 +52,7 @@ parser.add_argument("--version", default="last", help="Which dataset version to connect imported data to.") parser.add_argument("--ref_name", default="", - help=("Reference name to use when creating a reference set " - "AND selection of reference set for imported data")) + help="Reference name to use when creating a reference set.") # omim file, since we can't download or version them parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), @@ -80,7 +79,6 @@ parser.add_argument("--dry_run", action="store_true", help = "Do not insert anything into the database") - # Logging and verbosity parser.add_argument("--disable_progress", action="store_true", help="Do not show progress bars.") From 69033608f017850d94ade74f24a711b66e49bb56 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Mon, 29 Oct 2018 08:01:08 +0100 Subject: [PATCH 017/360] adds --dry-run, --dataset, and --version flags to raw data importer. --- .../importer/data_importer/data_importer.py | 2 +- .../data_importer/raw_data_importer.py | 335 +++++++++++------- scripts/importer/importer.py | 16 +- 3 files changed, 218 insertions(+), 135 deletions(-) diff --git a/scripts/importer/data_importer/data_importer.py b/scripts/importer/data_importer/data_importer.py index bda4cc988..e49a6e519 100644 --- a/scripts/importer/data_importer/data_importer.py +++ b/scripts/importer/data_importer/data_importer.py @@ -84,7 +84,7 @@ def _download_and_open(self, base_url, version = None): def _open(self, filename): try: - logging.info("Opening file {}".format(filename)) + logging.debug("Opening file {}".format(filename)) return gzip.open(filename,'rb') if filename.endswith(".gz") else open(filename) except IOError as e: logging.error("IOERROR: {}".format(e)) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index acd1dcc66..9c03547e9 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -6,6 +6,8 @@ import time import logging +from datetime import datetime + import db from .data_importer import DataImporter @@ -63,36 +65,107 @@ def __init__(self, settings): def _select_dataset_version(self): datasets = [] - d_id = db.Dataset.get(short_name = self.settings.dataset) - if not d_id: + try: + ds = db.Dataset.get(short_name = self.settings.dataset) + except db.Dataset.DoesNotExist: print("Select a Dataset to use with this data") for dataset in db.Dataset.select(): print(" {} : {}".format(dataset.id, dataset.short_name)) datasets += [dataset] - d_id = -1 - while d_id not in [d.id for d in datasets]: - print("Please select a number in {}".format([d.id for d in datasets])) + selection = -1 + while selection not in [d.id for d in datasets]: + if selection != -1: + print("Please select a number in {}".format([d.id for d in datasets])) try: - d_id = int(input("Please select a dataset: ")) + selection = int(input("Please select a dataset: ")) except ValueError: print("Please select a number in {}".format([d.id for d in datasets])) + ds = [d for d in datasets if d.id == selection][0] + logging.info("Using dataset {}".format(ds.short_name)) versions = [] + for version in db.DatasetVersion.select().where(db.DatasetVersion.dataset == ds): + versions += [version] + + if not versions: + raise db.DatasetVersion.DoesNotExist("At least one dataset version required for dataset") + + if len(versions) == 1: + logging.info("Only one available dataset version, using: {}".format(versions[0].version)) + self.dataset_version = versions[0] + return + + if self.settings.version: + # name based version picking + if self.settings.version.lower() in [v.version.lower() for v in versions]: + selected = [v for v in versions if v.version.lower() == self.settings.version.lower()][0] + self.dataset_version = selected + logging.info("Using dataset version {}".format(self.dataset_version.version)) + return + + # date-based version picking + # note that this only works if the version string is formatted like: + # yyyymmdd or yyyy-mm-dd + + target = self.settings.version + version_dates = [] + for v in versions: + try: + version_dates += [(datetime.strptime(v.version, "%Y-%m-%d"), v)] + except ValueError: + try: + version_dates += [(datetime.strptime(v.version, "%Y%m%d"), v)] + except ValueError: + pass + if target not in ["latest", "next"]: + try: + target = datetime.strptime(target, "%Y-%m-%d") + except ValueError: + pass + try: + target = datetime.strptime(target, "%Y%m%d") + except ValueError: + pass + for date, version in version_dates: + if target == date: + self.dataset_version = version + logging.info("Using dataset version {}".format(self.dataset_version.version)) + return + else: + today = datetime.today() + if target == "latest": + try: + target, version = max([i for i in version_dates if i[0] < today]) + self.dataset_version = version + logging.info("Using dataset version {}".format(self.dataset_version.version)) + return + except ValueError: + pass + elif target == "next": + try: + target, version = min([i for i in version_dates if i[0] > today]) + self.dataset_version = version + logging.info("Using dataset version {}".format(self.dataset_version.version)) + return + except ValueError: + logging.warning("No future dataset versions found!") + print("Select a Version of this dataset to use") - for version in db.DatasetVersion.select(): + for version in versions: print(" {} : {}".format(version.id, version.version)) - versions += [version] - v_id = -1 - while v_id not in [v.id for v in versions]: - print("Please select a number in {}".format([v.id for v in versions])) + selection = -1 + while selection not in [v.id for v in versions]: + if selection != -1: + print("Please select a number in {}".format([v.id for v in versions])) try: - v_id = int(input("Please select a version: ")) + selection = int(input("Please select a version: ")) except ValueError: print("Please select a number in {}".format([v.id for v in versions])) - self.dataset_version = [v for v in versions if v.id == v_id][0] + logging.info("Using dataset version {}".format(self.dataset_version)) + self.dataset_version = [v for v in versions if v.id == selection][0] def _insert_coverage(self): """ @@ -109,34 +182,35 @@ def _insert_coverage(self): last_progress = 0.0 counter = 0 with db.database.atomic(): - for line in self._open(self.settings.coverage_file): - line = bytes(line).decode('utf8').strip() - if line.startswith("#"): - continue - - data = {} - for i, item in enumerate(line.strip().split("\t")): - if i == 0: - data['dataset_version'] = self.dataset_version - data[header[i][0]] = header[i][1](item) - - if self.counter['coverage'] != None: - counter += 1 + for filename in self.settings.coverage_file: + for line in self._open(filename): + line = bytes(line).decode('utf8').strip() + if line.startswith("#"): + continue + + data = {} + for i, item in enumerate(line.strip().split("\t")): + if i == 0: + data['dataset_version'] = self.dataset_version + data[header[i][0]] = header[i][1](item) - batch += [data] - if len(batch) >= self.settings.batch_size: - if not self.settings.dry_run: - db.Coverage.insert_many(batch).execute() - batch = [] - # Update progress if self.counter['coverage'] != None: - progress = counter / self.counter['coverage'] - while progress > last_progress + 0.01: - if not last_progress: - logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) - self._print_progress_bar() - self._tick() - last_progress += 0.01 + counter += 1 + + batch += [data] + if len(batch) >= self.settings.batch_size: + if not self.settings.dry_run: + db.Coverage.insert_many(batch).execute() + batch = [] + # Update progress + if self.counter['coverage'] != None: + progress = counter / self.counter['coverage'] + while progress > last_progress + 0.01: + if not last_progress: + logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) + self._print_progress_bar() + self._tick() + last_progress += 0.01 if batch and not self.settings.dry_run: db.Coverage.insert_many(batch) if self.counter['coverage'] != None: @@ -155,78 +229,79 @@ def _insert_variants(self): dp_mids = None gq_mids = None with db.database.atomic(): - for line in self._open(self.settings.variant_file): - line = bytes(line).decode('utf8').strip() - if line.startswith("#"): - # Check for some information that we need - if line.startswith('##INFO=').split('|') - if line.startswith('##INFO=').split('|')) - if line.startswith('##INFO=').split('|')) - continue - - if vep_field_names is None: - logging.error("VEP_field_names is empty. Make sure VCF header is present.") - sys.exit(1) - - base = {} - for i, item in enumerate(line.strip().split("\t")): - if i == 0: - base['dataset_version'] = self.dataset_version - if i < 7: - base[header[i][0]] = header[i][1](item) - else: - info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) - - if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): - continue - - consequence_array = info['CSQ'].split(',') if 'CSQ' in info else [] - annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] - - alt_alleles = base['alt'].split(",") - for i, alt in enumerate(alt_alleles): - vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] - - data = dict(base) - data['alt'] = alt - data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None - data['allele_num'] = int(info['AN_Adj']) - data['allele_count'] = int(info['AC_Adj'].split(',')[i]) - data['allele_freq'] = None - if 'AF' in info and data['allele_num'] > 0: - data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) - - data['vep_annotations'] = json.dumps(vep_annotations) - data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) - data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) - - data['orig_alt_alleles'] = [ - '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles - ] - # I don't think this is needed. - #data['hom_count'] = - data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) - data['quality_metrics'] = json.dumps(dict([(x, info[x]) for x in METRICS if x in info])) - batch += [data] + for filename in self.settings.variant_file: + for line in self._open(filename): + line = bytes(line).decode('utf8').strip() + if line.startswith("#"): + # Check for some information that we need + if line.startswith('##INFO=').split('|') + if line.startswith('##INFO=').split('|')) + if line.startswith('##INFO=').split('|')) + continue + + if vep_field_names is None: + logging.error("VEP_field_names is empty. Make sure VCF header is present.") + sys.exit(1) + + base = {} + for i, item in enumerate(line.strip().split("\t")): + if i == 0: + base['dataset_version'] = self.dataset_version + if i < 7: + base[header[i][0]] = header[i][1](item) + else: + info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) + + if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): + continue + + consequence_array = info['CSQ'].split(',') if 'CSQ' in info else [] + annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] + + alt_alleles = base['alt'].split(",") + for i, alt in enumerate(alt_alleles): + vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] + + data = dict(base) + data['alt'] = alt + data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None + data['allele_num'] = int(info['AN_Adj']) + data['allele_count'] = int(info['AC_Adj'].split(',')[i]) + data['allele_freq'] = None + if 'AF' in info and data['allele_num'] > 0: + data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) + + data['vep_annotations'] = json.dumps(vep_annotations) + data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) + data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) + + data['orig_alt_alleles'] = [ + '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles + ] + # I don't think this is needed. + #data['hom_count'] = + data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) + data['quality_metrics'] = json.dumps(dict([(x, info[x]) for x in METRICS if x in info])) + batch += [data] + + counter += 1 - counter += 1 - - if len(batch) >= self.settings.batch_size: - if not self.settings.dry_run: - db.Variant.insert_many(batch).execute() - batch = [] - # Update progress - if self.counter['variants'] != None: - progress = counter / self.counter['variants'] - while progress > last_progress + 0.01: - if not last_progress: - logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) - self._print_progress_bar() - self._tick() - last_progress += 0.01 + if len(batch) >= self.settings.batch_size: + if not self.settings.dry_run: + db.Variant.insert_many(batch).execute() + batch = [] + # Update progress + if self.counter['variants'] != None: + progress = counter / self.counter['variants'] + while progress > last_progress + 0.01: + if not last_progress: + logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) + self._print_progress_bar() + self._tick() + last_progress += 0.01 if batch and not self.settings.dry_run: db.Variant.insert_many(batch) if self.counter['variants'] != None: @@ -235,23 +310,27 @@ def _insert_variants(self): def count_entries(self): start = time.time() - self.counter['coverage'] = 0 - logging.info("Counting coverage lines") - for line in self._open(self.settings.coverage_file): - line = bytes(line).decode('utf8').strip() - if line.startswith("#"): - continue - self.counter['coverage'] += 1 - # print("'{}'".format(line.split("\t"))) - # break - - self.counter['variants'] = 0 - logging.info("Counting variant lines") - for line in self._open(self.settings.variant_file): - line = bytes(line).decode('utf8').strip() - if line.startswith("#"): - continue - self.counter['variants'] += 1 + if self.settings.coverage_file: + self.counter['coverage'] = 0 + logging.info("Counting coverage lines") + for filename in self.settings.coverage_file: + for line in self._open(filename): + line = bytes(line).decode('utf8').strip() + if line.startswith("#"): + continue + self.counter['coverage'] += 1 + logging.info("Found {:,} coverage lines".format(self.counter['coverage'])) + + if self.settings.variant_file: + self.counter['variants'] = 0 + logging.info("Counting variant lines") + for filename in self.settings.variant_file: + for line in self._open(filename): + line = bytes(line).decode('utf8').strip() + if line.startswith("#"): + continue + self.counter['variants'] += 1 + logging.info("Found {:,} variants".format(self.counter['variants'])) logging.info("Counted input data lines in {} ".format(self._time_since(start))) @@ -259,5 +338,5 @@ def prepare_data(self): self._select_dataset_version() def start_import(self): - self._insert_coverage() self._insert_variants() + self._insert_coverage() diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 0b0cabb37..e63396730 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -49,8 +49,12 @@ # Dataset connections and naming parser.add_argument("--dataset", default="", help="Which dataset to connect imported data to.") - parser.add_argument("--version", default="last", - help="Which dataset version to connect imported data to.") + parser.add_argument("--version", default="latest", + help=("Which dataset version to connect imported data to. " + "This can be a text-string name, a date in on of the " + "formats yyyymmdd or yyyy-mm-dd, or 'latest' for the " + "last published dataset version, or 'next' for the " + "next coming dataset version.")) parser.add_argument("--ref_name", default="", help="Reference name to use when creating a reference set.") @@ -61,10 +65,10 @@ help = "OMIM annotation file.") # Raw data (coverage + variants) files - parser.add_argument("--coverage_file", default=os.path.join(os.path.dirname(__file__), - "data", "Panel.0001.txt.gz")) - parser.add_argument("--variant_file", default=os.path.join(os.path.dirname(__file__), - "data", "variations.vcf.gz")) + parser.add_argument("--coverage_file", nargs="*", + help = "Coverage file(s) to import.") + parser.add_argument("--variant_file", nargs="*", + help = "Variant file(s) to import.") # Actions parser.add_argument("--add_reference", action="store_true", From 40f3029a78389d2e9342ae591d914d0f7f7c83c9 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Thu, 8 Nov 2018 22:34:43 +0100 Subject: [PATCH 018/360] Adds number of variants to dataset_versions --- backend/db.py | 41 +++++++++++++++++++ .../data_importer/raw_data_importer.py | 2 + 2 files changed, 43 insertions(+) diff --git a/backend/db.py b/backend/db.py index d7f135605..92c497175 100644 --- a/backend/db.py +++ b/backend/db.py @@ -225,6 +225,7 @@ class Meta: ref_doi = CharField(null=True) data_contact_name = CharField(null=True) data_contact_link = CharField(null=True) + num_variants = IntegerField(null=True) class DatasetFile(BaseModel): @@ -318,6 +319,7 @@ class Meta: mids = ArrayField(IntegerField) hist = ArrayField(IntegerField) + class Users(BaseModel): class Meta: db_table = "users" @@ -330,6 +332,7 @@ class Meta: affiliation = CharField(null=True) country = CharField(null=True) + class SFTPUsers(BaseModel): class Meta: db_table = "sftp_users" @@ -341,6 +344,7 @@ class Meta: password_hash = CharField(null=False) account_expires = DateTimeField(null=False) + class UserAccessLog(BaseModel): class Meta: db_table = "user_access_log" @@ -351,6 +355,7 @@ class Meta: action = EnumField(null=True, choices=['access_granted','access_revoked','access_requested','private_link']) ts = DateTimeField() + class UserConsentLog(BaseModel): class Meta: db_table = "user_consent_log" @@ -360,6 +365,7 @@ class Meta: dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='consent_logs') ts = DateTimeField() + class UserDownloadLog(BaseModel): class Meta: db_table = "user_download_log" @@ -369,6 +375,7 @@ class Meta: dataset_file = ForeignKeyField(DatasetFile, db_column='dataset_file', related_name='download_logs') ts = DateTimeField() + class DatasetAccess(BaseModel): class Meta: db_table = "dataset_access" @@ -379,6 +386,7 @@ class Meta: wants_newsletter = BooleanField(null=True) is_admin = BooleanField(null=True) + class Linkhash(BaseModel): class Meta: db_table = "linkhash" @@ -388,3 +396,36 @@ class Meta: user = ForeignKeyField(Users, related_name='link_hashes') hash = CharField() expires_on = DateTimeField() + +##### +# Views +## + +class DatasetVersionCurrent(DatasetVersion): + class Meta: + db_table = 'dataset_version_current' + schema = 'data' + + dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='current_version') + + +class DatasetAccessCurrent(DatasetAccess): + class Meta: + db_table = 'dataset_access_current' + schema = 'users' + + dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_current') + user = ForeignKeyField(Users, related_name='access_current') + has_access = IntegerField() + access_requested = DateTimeField() + + +class DatasetAccessPending(DatasetAccess): + class Meta: + db_table = 'dataset_access_pending' + schema = 'users' + + dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_pending') + user = ForeignKeyField(Users, related_name='access_pending') + has_access = IntegerField() + access_requested = DateTimeField() diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 9c03547e9..e63cd299a 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -304,6 +304,8 @@ def _insert_variants(self): last_progress += 0.01 if batch and not self.settings.dry_run: db.Variant.insert_many(batch) + self.dataset_version.num_variants = counter + self.dataset_version.save() if self.counter['variants'] != None: self._tick(True) logging.info("Inserted {} variant records in {}".format(counter, self._time_since(start))) From 81795b643f027db6123833c1744b3228b09848c5 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Wed, 14 Nov 2018 13:01:03 +0100 Subject: [PATCH 019/360] Updates importer to match updated postgres schema. --- backend/db.py | 11 ++--------- backend/requirements.txt | 1 + scripts/importer/data_importer/old_db_importer.py | 4 +++- .../importer/data_importer/raw_data_importer.py | 15 +++++++++++++++ 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/backend/db.py b/backend/db.py index 92c497175..58c55d6bb 100644 --- a/backend/db.py +++ b/backend/db.py @@ -226,6 +226,7 @@ class Meta: data_contact_name = CharField(null=True) data_contact_link = CharField(null=True) num_variants = IntegerField(null=True) + coverage_levels = ArrayField(IntegerField, null=True) class DatasetFile(BaseModel): @@ -298,15 +299,7 @@ class Meta: pos = IntegerField() mean = FloatField() median = FloatField() - cov1 = FloatField() - cov5 = FloatField() - cov10 = FloatField() - cov15 = FloatField() - cov20 = FloatField() - cov25 = FloatField() - cov30 = FloatField() - cov50 = FloatField() - cov100 = FloatField() + coverage = ArrayField(FloatField, null=True) class Metrics(BaseModel): diff --git a/backend/requirements.txt b/backend/requirements.txt index 18ccc26e5..9a1fa9036 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -13,3 +13,4 @@ requests==2.18.4 six==1.10.0 tornado==4.5.1 urllib3==1.22 +psycopg2-binary==2.7.5 diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py index 100e7cfb7..6fcb0dd7e 100644 --- a/scripts/importer/data_importer/old_db_importer.py +++ b/scripts/importer/data_importer/old_db_importer.py @@ -176,7 +176,9 @@ def _move_dataset_versions(self): available_from = dataset_version.available_from, ref_doi = dataset_version.ref_doi, data_contact_name = dataset_version.data_contact_name, - data_contact_link = dataset_version.data_contact_link) + data_contact_link = dataset_version.data_contact_link, + coverage_levels = [1,5,10,15,20,25,30,50,100] + ) .execute()) self.id_map['dataset_version'][dataset_version.dataset_version] = new_id diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index e63cd299a..d8841073a 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -199,6 +199,21 @@ def _insert_coverage(self): batch += [data] if len(batch) >= self.settings.batch_size: + # re-format coverage for batch + for i, item in enumerate(batch): + batch[i]['coverage'] = [item['cov1'], item['cov5'], item['cov10'], + item['cov15'],item['cov20'],item['cov25'], + item['cov30'],item['cov50'],item['cov100']] + del batch[i]['cov1'] + del batch[i]['cov5'] + del batch[i]['cov10'] + del batch[i]['cov15'] + del batch[i]['cov20'] + del batch[i]['cov25'] + del batch[i]['cov30'] + del batch[i]['cov50'] + del batch[i]['cov100'] + if not self.settings.dry_run: db.Coverage.insert_many(batch).execute() batch = [] From fce1a30a6712fe86adde931bf4c5faaff2ec4e96 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Wed, 21 Nov 2018 11:22:40 +0100 Subject: [PATCH 020/360] Adds functions from the old db peewee wrapper to the new one. --- backend/db.py | 90 ++++++++++++++++--- .../importer/data_importer/old_db_importer.py | 12 +-- 2 files changed, 86 insertions(+), 16 deletions(-) diff --git a/backend/db.py b/backend/db.py index 58c55d6bb..ea0e403c6 100644 --- a/backend/db.py +++ b/backend/db.py @@ -15,6 +15,7 @@ PrimaryKeyField, SQL, TextField, + fn, ) from playhouse.postgres_ext import ArrayField, BinaryJSONField @@ -313,7 +314,7 @@ class Meta: hist = ArrayField(IntegerField) -class Users(BaseModel): +class User(BaseModel): class Meta: db_table = "users" schema = 'users' @@ -325,13 +326,32 @@ class Meta: affiliation = CharField(null=True) country = CharField(null=True) + def is_admin(self, dataset): + return DatasetAccess.select().where( + DatasetAccess.dataset == dataset, + DatasetAccess.user == self, + DatasetAccess.is_admin + ).count() -class SFTPUsers(BaseModel): + def has_access(self, dataset): + return DatasetAccessCurrent.select().where( + DatasetAccessCurrent.dataset == dataset, + DatasetAccessCurrent.user == self, + ).count() + + def has_requested_access(self, dataset): + return DatasetAccessPending.select().where( + DatasetAccessPending.dataset == dataset, + DatasetAccessPending.user == self + ).count() + + +class SFTPUser(BaseModel): class Meta: db_table = "sftp_users" schema = 'users' - user = ForeignKeyField(Users, related_name='sftp_user') + user = ForeignKeyField(User, related_name='sftp_user') user_uid = IntegerField(unique=True) user_name = CharField(null=False) password_hash = CharField(null=False) @@ -343,7 +363,7 @@ class Meta: db_table = "user_access_log" schema = 'users' - user = ForeignKeyField(Users, related_name='access_logs') + user = ForeignKeyField(User, related_name='access_logs') dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_logs') action = EnumField(null=True, choices=['access_granted','access_revoked','access_requested','private_link']) ts = DateTimeField() @@ -354,7 +374,7 @@ class Meta: db_table = "user_consent_log" schema = 'users' - user = ForeignKeyField(Users, related_name='consent_logs') + user = ForeignKeyField(User, related_name='consent_logs') dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='consent_logs') ts = DateTimeField() @@ -364,7 +384,7 @@ class Meta: db_table = "user_download_log" schema = 'users' - user = ForeignKeyField(Users, related_name='download_logs') + user = ForeignKeyField(User, related_name='download_logs') dataset_file = ForeignKeyField(DatasetFile, db_column='dataset_file', related_name='download_logs') ts = DateTimeField() @@ -375,7 +395,7 @@ class Meta: schema = 'users' dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access') - user = ForeignKeyField(Users, related_name='access') + user = ForeignKeyField(User, related_name='dataset_access') wants_newsletter = BooleanField(null=True) is_admin = BooleanField(null=True) @@ -386,7 +406,7 @@ class Meta: schema = 'users' dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='link_hashes') - user = ForeignKeyField(Users, related_name='link_hashes') + user = ForeignKeyField(User, related_name='link_hashes') hash = CharField() expires_on = DateTimeField() @@ -408,7 +428,7 @@ class Meta: schema = 'users' dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_current') - user = ForeignKeyField(Users, related_name='access_current') + user = ForeignKeyField(User, related_name='access_current') has_access = IntegerField() access_requested = DateTimeField() @@ -419,6 +439,56 @@ class Meta: schema = 'users' dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_pending') - user = ForeignKeyField(Users, related_name='access_pending') + user = ForeignKeyField(User, related_name='access_pending') has_access = IntegerField() access_requested = DateTimeField() + +##### +# Help functions +## + +def get_next_free_uid(): + """ + Returns the next free uid >= 10000, and higher than the current uid's + from the sftp_user table in the database. + """ + default = 10000 + next_uid = default + try: + current_max_uid = SFTPUser.select(fn.MAX(SFTPUser.user_uid)).get().user_uid + if current_max_uid: + next_uid = current_max_uid+1 + except SFTPUser.DoesNotExist: + pass + + return next_uid + +def get_admin_datasets(user): + return DatasetAccess.select().where( DatasetAccess.user == user, DatasetAccess.is_admin) + +def get_dataset(dataset): + dataset = Dataset.select().where( Dataset.short_name == dataset).get() + return dataset + +def get_dataset_version(dataset, version=None): + if version: + dataset_version = (DatasetVersion + .select(DatasetVersion, Dataset) + .join(Dataset) + .where(DatasetVersion.version == version, + Dataset.short_name == dataset)).get() + else: + dataset_version = (DatasetVersionCurrent + .select(DatasetVersionCurrent, Dataset) + .join(Dataset) + .where(Dataset.short_name == dataset)).get() + return dataset_version + +def build_dict_from_row(row): + d = {} + + for field, value in row.__dict__['_data'].items(): + if field == "id": + continue + d[field] = value + return d diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py index 6fcb0dd7e..96eb69797 100644 --- a/scripts/importer/data_importer/old_db_importer.py +++ b/scripts/importer/data_importer/old_db_importer.py @@ -246,17 +246,17 @@ def _move_users(self): logging.info("Moving Users") for user in old_db.User.select(): try: - new_id = (db.Users + new_id = (db.User .get(name = user.name, email = user.email, identity = user.identity, identity_type = user.identity_type, affiliation = user.affiliation, country = user.country).id) - except db.Users.DoesNotExist: + except db.User.DoesNotExist: if self.settings.dry_run: continue - new_id = (db.Users + new_id = (db.User .insert(name = user.name, email = user.email, identity = user.identity, @@ -277,12 +277,12 @@ def _move_sftp_users(self): user_ref_id = -1 try: # user_uid is unique, so we rely on that - db.SFTPUsers.get(user = user_ref_id, + db.SFTPUser.get(user = user_ref_id, user_uid = user.user_uid) - except db.SFTPUsers.DoesNotExist: + except db.SFTPUser.DoesNotExist: if self.settings.dry_run: continue - db.SFTPUsers.insert(user = user_ref_id, + db.SFTPUser.insert(user = user_ref_id, user_uid = user.user_uid, user_name = user.user_name, password_hash = user.password_hash, From f97f24782cee7a437c0b98a52ef20d605431105b Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Wed, 21 Nov 2018 18:20:55 +0100 Subject: [PATCH 021/360] Docker-compose for postgres database. --- .gitignore | 4 ++++ Dockerfile-backend | 11 ++++++----- Dockerfile-frontend-rebuilder | 17 ++++++++++------- backend/application.py | 2 +- backend/requirements.txt | 1 - backend/settings.py | 7 ------- docker-compose.yml | 31 ++++++++++++++----------------- settings_sample.json | 10 +++++----- 8 files changed, 40 insertions(+), 43 deletions(-) diff --git a/.gitignore b/.gitignore index b8db17e69..bb3e30b2a 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,7 @@ static # importer and config stuff mysql-data* scripts/importer/downloaded_files +# docker stuff +postgres-data +# local personal things +personal diff --git a/Dockerfile-backend b/Dockerfile-backend index 2ccc0b728..37bc117e7 100644 --- a/Dockerfile-backend +++ b/Dockerfile-backend @@ -1,13 +1,14 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && apt-get install -y \ python3 \ - python3-pip \ - libmysqlclient-dev + python3-pip -ADD . /code +COPY backend/requirements.txt /code/requirements.txt +COPY settings.json /code/settings.json +RUN sed -i 's/"postgresHost"\s*:.*,/"postgresHost" : "db",/' /code/settings.json WORKDIR /code -RUN pip3 install -r backend/requirements.txt +RUN pip3 install -r requirements.txt CMD ["python3", "backend/route.py", "--develop"] diff --git a/Dockerfile-frontend-rebuilder b/Dockerfile-frontend-rebuilder index 444619a1c..ab2b2a3bc 100644 --- a/Dockerfile-frontend-rebuilder +++ b/Dockerfile-frontend-rebuilder @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y \ @@ -7,17 +7,20 @@ RUN apt-get update && \ python3 \ python3-pip \ python3-pyinotify \ - inotify-tools \ - libmysqlclient-dev && \ + inotify-tools && \ update-alternatives --install /usr/bin/python python /usr/bin/python3 5 -RUN curl -sL https://deb.nodesource.com/setup_6.x | bash - && \ - apt-get install -y nodejs +RUN apt-get install -y \ + nodejs \ + npm -ADD . /code +COPY Makefile /code/Makefile +COPY backend/requirements.txt /code/requirements.txt +COPY scripts/compile_template.py /code/scripts/compile_template.py +COPY scripts/watch_frontend.py /code/scripts/watch_frontend.py WORKDIR /code -RUN pip3 install -r backend/requirements.txt && \ +RUN pip3 install -r requirements.txt && \ pip3 install inotify CMD ["python", "scripts/watch_frontend.py"] diff --git a/backend/application.py b/backend/application.py index 74bc47a3a..dc1e1e283 100644 --- a/backend/application.py +++ b/backend/application.py @@ -576,7 +576,7 @@ def get(self, dataset): return self.set_header("Content-Type", logo_entry.mimetype) - self.write(logo_entry.data) + self.write(logo_entry.data.tobytes()) self.finish() diff --git a/backend/requirements.txt b/backend/requirements.txt index 9a1fa9036..2b1534135 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -4,7 +4,6 @@ appdirs==1.4.3 certifi==2017.11.5 chardet==3.0.4 idna==2.6 -mysqlclient==1.3.10 packaging==16.8 peewee==2.9.2 pymongo==3.4.0 diff --git a/backend/settings.py b/backend/settings.py index 7d579275b..225b25df9 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -21,13 +21,6 @@ ## Generated with base64.b64encode(uuid.uuid4().bytes + uuid.uuid4().bytes) cookie_secret = json_settings["cookieSecret"] -# MySql settings -mysql_host = json_settings["mysqlHost"] -mysql_schema = json_settings["mysqlSchema"] -mysql_user = json_settings["mysqlUser"] -mysql_passwd = json_settings["mysqlPasswd"] -mysql_port = json_settings["mysqlPort"] - # Mongodb settings mongo_host = json_settings["mongoHost"] mongo_port = json_settings["mongoPort"] diff --git a/docker-compose.yml b/docker-compose.yml index fa8643e8e..e2f6ce32f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,18 +11,17 @@ services: restart: on-failure volumes: - type: bind - source: . - target: /code + source: ./backend + target: /code/backend + - type: bind + source: ./frontend + target: /code/frontend + - type: bind + source: ./static + target: /code/static db: - image: "mysql:5.7" - volumes: - - type: volume - source: mysql-data-volume - target: /var/lib/mysql/ - postgres: - container_name: postgres restart: always - image: postgres:latest + image: postgres:10 volumes: - ./postgres-data:/var/lib/postgresql/data ports: @@ -33,10 +32,8 @@ services: dockerfile: Dockerfile-frontend-rebuilder volumes: - type: bind - source: . - target: /code - - -volumes: - mysql-data-volume: - external: true + source: ./frontend + target: /code/frontend + - type: bind + source: ./static + target: /code/static diff --git a/settings_sample.json b/settings_sample.json index 5b7fc79bb..a42391034 100644 --- a/settings_sample.json +++ b/settings_sample.json @@ -5,11 +5,11 @@ "googleSecret" : "a secret from google", "redirectUri" : "https://google oauth redirect uri", - "mysqlHost" : "127.0.0.1", - "mysqlPasswd" : "password", - "mysqlSchema" : "swefreq", - "mysqlUser" : "swefreq", - "mysqlPort" : 3306, + "postgresHost": "postgres host", + "postgresPort": 5432, + "postgresUser": "postgres", + "postgresPass": "", + "postgresName": "", "mongoHost" : "mongodb host", "mongoPassword" : "password", From ca939bc0936edd44cdd7ad821f0f9cd86c1779eb Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Tue, 4 Dec 2018 15:19:16 +0100 Subject: [PATCH 022/360] Main site working from postgres database (not browser). --- backend/application.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/application.py b/backend/application.py index dc1e1e283..f67e57c61 100644 --- a/backend/application.py +++ b/backend/application.py @@ -259,7 +259,7 @@ def get(self, dataset, version=None): for f in dataset_version.files: d = db.build_dict_from_row(f) d['dirname'] = path.dirname(d['uri']) - d['human_size'] = format_bytes(d['bytes']) + d['human_size'] = format_bytes(d['file_size']) ret.append(d) self.finish({'files': ret}) From f860f065958c64347b161c534df7df523fe89182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 4 Jan 2019 14:40:19 +0100 Subject: [PATCH 023/360] Fix the name of the end_pos column --- backend/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/db.py b/backend/db.py index ea0e403c6..87fb9f213 100644 --- a/backend/db.py +++ b/backend/db.py @@ -105,7 +105,7 @@ class Meta: canonical_transcript = CharField(null=True, max_length=15) chrom = CharField(max_length=10) start = IntegerField(db_column="start_pos") - stop = IntegerField(db_column="stop_pos") + stop = IntegerField(db_column="end_pos") strand = EnumField(choices=['+','-']) From e4102b95175f91199824008dc917c186ef1e3f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 7 Jan 2019 09:45:17 +0100 Subject: [PATCH 024/360] changed database connection type; playhouse.postgres_ext requires PostgresqlExtDatabase --- backend/db.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/backend/db.py b/backend/db.py index 87fb9f213..87e09f5da 100644 --- a/backend/db.py +++ b/backend/db.py @@ -17,13 +17,14 @@ TextField, fn, ) -from playhouse.postgres_ext import ArrayField, BinaryJSONField - -database = PostgresqlDatabase( settings.psql_name, - user = settings.psql_user, - password = settings.psql_pass, - host = settings.psql_host, - port = settings.psql_port) +from playhouse.postgres_ext import ArrayField, BinaryJSONField, PostgresqlExtDatabase + +database = PostgresqlExtDatabase(settings.psql_name, + user = settings.psql_user, + password = settings.psql_pass, + host = settings.psql_host, + port = settings.psql_port, + register_hstore = False) class BaseModel(Model): class Meta: From 4b3cedafa46b508f1dab6282191d34d04c1c6d75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 21 Jan 2019 13:10:04 +0100 Subject: [PATCH 025/360] Should hopefully give us actual json in the db --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index d8841073a..5cfcbbd6c 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -289,7 +289,7 @@ def _insert_variants(self): if 'AF' in info and data['allele_num'] > 0: data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) - data['vep_annotations'] = json.dumps(vep_annotations) + data['vep_annotations'] = vep_annotations data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) From 03b7396349645f740a3a093c706f4b12cf85d474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 21 Jan 2019 14:18:35 +0100 Subject: [PATCH 026/360] gencode files are gtf, ie 1-indexed --- scripts/importer/data_importer/reference_set_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index e2daea536..8f81d8f99 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -407,8 +407,8 @@ def start_import(self): info = {k: v.strip('"') for k, v in info.items()} data = {'chrom':values[0][3:], - 'start':int(values[3]) + 1, # bed files are 0-indexed - 'stop':int(values[4]) + 1, + 'start':int(values[3]), + 'stop':int(values[4]), 'strand':values[6], 'gene_id':info['gene_id'].split('.')[0]} From ea0123c81ba907e2b0dc92dac7cb972e2b9d0f5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 21 Jan 2019 14:50:46 +0100 Subject: [PATCH 027/360] Should fix gene stop positions --- scripts/importer/data_importer/reference_set_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 8f81d8f99..2c637b205 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -89,7 +89,7 @@ def _insert_genes(self): canonical_transcript = gene.get('canonical_transcript', None), chrom = gene['chrom'], start = gene['start'], - end = gene['stop'], + stop = gene['stop'], strand = gene['strand'] ) From a296220aa96bc274c865337bdbcab594bbaf725c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 21 Jan 2019 15:07:29 +0100 Subject: [PATCH 028/360] Should fix the division of multiple names --- scripts/importer/data_importer/reference_set_importer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 2c637b205..0d32b18ac 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -262,9 +262,10 @@ def _read_dbnsfp(self): values[header[i]] = value dbnsfp_cache[ values['Ensembl_gene'] ] = { - 'other_names': values['Gene_other_names'], + 'other_names': values['Gene_other_names'].split(';'), 'full_name': values['Gene_full_name'] } + for i, gene in enumerate(self.genes): if gene['gene_id'] in dbnsfp_cache: for key, item in dbnsfp_cache[gene['gene_id']].items(): From 73de6e2fef7a678c68b0f994ed3c77692bda70c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 21 Jan 2019 16:30:04 +0100 Subject: [PATCH 029/360] should fix dbsnp problem --- scripts/importer/data_importer/dbsnp_importer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/importer/data_importer/dbsnp_importer.py b/scripts/importer/data_importer/dbsnp_importer.py index cecc4a827..06a924e89 100644 --- a/scripts/importer/data_importer/dbsnp_importer.py +++ b/scripts/importer/data_importer/dbsnp_importer.py @@ -88,6 +88,7 @@ def start_import(self): try: rsid, chrom, position = line.split("\t")[:3] + position += 1 # 0-indexed except ValueError: # we don't care for incomplete entries continue From 40c642010105b1e597f4d418330a065b7698bf18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 24 Jan 2019 13:24:30 +0100 Subject: [PATCH 030/360] quality metrics should also remain as dict --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 5cfcbbd6c..309b62f7a 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -299,7 +299,7 @@ def _insert_variants(self): # I don't think this is needed. #data['hom_count'] = data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) - data['quality_metrics'] = json.dumps(dict([(x, info[x]) for x in METRICS if x in info])) + data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] counter += 1 From 6b0b69228a3e55b56a0ba91254d2c13b2c2419d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Feb 2019 11:20:47 +0100 Subject: [PATCH 031/360] other_names for a gene is now in a separate table Import changes done according to planned DB schema changes. --- .../data_importer/reference_set_importer.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 0d32b18ac..1cb1ea314 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -80,12 +80,10 @@ def _insert_genes(self): last_progress = 0 for i, gene in enumerate(self.genes): # As far as I know I can't batch insert these and still get the id's back - db_gene = db.Gene( reference_set = self.db_reference, gene_id = gene['gene_id'], name = gene['name'], full_name = gene.get('full_name', None), - other_names = gene.get('other_names', None), canonical_transcript = gene.get('canonical_transcript', None), chrom = gene['chrom'], start = gene['start'], @@ -99,6 +97,12 @@ def _insert_genes(self): db_gene.save() self.gene_db_ids[gene['gene_id']] = db_gene.id + try: + other_names = gene['other_names'] + if other_names: + self.add_other_names(db_gene.id, other_names) + except KeyError: + pass progress = i / len(self.genes) while progress - last_progress > 0.01: last_progress += 0.01 @@ -450,3 +454,10 @@ def start_import(self): self._insert_genes() self._insert_transcripts() self._insert_features() + + def add_other_names(self, gene_dbid:int, other_names:list): + if not gene_dbid or not other_names: + return + batch = [{'gene':gene_dbid, 'name':other_name} for other_name in other_names if other_name != '.' and other_name] + if not self.settings.dry_run and batch: + db.GeneOtherNames.insert_many(batch).execute() From f025a7282517043b35ebf2d75231b7d70a134549 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Fri, 1 Feb 2019 08:59:17 +0100 Subject: [PATCH 032/360] Updates db handler to match array-to-table changes in schema. --- backend/db.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/backend/db.py b/backend/db.py index 87e09f5da..8f1c2420c 100644 --- a/backend/db.py +++ b/backend/db.py @@ -102,13 +102,19 @@ class Meta: gene_id = CharField(unique=True, max_length=15) name = CharField(db_column="gene_name", null=True) full_name = CharField(null=True) - other_names = ArrayField(CharField, null=True) canonical_transcript = CharField(null=True, max_length=15) chrom = CharField(max_length=10) start = IntegerField(db_column="start_pos") stop = IntegerField(db_column="end_pos") strand = EnumField(choices=['+','-']) +class GeneOtherNames(BaseModel): + class Meta: + db_table = 'gene_other_names' + schema = 'data' + + gene = ForeignKeyField(Gene, db_column="gene", related_name="other_names") + name = CharField(null=True) class Transcript(BaseModel): class Meta: @@ -268,8 +274,6 @@ class Meta: ref = CharField() alt = CharField() site_quality = FloatField() - genes = ArrayField(CharField) - transcripts = ArrayField(CharField) orig_alt_alleles = ArrayField(CharField) hom_count = IntegerField() allele_freq = FloatField() @@ -281,6 +285,24 @@ class Meta: vep_annotations = BinaryJSONField() +class VariantGenes(BaseModel): + class Meta: + db_table = 'variant_genes' + schema = 'data' + + variant = ForeignKeyField(Variant, db_column="variant", related_name="genes") + gene = ForeignKeyField(Gene, db_column="gene", related_name="variants") + + +class VariantTranscripts(BaseModel): + class Meta: + db_table = 'variant_transcripts' + schema = 'data' + + gene = ForeignKeyField(Variant, db_column="variant", related_name="transcripts") + transcript = ForeignKeyField(Transcript, db_column="transcript", related_name="variants") + + class Coverage(BaseModel): """ Coverage statistics are pre-calculated for each variant for a given From 280c7c37fb164d952e121ff4225ae1835b89b9bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Feb 2019 11:46:13 +0100 Subject: [PATCH 033/360] fixed a confusingly named field (gene -> variant) --- backend/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/db.py b/backend/db.py index 8f1c2420c..6d13bb4dd 100644 --- a/backend/db.py +++ b/backend/db.py @@ -299,7 +299,7 @@ class Meta: db_table = 'variant_transcripts' schema = 'data' - gene = ForeignKeyField(Variant, db_column="variant", related_name="transcripts") + variant = ForeignKeyField(Variant, db_column="variant", related_name="transcripts") transcript = ForeignKeyField(Transcript, db_column="transcript", related_name="variants") From f33eb977fb05d4eabefe9c8d64d59cfa3eb512ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Feb 2019 13:36:08 +0100 Subject: [PATCH 034/360] conversion str->int and a forgotten .execute() for the last batch --- scripts/importer/data_importer/dbsnp_importer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/dbsnp_importer.py b/scripts/importer/data_importer/dbsnp_importer.py index 06a924e89..87a302b7e 100644 --- a/scripts/importer/data_importer/dbsnp_importer.py +++ b/scripts/importer/data_importer/dbsnp_importer.py @@ -88,6 +88,7 @@ def start_import(self): try: rsid, chrom, position = line.split("\t")[:3] + position = int(position) position += 1 # 0-indexed except ValueError: # we don't care for incomplete entries @@ -119,7 +120,7 @@ def start_import(self): db.database.commit() if batch: if not self.settings.dry_run: - db.DbSNP.insert_many(batch) + db.DbSNP.insert_many(batch).execute() if self.total != None: self._tick(True) logging.info("Inserted {:,} valid lines in {}".format(counter, self._time_since(start))) From 14e3d34a697021e363594c14eba74f2a51831d23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 4 Feb 2019 12:30:27 +0100 Subject: [PATCH 035/360] hom_count parsed --- scripts/importer/data_importer/raw_data_importer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 309b62f7a..ba557270d 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -296,8 +296,7 @@ def _insert_variants(self): data['orig_alt_alleles'] = [ '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles ] - # I don't think this is needed. - #data['hom_count'] = + data['hom_count'] = int(info['AC_Hom']) data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] From 6a62d327613b5cddf3e62d244818a7409bc47c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 6 Feb 2019 08:04:27 +0100 Subject: [PATCH 036/360] attempt to add beacon-only import. untested --- .../data_importer/raw_data_importer.py | 29 +++++++++++++------ scripts/importer/importer.py | 4 +++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index ba557270d..df09597fd 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -8,6 +8,7 @@ from datetime import datetime +import modules.browser.lookups import db from .data_importer import DataImporter @@ -54,6 +55,7 @@ def get_minimal_representation(pos, ref, alt): pos += 1 return pos, ref, alt + class RawDataImporter( DataImporter ): def __init__(self, settings): @@ -247,6 +249,7 @@ def _insert_variants(self): for filename in self.settings.variant_file: for line in self._open(filename): line = bytes(line).decode('utf8').strip() + if line.startswith("#"): # Check for some information that we need if line.startswith('##INFO=').split('|')) continue - if vep_field_names is None: - logging.error("VEP_field_names is empty. Make sure VCF header is present.") - sys.exit(1) + if not settings.beacon_only: + if vep_field_names is None: + logging.error("VEP_field_names is empty. Make sure VCF header is present.") + sys.exit(1) base = {} for i, item in enumerate(line.strip().split("\t")): @@ -274,11 +278,13 @@ def _insert_variants(self): continue consequence_array = info['CSQ'].split(',') if 'CSQ' in info else [] - annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] + if not settings.beacon_only: + annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] alt_alleles = base['alt'].split(",") for i, alt in enumerate(alt_alleles): - vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] + if not settings.beacon_only: + vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] data = dict(base) data['alt'] = alt @@ -289,14 +295,19 @@ def _insert_variants(self): if 'AF' in info and data['allele_num'] > 0: data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) - data['vep_annotations'] = vep_annotations - data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) - data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) + if not settings.beacon_only: + data['vep_annotations'] = vep_annotations + + data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) + data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) data['orig_alt_alleles'] = [ '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles ] - data['hom_count'] = int(info['AC_Hom']) + try: + data['hom_count'] = int(info['AC_Hom']) + except KeyError: + pass # null is better than 0, as 0 has a meaning data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index e63396730..a17db2648 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -91,6 +91,10 @@ parser.add_argument("-q", "--quiet", action = "count", default = 0, help="Decrease output Verbosity.") + # Beacon-only variants + parser.add_argument("--beacon-only", action="store_true", + help="Variants are intended only for Beacon, loosening the requirements" + args = parser.parse_args() logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", level = (5-args.verbose+args.quiet)*10, datefmt="%H:%M:%S") From b83e1e95b6bfdd16655252814258a963c8d2e677 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Wed, 6 Feb 2019 09:50:58 +0100 Subject: [PATCH 037/360] Fix typos etc --- scripts/importer/data_importer/raw_data_importer.py | 10 +++++----- scripts/importer/importer.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index df09597fd..0d5a955f1 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -260,7 +260,7 @@ def _insert_variants(self): gq_mids = map(float, line.split('Mids: ')[-1].strip('">').split('|')) continue - if not settings.beacon_only: + if not self.settings.beacon_only: if vep_field_names is None: logging.error("VEP_field_names is empty. Make sure VCF header is present.") sys.exit(1) @@ -278,12 +278,12 @@ def _insert_variants(self): continue consequence_array = info['CSQ'].split(',') if 'CSQ' in info else [] - if not settings.beacon_only: + if not self.settings.beacon_only: annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] alt_alleles = base['alt'].split(",") for i, alt in enumerate(alt_alleles): - if not settings.beacon_only: + if not self.settings.beacon_only: vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] data = dict(base) @@ -295,9 +295,9 @@ def _insert_variants(self): if 'AF' in info and data['allele_num'] > 0: data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) - if not settings.beacon_only: + if not self.settings.beacon_only: data['vep_annotations'] = vep_annotations - + data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index a17db2648..6d0204c88 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -93,8 +93,8 @@ # Beacon-only variants parser.add_argument("--beacon-only", action="store_true", - help="Variants are intended only for Beacon, loosening the requirements" - + help="Variants are intended only for Beacon, loosening the requirements") + args = parser.parse_args() logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", level = (5-args.verbose+args.quiet)*10, datefmt="%H:%M:%S") From 830105947147683c14fed527f5ab59a7115926f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 6 Feb 2019 10:04:01 +0100 Subject: [PATCH 038/360] make some more stuff optional --- scripts/importer/data_importer/raw_data_importer.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 0d5a955f1..2ab0620f3 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -289,11 +289,12 @@ def _insert_variants(self): data = dict(base) data['alt'] = alt data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None - data['allele_num'] = int(info['AN_Adj']) - data['allele_count'] = int(info['AC_Adj'].split(',')[i]) - data['allele_freq'] = None - if 'AF' in info and data['allele_num'] > 0: - data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) + if not self.settings.beacon_only: + data['allele_num'] = int(info['AN_Adj']) + data['allele_count'] = int(info['AC_Adj'].split(',')[i]) + data['allele_freq'] = None + if 'AF' in info and data['allele_num'] > 0: + data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) if not self.settings.beacon_only: data['vep_annotations'] = vep_annotations From fc49cd6ad2bc38464c9fa49b92d6debfbd92fc3e Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Wed, 6 Feb 2019 16:24:28 +0100 Subject: [PATCH 039/360] more generic vcf parsing for the beacon --- .../data_importer/raw_data_importer.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 2ab0620f3..00f19b25c 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -271,7 +271,8 @@ def _insert_variants(self): base['dataset_version'] = self.dataset_version if i < 7: base[header[i][0]] = header[i][1](item) - else: + elif i == 7 or not self.settings.beacon_only: + # only parse column 7 (maybe also for non-beacon-import?) info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): @@ -289,12 +290,18 @@ def _insert_variants(self): data = dict(base) data['alt'] = alt data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None - if not self.settings.beacon_only: - data['allele_num'] = int(info['AN_Adj']) - data['allele_count'] = int(info['AC_Adj'].split(',')[i]) - data['allele_freq'] = None - if 'AF' in info and data['allele_num'] > 0: - data['allele_freq'] = data['allele_count']/float(info['AN_Adj']) + an, ac = 'AN_Adj', 'AC_Adj' + if self.settings.beacon_only and 'AN_Adj' not in info: + an = 'AN' + if self.settings.beacon_only and 'AC_Adj' not in info: + ac = 'AC' + + data['allele_num'] = int(info[an]) + data['allele_freq'] = None + data['allele_count'] = int(info[ac].split(',')[i]) + if 'AF' in info and data['allele_num'] > 0: + data['allele_freq'] = data['allele_count']/float(info[an]) + if not self.settings.beacon_only: data['vep_annotations'] = vep_annotations @@ -367,4 +374,5 @@ def prepare_data(self): def start_import(self): self._insert_variants() - self._insert_coverage() + if not self.settings.beacon_only: + self._insert_coverage() From 9a37e0a682e0945887b7bbe16fc8708572749e94 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 7 Feb 2019 17:10:54 +0100 Subject: [PATCH 040/360] Skip lines with non-standard rsid This should probably be handled more carefully --- scripts/importer/data_importer/raw_data_importer.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 00f19b25c..8eade6884 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -275,6 +275,7 @@ def _insert_variants(self): # only parse column 7 (maybe also for non-beacon-import?) info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) + if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): continue @@ -289,7 +290,14 @@ def _insert_variants(self): data = dict(base) data['alt'] = alt - data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None + try: + data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None + except: + if self.settings.beacon_only: + # ignore lines having double ids: "rs539868657;rs561027534" + continue + else: + raise an, ac = 'AN_Adj', 'AC_Adj' if self.settings.beacon_only and 'AN_Adj' not in info: an = 'AN' @@ -336,7 +344,7 @@ def _insert_variants(self): self._tick() last_progress += 0.01 if batch and not self.settings.dry_run: - db.Variant.insert_many(batch) + db.Variant.insert_many(batch).execute() self.dataset_version.num_variants = counter self.dataset_version.save() if self.counter['variants'] != None: From 05ac2ea95070aec03f744b4992ebe477ef82b235 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Fri, 8 Feb 2019 14:38:44 +0100 Subject: [PATCH 041/360] fix: Restore mysql-settings. --- backend/settings.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/settings.py b/backend/settings.py index 225b25df9..dc6d9f94f 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -35,6 +35,13 @@ psql_user = json_settings["postgresUser"] psql_pass = json_settings["postgresPass"] +# MySql settings +mysql_host = json_settings["mysqlHost"] +mysql_schema = json_settings["mysqlSchema"] +mysql_user = json_settings["mysqlUser"] +mysql_passwd = json_settings["mysqlPasswd"] +mysql_port = json_settings["mysqlPort"] + # e-mail config mail_server = json_settings["mailServer"] from_address = json_settings["fromAddress"] From 51399227d3603324a2e7e683f16499403b475d72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 11:14:21 +0100 Subject: [PATCH 042/360] variant parser updated for new db schema --- .../data_importer/raw_data_importer.py | 80 +++++++++++++++++-- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 8eade6884..89b717a58 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -204,8 +204,8 @@ def _insert_coverage(self): # re-format coverage for batch for i, item in enumerate(batch): batch[i]['coverage'] = [item['cov1'], item['cov5'], item['cov10'], - item['cov15'],item['cov20'],item['cov25'], - item['cov30'],item['cov50'],item['cov100']] + item['cov15'], item['cov20'], item['cov25'], + item['cov30'], item['cov50'], item['cov100']] del batch[i]['cov1'] del batch[i]['cov5'] del batch[i]['cov10'] @@ -235,11 +235,17 @@ def _insert_coverage(self): logging.info("Inserted {} coverage records in {}".format(counter, self._time_since(start))) def _insert_variants(self): + """ + Insert variants from a VCF file + """ logging.info("Inserting variants") header = [("chrom",str), ("pos", int), ("rsid", str), ("ref", str), ("alt", str), ("site_quality", float), ("filter_string", str)] start = time.time() batch = [] + genes = [] + transcripts = [] + last_progress = 0.0 counter = 0 vep_field_names = None @@ -247,6 +253,9 @@ def _insert_variants(self): gq_mids = None with db.database.atomic(): for filename in self.settings.variant_file: + # gene/transctipt dbids; need to add support for version + self.refgenes = {gene.gene_id:gene.id for gene in db.Gene.select(db.Gene.id, db.Gene.gene_id)} + self.reftranscripts = {tran.transcript_id:tran.id for tran in db.Transcript.select(db.Transcript.id, db.Transcript.transcript_id)} for line in self._open(filename): line = bytes(line).decode('utf8').strip() @@ -309,13 +318,12 @@ def _insert_variants(self): data['allele_count'] = int(info[ac].split(',')[i]) if 'AF' in info and data['allele_num'] > 0: data['allele_freq'] = data['allele_count']/float(info[an]) - - + if not self.settings.beacon_only: data['vep_annotations'] = vep_annotations - data['genes'] = list({annotation['Gene'] for annotation in vep_annotations}) - data['transcripts'] = list({annotation['Feature'] for annotation in vep_annotations}) + genes.append(list({annotation['Gene'] for annotation in vep_annotations})) + transcripts.append(list({annotation['Feature'] for annotation in vep_annotations})) data['orig_alt_alleles'] = [ '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles @@ -324,6 +332,8 @@ def _insert_variants(self): data['hom_count'] = int(info['AC_Hom']) except KeyError: pass # null is better than 0, as 0 has a meaning + except ValueError: + data['hom_count'] = int(info['AC_Hom'].split(',')[0]) # parsing Swegen sometimes give e.g. 14,0 data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] @@ -332,19 +342,56 @@ def _insert_variants(self): if len(batch) >= self.settings.batch_size: if not self.settings.dry_run: + if not self.settings.beacon_only: + try: + curr_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id + except db.Variant.DoesNotExist: + # assumes next id will be 1 if table is empty + curr_id = 0 + db.Variant.insert_many(batch).execute() + + if not self.settings.beacon_only: + last_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id + if last_id-curr_id == len(batch): + indexes = list(range(curr_id+1, last_id+1)) + else: + pass # slow version of finding indexes + self.add_variant_genes(indexes, genes) + self.add_variant_transcripts(indexes, transcripts) + + genes = [] + transcripts = [] batch = [] # Update progress if self.counter['variants'] != None: progress = counter / self.counter['variants'] while progress > last_progress + 0.01: if not last_progress: - logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) self._print_progress_bar() self._tick() last_progress += 0.01 + if batch and not self.settings.dry_run: - db.Variant.insert_many(batch).execute() + if not self.settings.dry_run: + if not self.settings.beacon_only: + try: + curr_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id + except db.Variant.DoesNotExist: + # assumes next id will be 1 if table is empty + curr_id = 0 + + db.Variant.insert_many(batch).execute() + + if not self.settings.beacon_only: + last_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).id + if last_id-curr_id == len(batch): + indexes = list(range(curr_id+1, last_id+1)) + else: + pass # slow version of finding indexes + self.add_variant_genes(indexes, genes) + self.add_variant_transcripts(indexes, transcripts) + self.dataset_version.num_variants = counter self.dataset_version.save() if self.counter['variants'] != None: @@ -384,3 +431,20 @@ def start_import(self): self._insert_variants() if not self.settings.beacon_only: self._insert_coverage() + + def add_variant_genes(self, variant_indexes:list, genes_to_add:list): + batch = [] + for i in range(len(variant_indexes)): + connected_genes = [{'variant':variant_indexes[i], 'gene':self.refgenes[gene]} for gene in genes_to_add[i] if gene] + batch += connected_genes + if not self.settings.dry_run: + db.VariantGenes.insert_many(batch).execute() + + def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list): + batch = [] + for i in range(len(variant_indexes)): + connected_transcripts = [{'variant':variant_indexes[i], 'transcript':self.reftranscripts[transcript]} + for transcript in transcripts_to_add[i] if transcript and transcript[:5] == 'ENST'] + batch += connected_transcripts + if not self.settings.dry_run: + db.VariantGenes.insert_many(batch).execute() From 5a35aa791ac46369868e695d939d1e0a70b1569e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 11:17:14 +0100 Subject: [PATCH 043/360] and a forgotten get() --- scripts/importer/data_importer/raw_data_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 89b717a58..1b25a144c 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -318,7 +318,7 @@ def _insert_variants(self): data['allele_count'] = int(info[ac].split(',')[i]) if 'AF' in info and data['allele_num'] > 0: data['allele_freq'] = data['allele_count']/float(info[an]) - + if not self.settings.beacon_only: data['vep_annotations'] = vep_annotations @@ -384,7 +384,7 @@ def _insert_variants(self): db.Variant.insert_many(batch).execute() if not self.settings.beacon_only: - last_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).id + last_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id if last_id-curr_id == len(batch): indexes = list(range(curr_id+1, last_id+1)) else: From 30b56ae9dbc84f52edf89ee5c17def1031a6b34d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 12:25:58 +0100 Subject: [PATCH 044/360] a five-character string probably never matches a four-character one. oops. --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 1b25a144c..3f19ed019 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -444,7 +444,7 @@ def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list) batch = [] for i in range(len(variant_indexes)): connected_transcripts = [{'variant':variant_indexes[i], 'transcript':self.reftranscripts[transcript]} - for transcript in transcripts_to_add[i] if transcript and transcript[:5] == 'ENST'] + for transcript in transcripts_to_add[i] if transcript and transcript[:4] == 'ENST'] batch += connected_transcripts if not self.settings.dry_run: db.VariantGenes.insert_many(batch).execute() From ce06d4955baa56bdff529668ea8fb7997bf395a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 12:27:50 +0100 Subject: [PATCH 045/360] Wrong db --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 3f19ed019..824caa2ec 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -447,4 +447,4 @@ def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list) for transcript in transcripts_to_add[i] if transcript and transcript[:4] == 'ENST'] batch += connected_transcripts if not self.settings.dry_run: - db.VariantGenes.insert_many(batch).execute() + db.VariantTranscripts.insert_many(batch).execute() From b376e3d9af79d798751806cc9df9e5333cb0d91f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 12:48:04 +0100 Subject: [PATCH 046/360] a couple of pylint fixes --- .../data_importer/raw_data_importer.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 824caa2ec..5e4ff22e2 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -45,19 +45,18 @@ def get_minimal_representation(pos, ref, alt): return pos, ref, alt else: # strip off identical suffixes - while(alt[-1] == ref[-1] and min(len(alt),len(ref)) > 1): + while(alt[-1] == ref[-1] and min(len(alt), len(ref)) > 1): alt = alt[:-1] ref = ref[:-1] # strip off identical prefixes and increment position - while(alt[0] == ref[0] and min(len(alt),len(ref)) > 1): + while(alt[0] == ref[0] and min(len(alt), len(ref)) > 1): alt = alt[1:] ref = ref[1:] pos += 1 return pos, ref, alt -class RawDataImporter( DataImporter ): - +class RawDataImporter(DataImporter): def __init__(self, settings): super().__init__(settings) self.dataset_version = None @@ -68,7 +67,7 @@ def _select_dataset_version(self): datasets = [] try: - ds = db.Dataset.get(short_name = self.settings.dataset) + ds = db.Dataset.get(short_name=self.settings.dataset) except db.Dataset.DoesNotExist: print("Select a Dataset to use with this data") for dataset in db.Dataset.select(): @@ -239,7 +238,7 @@ def _insert_variants(self): Insert variants from a VCF file """ logging.info("Inserting variants") - header = [("chrom",str), ("pos", int), ("rsid", str), ("ref", str), + header = [("chrom", str), ("pos", int), ("rsid", str), ("ref", str), ("alt", str), ("site_quality", float), ("filter_string", str)] start = time.time() batch = [] @@ -313,8 +312,8 @@ def _insert_variants(self): if self.settings.beacon_only and 'AC_Adj' not in info: ac = 'AC' - data['allele_num'] = int(info[an]) - data['allele_freq'] = None + data['allele_num'] = int(info[an]) + data['allele_freq'] = None data['allele_count'] = int(info[ac].split(',')[i]) if 'AF' in info and data['allele_num'] > 0: data['allele_freq'] = data['allele_count']/float(info[an]) @@ -334,8 +333,8 @@ def _insert_variants(self): pass # null is better than 0, as 0 has a meaning except ValueError: data['hom_count'] = int(info['AC_Hom'].split(',')[0]) # parsing Swegen sometimes give e.g. 14,0 - data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) - data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) + data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) + data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] counter += 1 From 7376aa971a465f5efce6f5e6039201ffa699a12d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 12:53:27 +0100 Subject: [PATCH 047/360] slow id lookup --- scripts/importer/data_importer/raw_data_importer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 5e4ff22e2..d02973058 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -355,7 +355,9 @@ def _insert_variants(self): if last_id-curr_id == len(batch): indexes = list(range(curr_id+1, last_id+1)) else: - pass # slow version of finding indexes + indexes = [] + for entry in batch: + indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) self.add_variant_genes(indexes, genes) self.add_variant_transcripts(indexes, transcripts) @@ -387,7 +389,9 @@ def _insert_variants(self): if last_id-curr_id == len(batch): indexes = list(range(curr_id+1, last_id+1)) else: - pass # slow version of finding indexes + indexes = [] + for entry in batch: + indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) self.add_variant_genes(indexes, genes) self.add_variant_transcripts(indexes, transcripts) From 25b26ec1e8dfefd8ed7fc2c28ca6ec23e295363e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 13:57:32 +0100 Subject: [PATCH 048/360] refdata changed to local objects instead of class members --- .../importer/data_importer/raw_data_importer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index d02973058..44b99be5b 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -253,8 +253,8 @@ def _insert_variants(self): with db.database.atomic(): for filename in self.settings.variant_file: # gene/transctipt dbids; need to add support for version - self.refgenes = {gene.gene_id:gene.id for gene in db.Gene.select(db.Gene.id, db.Gene.gene_id)} - self.reftranscripts = {tran.transcript_id:tran.id for tran in db.Transcript.select(db.Transcript.id, db.Transcript.transcript_id)} + refgenes = {gene.gene_id:gene.id for gene in db.Gene.select(db.Gene.id, db.Gene.gene_id)} + reftranscripts = {tran.transcript_id:tran.id for tran in db.Transcript.select(db.Transcript.id, db.Transcript.transcript_id)} for line in self._open(filename): line = bytes(line).decode('utf8').strip() @@ -392,8 +392,8 @@ def _insert_variants(self): indexes = [] for entry in batch: indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) - self.add_variant_genes(indexes, genes) - self.add_variant_transcripts(indexes, transcripts) + self.add_variant_genes(indexes, genes, refgenes) + self.add_variant_transcripts(indexes, transcripts, reftranscripts) self.dataset_version.num_variants = counter self.dataset_version.save() @@ -435,18 +435,18 @@ def start_import(self): if not self.settings.beacon_only: self._insert_coverage() - def add_variant_genes(self, variant_indexes:list, genes_to_add:list): + def add_variant_genes(self, variant_indexes:list, genes_to_add:list, refgenes:dict): batch = [] for i in range(len(variant_indexes)): connected_genes = [{'variant':variant_indexes[i], 'gene':self.refgenes[gene]} for gene in genes_to_add[i] if gene] batch += connected_genes if not self.settings.dry_run: db.VariantGenes.insert_many(batch).execute() - - def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list): + + def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list, reftranscripts:dict): batch = [] for i in range(len(variant_indexes)): - connected_transcripts = [{'variant':variant_indexes[i], 'transcript':self.reftranscripts[transcript]} + connected_transcripts = [{'variant':variant_indexes[i], 'transcript':reftranscripts[transcript]} for transcript in transcripts_to_add[i] if transcript and transcript[:4] == 'ENST'] batch += connected_transcripts if not self.settings.dry_run: From f3381c5d298a212c7212b22986ed04ecc531ecf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 13:58:42 +0100 Subject: [PATCH 049/360] forgot a self --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 44b99be5b..6b359fe13 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -438,7 +438,7 @@ def start_import(self): def add_variant_genes(self, variant_indexes:list, genes_to_add:list, refgenes:dict): batch = [] for i in range(len(variant_indexes)): - connected_genes = [{'variant':variant_indexes[i], 'gene':self.refgenes[gene]} for gene in genes_to_add[i] if gene] + connected_genes = [{'variant':variant_indexes[i], 'gene':refgenes[gene]} for gene in genes_to_add[i] if gene] batch += connected_genes if not self.settings.dry_run: db.VariantGenes.insert_many(batch).execute() From 556e26c4cfbb6af87bd200b383f3ec99089471f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 14:03:06 +0100 Subject: [PATCH 050/360] update parameters for function call --- scripts/importer/data_importer/raw_data_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 6b359fe13..54f5b222e 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -358,8 +358,8 @@ def _insert_variants(self): indexes = [] for entry in batch: indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) - self.add_variant_genes(indexes, genes) - self.add_variant_transcripts(indexes, transcripts) + self.add_variant_genes(indexes, genes, refgenes) + self.add_variant_transcripts(indexes, transcripts, reftranscripts) genes = [] transcripts = [] From 19775bef280d1f42f3ee3b36ad5ad41fb0e617ee Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Wed, 13 Feb 2019 14:35:03 +0100 Subject: [PATCH 051/360] feat: Change mysql database to postgres database This removes the MySQL schema and replaces it with postgres schemas, as well as updating the test dummy data. --- sql/beacon_schema.sql | 47 ------ sql/data_schema.sql | 68 ++++++--- sql/schema.dot | 108 -------------- sql/schema.dot.png | Bin 227009 -> 0 bytes sql/swefreq.sql | 262 --------------------------------- sql/user_schema.sql | 68 +++++++++ test/data/clean_dummy_data.sql | 38 ++--- test/data/load_dummy_data.sql | 73 +++++---- 8 files changed, 186 insertions(+), 478 deletions(-) delete mode 100644 sql/beacon_schema.sql delete mode 100644 sql/schema.dot delete mode 100644 sql/schema.dot.png delete mode 100644 sql/swefreq.sql diff --git a/sql/beacon_schema.sql b/sql/beacon_schema.sql deleted file mode 100644 index a43a41af6..000000000 --- a/sql/beacon_schema.sql +++ /dev/null @@ -1,47 +0,0 @@ -------------------------------------------------------------------------------- --- --- - --------------------------------------------------------------------------------- --- Beacon consent codes. --- --- These tables are only used by the beacon, and are thus copied directly from --- the default beacon schema. - -CREATE TABLE beacon.consent_code_category_table ( - id serial PRIMARY KEY, - name character varying(11) -); - -INSERT INTO beacon.consent_code_category_table(name) VALUES ('PRIMARY'); -INSERT INTO beacon.consent_code_category_table(name) VALUES ('SECONDARY'); -INSERT INTO beacon.consent_code_category_table(name) VALUES ('REQUIREMENT'); - -CREATE TABLE beacon.consent_code_table ( - id serial PRIMARY KEY, - name character varying(100) NOT NULL, - abbr character varying(20) NOT NULL, - description character varying(400) NOT NULL, - additional_constraint_required boolean NOT NULL, - category_id int NOT NULL REFERENCES beacon.consent_code_category_table(id) -); - -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('No restrictions', 'NRES', 'No restrictions on data use.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('General research use and clinical care', 'GRU(CC)', 'For health/medical/biomedical purposes, including the study of population origins or ancestry.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Health/medical/biomedical research and clinical care', 'HMB(CC)', 'Use of the data is limited to health/medical/biomedical purposes; does not include the study of population origins or ancestry.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Disease-specific research and clinical care', 'DS-[XX](CC)', 'Use of the data must be related to [disease].', true, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Population origins/ancestry research', 'POA', 'Use of the data is limited to the study of population origins or ancestry.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Oher research-specific restrictions', 'RS-[XX]', 'Use of the data is limited to studies of [research type] (e.g., pediatric research).', true, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Research use only', 'RUO', 'Use of data is limited to research purposes (e.g., does not include its use in clinical care).', false, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('No “general methods” research', 'NMDS', 'Use of the data includes methods development research (e.g., development of software or algorithms) ONLY within the bounds of other data use limitations.', false, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Genetic studies only', 'GSO', 'Use of the data is limited to genetic studies only (i.e., no “phenotype-only” research).', false, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Not-for-profit use only', 'NPU', 'Use of the data is limited to not-for-profit organizations.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Publication required', 'PUB', 'Requestor agrees to make results of studies using the data available to the larger scientific community.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Collaboration required', 'COL-[XX]', 'Requestor must agree to collaboration with the primary study investigator(s).', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Ethics approval required', 'IRB', 'Requestor must provide documentation of local IRB/REC approval.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Geographical restrictions', 'GS-[XX]', 'Use of the data is limited to within [geographic region].', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Publication moratorium/embargo', 'MOR-[XX]', 'Requestor agrees not to publish results of studies until [date].', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Time limits on use', 'TS-[XX]', 'Use of data is approved for [x months].', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('User-specific restrictions', 'US', 'Use of data is limited to use by approved users.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Project-specific restrictions', 'PS', 'Use of data is limited to use within an approved project.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Institution-specific restrictions', 'IS', 'Use of data is limited to use within an approved institution.', false, 3); diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 94fcd2d42..3b192034c 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -5,6 +5,7 @@ -- (reference-data, variants, and coverage) the goes into the Swefreq system. -- -- -- -------------------------------------------------------------------------------- +CREATE SCHEMA IF NOT EXISTS data; -------------------------------------------------------------------------------- -- dbSNP tables. @@ -22,13 +23,12 @@ CREATE TABLE IF NOT EXISTS data.dbsnp_versions ( CREATE TABLE IF NOT EXISTS data.dbsnp ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, version_id integer REFERENCES data.dbsnp_versions, - rsid bigint UNIQUE, + rsid bigint, chrom varchar(10), - pos integer + pos integer, + UNIQUE(version_id, rsid) ); -CREATE INDEX IF NOT EXISTS rsid_index ON data.dbsnp USING hash (rsid); - -------------------------------------------------------------------------------- -- Reference Set tables -- @@ -36,6 +36,7 @@ CREATE INDEX IF NOT EXISTS rsid_index ON data.dbsnp USING hash (rsid); CREATE TABLE IF NOT EXISTS data.reference_sets ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, dbsnp_version integer REFERENCES data.dbsnp_versions, + reference_build varchar UNIQUE, -- should be ^(GRCh[0-9]+([.]p[0-9]+)?)$ reference_name varchar, ensembl_version varchar, gencode_version varchar, @@ -49,7 +50,6 @@ CREATE TABLE IF NOT EXISTS data.genes ( gene_id varchar(15), gene_name varchar, full_name varchar, - other_names varchar[], canonical_transcript varchar(15), chrom varchar(10), start_pos integer, @@ -57,6 +57,12 @@ CREATE TABLE IF NOT EXISTS data.genes ( strand varchar ); +CREATE TABLE IF NOT EXISTS data.gene_other_names ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + gene integer REFERENCES data.genes, + name varchar +); + CREATE TABLE IF NOT EXISTS data.transcripts ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, transcript_id varchar(15), @@ -111,6 +117,7 @@ CREATE TABLE IF NOT EXISTS data.datasets ( full_name varchar(100) NOT NULL, browser_uri varchar(200) DEFAULT NULL, beacon_uri varchar(200) DEFAULT NULL, + beacon_description text DEFAULT NULL, avg_seq_depth real DEFAULT NULL, seq_type varchar(50) DEFAULT NULL, seq_tech varchar(50) DEFAULT NULL, @@ -143,7 +150,9 @@ CREATE TABLE IF NOT EXISTS data.dataset_versions ( available_from timestamp DEFAULT current_timestamp, ref_doi varchar(100) DEFAULT NULL, data_contact_name varchar(100) DEFAULT NULL, - data_contact_link varchar(100) DEFAULT NULL + data_contact_link varchar(100) DEFAULT NULL, + num_variants integer DEFAULT NULL, + coverage_levels integer[] DEFAULT NULL ); CREATE TABLE IF NOT EXISTS data.dataset_files ( @@ -161,14 +170,13 @@ CREATE TABLE IF NOT EXISTS data.dataset_files ( CREATE TABLE IF NOT EXISTS data.variants ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, dataset_version integer REFERENCES data.dataset_versions, + variant_type varchar, -- variants go here `"enum": ["DEL", "INS", "DUP", "INV", "CNV", "SNP", "DUP:TANDEM", "DEL:ME", "INS:ME"]` rsid integer, chrom varchar(10), pos integer, ref varchar, alt varchar, site_quality real, - genes varchar[], - transcripts varchar[], orig_alt_alleles varchar[], hom_count integer, allele_freq real, @@ -180,6 +188,18 @@ CREATE TABLE IF NOT EXISTS data.variants ( vep_annotations jsonb ); +CREATE TABLE IF NOT EXISTS data.variant_genes ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + variant integer REFERENCES data.variants, + gene integer REFERENCES data.genes +); + +CREATE TABLE IF NOT EXISTS data.variant_transcripts ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + variant integer REFERENCES data.variants, + transcript integer REFERENCES data.transcripts +); + CREATE TABLE IF NOT EXISTS data.coverage ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, dataset_version integer REFERENCES data.dataset_versions, @@ -187,15 +207,7 @@ CREATE TABLE IF NOT EXISTS data.coverage ( pos integer, mean real, median real, - cov1 real, - cov5 real, - cov10 real, - cov15 real, - cov20 real, - cov25 real, - cov30 real, - cov50 real, - cov100 real + coverage real[] ); CREATE TABLE IF NOT EXISTS data.metrics ( @@ -205,3 +217,25 @@ CREATE TABLE IF NOT EXISTS data.metrics ( mids integer[], hist integer ); + +-------------------------------------------------------------------------------- +-- Data views +-- + +CREATE OR REPLACE VIEW data.dataset_version_current AS + SELECT * FROM data.dataset_versions + WHERE (dataset, id) + IN (SELECT dataset, MAX(id) FROM data.dataset_versions + WHERE available_from < now() + GROUP BY dataset); + +-------------------------------------------------------------------------------- +-- Indexes +-- + +CREATE INDEX variant_pos ON data.variants (pos); +CREATE INDEX dbsnp_chrom_pos ON data.dbsnp (chrom, pos); +CREATE INDEX coverage_pos_chrom ON data.coverage (chrom, pos); +CREATE INDEX variants_rsid ON data.variants (rsid); +CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); +CREATE INDEX transcripts_transcript_id ON data.transcripts (transcript_id); diff --git a/sql/schema.dot b/sql/schema.dot deleted file mode 100644 index c085353e6..000000000 --- a/sql/schema.dot +++ /dev/null @@ -1,108 +0,0 @@ -digraph { - ranksep=1 nodesep=1 rankdir=BT - node [ shape = none ] - - user [ label=< - - - - - - -
user
user_pkint
nametext|null
emailtext
affiliationtext|null
countrytext|null
> ]; - - dataset [ label=< - - - - - - - - - - - -
dataset
dataset_pkint
sample_set_pkint
name text
browser_uri text|null
beacon_uri text|null
avg_seq_depth float|null
seq_type text|null
seq_tech text|null
seq_center text|null
dataset_size uint|null
> ]; - - user_log [ label=< - - - - - - -
user_log
user_log_pkint
user_pkint
dataset_pkint
actionenum|null
tstimestamp
> ]; - - dataset_access [ label=< - - - - - - - - -
dataset_access
dataset_access_pkint
dataset_pkint
user_pkint
wants_newsletterbool|false
is_adminbool|false
has_consentedbool|false
has_accessbool|false
> ]; - - - dataset_logo [ label=< - - - - - -
dataset_logo
dataset_logo_pkint
dataset_pkint
mimetypetext
datablob
> ]; - - dataset_version [ label=< - - - - - - - - - -
dataset_version
dataset_version_pkint
dataset_pkint
versiontext
tstimestamp
is_currentbool|true
descriptiontext
termstext
var_call_reftext|null
> ]; - - dataset_file [ label=< - - - - - -
dataset_file
dataset_file_pkint
dataset_version_pkint
nametext
uritext
> ]; - - study [ label=< - - - - - - - - - - -
study
study_pkint
pi_nametext
pi_emailtext
contact_nametext
contact_emailtext
titletext
descriptiontext|null
tstimestamp
ref_doitext|null
> ]; - - sample_set [ label=< - - - - - - -
sample_set
sample_set_pkint
study_pkint
ethnicitytext|null
collectiontext|null
sample_sizeint
> ]; - - sample_set:study_pk -> study:pk; - dataset:sample_set_pk -> sample_set:pk; - user_log:user_pk -> user:pk; - dataset_access:user -> user:pk - dataset_access:dataset -> dataset:pk - dataset_version:dataset -> dataset:pk - dataset_file:dv -> dataset_version:pk - user_log:dataset -> dataset:pk - dataset_logo:dataset -> dataset:pk -} diff --git a/sql/schema.dot.png b/sql/schema.dot.png deleted file mode 100644 index aa918f5ea5434732c3be99dd20f719bb1cd8e822..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 227009 zcmb?^2UL~!_9cl?qehLfA&3o71jV2f0RfE)UIkRD^hoa@(h)GRMFo^AMY;;oi}WVN zhSH^@R7F6#1w{Jnb4~v5z4_0qHM3@X>#ZR7-tT_jubi{@KKuN0*w>Y;0^x z8AlE&v9ZnPU}IZg_~Qcn=2~~|Li}fe{xP{jY;*L#I|VVf*w{9)F%BJ2aSk19bFo(# zpPl=tF3aD;5g#oYn=5SPmz5m-{_wp90Y3fk?G0MONlI3k)(b`aRBA5r#T_vp{9Qga z_?L9{-{h~}>G^55L`28N!m8t5o8SCc{ajNgp*LM-sz9&THo$pma3*z63Hx`r0sJX$ zYq7K$_~F}M*gi>fc>n#QotG^4w;#{1kF5Fe+mD-`n+kvb?Z+2F{}*qS)1v3i3EaZX z-90>PJ;1&{{Fjw0xz*LxwVvYzm!G_4+1NfD`8qo_q8hKV9lzXQx^Vf1`YRhfOb6I8 zFgNbeEo@k{Q#0fDOKWU7eSLj}Ilf~n*6CXJ|9Hb^GaTrFIyHN%Bbs7QM*7{p&Bt(^ z{`h=stVyqB?`pheeQ7m)$&1xyW@a3smZy^I`86|QOT0N_9z6JUVroiY@7}%Y{TP?d z_PcbO=e31QUvAo|k;c>1|LWDN(uM|+rhbffXFKaD&aV&o_4J?7iecvTXlxHQhqFFp zDQ@@fukYv0%QSetY&GAB{oQ5$bqOhj@$cW8F5R&26w`kC>zT2k7WG|4(`gb8{ZV~& zF#@}HD=}4)wZ+|Wzmpie)vH(YGF%5!p9@>J?i~a1y$qyq^Ab=cymdrO^uJJzj=JgB>=O1-n)67fmhN&+*PXphS<)@NnKw4r|dU9 z`_0Yc!_R$UIP}*uR&JGl>BA*ifA8=T_o<=mc&+SYx}O&>j!2EwD+li}@j2?xe;@zf zE+wUd>8z=Vlp7mNuQs3hGL&{|hk0vG>f*UxTU<$i@GA#I9CCRUrc)M`s zi`GzUuJY{c?9n4fPCUD|+TrVm)A#S+k5dejc%5w%!P8eCr_6uy-bF8=_*1F(9zTAp z5~swg<)Pxj(Xp0|?dH6YTnFqkw(Qj$9Pwv+tNLE}avwc@yeYj}Iw7TKR&(NWuX?6= zgGz?!Nk*VLpQ!1};@5*`9UavsK6R(pr)6gsC9iY-GN`e1qqH`2v`TU$)@^*4L(DqV zygt^~NVo9Z#AuD&Mh(5WFD*9AsOIKVB9_fJx6;rptue(eDz?%w;}O)(Ibm&WJ&6U> zaQEOsd>_Ahe~NZ)ZC+ZcT9RhtnYC*J2YT?VE&P}W9<#~OK4#RNJDVK(YI)VuaQB_V zFTY((#Ok$5Ww6LCp2LR^$2kl%nzuj9Wipv|_h;`OyplNQdH<^Oo~o*<{`P`=9 znXmJmQq)p(leQ`ZzdO2yrjd)aOLKObn3&99fzYEI_2b*jnEYHVBjWaDP&;m3dRC>;*R)kVMayD-RQ@x)t>)u9=?Q^ z5g76()BfFy6&MY(reqz>e8(i7?c(AZy;Wh#>Bf~R1+ET^+Sc5&MyaPV&UDEpX=N+c zcDLPER#r|o?5~Tt!4r4j!UETxAd}%&ZU{*Gzp#dRf7a8IlK3-SC6oOLDfKCOG9uQk zr`p@wnNh=S`Q@dhjMCCl^OmfnJo`TV0F(U@c#+1Z49iza zM&$uUB3M7g^Bz~V4tlJ@O1iS&&@igCt!?!`5wbLW*@k_wF%ce`SFc`uooNwxG*ECS zV&}lwUwHebCq7HqzrPu;o|23o_0+lf`H$kFsx|ixuU@-0W?Q)PZ$3Ug8a=0Wtf57O z?ZU5)SR6oGw&6_u_ zLzzt(u4Cq5`&^U|7w(lJMt(S-^HF9g78Tp_J6M31E?tVl+p+0yMnps;aZ|LM`!+k_ zAKTqV`Y@}^CnAI#%iawLky-(~^rWCRV%$X@T|U9Xllz)*o@% z$Q9py+V8{Kru3Ja29!Zqf8Md@E6!s(FUL$sNLa=tVK~5EJa6;Xt+L6<$yq*l&!@e# zi#mOU&1!jgd3j$+EcmdW52Kibi93IV?OXCPueo=Let{=i&5G#sD=0KXc+SQ-b+|WeZIT=QFgmJI7p?FgpQxcX zV3GF)>UpG=js@<}xJPR(c<-5e3duKG-*k0FR)tA+j7?xA<=zGs>D*0UwW(UdbM_&9 z)w_4^+&q$si;K16x`u|5T*vxLYifkl``aqVv@%Rj-?HEyA022KNmjp)>!|NBu6WV5 zAJ553Y@Z(Z`K>DWSi(u69FZn*(aNo+bF(wK9$~5pYJE?Z%T#2#Vuc{gw%s_LWp1~{ ziQT}yv-msYfj4;uLPA2=VoaXC!B^?mu3k;uQ2ye@%coaYcH^dHy*b*lAL4dv9vu&& z`^9p75LLd^4;eT~BVG2w!sQgg#~QV4;tpS5!>KHoU@?$+_6d95$EpL%e|^~+$~)2D~#FId3O&3(|Q%UC6GzEc8L zvg_wcvGkdsr_Y|5VJG%Cr9@C%zJDw*-ttvie@*0-4Q_*}oZ;z)rN4wpyK6F4u({Os zNf)o#sTpr@55}Ml-Lm&7Y4-r-1mh(|T=Ak?A-YBE19HZFq=)yt=44l7!x}rge z*U8_1zoV_KEqvzfp-k&G&B_q5>^J#Xx63&t4ZKnv2$JNV_VT@T>z2d(riRe4F#qsy z!EM{N85tUyHa|E`=>x;#`b6`|mE}zcXi0zz{J<9T7X0)QFelThMU*o1H6I@}TiXY; z|MMIM@CnU4+c$q^GHxRj+I5%38CM2tW?P444i~ylAx&?iY%x4M{Aql=>CS-*DyjO1 zhg!0auid4KKs~l<_3Hb#Z?os-QY%45JL z?~xK}ZHi{1S8&VnZQ1f0P*YRb$_*QmHPVgE02>;;oNZ}}$NC$Zk`C1760e_ zCGqAB@pLa))@}R%1_fW=US|r5JrT2Pz60dYyhVbI&G`pb^mV!ROuy#MlPj6$ChR6A zCXDih9E}>f{+PFLv{I}i769wh=g+sE>HCI-g<(C!I}W`z1DIq+Z9aI}P<*Z|sme(o zXyA3unaF+n_NCRIadN6wsxB`-){vmygg9oRW3<<#Dm4GKYV)&ahmof!pEy)BHL2rg z&2(dhfZPKI4g`zYgyX(=OiX$u3)c)6jSVyjY~Rkn6PE9u?Tzp(d-UkQr6p^#W<9hr z&HZ&fCX8}(a*pJ+s#H}}L{S6`RJiY^mk=Q&+NM%&Unn#+&S5Nh8PBe-t#*`> zlan(tHtv76R<9n5Ed`4PnZMNaTx}G?ye`@slW-rq!Q#u{D@vhbU)zK^vTZs98S#j2 z0Us?afc4GZzr0J^$zyKD$;H*x90=B_AmKyi;#GWiutf;HYLv)%bMP))xR7_-wuIjw z{h4dGvsJ4+V5c(Tgldt;%#JQBdVc@~BxUZtPvhHm?2sQRnk#guLa0&2&eT!W zQh*51yv;&FCk51#>oE?3$B!TP=R5hEh~_e-4Z z@rqZ8v#Se63~X!*D<~+)v}h8vYJPCQ>UBnGt)ZV_$Cd)4@-<^49eT*#c4yDlAu*-b z=P55*w1^qy%_-LYYfFQK<6tZntNCDa>a}avlrR}*heUfaGK8`5-sYJM&tJH30C=t+ zGo7rxaJ(Jwt$~)W+pEgM#9)RpRAtY#=JdWA@yKOTv9JL)UZ00cf9**J^56 z8+CQ}?%j!%u83Z(hbVzJAJWte4iMCd$NGy0A)qu9ENpf!!ef$g_}Z$PZ4u}0DTIh_ z-@N%iOO{pLLzmB^n9Ep;reyP%cm80(UVNn4Au~mIg+)U=ub!SBqjs&{Ip1!_w-)IY z!6L^lEnZ#wuJqPei%n7A*NvI-HwNCo(fy}B;eH8_}vAS{5% zIe2B6YPNNowVR?N4Psne+^0{U%699;6dQ#x)=NE8aCV}1Bcm3v)bHlan?~86k(*ZU($yx&{QdXeuL684a2qds^5o#>p2}MnE?h`z zN5*#kiv?lED`n=UBhQ~_qlnLY)b9iUaE9ZMCQ*h*j~-!qZZF#?ohW{`$Mof$17LDR z*4=M1udUr>t;6-cvvU)|f2K``E~l7vJ;>ZB5@(xeppdL`w0x@c#*cV#^{c<|CRSGL zHY$6MBrM_l;f}t(zF%l)Vw0YyPR#KLwf+0gGjH8m{qxUv_Lx+?2PKej8V=>@^A>AA zf#8@Jl5|Ar+_{I8_HuIe4mGD93Hv+970T|b#st_%t_#@)_K4h z!IDnNgm|!GN?*Ow*sx)P`Kz@14hG!HTtYbQ)q$a?>ox`K&+r+i!nL&x|TF0>x@fUuVQEUb7>%T6#);-oj-n z0CNODkTGHr=_lV_-!%#nCIJ2dveEx4J%*?6!<#3>G`4Kn@~*!>esXL;0K1#m4faPv zriFTUbwo$-eZ3+N0@;(}BcAFJa@gr2?vwTiipOtAeYve~VBn8Q!j4Tiard{$iQWj2 zb6;(_q+C@-3a63*rKJwb$qitB0|5uH^rA89aq>aCogCa-wM?qRi8v(P_#tN6BrZkv>nxDcyh9-OY>ai&a>k zx5V1|HbVE}wStM!3L)(eCYWs{3}fTy9c!*DhVH$)cs0Ma)AZv@OMnN}@BVfv%_~B} z;VAF106+WO(=HBmA6~o6U%cujA|WWRb>2kTKuS?ExWYjCmthq=b@#|f%IK&)P*^~U;I%yD6telva+W#WHmc`EMT$5T0!lbSFT*?sS0yzTUMPLhl$Ts z>Mbh~5HsI>R0(U+dauX*Qog=ug%XS2w#3pL_bErf6Wu1mimLvS_5!yA>$W_N61zN2 zur#)ul|&!*`+yJ-fhRDT=`^A%H8q&dA#N9i{b%Z4vf0`iTNj$qZS$t&qz@lHa7u5h zdGe$y%OP2K_b%Osu@w~+TFJpAzYWi4ag5|?tO=5Gx_f(L@N^Etts2EWKUE?RQs z${0JWjtrO2#<+cj<4{~2&Zgy?pG8vA%(4vf5}NG1u)ID&J!L>moRqrp#l%i*o$2@{qQnO#XvU~!z<>u!P0Ckc6F|=S~b{8fQxnyAA95A);23+Q* z73*oUEISHy2yh-b@)tsBtWDvBY+If~oxu$$;{j8bcK_akJ@u90+>7v$*5%?;CTGF) zUS9Y1Rs~(}?&*n!m{l1rrBzc?)8A1fqrf6)YKbIvi1{HlLNl-!eP6?``!ELqkT4{2 z>MIFSbj~mQnV#h4VOiPBcum;u-I4n-F#}yUWE%5F3KPJ6TmF6@i}(Fb*{{FNsiVP{ zTTb}*Vm4BAkmj=;cHm-cE^O8GrId3>yTU3-0Css@;FgV0ee4#o#r;{S_f~)uwjz(v z1;iyJ_<48^K~%_c;9*_kXUf3Nhn_uuK9H2%t^~GFpQy11GR*a@!~@Ug6YV@N=OrY2 zYuj-(;Gj&C>J7lUnt%N9N9$HBPqs~RL}EYuh$!vVe(tjZ34OFp!O5d9FXenKK^o5A z40FQ@Xm`x~{CuD`LL6oosImx|q9lXv?a)e9#)sQL_)P}*@R8zqtZlGd+`wzA{OvpH zx-N$ilJ!>`ECc&E{M~ooz0N%wlN8yx;wG?r3`v)S_BuB^_SbLEG_CpV;N_nnbGb($ z^1jEy&UQ%LSKiX16{8rYg!OF2HGg?-?%7`R60dbG(So=`O3&x7GpQ*`s>!MBPx3C+_`#4tszyCcy3_QV531~UHXWZ5HqTx zLLNYbvF18&ivt0jKZ~reihd3ju?QeiiP&j2(3s@<=F(bAuD(Cku3fuLMn-0x!72b> zz?>7ewkjwcJ$lW-=b8S=Q>SiYw+!caSXw4xAT}YU>Et{1UEfvsGEpPlF!fYg#dVMx z(5K5kt-A+t&_q0d$-JlMF>x3)r5?bSwjqrIbR0nBV8pR#f*U|WO-6s+xDi-XR76TD z=)@s-jvrd-)Cn& zKW*bgtPI$tm%Lyphbdx{hEL^fp@SVBQ%NN4rWrCSgZ9W>S-vrVN5ri55Qc^bDBT@c zy;*;K94(4Zz11oBcxwg?TCxxl{oR{Prat6i8&F+u%urI-zF)Bpym(fwTzR(KMOs?A z1gh!a>&$WqQT?Ysa+f0iJ!3bhz;Jl+e1=ZZdS)5GA$Muwsno|3Jv}|f6n+N=%KQaF zzMHq;lwP)ZLs^Dt&Ev^Q#GXCO3WSK{1WpbPb1qKlb2+7u0&+17LM6AMCv^4qn64f0`!KYk>o0dLy8QzuUaj5t1tyIWjP zP|(iao}}X>gTlf>jy=X+jfonO((aRddWGklbmmf@Z@9&S8LpI2iK70YEvS|GT|V~^PXKY_p_-ALtLdu0DxBHjF*>}cH9*= z6C_vjZ!E_zIGCS7F`37#`22UcXpY$}AtW0-y9)SpCU89?&?&r7LkGdOm6cZZnPYa% z0WqO1rC`HUx@zsTt47kGYwg;V0u>L$gpveePHMZc`;bq)Qv$-!QANc@zwOFETQZ)y z9v&XKc1zzuJPQ3X+$OMm`Et;c>er@UK74>8VGvO1o@LXKjDW(+!()o-01XfzF2yN) zmHDGT=U=;VqY25^MiaPjQ#MhPP0J^dl_?dvI6J=wTgViRzPShB|8=^_jWGv62M!T) zLo4BE5Iv@1gtYtf%#?i=LQ+y~5{{FjX5fFN?2%DXJPf51CvN`o%P*t21j!jhFR+nN zpo@L_^yv<99*t^AXmbvOWjpKCd%(;TyUPQcZf-tE+JtF!xYXih%hYhs4OkhL7o8`FW0}xhAuHHWa+jQ)wM*9pf>lKnC z4-GCa<1%men6j!%Gdf25-b=_B8>XbCrRB(O1jOA~uceNcKYXyISiutn^)0yT`Hc{i zgp^Sw<1Y2$5GgY>G`x*^1SdoaU z4(OnJSmspgO7M;RzSPgX)gjo-?AM<$jrNI(e#y&yeq&>`V_QP%fG~mVBa#NmuHTD; zlisNe6<4Obf>MpJ$FwsGw$;wt1rZSnAk^I4?({XN{|R_{dLmUJzNjI%oW$B5F{mv^ zEm#$~VPGI5BV+xLqk9q*Pf}S)Cd&+Vb~YTJ0z|Q$7xD_lz4QCF@bMj?c}Yo0Dg63& ziPm$MSbt1_)@NcwE#lzJY5=9_?n4P7UQD}8=w_=0)G8VvB(N2)CLH@>9uYr8AXDU1 z;dLXvpq%_5q6`Lu|I~xuv#LH~2M&cYd5`r76ZX8hjRlXsTf1gWG@w~L^gk-3A+7k) zQbD-|D;6UCeq_h8M*gRWQ>|C*@xlG=`w1B>Y=&+$))Qh=R#&GCQq>#eHU3v%VBkI} zsic&%Q;tS>Yv;*tZ>_Idcv^}iZk87yOtJxGRuR*e`_J}N_>u&`EtPR?)pq>>$Xaas z!%b`k=(Da1m#sg6a0DGfXD6zU$ZlCa#46abSoAdzP>ncE>jFT3pY}c1vR}h zG?&Y7dU^enX;vqTA%M=Mn&l+3fIhB8rT^l2hL8|Q#A{5}Nq~^^T4EW!Xvm7t8-AvA zO;lOWb4C>f;#7zA^lA?GKGw4rZ}gnrjBM&YGuA}8%8HA;0Bpl7m~Lr_MZqerfr*ZH zcy=_Np%*)oX~2swS4GIAkhl){B9Q7rUG!qH4&BswZ+3(FNV(-Y51mh@n^XrNxU{J3^DD&HHFtGQ z{v--1`r!1F1wl{Eqa>nH)4>ZaTqXP2wOO(oFQ2yzM+}a3`J65_af)80(X8OYg z)MH29bNp|yY$gg#klByHP0`exYJh|xy?Upn3Q2Z+T1&iFvaN{kCGo3KVhFQRW8ievuy^0jiE91t-TD*WujW z`e+4CO1}UTuThI(W%r-dVow~9nf)4BIN2|NZ$AQPN3`B!cESXhu`lw5bS)wYC>Rfe z*fH`uoCh&6;jTxaQ#T;=CN@-oY;{tU~w?O!61JV^33F{ZeC-GmvW``l z^8XGTW6sM_*Qo7oxSpDt+SI1$o}GJU4!;%HvuDq-gD=@5!8pn)D-~cY>aV?joqRKx zkp!e+jVjM>fAv)y9IkEgeVh#}*h#4?=lSgHlns>ukw?@~UO+f?y0ia}?@@pc1-kQ( zlsI|vB)@XB*Vg0V=j3{IG`t-}KrI+bc6Mnzakw(cL;b&F9l&MrKoy4YwDt1e%w3FOYnB{`~on*$dAAT>Sytfk&GD|60Kz<(fh3f<#l4W^+B3w#t+T?otPJ z_w)6=3lW!5i}Err0|iJ(cFz?;yugWwJdl=%rFJy$;pfpx(kg-4*9Xd@?a81bG*sycs#*I>6nhltS?vC7_!bz%b z%U6dQOjV~$v%xoSHY36gEZAS$?SCD@4T9H8Xo0J_xRNej`~mi#6A*OkQB+&CabpVZ zpo2w2tyy_fDh-5AAwpUQs(Q`J!Ea!A<67S7GOHKge2L4hXv0f@~~ zj(mh^2n85JazYvtv&mp^T5hAYmC7ly?DC%`poX#yT^LR%;cCiPhFOk)=Dlet30O> zYY=M%cyubkbL7D{&Nw=XQn44op8Xx6QD(*2YAF*W{_*@NTi*ffMaMb= ztPe4usb3*Kl{;Hn4mzB@u~8;`C{Mk7cpSU1bHSD&FQF1O1x3(rHL90G&OxxY7S(%@ z3JV;RRDrEqdjhI82V1gJre|hQYEOnwZ1r}fLkRU%WBX7gpmHfJ3O6l8AV~a!e1#o@ zOLR*A{u0r2LS?y-TirP%9TN*DdN_laZpnlC+v8Cv^w+c{toU z`3p-;ZT^8+vz@PyECxk6Ni#F8;o{>9U2mUVFRQ4a5(KvN?WD6vHP(_b$RFa~g!A4l zV!>ik2BGj|VxFH@CA++UB%EpaYWF&W(MJn6#;T|2YN%~)mxl~Fd{6?P+GKMFKy1Gt zsh6;Dk)TYaWC{SCe=Eoq)>3r+;q|P$Dg~x`hr(I4%v@{ZN!u&`24v+3D{4P*7*Hbt z2a$`#RVn=?<@30F)S#Z;Jh)`7U_Ht-#1l(_e7`}+ z;;pCX=9U?vqKjlh>g;HlKuYsB=-T`fX}2%F13=;0Yr7vfG41bbZl`P71`_hbZEBOT z1fC@%Ncu+0YlO(0h*dMAZb(nQC}GgAi@)R$G0FcK6+1}S11MCep`P&1hkVFB+Ik7b zO|m~Jfs#T&rO$2nk(HR1QFR8ql%!GAcufZO(u*JV|9+Uh>^%wsO$Z+3okM6lf8`YF zaN+SwfZk}#ByC{}A<%-7m~VP|dQ#V>7-(S6Y3Rgsmg#l=v& zM}?B&E7*S$%6Qv7XQvyxkm)ra{Qd{LHmg>yj6{Ane&qnxsRonL2@oq1ZRk4CB&qs^ ze9yxG9s_vt$9=0jbUwo+_&kNJkG2tR2;RM+o*_dfo+eVGGgiLEUOSosNL&gps>-M5@AH-Mm@+_VCqQ^Dae3V zByBjs-*Z!eU$?+jGgbc*0s#f_nE}1ITFm8UBpu+5csvgU zrJV!1XST5lYZmJRF;%Z1t{GrrjCb$ZgQeoM>0gmG8zqD~P{MQ+r*8TBo+g!ZgXi>_ z0T&>?JxGnB09rjLcL@7IWMf^RgQ zT$Z^GQL3*kKYJZ&UE5FGId2BhD@fETxDQF&1ukK4gggk~_s3c>LqQmoAaJSMND<1= z+Uklt=PbKD*$Y0E36wm4-kbOF{CPGwIu)?==hzLrK7RVNn=BAxInVTkki~^AELgf5 zlP3apB?9R@JIO6`&_fTK#(EQzcIYcx2%=pmHo=l)G{Bd0@ReC80zoj+NifiW=5tY- zc1w~@5Em67@5R9!wocs6Fo3+2zvBf36%{|B683+Iz|SiyEBDP!f7YmibHIN)RMddf zzvW=VAWX=0%kF&Dw!s&Q!&=?>Z~sjd=K7yiVTQcCFpPy;w{I_XeFy`DEK>v8I9E~N zL&43PAMKTp2wu0x*l^vAzyA7FQBl#jul9Zky8$+ZJc55ESV?*Xd=(eApS<_fA)>9a zU-vpaFtivst;9%(Zz&VQ`6DXP^6Rh`st{zCPeb3L^-N-e0c^=f<>mblC&*|9EFBM_ zR-_tWkO82QJf^uvQh5 zOuIFuHj$iT$LWFqon)YH-H{B~<&X$9;pnkr%3#N+H7@@LOan`AR7Ws``|pnd>Cabm zoSk#9*c+l4D+g{1$s!Z`g@lB_t5uJ@@6WG#6#P{k^^^3_IRtS&0DmyK$5yNW^bG-) zpyEwl1TbEBc1uBFms7D4iZ(wGM!-h_g@@3LjVGL8i_`OsTkC}2`K-J9O|*5-bh}Ylfu`oaU72coRsO9mZU4A)mOdo`9sWHo3HuWe zNO4n}+@?53`ly}^ag)3*@YLiRvFKPyavgF*2|xrT#tl-g%|Nd!P^Y5G`lU;4ADlnF zea6ec<5%z2g@X>ErmZ1*U6be!asXs_ShB$>p&r)V*?Dv5Ty}hjmbL$`ZU2V3j}|<% z{(n-MwT{4eN(Lg7KD4ugmoC2{kUd!yv}arSS2Pz0d(NI?8OWjlJPpMQVI>06kA(-- z$zDoQQc@?+zI1XDH6jB2DEuX63u)~LuQ1vDCslIjSVT{1F7 z+PSj2EB?d%k}i5yYTv$h@81(~*66jy%>JKa@vcTQM25`}Zpcc3stRK2_)AWkhj(FP zR)l#)Wp;X^0VI}8Du`&L8^Z%aOK)a!AVtD;G!|%W6NCKb$P@Pf|Bo@pf-d#-^)O97 z`xWRFUpe%T?*-45(XOwXXK%KYOF{)r8z->G5VB&(3<;U3(euZ3!lpN&LF{5s?GbY$ zjA9ts3V`tPPSf(#Hp%xkg}nO!zPMoA#cen{fkEsx<>$XIdG;&_nLWUH387 zm8d^Ug`@Vi;3xLqp|YD9SKK}weLTV%v8NJi)my0K;*~3$DB*CIjt;)6TQB3$`osMB zLGXZZGK^6YhKBWo-9R36$R7#J9>$$A{b<=# zz`-(&um3A!PtStm+kg)jFI}pNEKD{K)i|YR2tj{0JSBfKy0$ycf*_(fA+g%%VI$Z8 zK^iEsvD&$IPGJN8a>67U01Ij8#@xS8iN+~(|w3$w{R?~!$oMeTUs9mUDxt1a>O0mRp8e(uxh)~yS!A3kPBkwZ;iHftbUc-UF0;< z4hQJ4Rj-klo}x@S%^cgi;P{pUe9liX{5^4bI_zMjHzDUyD zLqk@$z(u6?qZ>WGo(_+5OM&flOSuI}(M$lnoo|~}VAI*E7|OSCV}zEqqQn0sk;vMO zNF)WVm~3%Lk)0c^qRNt&^Rg1fF&q8 zorL#VHR{NUfsbI|#491uDg)V(ivuO5SXh1@edAf)PImR=IWX0dkj~ zo9GT~9I$N5Yj`;QsXQ8z!`2-;D(LT!cXG~jRRF)3qoyDZInD(>e6NTIy{yk|iPnx- zUuC2H4du`*pZ&0KVF)y~VBlYrEf+0cE{1L-A!Y@JTmgK#62gM~AK%ZH2N0^nI*|v) zszlkv3M^3`Q8B%~uwe=|<=rgRX##vVvG>s7!|NEvka}NvEMI;L?t^Dvzkbyg+wk+x ztkgx`Tu4>Ay1G5^(qwPQKD7gxXSXpbVaCV{JS+MTGJZuRVb{3+D=B03ISdzQ60%d)P-ZHnPOP6N)psdyUt~{_>JI{W*)<`+K0qhM2B0usY zLX^O}1r?mT-1*R{H(nU2&p4`Djj@b!rJ1G}zTDWCy zqnpzPz0W}pJf|fzv^GEbap`Zo=1UH1=1>1vqqeglM)UBm4NAu=GphI95Bnk_!8PO+ z6cd(p;T~U^efqApzmC|eFg5wN9FX0yV#UR>t!ZIX<8r#@`bIki`a^}PGf*Le*K;F_>uQ~whzdU2vlD~B@F`*;j;4bTd1=(rs$=R zDFrnZ#}PLav&c);N7liPxml++s1?j8)NGr35JMH=h<^AsI8es(S7;B|M$9M_wMbgX z&CPA{A2A=$Ek{O6e*E}1qN`b{!2kRx0b)#)pqE;;Jdp&23{g(qnGc4Vm5S@e#>O6j zY67!HpNLFglt7k7lRfGI{5yB5VvwyW*du|B1i)#~phtCOa#ujEZE^g(n5rG5(7@EG zdzKJczbukVtS<-zdVTLBNK0V*vLQsk<>3o?x6S$R)j%e53xhP0wgCtv%&3{EarN9R zI0uJpeU9nq{wNA76(C+MT2JniB?xy9bu2IWoVwPy=HHfX;QbG43r1>9)YZ^u=Et*o z)vD%_L2R6V3S%WR1A^y)lzkl}hsmi45Go6$(XU;fwA)*Jy#PlxZ3O?hq zc`qu41?ZEVZ0VSdrmzMxK~osb!F4x~u;IAm5|62S0nM;i<}^r-&G17U!{ z>MlmCRoB>^j6yFON(0~;Ks??WH~p?Jp6ooa)~k?PIn1z>T;Jcl(k!}^G56}1X{?Y} z-Vsu6Y6RP1@8@B_-B{2zfO<820#{+?`glFrB=wYvnws&{BzCYX!r|(5>&#j_tTHuI zB=uAWbZ;OP#NWBIA0@zaPE?~FB2s?7-k8^#DZO?OEa^Q;kc*cuZ-RJfdEXN79I|W% zWVV)8N@MVI<(>$%{|mc|{=TzVo3UY`V{L@F#IBGQ^8aD{3(IGj8a{t!{9 zb?&4`^1KR{I;E{`sRfv?g#q4O(mX8s(~m!%P*V$_x}$+tRk)Nu+_cM8>RJGC8AG&8 zz=B8d7>`zDUbr|g#jRRuygB^FDStF7y#zYM$21y?Cv0FzdMPE@h0y!t zaEh2mA#L_=ocnqOPWT3NoKS}rc47l+A|ljk4uzUL(mV`mt3#(PfdO)OqVSz)Pm9&O z_eCT~kL^XYu5h9>f6L#$`#oRI(&X_~Z2$>-0FVq)PkBL#a-aGl0!wRF(~>1iDglNS zP-}qLa8&~^i?|iz_;E4BPEJk>G^oN^*m|})2;m}_^lw&x#}su4W4$6wL)7(xJ3S6< zS$`{buxoR|kVU4UJvQz8($R4UW3~~x0!%RGZk6X$liZ|Gl9xwXAOp|Eix+Q0dLUa* zp?f|FZ@z}`otR*nEOB*tQ`1;qLl`9uvBfJXyCYnA^L(yXJ|0wGUvJ@&4Bi(6%PS`% z8V}Zkyu2F;p$ExQlqnCEiF-YFwk_hx>QJnUvL-0xHlzwBswK3cSLS)jZMRSFN(Xeu zl|spSZ~a*Dx{&|AFnB@Me!+v2OAy6cC;L^c3*etnfhT~fgm>=Tv8bZ0k*H?j-_n29 z&W@xN$ab$3)e2WY?Leec28OmY0M~WYTf2I-&KdsCWAF0MwGB87HVXktwQz+rbh}Eg z9sa3kw3;$8uOn+0M6w%<4rO)GTA>SmYvpAsEb^ zk{7rx_RrHS2olR%6=&Uw0bm`)4Q$e(_k8q`PCM2| z{%kN;h1D5e*Y)2j4BEfY^<}^I#YFgpg(U$#NlMU}By2f6@B!@i{uwwIrP6qOh0iy- zBX||A_Y>Ukhb4H<&EmX@0M00J@XxSi0Mn)DdA~uEqqK4(~f0jIzu>K8DXTGs6;cOJfcj`_ z!m@}sQx64RPVSGDr}FIM+M1|#NBWg*XEA>-npkL^WbGgSydByj_dS* z_0T3Uvy^Xb$x;H>o#QiSDACE{)o9UT!pdN+v%mgFY-ect(Kza)0nCKIHw-xhN|B=h zx#uCykj1^WR%zyI*NrsIq^1JcaEdGM06OMmpwx{_VHNB2APOmtmo?1sZu# zLlYeL6ab$f9v%lJEqYi`(2(a~fx+O%rskIQIspM2&?%X}S4Tl!p4G`feJdag4S0rT zWA6lNhg}7A7JZ@QKXYvPLapiO_mw5@m8{SD?No_F8H@>I(OrCr9+e`|OiWgCh$aHt zw#mU>nA4w!HPP_wNe#AKjw%8fzDIlvQc?}di38ZUe^*?I@RxKrcP%Aje(-1hTNp(Wa@HFR~uQF<9exO(M10#Xh2sU9ea7p_H=QjY>= zxuRNZ#qvNsdBSq!LrDfZZ2vZDimXwi4e|nWKx2i)%Fv$mI&5PzG7zjDL?uP!ZC7|Z zEDeYqVs{{j9;*3^y2;}+GE`7P$`Ug0f-@lrw32`n>NV!g5AMRfN=q<*9AR4vA-@*O z-MNz1Xgnuv5u=$8PnJ{f>Cl~JotnwxzXlEj1tOM0A0Yph80Qj^BjCO$1z4zWijIon z6A@8`Nm>aIBGdV!er~Sn><>}@!fp_=3ciiDw+Zc`M(i>dX>JeS{v8}`ZEX(FIQ{+n zS}k|fZ-S_H?XSO102K-9KICIyr!jdH!uM%FN>vT;qUO~1HIdzD*v5ij@XNU5#sBNB zdExpI*tyetC@#l(&Go}ZMn(;yDM;7+e0(qAsvjgtI0am))mAYl8QGVrwYVX`ZOd34^MlNX z;(5yyzc*Qd<(ZhcgG>6{DM+vQ{u3up`bK?vJ8AwI^qa?jBb)MRcvlwx^iw@-1#~tH zH4tkSp38;K5ews5Yy0wrF*kjEH#2BoZg6?G0*;uYOxU3al0XjCpxL>+d}A`q&2;n* z8cE`SoMZ5KrciB|QD{}-$FUQL?sX^+d`% zvY3^xc(!e$jw#5D|Lyk)z*$(c|I>BQ28!_SWd9R=YwVeC7Z(=~>fJ*?;ru*SB9ry- zD8wlQ+mS=@1mCgV#Enmuup><=cA*Qc7KqQL^fjv1lkX#M(19gk((ZX>=!!Yt{~JB< zI?y68%7AY<+4ei&bn@4cd@m6zHGxGn0NSbvJCVUWkkG$ z_49C(f~=wOg<)`NIRCh{^>j6-bvt?UAYPr+JNNNYWTYvA6{%&QX~QqZv`^~lw(ryv z6A(B?$q<0brZkg7*z^uHnxd;!oen>V6`T%aM>tzAVSfZR0Uw|5)(WA6mE4Mup?!Sr z;T{Jdgkr^ZE-mnc%-@g~o-hSjG!x>IR-rXA)^O3B2UJ}3sZ5?Y@Gp!8+70JapJUYG zS$M)I3TH~ew@p`n+tcItymfO6_6DbG2Of0vwMB|*i)(UXRMa7=4uK>;j{-@Q7mC+D4ChXc}#zl(@1LMZ<+y#fhQ+hppiGIwq-s5*7N{VhXlBR;@`FF%BJwrVpVJ{BFy0{$H78Ht>CMnw=6F~cuK^rJ<9ehv z(lX&u%?np4Od2oWRa6g*7I#Bt_7o|mZZp&_|6FEvGdMlKWM8%CY&wQ=YeK{1vBFBH zMAWoR!G5X#jYMqV^4Gd8sRmcjLKuntupQ1$jBZu|$z!C{qU@!DuLmX}2P{%1y87>y zIY5W1ht;q}V#%5vC(wWn$Nw%TCnxt0j~4dBeSo^qV0cU}JWGUCkVZ1S0h*}i3@a?V zya7~7#HM{Z-T(vMfKl)K9x=#>CkUk_Ik`t%#(-9`Oa|tw{~`3E+vpIujZGQCWd8CQ z{gCTm+^&mP6=(@1?*b0DA-WX;xEX-}9}}@j8^8>N)4}85=Pe ziB(xiNgqhZ%wsG;F9`lyI{QUAL2YN(n>VM)rvMv@DNhg>_-z&mGahcu1uvYPsP+s7 z0t*5YXps*VF7?_d)4mUz`&)$z%TTn0{WA`2AydUV02O3G1CPnRZHThxp1GE=V^}xT zL0e46F=S`aU-KCndNAEEgn*~eV39hhN6y!<-E+?UbH$$O+c?*NlhL!wbD~^f`r_~o zXi%a+j*bQZ+@c6{Jy?6VF&w#KNoQ^3i>oCxKu5lfU0*YelfJ~TulHcaRFf?-HN7FX zQ1z@$0uJ9jy9&SF3qs%h1o((N3Xb>ZtSxXH+(gIRj9yC^(zVKZJ>RwC$7 z;Hitb;!nK=R?_*4$JaLtxl7keI#v+g%WZ!f1*eKJ2wPT4l3K!|#f#HcdxIx#<>Pzt zuD7?PXCJz;Wtms6U*Cg!6vkn1khq(5J)zWC&1q|DnxpsE5cMG&&K8`iMJ^L^Cc^lF z&Mh?m=@z_ZSJsv1FsMMqH-&EjH#fI8SU(h?bZ@NL$|Ji;*ztzIP=R8~Yxl`b9(XJYd=1HY60#DekPXhAf5aE1D)NYlH90#w-$vtm za|vxlDwZOOqga}VGvjdOV9&X!4)uqgD3C^@uD(@Ja5#7X2<|rQEF1|9=A4qKQ2QFv z;Th>Tw*#s*%DW;^t{sbH`rCF8xg#m+`S+NFL9SV3_4dLY5ecTEQI`Uoe-c)fp&s4BnW;}>Bx@5G#1_ov^aQ|F)z(g= zaKyBS`K+az83pX1I$80X$tq6H1Q4#nhYlU#?^rB@xkY-9heXv?;!VC30Be5t4VKU= z{R0DIaNxv;_<^I{Ez!GXW|(E3%jg87v9Zhsvtg&aG*-%e_tjw4cWki>fGE)fn`f|W`21NIfc&VXtlfIL_7^^6 zZ%~~{VD|)V6NFvYPo>-7D*lyDUm<``60rh2b+ba2i{B_oT|ffAIW zCdBz~{g>+NeAqV=+?~H;0B3DVvORl+v=9gEO)UaZpGJ=Tr)aA_^NMHd*4N9-7A;+x zWTzAN>B8@#)~zVqZIpuFiCK2N3KX#bwHwFJq^KeK!cZ*-ppIU^0I0A!zPX6m&|+$a zUq*aOWh>`I7O>ea3f=1+;u{hIbx<(9T-DDWBT2`IAnCJBc(f3^Gl)UAIb+{-qnim~QPt+9qzY&a zBoDY-NmS(jH`!M&(-5`w`(*Qml?y|lpfpih=bt2?MmI5YNj|%os@<)=L3+7NQr@2;EHt zMc9^8$imd_fI^cgB>r!QG9uaIb^}Q?b{i>Br`iXKO=Q19-W@(Qx)`}4qht(QO2sKGgt_{9XRl4H7SfI5hdzyA)E1s&#-{{ z5O?a(XwN&-TUGt`t$s@=t_7AZKJ%pyT!>K%E8yr0?i(-A=?8az?cY5(4kPV45S-Lu zz{UX+0t{;CM`N`lIRtQrfjr*p#g*t#8dxU7o!?$P+SG>4LH#@cV6T&laGE6=lAGJQ zO*1teKE6Fnb#GXDZmJYY6~fO$-7#flWx{A1z?yguCka*hNmc=5L6D^r4`?(IOjP=C z={vk4rdg$9wcvQ&^gbFVVG|@I4h2K0r z6}U$Q^m{-RCjt6fUjT5r@z)P;;LJhMc>>N;6hg^#Spvafx<{-?2th-H>J0048~#;V zS(%w{jMa%|&0okAjV{Z4f1oy35?!$F52xdO|M_(lP{V&iC^S!;zb1&KW^EiKvg$`JX7 zGBvHLWgo7<;L&1JLE6I#Do3v}p2NSYva}R2Hw2jIY^B4$sc5b}!uahd|D#%KNgTvT@Tl{mPu3{pK3_HCw(4ESriIY?^EvhfqG8i5sg2 zlnn6(dp^(c7imJF?U4QHR2S?VIl1qwLtw;@g@gr4`Hl1*VNvn(*cWZ+wD^SjlKraA ztgl%6Xf@r6wgODCIU2QnSiP!$CBX_A6X@&vDE-}u$e(Z&3O06{`w;#V6G7iEPFieT zr0F4r&&R@ZNmp+bB)qb>06JYP&eQ^%!!OxHI4B_Z!OP9O@H_pY_z{I3#St(M^s)WJ zv1?er9HQ2U@^WIekoD=^=EC$n%5*jf&lx$W5o~OG+3AXLs;j(P!J+ZThb11EL*NZ~ z$_1qqRKk?laEv3IE?}PJO-)TTER)h5*+}RSdqosO7uZTsR{^k@IK{*B$3ENI6YRjDEhH#&2&XhP_|CXwpB@fGy_<4p<`LBr64I6p&}I_|O-@ z>V^3J#my+Y*qWn%MIDt6F0Ofk`q;%jKIhFpbO-90!wTbQFgMdNmlUY$S}OF1pt6QE zzFnQl7PjU{2kU>hV4e#2Sn%bwg^$l4At#Vdac6W6B+l$g_2x9N2d*PBS(&YwL}$(p z4Pe~gV^FhFjvYG&Meq@Siqn@t4K#S`3F)3X6$=tau}A`+7x}CcybXt0(XlE}!$xsT z?o@R8npLa#;Di3+07yawpom3y5Mz=$LNSN&2md<(jr#N_KkD)TX@7t-Q89Yqwr34+ zEF8r4r`}iNs?a!`iKs?~VVLpIS}B0=%;4YE(e6616HBu%yJ#i}JBrTRfpV-2RLhLQ zkki>Jbj%-EtFcoU9LT1)Ds?(Q$;U~_mMs>rLNq|hbeNrXnSs%!5r;~bXKE>g z?mdac&wrHwtgba)bNfuBYU7-+A+bi(KNOYPaj{ZcJztv#w zkAmCm#~YrQX9l8Z6V3;69Pr!(eX!&%Yjr~mye8Y4fi+F1wK=sp1WtY#qJt{XdyH!* zK&4?u0hONsDRHn>(pLB;(yv~z;xs3Yfvf>$pw6TGTu?z(Xf8w29B?Yg_?r}>BmEEITH4PT(nnBb+X$bmQ# zk8?6pr+RU_7Lbvtw!vct)3eXD%L6}uV<`Dsc0p9a5_Ns&D>v9%v!&wOf-(nlOOg z%$MvUf1Wlxy=spn&cCA0eZsX-}^{cY#>KM~QnYkIJeo=7XBkrjqk}AX3f><|l$1rCDssq&EkMY&# zY{7QIT_y~-<+oI};V&fq0WoMYaRTCjJ$m)}^@)GG zOk8`zoCcG58L+i9?Wx`ui3Nh=awp>_)RML1(D|Z4US6vztldnM%v(OMwc)Y21jJ3A zQabv>$`U0HI1Szoqq=K*j1_IvAgm^;2_WWFb|0on9rZWRIj+}mb{Pui0G7RX>~jJT zZ@>X)DIt*gsp!_CZ+|~{4ZomZfQpAc@@*O&l?}-Cs$C1m$`Gt$+C%cgAz`#jprNLP zweABzfhnaWOI*DfT!Q6#p#Byb8jy265j|*_bYdKtAG)tBpXod9sf48s6_VSaVyRIQ z%C=Tc+f2@!NtOQEId2BrO9eW1{2NKM%>B-aC>d0XM^Q^OI<thAW>WMSpfThK}AC>8|$lk{-qFjV_^|b*^kH*N%^ueGO z9G`ld zkJEl}CxE6l@TVs}e8O-7>x;HW3eOXp!n_W%!O^At9x;}~Bny!yB~&Seu+gveBKL`&Y+cub z>mB@wo_hECW8z=9n=Md@A>at!F#7AHS=3pqt*vE^SJrvmfowPk~z7HL*yLHH; zLUf2}>u#8d!wm>#)BHHFm~*-4kQLn~Q!MqYwk*V;t@rjm`e`Q%cUGy=%p{;|c<6Y= zo;d{-LtkpRZ3}!{Jw02&+RtWkpp zQ=#cm)m5UF6pnA}QTi48t-{XdkaYnEU@iwp92qmSJeo{5RQ9Z}q}LCKj{+5s(*U= zNStxnyPLx3P+qTtXCmC{DgzfleB6en_`*2ol)x&I za0q1Yg63po_ojfC-hVCOzW9q?ATZX)?zqq~DwHY)SE+$&2#`Ey<5 zS)C&L>B6E99ctJ1)AY#0mkT3A^Mb*RB`}vK(DOjS3h+~k`4AtDl%mxR)dgGzgsx0o zNm!+tl*;92~wQy*m0ANoKF7txLfTz zbhr;d{UMIIBm0Q=o;;FBh%cBFIzCO3{e?=P)d?h8fC{6^6R$%<5M^Mq08tX57PM>( zmR6_zueBx5yAoOv@o7PRmG76H)?-sH|7bj=1SCKNj0JK=74X9S`pJa_1)s<>-U0DG z6e=A#>6CEae+x|c8@l%s$_Y2!p3q6;qNG9f!`9|f)Csa=vIqg( zAqWEmU?ac)&s;=)p}zzXh@+lH>PvdRDIbm-KHQhQVN~{oxF)o>UCZ^2qi~gg1iOd@ zTU8SNMJhVEUq+Aj9T(+bJKSC zYp^6pDJOmY@6mpSXD6hL)HI$@&Yv`SpOwcNy11h&AYaDrawJ@$|CwW(?Th(5({j!Fw;lS3 z)QFJHrLBJ)V48#ovJvn&DU?wp&Lu9$_|)Spy*y%Cl_A{L>uMZxZat1ZVz5MF9@j)gKN03s?SJ@XFFQnbkVIA%|l zRslVq^V(sR`c5Bo>3gRRbiIF|-US!*zSOzY&*{0TWwT2Qvj5bjAy+mraJSpg<2z)+ zmUJ9udUpt8Sc21|s(O<&W zd{S2{LeD)ob82D8joK^H2-CREPbeo3?a;-D1NMo{@#J>7?C@9`Ot);=A`>+vAQ<$9 zO`jZYH#q9r@(!A~(Zj>Vv5R|OYjYcY32DXx2kd0sw`b3BqrYa%*>gWBwRoKNymJrvHK~u)rkrJn{sX*8E-W4(s7s9e|04^ zrFN-E{AlDJ zb+m9`#;bl!(mP;BEvcMva}GV1f>1p9Be`b~J1OAC*?6mvm&eay8=lTxOjO0Sj`Gts znQK%f_`e8M4Yjp9^QfaXU8jsZXU)e~{~uf`myGrG3h(!pIS9wo({KI3^L||6m%v|8MnDb2|ao$S!P(O z(QPfC!?ge&0)oN99>+`OrTcbbtBi4G>h5sKweI3?dD5M4EPHdvR;2@+) zCFATo@g8Z8@F4_Y#DojS2AD|B#}K6wwF}LhW?B_hUYhga=ZT&l-7uqIHT)kkf)M9E zKfc&W?)Xr~tAN619mOf8OzOV~F0N2RU{_2^(=ZL#VV63S)sRIO1L?xw&`f_*-4@F^}>B1z!#oX=j z=wc&?CsNkwKRSS|)EQvsNoWf79Kp6cD`QSWiVoA9=u-is0?JYK zDVs4ws&nPh2-Hz56`-^;aCtTG$w#}LXDWDiuik3hWxOsVKZ8N7a?K!d1PAfeGV)GA z^(R1<7bKV;SCzbbOkq*cYhNV~IH$14cFSATIl}W=YWNrMNPu+W{N5_@SfQja{3~Y9 zas{0f#!CC%y`LlzR|{~bMzmRCNe#H}N^ZT3_;1f3iNS%*gY07dpkuGMk$lMW`-+`9 z_R`nX&KpM-l=-|?1H2qG>-&7*q5Div7(AFa4qvN$nLt7m)~G8G+T2Z=?^Hz`vuKYD zmOZYTs4dd%y?XHV=C&nL82BIXe6y;ukU!!1aba2FcinNYqEEnD2e7Y#y(bYRaOMQ&&li zxk>deDwi)=Em^W@r{QMdab3!JkN&b(ItLL@OAQvtDkw7&Ufp9bGE@2D)eK7F_RY&5x3bY8k^Nb|k}BnrB}e4PSIM*GRinjU zPU3EVmVz26YYX->%Gz3^?FiXiF>{oi(;ir7F9{(o|5f?-1}WdWp6^AK=LGjq-V9OB zAuWulR9oKh??)KcHvZ@(Fg)w~b04gfeXsy-Pz5@mI`-(1{Bz$Z9Rj-{79>E-Ast>+ zju~`QRxBQGz||WcQg+|mR?20+?5FJCa~GAjWG}CxeGlN3`r*&J`%QUUSg$&7XlnV% zQxyVkOlzrpiwlDF{p-6W_Mvy@6BuGIb+5<6s>)L0+Pm<1{G1!6d?7<9CVvxQ8BV~@ zt`lSV#+%aBjU@m-`PrtToS$iMC&v-T+o?}=dS&HvMp&K zP)vyMe_mQ6S&!F(`W`)M0pxf2*B5B~H-jVSP=3w*`A)B<`$H(kF--_6Y>O5@$$9bEn9A1oB*)M>!ciWcB-dbrMlTm zM!nEjta@@t%=5mcTu4lrL*AXB*_S?-`Sl$!T*u5dto`b+{bs}J7Fy~bSbun{W!>yS zx96Pi)ZgY}n11r)^)KT_+T9*7%;$Q8nOmCJ9t>NtvEIP-zpbnBws_5n!r2{K{+Vy? zW|DHs>Qvvocb}eEa?&cLx974*;|Efvgd}J|?K5fAK;>sakA_VL4ScoZbDZY9sSXbD z=y-Yi!g%5*;7I5_X20FlM|(jkniBuJP=e&gf8wxJrgiJOzoE*x2~O^OAg`dHwaU+{ z2_5v;uTAKFf@}u$4}S`6np*5}esms<-p}_kif_{&AT>g{q^;A(7wG}{zK+WGRirV0 z0|&ew-SYo_T_QAoUDGvn>efyC14V^Zn>N)K*y!K8*wyve3@+_!Uvj6HRDE7T=rJ9v zTx;FLg_i2qU`|#>_i8by!GFC5SuqHs@};C?)8@_gF3rksbCBGo|7CYtiN26kTT@#5 z_d6SERp!DAYU4uFFD%>uIlJl4`#gK^Z#cDgUnO?8CCE1+{@Tk#ewP za`Lk*ejz{SS6B1lKV?((c4RTb2AuOb*^X&^`F%80?!$Zlv<6n=*51DV`NI{;AI@wy z{$vZ%mRe-|A_A7xOZnKAhJnh`sCvykpGB&FY?=+NIW4#u=Z-Upn zZm*}IHCX-B4pn=wUY5bdyGgedz-4usG@0@9$ut|*k^M7J`v3$cSq1FXzfb$nedW*A ze|e@!tiCSHDza#{J+w=#I7{*8TU9tUG;xO`_9^c9uJ2u)ScQ{=4Qn%41Gc zZHVm;s|9r%JxEsiM!w3m=5th-XfU%`y7TIVdxs5~`yShxlC$##hn64{P7h)8W+nwhWJwM&z}dEVL0 zl7VpEzR2&jyj$mDklgSliZ?eYFQih>`uB!wsINd+tn^NA1+;B)ak=tX2EOX7yrz8x zQe4{QWqp;E0GQ&(%y(aZj$=YA2p_8q z>Z{NoSIpR2dw(8X0L+8#5a5LM<njZ7G=8ZUK_dAaHvW^VT0+7LPz!u3=IePH9(|$ zfGb8J{iC9+YD$!=;?1uw7C*J4ObdZ?4aE&1k~1m!yvtjTV*&yuW0A)ZUx<v4BnUVp}%cna~^ zR$?$m1Fl)LsSg`2gw=pITC&{v3tPkuWiF@r^Z!OH=jfEnftc3?98cYnX7@F(F{2fW zQE?;_AEfpp=yB-fDq=p(cPTsGI_u1%$w3q`qRW1iK-p`mH8@ZY{Qf0$6_#ZMeQyw2 zhF$vcZFa$%+tvG7S~j5e+L~90uw)d6V5%wvR%&=1JJyLCuOb#&cf#ql)UUR31DamA zLm4aGjs7TTGu2{v`fPJmOkCV*1i4hMwJi~^Y%1LHzz%SBva8G;+Q<1#4f-W3?gLaq zI%hBRqq*y?xt#NDFJrlRxGg7WwhP_};IyrLW~XM=kd;m}R-3a8sMK#RV?s1?Y@6Wq-=o7cs8!+^@XM8e)Us=-49k8X2o<#z z^eHQTq?d>>vv)Y+hB%_0)DByw{aAYS6Ov~4quUxAr++JgebIj6xvm6bK?LRvI$G&o zvkNIUv`Q~j=)wVcjGBop| z9M3h!zHeeCVNni4vMnD$`q}bAzDIjn4aTIr2$vx0)fFWONNfX}zz#amVsb^ry!9$* zaLy&W9$gWt+)GpDBP*|x0*@-iweHo712*HwH=zgSbR?-`&2yerYXnbT)uegzd8OYb zmpps&<42JRc=&hf4=Ql~+A$ffCT!crG2fOZa8@eZgH8;zRw)M5aJz}80fncR7=E^C z593gF2ILgPYW9M&~bGQYdEE)-#Z5T zoQsO;PDdadUazRA-h>C0_wj8h1FV^+7DG|=F5u(Ih=@v4c_?B@z@b&H)%{2mI2rLC z9eTuoCE}c#39uRocY8HT?}u*K>g}jnqed`-$cyPC+1$xyD@5=Y7&_R2hVx(krsBF5 zGcjcQ?dkBFfs#96IuDH4+ehCqwGMjrIe$FzE0Mcmcloy+p*{C$bfV3?i?NK`S()Z| z=iwJIpbPqL8ssSt-LN{;UPY(gcs7-g0K{;BjhtemxlYkVpj+?T1 zM;-VLnPjt~c{gmsod8pJ@7aSou8xyyNG49}Fkid%RaRO=Tg=)utOjzOKfdL3f%G(j zJ>NZgQg6%=HAAqg;ep2PdOen!_F1#WXa0xBJt!Bg+_2&1n9^Fw*qVE!JUP`y%42YB zaPZnxxms&vLq()&#?xp+T7Sx#!0t=(*Vz^}a(_h?GcorOXv}qG6G?vFoSZ zTz8Yrp=!{1;&yho!@D$Ol}90YV>t)cSTJC(X7}mh|5W$OnrZ9DnGn}p$aAhI-x?pJ z1Pk@$=nEn&RbH@JOjr*Bf`WwHi#c_l!+f@6+Skx;bC?h`ATQB(MW}=@_B9nx9k`Jh zr`~YV#g5T-L*r*qlK()xIot<=k7JmV{XEyRIb}+pUAvyx6_;W_`YviNkI1%}`V;Ed z*w`?FfaA3AV}8Dr<~^nay)r_7301BpinkxQpv4}ePDJIuZDG6c?uz zpcap4Yb3X#{Ah2ewUQO;wThh%G|r1OaXN?|^zl|*hlr1gqdHN1|U zJHPm=9`|iW9Kk`lmt`i;fMERKm_wYC1skbA7E(MzTt5++(Z7GLYz-yc)TzSDwdMHY zRM30yC5~yLWv;A^&H%N^`+N89-Bqhr#gSHq*CJYGzB(<_vWSIfy;DEOSK;N?yhYbK zVB^L*#1HID34u%p*YkJ0j~xRIQ7=S6s`_L1Grjo=QN+pc@clgb329%v&pE%Xg(Z6Z ztn5cmNI$0W?zTiif7Ga94+vs9*!?Bo97SOFS8r+LiRkLXN+bl8-$zGIG74BN;iSgK zIb7$wTVGmi2&3NP;M1h@4E~|AIsp2tt;^=Nb18rOO`JL@(ouu6e)DlZSuj5{CFg8* z97#CGCy_oBYNdl3NXo}oCyfaiK0q`YTCFCL9wrp@A!4zBj2sdDZ1)|^38a!nsyk=K)s*82!xEFA znfCZ`Ma)@^;R`O4+;w9-i(_`l)9AeodBWFXV^`7qPw=3Mp}XE`(IOftv?9`O;UQ)` zKHZ)})=E$B{zPQwS1V%Km&LnY^PROFiBxTl7i#pLh{~Q~O z36}H%)VABM+lbJuBR2Wl&Ti$lZH=*RC;Mn>Xe1IMHUWSE)I=e66e_mVW`?24d;j)g z=#*5w)_HpiQ3?O#@HP5@W~*Mkwv(6PS0FH~UU)|%zg^)|I+hpyJ`Z-o-U|)=`0}Zc z4~Sq!BNneBs=G!~=8lueFD^cWi_xF;a4qdN3kqJDL(Hud164VW(>L?s;kv{I1`C?X zlebc=#H*IjFkVsK;@}JyVMD|U*OR+xYUQj;JMY}~^XJc)%qoB~C^6$6gNO`wOfD^^ z?{)O0#qMBiSBaIIK=?oN962@}ny_xNz1gEDFEuZm?0*zbN8sL^oNqb0V|)6qs@9FB zz{#E;dcc#ohTk_w8yrFGqm!bxvU(Lx&PZx04ZIuy!v(LZy5^?;*`dRF;EVNWTYV&h z41o5nUs$2!5vqO^_JY1ZsWPwNLuu17vuf@G0j=bW?k&cWqNKH#ags)Q0lJM~G ze^#$9pt{$z?j$UQ&K#mJ&>sF;SJ-5+BwBE+>IG#QcM~Go?`DO5X^xRRqzN*s90kHt zuWs*pM34S-lsC_Z5V$vj$h3iuWuzBkAzRwEXEJ2#UcFKu7ju!vri^1Ei5`$nPBFK4 zT4vpSDSirXDi90jEUEURGs3$o}5!b7yizTk8igj?kiqme=1$}7nKt}*DhewB0qY3G6Kz9{7aStCq zVrOaeUZksns{fYgPxfF+4qNEAeFOw4k$_>Uk<>wF+h9Dn9kYcVr}2i7k&#qKZ$Fl4 zs*ZE!e8s;%*!={U68C8Uqt$_GE^+i)+B2EVG)RKx@Y=QS?{S?T=eF5(@bu|V5nPh= z0*TCqy?aMl_-(Oqe0+Kx+a4990kdsJj;zC=pieoEYM+ZrHtE&t%cU(hVTqwZy$WQY zZ_+I}n%PMyGclMGZ{Wd!ea2uk#E$cAvCwmhEgMhA4XyF<@hQ@Ox<*_XdGcF0Df`$| zp~r{u)$AW4Diibw4C~*&BDN$ZqkqtvHC50bLRaL_*SrxD zSD35efr)eRcbNCpY`dg$^XEoBk|s@zcJn8>mTLpG+kntjj63eC z-EYQ>b_54A8x4GsyXZYT>1AHt-mp^khsEALCf729d**rjazON=4}AKY#|WI=ZQ(o1 zt(_;`ApbnY<72#*F+*s*aE+t!WjKrhY*MTllail#Fl3|}pAS-PQ~dTMNh-v7`*B0>7DLx>@xzr8ym+74M})^NIf*$LgDM_}McspY#!)CIb5aClqg z_$Mebd^R$KD$1Wd@LeG@k`7#tjjiDNtiBr&g5-EIYE5zy%4}<~0tN<+&e@zR@$3_G zp+mhNWm~f^KdZ8F^XBVV;pwGIxeon#PtzwjMG;ho^@(!H?vEwX06?VjQuA`@XHALw zXvz4LmDH+L^JfFz?n&xKPPA_Q`j>>GMEQMPY}}^5S(1~|DQ5}wBj$Ki0huKG-Snc{ zg6f{0o?r?@XQu<(m@roauIk2Fu~|5fUf?R=)u(@W&P_e}C#nK%AzK|X^KZHOm16NW zxbu}kKD8twvA*{7sWyic8>7)KEu@$MPoCAPFT!Mod@m6eoyN0u2(QxI;XaEI|wto#~r9 zj9m|L(kc6XYlaUD1J~~5l2r$wA(OZj~(Cs7B>f zd=FX2H%_l_G^9;s-NlR8;P-!XXRMqv9~!vqhdc4p5Ta7@VtYFGn6u;qMb-y$&S(<3 z8q0jMZ$C*%F&JRit5+{UHVI7zBYjRJH*gpN zM|F~&t^GV;^X7pZ3;Ka1lr5g*=i@VM?5(dQe>L+9qFyQ|gg99q0W+%1P;=J%zIfok z-`Z?yfBTk3{z-QSdvA6I!ThGs1J5$=*k}*K4`)3&+XXnKs{(HumtJ*wcbYhV%saNl zYb-Haa}AF-pQ2+o)bS=R>`(-%B+vCkbU`&5`pz7G{|+>XF*vQKN9=&=bc&uKxSx8- zXODfzXueY0(xSwnZl{jxy~~&`oo3Oua9=}jdgvEtF@fiEY;tl#w~s`hnSr1y(>>BJ zdv12$AQtS-AtkFGa}*~cF&YgbauJA(iII}IXGuxgvk20oAr-rPc)W!-gm^r&r8Fj) z9{cMh9gJhT93b;W9P_Z=1# zY8zA5P{36K#{^CfQ+dW;IMh>9D(5cmQ~j}+&&JvR6P0I|erj5EY-G=NFM_O3x zqhSb`Z74nQM?2PN4x4k_)=!(XGN-?d79>UI?MS8apdoPy5o;V$0CE($6Fbl+(Ff0( z5xSe!TV9kx0YQMa+X9!l9rY^G^%w{`V14wqzI#VVhSCG-I*9CC6=_SDJfgw zGyTKSP`RFMQ5*jRg0gZ`27q48r-=D68zY96=c9R}7EfrPJANbKfpwV(S!bDzxc>9w ze3WeWZVL`1W$-IhCB6{Egj$9Q*4M_+FzuuTs9rRx!V{ww`EXc{Gyyth1#hZX&|7bZ z1(<^*Y0_LpfonJqC6(!Tf4Ymhr`$C4fc9g{dD z@mW$Fsy=1)Z=R$zvQOD}XVCQ-_x4?>uQ^Wn(^J06)mH3>x;y9o6--_oJ-q;^k}qDr zjwKe2SeOoLm&TvrAXT)zn??F6SKBgeuw#LDf$r!)hnv zbBa33615zmu2B~BYqMz`)=o%xG(P99b_dfX-}%!MThxSqDBMQ2?HM>#9)M zJOi&+RkEjq0cokJ_b5EE597&$7%is_Up_7^Z3lldpD4tl7!YtE$XOyMIO5N6vs^*W zL?c(hN6KLVe+z0jwFJ=xYm`EV#EkT(AvrUf4z0T+soIYv3fAa3w;8cGmG1mMf*U2C zmpG6g1)mwHt*$y0m=S=iK5?Q8ggZI0M0od+{{VPc&VrVgd1v-PYi9-#9G)?&{_xUQ zw5|yZ1>vuzMqdJLXsIHMl<=j%iOhn1WL!{44rINZgneN7Mrr8>4YD!~03?iqiLNNU zRoq9ymvSHz+i=Wgz=++wu5d?k)kM9frpsEA?|wz&ixN=ipPBPgPcBEFdn3tu6TM5u z>2NQ!&)11Y^$M4;?Tk`+)b>|4R=_kM#((_$YGXOy5-C&NtM5fHybV!3U+%X6v9D3; zRJ`W!U6*he_U3&7c?H`_ef)U3`7%Gfr63nIHEgui$VF(0RoUt_YtS^R(T}i^#R@&% zwoMy7$ejQ#RLZfA>F4|jEHt&! z=nAR3>gIQ!p%fod@3SSZP$0R3jp+N!d zirZ9`Zm^5Lz-uAF69kn`?RE4?jyyJ@hjeq|4iXT9*y=#kg~+m?!MCP&r`{7wm4p^e zNf{FDZK;Y8B~&{Xb@f(KlD{l6xguI%{cLSp5*IRYv-Q|#XnY)9;VKi(x-A2`6 zS$M%pSb*D8z5npRog!)YS*E-Nnn>x5{ZPntIk;&PRUEZB>sz5xg+9o{ z2h3lW+YSbfsr@LGsHhG-T0eLtE#u3nlD^E!G9yP^9(AHw6OuY^)QH^t^o*nxBy*Bj zalp(8Ayi+eIi*IscL(#eg^*@CT)k|Hg>i2QtP)2}I{_@I{p9FgP$^PAJQ#O;;tPOO zv2{dmC+*^E;a~qEU6EWc%yOMXRpxkf~iFP7$zW*9$|bDUr_Xr5A;+#H(!jwZyVCv)O`Le`nDF zaNb^e%P^f^z>mJY6PuEUu@AaZY@TgHghF_TYxXBMCg@v#`Ep-MSRyv&F~!kQmktb( zLNOhv1hbozQ&}+3TeGJxPUHc_wUXmBM0!)?5L-FhX7uQK%f9DV5xxWQ%Wf0Tt<)RC zY3Lzhp1Ne2me-y=T10Z<{wn@njYiCV)T$J?e$&+PikgFsCZqK%vI<*BSnOhM;>GNExAJo8ujf`jVXABPt`PNr>N4ENEWAX&O zcL~r?1)iPJ|9e;V+PQP*p1*v#;Mo~~RKnz|7z%lg*gC#qe(AS`Ag#kWE@Wethes~w zND@qqHktPy@B>URnmh^oY1kBjGWy4(;B;_W1@IVv&a&VH#vFqMqA(Co8~A} zR5k;iv&GY0P4?f?d;F6NU}z|F6}wWQuku+E*68Tytkm#U@BrBE2^U`0MOD}hJqeX? zs>4e6F4;h_WQtirCv-C!NKF!X{mB!*U4G7Sbq3l_=T>!i^Pi1DZrCC zI{@>4Zz#Q4tLL6Pq+T)1!wN`Z<%q4`t^8nm_(E_XmyZ>5MqD6JEuUys zbmuOQgK@O|p_uLH?b_MNb=5}_uNi5lS|g_w<7lASqoxF_mP$Fo-%+L)M?`p=ebT(i1XU};HUlTlP@4Ry7jxRkOFspA- z8>NW&w$+_COEDCHu@4A`EI+`!wI!O?opfO{NvD09TY$TJ!K?j@`4;Cv(R36w9_rLU z`r@fmN@#&w>*t3W{j+b&+Ue=l^CH*0$Mxvf{w~YhO-(%uI7TkC3LIo%#~H(+4ty*w zj<*e0Nw|cW8^Rcc3VdQ4fMZrnD*U9)@(5)n%%0xcc!qjF62~WByg|?ES3qTzRN`@j z(?E{DlTfS3gqKs&A>%Yevo)-E(C3^-exyeAw6ylT)7r2Dw)Fo39A-ol9u{2!?#Rr{ zTuJyv!-e~+Zm;t(W?aH>+MkN_vnhDa=Q}KqOj!eT&NFRdxFC4pYg?#6Uxo4q;j`7A zbDbsbm{k6V1n)#I!1>{jF#X4Z zf&;X53bPT9GYb5SN3aq+8|q2L+~XeTaJLe&(7dO+ac+9R@fg+(ye`$9u`^s2EI0t8 z<2$`B9!JlorlxkK?NhP`tR028sSs+Ex;?RFBQH9WD84Q~d$yVxWl}gMVW9R7r3UHm*xfD;M0G|#pj#i`fFTfV z3@Nfx17@xfoUXT)k8DoQ_=}hT1f-XtVTI>eSqU_io?Gy`8X#t+=TK~-^6L0P8Cg!g zCAdVv+q-^*PQZ7yjTg5zHf}~|Py9}@^qPRDS~RL2V%QUqSB%H)Cr>mHG`B3?)^XgO z3p-K!z*rqo(x;YbEsczJl2LZ&?t_AYv!7n+NVoCgosWp1w@)v**s|5eT=UDzF6aF< zC=Smy3Ljq*xrJQsKfVN2_|5YYrsCQR4VYCZTu z@zR9&qJ70?LKIrt?#zo@@%PFD$Z7q~=F|i)XD6gOfMWZ9dYu&D+3;P!e z6p^2_s0z~7Ev#8UsHIbP$!+_We_%LeN8(>8^1!RC(G4|Y8CBkC3H6+H*rA9;AEYAm zBgQ}sTC4J_w&^NIz6X*MY6E8r}Id+NOBJV zL!{|hVeq6N4iq%x!W>q;dY0%mul}3|Ox5dUZf@Y*|9O+~k!X;U#0FmIpNUa~ z83*}Cs)yDot^|<`u^V!*PsFFdubx)R@syIwoIP7y>1^SYiym7*%R^>?%hsdZ1V*D0 z?3OVwurSTfKH~^eVp&-dM;*DXp`nHszzNQI%%l-!X3u#Tp^qsN#e(*aIkW1Ye}GlC zhaKK-_3O3q02uLg7tx9tE}L~D0NW7|+!1!$(ZC_Q3H$MOfW!S8-;ScRVF%g`c-WPk z58~&Hi;)~9n#_f4@$vI#(c@JSM*L1XMg_tGY9#HU2eFMvRC|lhev$`-h*r|2h2H;{ zd~SNiw@b9ULjd+h$g1c(o$<{6gfd|_`nCgST`~+9E+J3a@YHN|Y=SDWmCSO_at|`# zz{T4l%IQT#LQ@_-ZItzMZk+tDM2!SI$C;zX+SO^$U?5g8JLQ;Wn@x!xrEDkJ`;s1@ zV<1~kau`q;U!U6<&kE7rB?(&*6x51C712kNQX5&z7h;0YQ9+fd<%Jb_`JWCn?>L(S zZ@BD-u?k}#Kp#Gh%ukyJ#o>4}DLqFNU+a}eZK{ghUh6+SOiby<_SE=O8c-{9IQQN5ITF2v`YZDR)I7CpemvCSB)YR@1 zsCx+)7QV6M-=S7I$l9(#ND3mfpJgGK|IH;G8$Xy;U!sNpD`Dv+*a^r_Ds_JD6BiyP zk>Ekj51}fmF@s@}{nn40IhOb22-QSR#F;*fowx;0_Ac;nDx7EGHX|5*dNS%*CdkzM zov`swcoVA#upkOBtg<$XT;(}PBEj4$Ch@HzO3R68JAC&(5Gjt;CJDy*u+{k81;WPc zv&kxj6gD63$2QKueJM42Rs!R=%ZK2=5>xIPw3VWVzW!!1E9)m%<-r)b9lhqaXgD_nB98QE{n<~*&3nAEXw`{fX^(_ijkIZEV7>o?V2toSdBe%@Mc--Mb3xAR+agJAXbOTxG%uPY;j# zh!3m88wv-TMMm@lLP9k!0WzRw>5e@}c7({u5X#e1;U{k*-Pugg%~lM2gEWUVS`mH} zFPvO$icGNJ!ML^1YPKVeU^ArIqEpogx{c%jK#nKkjV`>?o2xv9JtOVi{4R~7Ur$qW zDBPmvkGrI27)&CguskxMQl(0ix1~-7!cx(-tTTcRg3mY=Yi-~nFbFlyMTK~h`RFt1 zEqT8ljGi-%`aPJf?7y9n!vWJ1?Ea`#>mU(e%1iUPC*cZEfVquLJ2pqI6Y|?`qlN29 zVk!oAXqEx}_mbtR3*^?E+Q%ii6%J206L#i9$hOHX5e5HN+IK|TT2-q)XE#G&y7TxA z#ZQuS684#W3SG-?J-)QWbK=B_+u0{l!?M>4Xa2TjUZur5=RA$pAORe*t#i%vNrOB< zD8MyWV*;?tXXe_K9U_>qu04}nW$4){W+g4QgDw$lWIa03-0AE+4Pt$ib?h%X;54Za zHYZ&m5lXwTu;cUy!?AVi=g##%P9Pmt?0hg;v1$5B6NP^~Y^=-U$JxJb#G|_gKq|+( zOsbpQbv4vobC%3(*t9G$B_+ip%*o@{^oSyz%+qY{s<4GQE?dJ&C;R9B{bNV6y`C^g zmX>_zNh7pZRBUK^WNhguZfgf8#DDvCv&D;z2m3awFcn9_fhbr$D2`nEKAqgv8nlZc zll6*fJEHI5Dd);sY-fN8q7XyKgxizSssJn65cL7o3D+pSll~?2d{k6Y_h3#Ilzg5; z%C+U)!|cnKH+1XJ;oWkzuT{8M1GTMj$$0AFMSaevlV}xUI5y#*a+hed*Pl+l&;V+u zxs%h!RwjF`Qt%s6))($6(8*z|V@)F?N0hDJijEEqyMN&stix@82T)+U+0LGQlV^0a z7~;=b!apaf2)yYE7Er3u9HAh*kC+us@c6TFhm!LywtVw!oIb+)o8aNzWB&JS8w?(k zl=pMA#qo3Y1gT>eUuBEAGc&B$!gmdL!xbJ4QEw&JXGLzjK^W*P>#9WiArZp}sdU;Z zWs`y0Y0xoOKyaDc<4z~SwOAq>*heFhmjZ87ysMSoDSeuKh^>`45)}!-@Z_cV7|T2_ zi<>J?Jx)vO2I#~AqM*|Rzjf)l`W+5OZ$oa{v-k?f0$(H&W&V;jxLYU9H;Hv^2xy4)HtzNJT=C0-_d3 z0O$DN^0H-YU#fsQP}VrJ-4Mv+^Qxt1be>0p8t6-Vn@C;IKC|-Mcl)+11ny?bv^NOP zN!g485ENw3ahfE0uu^CyNtOzcpos)Rf(EES4Ucs+=+vmXu&r2dARYgW8>cRrVv8*# zp=;Tv&r=Vhe2Nb$=zzWpZ^4>?Swva~H6Z768vj%Q)Ju}IqwlglRM}&1-rN8oh2q^J z(1BV`&$<@}Zz+1(yL(AdPCS4c^S}?XThj{M-{=F?ydrlAwhSE|`WP1NP*8q(9e^Wc zi&v$}HDVq-*aSc_K2LBkJTsV#UdB?kj=1YFKHaHvQ*GQ9HV232=ILouUA)X&$Ve$G z7ptTi1P^QDyHY>7I{-6iBQ}NV;~!9e#vp0cGd=}geGiN~$j*KzMPpRe$W^M>Xx?YH zJW8exb;ZxjPI`FrW_8-FV9x^MNUA9SCiq!C?FOhz-sG7IDMv_SOc6l6X$@I=PPOaO zHv?EAyaxVfe@r3J(F-2YG-554u1%yj8uWhv@z6j4$SC|dI4dWCBiXFJTo9eem-2Ra z{@78JSEZ0VGl}xi4<-$n>>tt7;fsTRz7$5`{KS@2b6M6!?`dIi%gk%vmNe1S++}%0 zy8^)b>`r=DGdh*>;*TY_mc1EvM9$0g8EuAo%xf)}sk;|`61RMddyo+gM#DRhnF&mF z^5n@iPc=!S6YXDebC>fT6Z;e9yT9ycK>M{R#b!=s;>YgU(@@Lo0Z@Kc+oI)N-*VP9 z^x(ghw8s70s+l>Ijn^6n0D$?!5Xq^v;_D&eOOuZi0E72V4JgYSRc%G>8y-Ms6e0Ju zRN5%*dn=jgD}PyRsV(`R4H)tnevlm^)-3Z%d?&S?rfMQ?Lic`ZJYDB}QK|Rc6`U(n zUxvc@=RGf1HJ;qnD1d8NYg|d45m^i!GQ^g&f|w{at6jkAfAh?m1_nkYN@5wPZamX@ zfOR&VII$TUR^Mp7F>vJ4TjD_d+YY__626_k#S_9Xpe-1oe3oQ7EdvIq^YtBzp2t)m zbl5-ES^2%KnyqaA7?mOD<&o+d*+88tN*~KWJqU3E?kRVUT~oJe>5L(LjO7?Am;N7| z%!1J$Dy%U__h-Z5f;#GSeoU=qy}T&wz`^QS-kbKSg*wGOnAM_*|7k&yDQZp|`V3@D z+Kq_ToVkZ8kpc@eF;{1so2eUy-D7z$<)dIlIA%uyk2&Ji2$A7==KuQklbCwpa5QYX z9Mp!M7^@#lEFqcwc<=LjLd%(~@*uP@BO#zsq?_(}8M;E+w`D=lYbIV=yo2Dhb;ke9 zKs6Q-hQnY6vC}hJ|H4<>Cx_+nD>^Rk?~-TI`&rA@BYT^2eAdHjHG6@TkY)ZY=?i=n0vPx zc*?mG$M|R_9IQgAtdmXLtz$4SsOMCp^axC4eA`p7I>4aOXNWAiWN2R5Q5gs7hp^;Jl0uVNUFJ(2?ex1DCn`dqS* z|Ga>&pHp%muP5qz!xsxVf_RsTbgoMYs`x!QaAHX}%<`eH|&>aRXQX4~f2uGi! zv+!mlB)F6CK8!-{PYBQSA&aoDVl&{5Nn3==69GEB?Pt44qe-F(al!Hup z$dyBeY`Oa2LH%Xy~7=FG5zqmy^e)^7U8o&&45Htj#?pY7`s`nA0FwbY+J z@}1fLBs6+@;>^+5KAw3ck9Qv5GBMZlS!ZTVhu%)R%lur9!8@=%sWg2eK|O+VlZn~y z->?61dC&;tb~y)@FsDw#D9Xvl6J?Q9Siw-G>iG#8kLY_Vf|_9jxDyzrA&yxFU5Wl5 zQPx4s@>{nljD>*@1A610GM^fVKD7rKAA(-bXz+@MNDnZ0X(B{6ODav$aqZK*Z=VN` zu29F`N9u8e2xLc~kTDYqiUKl3XQoxd(!7?8z1t5H;2$6bI~VVYb&;onREA<|KbadT zbGQ+Q@qE2y2|G~KT?v-;32v5r%RYoI$O96G9yA1cmUdin z7`UEsofbkf*2chI2JAf!{%=Y4C6c*lbp*>;c@N)2`_Y8`MXcsw^5YIRkw?$i-^#v3 zSrQiS7=n{y@pMYKAf0~x+_}S;QtYw+(y~%lLuk<3#?#>~GU_{STqUeH0-=b&aZ6Du z^@}1H=+lk5M2XCVqBE~Dpf-sQex$uFXfgOV1Ejct8VH2#rl5(~H@;`N)k-&jS_A9?owpkeoUqip)htV}axWP77{5 z08)@Sexyj`uX`3+g~Ar1e7AX~uFa|9_i3W3zez`=7i1tJNYP;q{cPgy-wU@np2XUP z96UIo`d@4vs@jK1@P-T@o&qCK<~2fF8Ow3L;WCG+QbV-sXwWvgTQWrB=0WOCJ7kk(44mg54JVUWG8=ud?O>`Q|xeLRtyFby>*%@+A~vyF#WEJ3^6#waghn8 zUz^SYNVq_Ekj(RdDgL#jB!{Db5-@teMq3;LqfU9g&Ft%H7}>*fUA@*i?Z?c2&xV}l zy!pT^VsepD!7#O+cIyXf(817l+LK*&njAg8`I1lmMTM7{B0n;6*J*p#DeX&qrbfG~VZ5%l(5@Qe+r!-w(+ry1pDWwkR;(bzzNV@IFMy$j#Z&7O4xq~}IP zhRK88-+#>I1fG9$9%lQr+_q69Fu9QvABwgjA?Ii$y6@k=|B1~wBtTR=FpUI_nZ#3s=%g&v%GM>NPTg-i)6UjSLF`GOYAP6`|@M1x;y1$N0;G||UhE+n; z=O_t&d!_@?Chf0-SXdaFUzQ!x#bNqsZEikCqT(+i-c36AICH-3)@NjIyppzyw2$Kg z$i+w=m-wf)(?Q6Q(}5jM{Ok-n%=|uth@IJA7#PzYr$ouYU`pnW+tc^-n%|!~O7Z5m zoo&{~t++F}!Jxr|JF*D~=N`c|Vb{q#O*v0&@RUM1=KwyCc87DKWd;_ETNz^|`fFfP zk7$cUnBYRQyj`0&2LrWrf8ZSl;cyT@l}wh>uKvd#{x}~A!pIL~wjqs}yD`)u9ON(} z5=zLMDT|;fdz{}4x#9LG6Q}+OYc-401Ya#p8)YK5CJjpWHb4kvf3Nv3ueZuyYT7>Y z;y(dm#O;l!5-y}Pa%V!*4>T=4cuS@~ipM4g^F%&*;$iS+s?yVpw9%rcv(>oJOry2k zgb5P@?Ham#KF|(B6adiPa$)xUL2#1jL|pN7X~`U!U&GlL2U}1czYKJjiKvvFWnvOl zt|0-8=sQ8OArlgSRPN+oqMmMvTeO5gnDd24xQ$^&Sn=s5TLS}SG%A-N76_69d-cAI z7Zd$&%n1)gxMWaj_7MvXy)nLNURKs&_{&z<{jrciPnn>c^|9;JY*fKcnlma|TJD1E z8mtwghHTQ5B}H4PAsnZA^9}$B`O#!}3}b;bCWV}2hy9xP4qA~B$bpTJ;z78h zgVoV?oU_(#+YSw&1lOj0(8JTMfthv(d*8OpC2ihGE=?!Z`cJy&`KBBaY}hZO4`wo# zb(ua%B!Sy7*fA*%%$N#hX!DNjGb+;0k?=C~I+@?ey+qLTkfNxbPvM1thCjIgg>;a< zL#FALi*Ab+{ot&gZWMse7{|IS+|=A$bXn07Ot8pX20!BfAq@L#?(JRTe7)AJU=&J= z+Stx;fd279?hOh`wTGFv6&wE6V>ojaLh#v*HYM5vpl$_gv!>pEY%d_|6S(x!C=@u2 zZuYGJFXO=t=NdAlmE2)G$9}dulVWuPbP4$|yOFOvzLR)wg_CEaW36LNuHazEn6TUK z1f046=UXMk{@H#5GAz_o{#wF7_!q}69Dc>o^=Pv4Oe z$@ynxHQcI_lwH#UXcS#0ulQ?AH^mmLjn3Cpsf@e2ZX4Fz*li@&WE!6c5}YXtS0ah; z9;nHYKqE3Is%Dw&1S7OgFauUCG+$6_O|;juc4+#h?Ol!mBEN^=yFS*p`paQ(5_6UI zzqqz$Hluw_Mdi)!QYBpx*>jiUtP_g2s@%4{o2%>Zcg&%CJ7hGpSa^hBXKW{Dzfe*VJZucyeor@DB@$3Zs{0l}Wn7LP&HtNp`p zfT?5Hnhr)r7e>Y}Ntb~qq=lE>=0GW^-W648|-capH*38WD?Jc zls`7M;k#4}(?eXih)_)rZ*%~8i>HNWM2MV(;Q$%dF;#u8I9+qPtV^>=mlj*2(^Y*G zj_ox&J|UI3FbO*#Bk~5sEo+8)BzG|c73Etl#)vZ$js*l$m!rsP|IL`W!GMV=Y_Mb^ znf^D?!QZGU4`X9c)F3x4Tbdi`_2#Z1;V_&x9=3*lTsG{ z|C(}~?mTlZ(_I!32vP`}0it8tM_GpMaz=dRs=0^P)?N(<S9Fyz`R8?Jg3icfcJXka{ot>|KnoBXh{l#;^v+j0e~~6<>)SG=_*6kug4ZO-NU=&--chZbRy0 z!bDReVM3TTa?NbC8?mv2D7~Rk z?(+$R{bo|qC>fJd6b=0mVdqeag{A|mU}!|{PL+@pbRqL}tZfpm2us(={64d95PJ8p z?z^|=yg;6Q+f9kV{O1TCNdL(u9honga=SNn@gT#G`okYm1{XudOh`^1OEiCmhK2ny z$q#|Pk&MRBnF$N$9Rtz1X=M%=-cuy=WSF{SlGNtY-V8G{yT=AU%DvvefAjk-$Wudx zW`eR+i@P6QzYbwLOgrBO2M5CK@hwGm_(1P5((|7I@}ZCaPdoZB00A z2JCS{;34eAZ8~IMLm`d@L_|asMXPg?(Q>}mh_;<9#034F7HyDi9SrLD4XC5FIS^_1 zQthTq!(g8exv@PPFKI|AcS_1#%9Wkk+kO2w$#uh7DpC{tM9~n2&jqI!D~h)B6)+sS ze`sd{xl+PMP?)qglNe=BMe7(z*$oCDLC_U#LlOc$eR#uE=f-HK-0Xop+p0+ZOVcdf za-mqih?ocyUwRz&;~)=%ZKv2tG{v`oZ2s9ks97y!D+!{zPl|GWJKWvFBaDN$YNrN{ zy{D27yvhQq@_TRE6%a|LJH2N99Kq}iK(>_8%+9$XYQ8kB1+!*hKrlPD)^|8WvS;mU z*sGUW09cdr#gxQEb8;}Tdl9AIX*CfgeyH<~?x!cd{+xbZ1I2g7_ndLdw@phcEi1E{ zv@uP)r|bcmw}_+{l82&1C8-NmVhrGwjydI|6zrRb4ihsG;bZW_Sd1a&=A(#6Bd09G zl&|v9Kzp>sV83_Xrsk3_%VZ9AqvQ&bzagKeGZRJVokk18WbzRNvzv|veea1DIU6AW ziGfq4>5c|6>+k%fYe`n;O;V9N!~`st?o|}AnNAW%%Ygfeef8LGAvHD;M3toK72V3ZwFnA zp**me)IH^4%Lq;?SahI{l`w7Y3TMirwTEDQIUEnk!W{d;`QOiiH*C;gbG94)lQS?! zyY1PxcM?fG8q}$?aa!W1jN&ehEFSk96%?P%0%Wur>nzg^L}6S; z7*KvOr#KW%`agTpRnVcm?~wA4Fii?O=JzqOCx9KZ2pGRn!)-mSff-vk`SOpVF6IGy z_Uz#Sj)kYq)*en$Dx-#Uvgz-9z~6N-2Xs6kiuW?~Ud}We7{2ESCL#@3d=fMO9hS@dF{l(N9>$<+$HR_(nl?lF_wL<$3ldp3 z^VuRMYt1-G#j*D1OGZE;RxNWTOQh&K94QEo&uI=mzMHxb-8_(Hlbp((d5S|ujCjOC z0Qa8J-5Vbt@6h4Kk4tsqOWj^?87Jvs22Tf*GlqaxkeOX4{MN4zNVLCU5Yef?Be!J! z<+PC>IeGe!_W%H&irne*Bv&uxDBDoF39!#7{XEKU-roEVB z;vV#d*+<3?_G!B1?HSzWIDDDT>+k8stqwU#Y-g;5#EvHK1_g~G-41#9&=N(%gxJ`S znA|6%e8zVv&UZYS&wIU=De-RFff>4PY^FyXZnP^U)nfXZwL_DqCwPCOgeCG;Pv!KU zYN)B%5uZm^r>XVYpU$k>gsBu;MGPAiv^uFwh<>tNtb-bxGEsn%Z57 zTRW*!iLavOsyamt7C7&hHw+?RWe)2vcRZhf(p-5MKR+gxO{^oS2RXc!vP>(=S4CM5 zd*$O*EiNnf(?~9b1^Yj}Z_6}u}&g<=I1){ck8PuS3XGMk;FT` z^T?fn$yV>x<}VsayspE8R13on`u*2yQNJfUI~#k$|0x-sYUDs=59)%3jW?*tXSJB3 zzD(YyyNj4pwXw_h-*5@1rkW}l)pzCAt-A_5r|P#fzdje6pq}dV@4rGAi~8>^Uh&3k zsZ?KMN7Uidt4k`vUns)M8&}2J`jd`I+Qg$%l*sMr<7X_(bhW(ppsLC}Tz&dw9|@>T<5)Qvt?Mh7v2U$NG^?yZ49x1~ z&6IU7-x{kfUc9IizPPDe*zNzrgZuS!e_b27ynk=;cgb8xWg9Cua)54@fMD$2%1!)I z)vkm3qhV$R1Vc*D`1R4-=(lOprs*%gb6T(t~Q&->0WZZwhZB?416{Twh zNmRv5S8sbhUA^fXMKM}+N?8k4i@wTtN6P=_`l=|`w>2W7`>reeTUCDG8%f6T@5@T4 zt@8S=tQxjR7Hng4^|uhm<7c~;KTGv7JVF7Bd4xUw`WTU{-O4v?`Rnb+m{R3c zf1@J)-H#F5?=4~KsBMCQNHePp7tnyDwixx5bJvPX$T&Cunp$VS{JP|qlP)jK`1O~l zV@*20AoiDE2I0c8IpXCT`8VZant{Ek1+7p1p*#eCvSUf{a0hC^aJK{n*pN4N1QI4y z?x6^od#!2q?;q%76G>EVD-Y}ANdYqKDE1|DX+$h+~ry*F^3WRm0ydQaxKQnwa|58=ZKlY z^(nq#;~_G}n{etBL(tDx{Vh~+ne`C2x~sf#fpQuhM$bq+@g0>x%g__k=L3U%Ej-X2USKXkqovNwDiw(4)crnoB28bx}fsipHm*=-ueZ`uAII zC*CGOYIDeiEaXzeW5p*XK?Hiu^mmsU&-Y(1BaC_emtuRBcS__6`?qWPrct|&#xM>^ z{$`Z@DE+yz%F+HRl3Fro9z5&U7e2%8>aMmHZDcW8JcB%UfONKliAjdq@Mzcj&kK@N zkV=<-#zw_7eodiOl~U*_wFB95F^O~8FG;A;jGLGO8TTG7(B8f)E6N~UYD$uX1bn() zf5Z%Waw{LgfAkx5RUD_&h}1f7l19@!B?IWt7L)o1dK~T}ilIiO07F;rm2s`F0gpj8 zXN-x!Pm)+d)(HyQ=LM^bot^&3MD+DgL=E+-*Kkj`ay7Vu+)YziYGi*$AOd+!-*=u1gS)*_a% zGd0P50^z`oen{b@VZC}X1{pl;@wcx4M4j4aF7cyIHwqf&oC^k2M*y*;yg@1)a`vgm zP^rJrjyYGW45{nU)c9U!XNN*Ln8E$hOSMZ71ucoDSFI64lpX=_@J@u{O?(E8B%%LN zysT`T8ZDynY^R~~K?+T$c6$P2bAclO-;5|6aH96ZsJBuoUuFS(@DTtKvjF)$_}^iW z41-Bg@Un0{1NfT@#Dp?*hHULH=te-Wq~cVo0v-Y}%2ERCN$DRV+X5xlU$sF(vgd(k z<~*^YLjDa}Me_zfwlt^oUIU%WIW~W+jg%P&H;OQ2)h-`ZkYTU z9&a-LB&iepkVAhm(34Rz;`X`svB1b!=;_W(XjK`HS85o=w-9*=jBEKWw~cOq3rU0V z-R9^x%l?K=D4$zaG^K1>;du8+In?NYiD!^B)51Z`%V!GA%g*Km17`xC(DZweY&T^J z=$gS>z943$5>Vyd%qZ4B7!snP_4`+uE0dE4vD>lqVytcAf?UXmf_GB#mXYC%qg_&c zX83r=3!j;%_5j$PBeIWi@LA8wew@{jPT~N)>35#;xFRhc1sW~v5HCk|StFPX`3 zYAD8`eSdtpmenp{4Jydvt4D+U4ku`T9OZgK;2!h)pec}%?Mq81k&r)v9CsS>SWY-8 zo%v|8-PY!`>?`9smn|!^Xgz)Sqsi0<03x?-&i73qv9}5ML_54$0JX5=*kQ8(wqXdx zF*fo5nP`2Q3T?CagWoxm?Iwyne0~X@_!Kw-c3)lUB29jV9V^3o`TBAEmibruQeY+= zWmw65khj+6vTK0;JDY!dwP}Qmi+2g3EY-?Rorb}Y*#bJm2E=gom7*tj)2+e@ zP%yv%LwR{;pCuP)`w0aHJWN?-$0=HthGRFF20sS0YAj3xG<^Rb%H9Ml=e=F~zeN_A zDJt_25tb%vnUW}yp)?soX^>gwF+(UNMG+dLRFZ@cAvBO7rDPronUy4&`hTvhXTST| z`@i>ly!$wwz1Ln3b>F|=_j?WJd0yvb7I>`f_m9^u^B7B?hV(qF!#`BUArC=al5^~E zK|OYe*>h~zTGtU~4s*ZHbP0=bU;=w^V2<1poP^WRbLIEzE9(L?iJ%DN&>r3cNyMVm zRaFZQPKn3sM@>URRx_*p>G1I2Ea7>;&9w>d!e4XbpFexnamUr2TFiCrgi^9$!k_ds z<6FOU;p>6oZ2-ts*+Wr!ysl*w!V<+AdfDb7}1f7e~wGvvrbq|ti4N+}7u4NuOPqZjoB+QbWAEAIZ17n@t% z8f1I;zM2G@b|9L0oHq|nB?|XOIB(rH%N^VdB)k`i~R``-9L3Ka(E!TFJrI9JJzuP|}N3e+=dd3a?kW{_&LmkM^|o7fzhE=O<-7u$6cQ2q>xzT=jVSv{{((S7*Ql6 zS7-MXiQhN0A(PU!p5X5&%!duJQRy(VF32B9`>*%8Xb=phU)reDx_Y|0{smK=c5S%- zfRjRQhipS_HvX>(KHlC}ZBk>RqTKKU(Zqk6vARjI%XtaE-vEqfBI^1%4^Nk!|A@kC z?1Z)9$Tx^091iS(%!x~KVju|}%YSk;4c7izb&jc_iD#E2kON0`wLa*rY>hv$ZSeFaJ+#}a36>Yc#EviTn|O6fp6kw6L-9~ec=H4f!I z9$*_FFeN4ffW-r{LP!R8;RlSSpr?;N&f8AlJZM(Y|2);LSt8di7TL-nWFvgCo2#G5 z-|Dw!33gqm&23@dc6Aw4eqEn3#bsMbIHDpT_FU_+gQA(g6GP;Q1gnFQ3;1TT78hDD zimmpxp#66^DZR}_CMabIAa@*;)#&IX-`iD zGpBA4OqGktfD71n%9tSL8g1PZ*am>pR6243}d@WVHzVOy5E5bqLg3a-Iyn z>;G57xUjAMpCY7z$R)YgJb%tsKKH;7`>C3u_tC|nLx-|8R}bA(*0;)ymjeEwR(q%% z_@6YJZ`jWqursOFC>R`cYHl`*hz9C4?R%5Sgn$ADIyhRro)nsyn$F|^tUwU+$#BER zk7VX#tVR)A>QFwToR!VqqBeWmk2}TRxnXcX7lw-y5Yz?X=_tVV+k~Z{*N>|wyN4Do z=1&p%0$@o+r)!=Ev(m*J3yWKP8AEnqBU8La&BWxK4bn0GXx!wi6JDHeVJ2@hq&H6G zpOQ&uqe_m9Pcng%&yz0k)vIM=i=RFI$)p1b!jiF_!&$_{u4oQKXLDi6()ahHsq=pe zFbf^r-Frxj+ow)-&qfM`N(RhMLCo0pZ3nF|c%XF)YT_HGNjP#I>u|h;x^HhZgB=z$ zv$6rXrd}sAbTL0fjtcguKJwq$NLX;=BZYStKH}qvWCNlmRwT;E&tyd7w{~#~Dd1Xp zh>FjS9XkMUA~_K*{f*cy0R^npwJWT6bXw+yIsYSx>a=7wF-cZ(z3oRezB4AqHTYf% zX~wGt7>%$bP{61d`TSaiPl@@}DC(FO?h%IdlRf4+LVp*1<( z5|8KCwCaVV#RRVEHJD(G^724q=z+B{`~(k>9qH1mQvC%H)p;qhYy3@K^w#Sx9${J! z9{K6Z;$NBXy%*-OwDJuR;Kj1=|3ik_h<4Wb-J+jMIoUkHsx!U$S~Qr}1a>6l0~uhP zz-i9sBNqS@@KSqdYir*gB(o<*FgS`pKo zCVEIsZP`ScHCa4Bi8PwJ%?5BB*QO5U!if{_7Bd(a9ToM8e_Vll-3Dm6&Bt9+mNNp9 zh~>^T$0Rq7>v9(n*N{7BJ$f}PkiQ>6D$zlI2`w%hP?YadHGGd>!~Nwmy|D-`2%1Z3 zVuG+R+tgSI^nCIpKsXBDLiq^UyEUVexx7?fm5esQUjHxD>xAl?zxuhXi8^@j4LA4B zV5WH|QZXgB^kW?4SlC|_M~ztUC*&Z_Sh->25261rtTVNnMIgO+6wTHZ$i~>=oU7ke zZ>cf9XOk%{cMz0>2&SYk&tBFo;_8e%QuAWW-&cRS+D*{d8w3hW`@twbsZ4_2WvR7 zp3B7}STxz@*-Nhf!2R7C#k}yhs8)5@7fvC%v&(8=u8_X5jMnFYnZePBk&noXxGs+8 zGl%{{$vleL6zZ0bjz8rsyHSAN#G`0-L z1Xnh8Dd9K*bc;ZS(aK}9S2brPO|PmbUyVIj#2q^+y2lbvZ5gBlH;h<8t($dmf2yzY4#Yo@aIXtV@ z5|K(OI|4Mmp(=)q@8W!Y-;OBsVeXeU)(;tZfkjs%D9G+mgXI$$SNVA$K3?A-!FNKy zQs+}=KMlT?dSRDuASA@=*&p{lSp4OS#*_6gZ1hIvaqmNlU8~!8AP4Oiwz81ltp78G zif6e4#rr?x&FMdp4A_H=Bn|mL&sRf23d%92;EPW!>_H^ z+4~w^(vSHM(J3Q)IUbr)pP%@!hM${1mMB=kA9v@(fx!x+ij$-*ED0)$IO@n0hrz%a zq8X|N>56G$^fhApxWRkAeR;pPq~Cwzsio%e+Ag4se}fu2Wu7Qc9-T=BV(WeUUQeJ} zd6ghhw}Hcgix(DMh)1@KAoeh`7IOcQ(Ehpcxqo82K`Uwo9|l)@)c>2GH~+$2)>yO2 zigxKam^LEzYsjzo#x{tD1wRZkjvxHWN|tC;h77iv##! zrCVxRjI0D~cnaupD30 ze}-Xa0%|#dp7z@1%R@!|K>I68X>f9#2eNi?(31!J7dRxW6W4e7x5~SPaWtM@1OBIc zk<9E+# zoG`w~p52%3Rp6KW27jh@VD^L6tV4LLJ#Xof-Qop*k|4#TyJbYBJcrV!_iFfofs;8# z1q@)L02l{Vz>~1$_6wjBYP5PHn+xe2-qqGPi&KC+O_{*}45ndty3SY5cf`{glp!AJ zOC$9@##9D70V+*~4lTgDf|H2j*J72%jqRup+q-R&?;&qVW`jr_G6U3P(Lni5$~A1! zh-__*YGx>Bfl0wCh!~JwE!I2JCrj@A0e?sFfCAC14bH5Zs;?JB*M9;Y3n3pBMqD!TU@)z22eN_RKt2= ziNsI*P0#HaxcnH_DTfi03ZNqcV>NQwd^Xz+K?#mt_AQ)Bowr-ADc^`a5w!m1#F*hp>r&(5a16T(I!b&5KV zv4vQ$xBB{_ln0Ws{6tJ0g|@+eCWzK&rSV%&;15pupx!q6%6BSqNox|A$$)6|U>t&X zT9wKZ=NHtC*-7~*AhS&9NJ1s@`)&K2hHhp&vPHA3D+Le`4Ss}idzR03nfEdaiFE}# zYoC8VBj=gJ-TkYvd>mLNPbEXV~YL=z_39h!M2wkh5?+^||@+MH! z$iUtXsj(t0!~zVmX)c^A!(Q5^2# zCPh$XwfcJQqtRb_<9oJ*U}RCxXaIONE*Y!ix^k`AlD`L%fdely&1fG8y!S>U6ma21@It#sY581{Zvi zGy9rHNnf*n!&prCswb8|T)F%MXZ$~7B^UnaRrFu561~SEZs;Mv`Gb>fEefO9$W%-A5Unb6K-#GTvtc1OQ*1KZ;NoV3UZS?cAHQZG;?XSTWyg)K0V53~*1RS&t!SCRe7%{OnfMEe zTM(i`9NCAsxOAmZkc||7SXfxd(r9!E;zgzNNq1vYlX_MK+WapSX(JTbln6R2dPJ&h z&v|e`7HMQ4k=6htP|}vkr<7lCUpkoRjC6N*SH4ktb%wKZcTkIQkMT9iAS%~VHGa(q zsvgtY&WQMZ0Ho2LMm|1J;I7u7K?7#0jr~N${}a~iMiA6(d?HgAhh-_^o43r$cK0ko z*_esXRh$BD!C#YYc)w!UH8UFY&dLdJ)dxG`g;k%N4f}HXlr6b+s-{*up)C35M~~F_ zXY@hQ#FqX!eZW7IEgAK;DylrhLw&6d4Q+QgA;DsxOIYB*O1$ZIrn0T4P91ID&p17O z{qSXnv!Bv8f`)p_cQC|EVZS1K*VHv}yx8$ps-_cwGaYHH*E|wUEp|sQ=r(#BtA^sy z21@FA_79$D$6>aM%3Pu$K2bu4^UCFE9n@fVhZL?4>>ep zc?70o3NZ!XUdjz~$QkfwBG=pfFn?I_rr$0FgGSr7wk<3RqpUD8+&uxs=$&jpFUlB@L(_enA40I zpVOUe>tfv1K}*YvZYN%sNN%9n$i>A)446|~U6=uBOKaYt^7!7+P?X9;y{=EHC(mW? zzSF%peKsBG;?!s|ai0kRia`C)-eFCZbHz7O_URQBJwRa&r%m|fftERnl39VB=E*BX zHUjKmS0L+gXcG)>EGDr11yfw5uF9ZQ7)uy=^&Qo{o+V z1~&~-+hGkE9j!%V>wYXeQ2Sj<_zd5?9^lm=+4MXuUhfGjnp9V8wvO zDgQ21V4uEyT?h}x9|kv+)uq_tCHr9mjq@f;_KrR*=_%K$qmHImQ)eIMxqSvpz;&XGbK;= z^XJd`Z|+yyP0x9R!=_!L!wow6OQaGD=fkeF7b?`L8AukvkQP*am?AEFxM{%YF8`Y zc82GQC&jjjOzHB@ynH+RLL=|*D-sv zvRz~|6Pr%sl*jMvR+Ty^mzdYdKB|$tc~xT# zn6-t1X^|S_*ehuZKo89m1evZwho}-E=5r%yL?g9RJ{~taU?uIIGmS^Ka3f-~IFbrI zQrkyt_J^`jmJdztL5R&um`u}>>Rf5+t=m= zFhTz$Dz>25wQ>C=pl#8cZ{~ARLR`?k&6{P-HLHj9Fvmq*cM*9;c;cQ!?i!hl=l4mv zQLv6%?#WnP3CkJ!wsqgx)5sFLnUYW9a8Ur%Wpb+aqWgbgIg;k6OSILVkIs8FeYq)q zzW^_nu3sN@?hd6C|GNAU;dUr4Lpwva_FRC-gw~lYZyV8&MkH_QrK1;_$&P63eW5n%88rpz&N=SrCP6WOqnN z51a(W#4M9=PxbBSU60zJ%fVOpz9pL zU~$Q;ZOQzN_<0#)hDJM7L?{zGRzkZ>>;a%O@Wfi0|E3f7j($`R3IKUjBTrU)$jYhh zXY-t?XvHFpNj{2I6E1f4_uLHqX?!lCV(EqIpZ?-uKWNaP*|TR82R4iufbD~?pC3(s zd_;&{Te|%juC7ept@rjekE@t9Z{F0|vbHSv{<5+C=S~>(&lg!SLgLIXEmv1p8LQ@P zGym92z6)sY==rrbVwi z+QxV_Et;yQr%8>vKak!Iy5e3L9)vRfsyPD6*>x(R4m0q*iNlLwI>cNc zQ>xXvW(rRJv!WvL?p`o|eym=}8}Dw+@m}Uo%9@JPOWtb&gi>IbCX{|ZO?i^- zp1H6OWDq^MglNV)DTpH@V{CdKdgIZ6&uC&}cubyows%TOKLvZY_7f}R(E;H!@nl>! zAS9%|G{ab^cQZ0-=Sj2>?E#*nYx_8Od-q`d-l6IL@|V!2_pc;JCp2{F+2c6elL)Lb{oDJ8 zg|&m@s4J-vWaGuG_AEMqf6QLFr{<`cY#C`*^QBZk?v>=G!yL$OLAh1iI6-AKql7dH z^*xi4bdhM(o`&n5>~*7B*YE^QfBNCWO{_S+(mCHl+1P&!^_*hDgb5GPcs&doJ8%+H za$_i`sKb>O=2Wd>FFAfj{a}o)7aD=pb-O3=T)uPl#xB%yk*eqxHh_WPn>C8uB@1L` zV$sWMky}gM(8iMzoNIcSgR&b-QE-vzkZf)Snp}@%6$SQJZ{CE;QAU|TB_(~KLV_2l zi9*}w(#ntLhL0aV-fPhDWh@JI9ZEcnu6+Ci`M>OH1Rws&#s8wGoXZarStcT;-h~d7F{ZyAt4$ZES(ens0+kq;aEsS>}UAb zbpZj*(BsrqpiMNfQUyGJjWz%IF9K*((qI$1ylLM(%ac6-pcRzvp+z%*-^N z8=1tRQCC;LMR;HJzH{f!wQ4S${GG3bGfep`^{mWUpXu@LuCA;`G+ct;5|Ew+_f4Sm z^DALc$&Phfwru&LpY4A<&);SHNHIWu{`~pVr%xj?xwur&$YfK~R$}930mOc7me0RF z-KgJ1vQ`<{{Wg*cen$gi@+7H0DmuDMfX`()SfR`?JkV6FXTN?`A#d#ER7Lg)bW`Pr z^zfz-z6HQrv6%XgSj{tUwa1NOa63;_O{V3=|21dSg__@J8X(WPo>hIS#0`iqL+9M4 z58y$Rf@hkv@T>gzgh$%{h2iLq>D5=WvmHqdV$?{-r1XHZ?=OGnF3|?pm^wa@itVAH zH$XcnL9{;n{#=J^GSPb7$4 z=X9~W)22n=<>2=B@OJ}0bw>BmzmUEXImXlx78Y!FQDWGGmT>f#F^#wHRYegqh;pX` zGY?PNP+i?);{wHEF%n7V%A-Ybn6R6GeR^}jsn4F1zw8qM)b>b%EGgN{8XLY;-Gw>R zq1!uh$~t4&2Kvi5^bnB#e&qb5pVEYz>@<|(OxyaORyyw0%q=4wDfm(RCPv1T*&y0b zN3FA;PSL$5>oB0o7F7RQ4OGw|_0P_ohn9pLa-@y&Z95>C`FT~-XVe?KC4C?Q4iUCm ze<~mbl}R=j@ zan`sfc#Ij0S=Vx7lQzM+GYxDJE_2}1n}K+bE-!Zh;g`5fuWoH`fxWsck30%`imgQ& zPjJDaMd>vQyCN2@`D>Ud52&lQ_K1?x8xUUgVQLem){OU!-kZSo6EL*?O3pwlgmEy@Sb?W94+2;3PhfAY%sh@2^R+!=7UNt8=!C zO*CY?<7k48MeG8u+}T{9e}gEPh=_2?QQzUhbco*U-%sE5gHIM zgiD&e!Y8MppV@(y_Y`jIRmFVq-IEL9gveZ^>MUZLG^4$;*4##o8r7oPR_^fk-|BRR z_qtB;LLo%QaG+~5`G!<8@Fwj@Y1qC-Bu*`Tc>q*vdq~Jts!`fAr+|MhRJVII-Oj*b z#H3^SAXEtJG_6&=z*NiG7@OQ-)R04;d@=)fQ~SYJyCUDif&9d+Ua!WEJdoS8WlJ+s zMBL|YVS^~NK&!V01+nX?E=;o0H0ol|qgH!nJWO)>16@V>_3OTp^RFZfoHG7*5PV@? z6Liz{Kj2tVWoI~OS*a)eDnwJ4+^Sib)ZF90R+_$9dx-Z0`omiBgDuvU zKCN}+$dPEhP0Lnfq^CEvMcPMT{mLfAO#4a&!hfRY+Lv(ckN}g;`0bUm=~Ul&{?Axivl2)D1$U*ItI<`9Xib_2}GJ=ib?1AjCHkW z#T4qgj&*9)s&zeV&|UzymiPbjk=vptS0!}DR_)aj6M{=i=ZCKE>bXZ@F=kBfg{FeV z22HM51SGy#?Jm591@R8k{O*!JxMzEicj~k;hP0~m#0t3?b6{DMwhCzJB`3AZGYUjTeHK3Bx zc{#e-i%hmGar2W{yeaiT)qt!uAdy{1O$&QlJx zi|?hE>jsSAaek7mAp6kqM_R0`e6(~q|5@z#Pe0%P>IJ&_WY3$k63-pHeO_E`);wnM zW5Ay-|FvIV`(t3yz4EUihy3H&8$0pFj|JS7Do3ihRo>tKiQQ5_%P&l40 znp>Z7-jDaHFvmpW+M6Se`1LE5B4K%*E_l(@RzYqCdV1gNrye-;^kewNyd!zc!xWEu zKUQiHlPx}-yv;feO9BK>Sy|beqoy_}DCQF9rk;ul5N1?l%%}XllnjHKn$3VB6p+s>jyY*cglV)%hR$IEXQe~Bo>rj9*3cjnYa^g)I zn1*-PuBrOvCPbS`v*BZV>*S~_Qn>N#@xA zZxp`vpcZjIOvTHFu6mvT!?H*icxnuWedB9IA=w1G*D6BT zDxGF@K8VUL@hBjFZy_BfBm2`_$-?XHlN1zAtcKX^v-_KV%uzMJ<=$re1Ml zx3m%yrE}6ozSv)KotM|3+|{TX#O3_&0Ryb@M-;JoSeAVoz%V8#*O7}Tb!$x{&MJ8E z0byY`0kT2{F4yBch=((KK71i-;#TAQtMDY+H;{cG!OK3-xzVXi<0uKo=e=%SE#RUX zg^#hbN4^Ba?+5$fofY>^wmX3EgOwC|fB*I^GS{t-zP@aO_Y{(dB^3a1>mH`hVHm87 zriHPUDiVkd(b3N5XD#>6jJ)Yu=XZS~b?%^J@65LJ=n2YznIJKS-LYM`R(ScvRf~Vg zocP4V>U8C4P}^DPD8f&FD&-$FcyG{!+)5)!nP4O>YWSk)Go#P+SzbRl>(sVits$k- zv$K5=1+OY(N99(t104+wQ55VLr;(^vT3&wn;lnmG`=MX|6KOOz5_?8ZLPhKx1~VC1 z-aq~FrWgH7T&>s}3p~;P!R_+rtAB01-?vdp$zKO!Vs>5q@edARyA>QFwiAIzyt0X* z!ummd0z$`hRR`Z34GxlY;)Hl3%f1(BV^Fuk&tIkzwb!dr9|3qtp_LZ>~_H8galFO;J%)dBG)59$eU0xOx@DJ1}|T;$*%jaHbjnE^fp95QWH zp=bg)GScRmC*v0k!WZGE!wXZP8*V zc?#o8Rz|d1elp@~wRa}1+{?3HzH0cHGJn;wNqeK7Kd;u8JYBEjp*EyDtNO!+4?hl+ z2k$(LNEnBLgh||I)L{omb5wqf0IRtZ^5TUNMTISD2JU9OP8caH3(2Z9bX)}4ehMZa z3q-ik)~~)IjJ9d9nwX}u@QA!R_ArAx@etSu@s!LazytQNs-_nq?Rpv9hCuAuv!~Ev zxw*||&6))&2Oc6%%$=YOIy(w4=qb?MKPiyNUe=Jg=}U!$y8v)mJg!Ubk!qRPEzl~9 zJ-734T7|UvaaQ*Ev~8Q~o_zZ3S$+3>?a(?I_(5k;f~h}>Mw)?425;% z0>JW{yf}4zmxhAZgSg4AH)%WAbO~6&NqsI@J^5O6Bcw}u`-acE#`Ax!O8|v_hoX9; zGQyLx^eDTQ6ujQ_o!zY$HJ_xlKQYmt<&$_abm-rI4=^h4GRENv7J6g$?_udvLpH2X z6r`iTyS%pg`&8o5q-pE69f}|duui!x_c1i*7UeWp@VYS%fNE9w8x_7o2vB05)bevP zQ#i?v2uPIY`rp(!V*N1SkKYKDASf+4b!82&T)p~?r8Cl6N{ZrZI?kRwiW3fJ&=92I zd62!seqjFU*lJ>9DGD=L@yN%rnYR8IngkX+&^t#k2ODdo?Wj&gU)(-&B;B?f<}3)#*~&*@x{BO;_rWB!Lm=yd3g0|eZEwv2-OR0 zcdu{WEv2HOqFhZcpo`O0HQSA`RDuc#ZTB|s6LenAg?2B#WKnm#s^S*h4RVod~iAI+%{ff$yEREUqcqI zo$BX2ZTt5FuRofnXBp0Y_|-deuffc5NppZfW6JN}zyHlUvuR{1;?!83R@J@#%-tOt zI%P(CAek-4DsgaT8PP7Ktv){|5oe@mqKp*<0a3<&+9;`0A_s!fZHbXBPEf{Jb8Y5C z*SbY!H2VHdpG~@~Fc=-TO*Z5SlPPoz_j2NlNwoco$K*r?9zM^0@!tPIVvTz>QnzDR zOH(b)a=W0H&!4N_8*V$=0BB}DJ5WTSd!DgvpRh5}PX>$}(oR(Nb4_N(n;7ho2sVQh=AAMYW;K&Eivb+0C zSArJn(f8_j4Hn*{-t9|$coF4=sTEyhT_{IH66f2vbW0z+gDT-wKz)uXoOiHx?rzWn zsu%8t_STt3LBy7e=+l-F2D;>q_aQx^L;8clVQtsuV#)VLT0e;kC(5EmJF%K_3(m+=`5@nm2CTs0C4Wc${~p&~eOIECBKJ z2^$%$2m4E)NIH5n_Ens!tfl@}lQru5pyi)rfVJ20C)R}F`YI|#=&=6O>nb1As@>i*Rn>OhlF52k2zXtbk8j6&iBc9@}#}1YstcUN6^wt2CqaCp{-|bRlwTN21JF5 z3x{?tY8GS-q_Vsu8jx;R*3PgVnto<$>ISci8FZD0n}HITA=F412CtLAAMHq<{ge1{ z(xxYWwO_KNufX{61ugG?1WOioIF&ZV+?qF)uPEVC*8Dtkvw}w@n$8v+z|IJLSFZ*- zfIVJxb8K(zf4&KI=r9fT;9LUyqeaN0>_(P$14hT3qKz7uWO5{3ZH2{9jy+g{tZxr= z>}F!KP|MulrJDC?#H!0jy^dRqG?w$K1MRY?2rU;$Td9<8{y5XY$!YkE%eGG^F09nr za5kYUQF2W<;nobKQj!BjglKLvS?{7^OoE|1afUW9{xGrtwiaj8i_r3HQ!ae{SKq$D zFo}BHO{6tDKTh!3<6%Yc$f9YR4?Y_{k8(oxq4WBJ+!`j>Spe>`S*!PwRG(x24c;zH zSwqE7kL*QZ1;D~mN=;9yQ2CQ&76c0l$Uk3o-Sue0kULy8sW}>SGPxQ~q-x%M9=$0g z916w=O6$A#?^7+-7TOyDVf8x; z&MN9DD%lp%G+5c9lxD=#CoEVGMRvxeOD5Q-^e$K<1Nt?ZL5t+69aTDopPH=ia^2Da zg3l!J6gZfeUI(m(BFJFE>t5<|kC|f=FYWwXF5H$lxn|2KOs(2=wUJJ#M1awBM;|3J zcrgUZ;89Otk(^Xu%9b)1^#i%|NEkC!`ui8;>1+mARQZfApuO$B=b#W2DLT`mQy|*x z&;{ZsE*d>&CPn&tR6X6$L=~Z%sg>#rl;k$z#A3voosz#qMy*%_xD#Phckx?b?afX; zUR>eUJs##f!yW)IZ{C96WN?UIQ|=`>(f4#02UCR{3@y#XNKo9NKl=~9$xK30a44p#J~vT^Cj+UihrPx`E=V<2cIsPo#N6|Kw!iuk+k4`xmcJgc5A{n}+%iC8X@lw6tt$yx(IkSK9k=6U^H~ z3lB2KHu;lk-%J!j*U&}$x@fvz;F+pZ`Cw2z$x6t?tjGKu0u_idATTCvfWK zYHD88E-(NQv_HJ3b~M$C_g0{u_4e~Cxc4g+z)I}2=v>x4OnAs4%bRI!`8#o^O5z?T zTiU30kDfS@PSFB>hU`yPi6JT3P}nMm?JG#hSvo z$6mJ+$;r`3JdHnmXJyVX#hRZL_6m>@MH)DW^7F33;<9SM9&x2Zh7Yv%C!KNr7UC{4mZ92 zZ%BgvzmNpcjOIl*;M~JmOT}iO={Kem@V%@(9TTG%J4Ckz<^A;O-OtySTVm3rH(j#i zAorUc6B|EXs173x=%P4@>3cP{z`Y6$$+GY7PoV`j*|IQjS1ISB=dsW(n$?5#_4Nsl zZp5Gcb3UmolkF+p9%Ew!;v0uHcg!lr^$<`U#e-TBCGQtVb2@YD(l*kz(8=YKAZwt9 zQF{!$iSp2|S11$U*a4pcf`V*)#!jyI`u%(H)2GIEb}i}k6qC;^s_)5xb3$e17PtVC z!M9Dn=6n|Muvb(pTfhEWuh^!eC`7>A9BB~{_0^tVR%FTyqwHj*j1$QK<&c76z;Vu; z(bS6wUfl~`lV1-_iYhKDQt_*4v3CldY-QpTc@&r!DVYVOO})mAUFa$Jcp?-+edgyA z<`@9gy&N)$lSs*i-pHJP@A>ffW1n#s)~q%+_!UmoPRpPEHt~Xk+ul?)_vN++%0LI*>|{%0n@gXX_;PVH^aglwN*12i5%n`R4xyOT4ZImT*^owe}j1a&(}r z837*b9}3$4)TzHT{pu>1&2HGR;azku_rH)N1nef4k#Op>J-mB&Z;A;iwS<^wBP7z6 z9!H<}`Zi=(cjr9QVuV?UZ7<;<@26a+4nE#((c`SApcW8q_;3N^u<5wUSX&x3Myz#A zWAdB`f64hwPrrV2_AdxcR7+~=EovIBu{2zf>QGAP(r*DZ%ecKr#mFTEwtmaknlHZ( z%dbgDC7n;nwQgpzGZ5@ZEkrfj!hoZ8;K~{_7S{l0MlPP4;{)Cr>d_O&QJ{Mp`;E+` zD$R#}OCHS^QOAFPP^ZNdu3t5tL#g23)R4WO{N1AKsHe1Rs{mY$Kddw&hXL6vd!JmG zzr-gJne4%qKCd}9h(pliJbm?QOLDTIXc~wsEEqe=K2bWkEc7G!Mk*8uTyKBo$_EY{ zC}L)u6$x9?DN%iGZQFUCp@WIZr565E+EIQh?jm#GxZC@y#!kIp1Y{@sshO@9{U^_S zBWypN&wi*oj;bC9LLrIcb(#*Idx z;rOSRQ+S5ZQbDAMMpl&F!NHr(o*mHH%vUf0nEo=#plMtM@=62W-)BEwMQ2&Bbn7ux zE4(zHgoFg3S5>r&FQ7Kn1|_UTl4oQs6`tPPYM!4;1lYp4>zPf}T-gQZlbAR!LTf!^+a9>lRWX zu6oh{#Z1-x$4rl>|7;0L(d|jj8E`i(>4nHU3F+zS&s&#&^ysB+xBZ`Sk{?+fHDW^T zG>t)~RTtLkwK9oi0PYGyniq-S8hxfISPL0JZy~Sd1wq@+zTZwS>YG~#k)XI7p5JpR zy2speXT)w{iP>{!ju-3%B=dX z+nT5?m3X&c=05D=wHW@L67+2;6`w?c6+fRa-eW(wf$|Njf!`5|Wy3}8#yQ7ytf`I&Xj z_V-$QOQjm^va6|9GhoHZmo#1y?tTL|!$F2>Ei(EeaE9o%Ha6ALf;06bDwKiP9EFR; z?CmCgbYFAU{<6-!-NTkh?5;e~s^9DfC;e!fW%es*<^?AIqIhLYH6_?&UEP6BmkO2? zye2?5U@A2j5?sC*5ijWD3WsB-PY-i?JTjGnK+i&Zs}Hlr$czu|o>6l!nnqhR_lq2P z76Kho(v`G`IiDX$$F5g@I^{$%TP1rJe8M(qD?=?num?jt%!d&8Ed|3(YL!l;z{K>h z7#9ph$it8brPy%(8em`dqfhvHFXH#8qy+g+UG_|w))IkPH?}~F?Dqunl`hzAl)PHU zbg|596ik8h?5Lw-(d)d`_)*VYU0muupZlbjcCMSF<9Zqjo`~0x6?H^L3j}V}VB(f+ zS1Nh6LCR8})U#3^Q83d=i&DLu(SuuuY;_2T>0r)Pj0Eb;32Y|QSwt?|&jwqzZ4)lT zIKDu}!Ehp?$;CLu-V@GGa8L@5dyIkJOeQ*r;uLKNdt}o<7Vy`=9;Wv5uD59cbGWxE z1JCyUw?5uxNs~!tEwNvqS3zOzf`m>u-Z#lSWWj>bNn<7Lh*0-U^8d9xj~cT$pY-4a zZ9y%F>RI3bM(~9N+Y!ssmncw4h_-lLX1*-`JeLl#w&-O6-EwF`gmT8PHOsGNT)OF& zUsxE!GZ&$ehQ@>$QwgORM^^nlh$dSi>G!On>9kpXASgPd#x&mRDi#$upVaz1bHBC< zoFizN-K&8DPax%Ck_XaVC>p>N3iwWt+YH<)iFWoV17<7&8U^YCF(I=FbQs&(^ji=2 z0|?|yu}Lj7eeT?wg0mr+X6kP+_zcyjn2!qMBt}O47>`2cB4RLMR%pZdp8TpkJ}V}L z-K8-DLy4jA1Y(abWAn5(1My9pHZ|rh-s3t_V7yuU#~*))q?Qj&#A_lQrZB&^Z||Tp zyG_Bd>Sqt1OL*Hae%$_lqk$K<{y?%O#2mbND4ln<5Z|tvwDA3VcE)0;!n^AI&)qM^&6rViwF)9AP~ zzL(`Q?@X4p?4V*-y2Z>C{OuJ0(pPtrI+Ib|&w`8d=C^Ve{!A@Zq7ZgPv7?Z2HEaTu zQoNe4`B63;DmNXZz}Mfu3D;yg5FvO&8T?WaK3PYC@wa)#&XQ@&lZ*5i+`5|<1uOqWuh>2qcrrgzMgCK$3pvX z2evZCj2@zEbu_Sxz@5B!u8)rC7w0*1di{qUq1p#YUXTMcd^KY6yI3hg`@DpW@7`7SBwsge+cx32p~0UipS~6* z(W@$?;qk-3E0EF|)ke~>W0!DoCJ4w@n3E~1zxSmuD(1^laRt3R5FA3)417_rF(Oi% zo+1z$G!VTTL_It0+R3!dQH3Q-i%ZMO_7e+DzcfgBa1iLzmqAp`@_)6f2iyH6YqG`d zbIl{>!c2jklJ8e>F3ULLTp9QE)q>)D(x(-(I*M+R$s@~GcV=E0l$FH&r>0cBXAhuM zzg|7<_uc<>+?tvj_>9znJ7u1J8)9}2Wed5!R5v|VOjI)&JGKepJ*+r-tmvVf=Vk!n zbe>J1|1SSKZn8D(`PE55m8VnH_Th7>V1kBdR5fbOj^117qygXDo0sv9qz4f^fKZKz z)PB>!3Q#v=FeD6@q<{DN(m=V@dfD*}kMG-laJws`7co661CXIhxc%&{Rz|Ukd(7XI z3?Rte0_cy1@T@m`d6@?2TJ?d%fhrW3cK;lh*b(OD-Itgi2nq@kfu^KXlOHAm{IxziO2~ zcH%@$lI5RUXa4JTD&%!)kv!?C=w!zVY_h6Kk7Jckdc1ykUFK#{tlNJ!@DW#GSQdX0 z)tMXLKB~o4$8kzno$7-L-k2Z%5YgG{&Qvo0XSmS}=!CY9S1~4cFW3AihM^r!by@&? z1+Kt!VlHhSE~8fgPXVe#psoC6?=Gfy{+BjSHfI07_Iai{=8?H+X=q_~c6u;j(xlo~ z4w=;?_>Sp=yLy{h&4lZ|&5~%gLtcZ4Oh*X4oXy0X>9Kw?!z3W=XM_CX&&iWfne_1J z5Ws2{7eoU;p-pKsV2Q8HIPKoec5@Y?o%^NFWd$C~D+is|)R}i}k;4Go8jx>?Fk=Bp z*_9bwBCS5sVZe3UO=G95_!MaT4{Jrvn%DKjba7cX`u z#w!GRXSNh`$Q#_|2Jb3~SZG{4MTJz{{`a_LsO>g-TTULc>ENBU7UndjgzZYh*`Zsv z&5{~^D^;NfT=9RR3~WTJD^|mu^|}jCE?Pl`y>@nu?$~LmjrP4`Exg}Fh4`-gaPj)?m8YFFbu;xe)H+?hd(FDPX6*du0Arkq<5#E)cL6`kWdia%f7OnT}Nh7@b3Df;l>TDEWhWOlfP z2i)8)^_-j>*Rf#(r*{g7%}l1c)l4GF)V(n&JUe?NjmVVFVdygG2vZ78_HV~ey5Mxl z*{YE~=i)o&JSe$D7xenotNazKiNn`9z2r+j{5+Cxrr6ln@X?oA$J{M?Rn;MCP5=J= zuhWR$pDCnC)Z&yOH>O?bW}&U>NT$XbVkl}A8J*W0t#hS1X8-S8^;@@^qW?(e6sHxJMIXNNrkxudbf~0L4SKq9d*zTxF;}~F*VS#tkIq2<=mX2G zjPNj`lhYuh}h%LuLOgVfJ8+ zl#Rk$mfRUmej`F($;sKZq{qhXU2V=yOg^c!f6pGxjV2wb5XvyC>%X?{sxe-fk&&JD z4O-cfsXG2>_&z8$M>OZ*H;(725@aU5^(k+{Ji}&Yu=!)blcx0UZYKFAuX*gP5k?yP z`VDCJ(d=!TcfP7FoqC@>eYP)|8173MTU*g?@S2`4K<`JJTU)mn*e>y=NoUK&=?bTY zOdu@2S&^dZPe*W+!RI!ER@mH+TeT2q_}SaHTj}{m4hU*u_h|CrekC`uvrWJ!p@whu ztVC(k0>|+BjK%hMs(Jdo)~WbCKVW~(Cy5ZS?B_Q_DPD-*bYbQLlE;qZBg$Omz*S!@ z@5ePY*<%UckxP-eqJbZ`$@xOu6qmSFI4l`rQ%qed!GwC9e z6EkBz4WX&t2H#@vJr(sC;ONvH?Q4xoIDJ~3=8Re18KM8_-B?+en_uKUqqI??yavNP zaL=yJi6x-sw{G9QOd%Z=s?ssad*-ZJ#U$$HiMFjY<8x*nj_>HpXF+RYw6}9h4&ue) zvZ&C^ZepvHH|E*7`gN3$K8aq4nmcjva!n z(ackGKKpPMArOFeZFbj}Wb&;q-oD*JF`xMa;Wte7H&yUgnzn1VjVU9uN8g>EefZEA zxt4a19>CMJ8J%rMrg5N%hCWX%Z-d`@r*5WKn~ zc;CKbv4`IyRhot5^S=n7H)a-2g@+lPJ|Qi_6B38M7^Y0b5KPD!x-J?9nM` znR~0m37l}$@Zp(@Zf-#97A;$5GJBQHrMyRv#gfJ5qS;A#D_#qr zq0>4AY(cx^%g*r~tj+X!Bs#zAckTo;uF+?maax-;ZN~G(Q+~j7-JoZ@MtymY({>Nc z<|Y-+U8GPM>P|r39*1F&8!Fg7H((MYX?uZRYp2lw-hGvmb>+$y0`n3U37{n4Lq>vgv>We-#B`g(ef zEJv8(tJ{|@?j%g(hchF(IKFxFW;VV7v&jSRi0zJR$1Gp!5HKQrXHd`t?v~$5uIO=E z`#wazI08p>52CU`^6Kp9Vc~Pmf+x!2q{4oiYBF8G z+*>|opR6FSjYzDt>YjYz={y{!6cbL|b~l6?VxZ8E-fL!zNxhjS_&+TB_PIMonOY9^ z*4CGa&$_5n`eeF4GD_g)s;LEOJ}{khoYW);OFbGpzPc{7;twP)r{_OYB8Et070Q|! zj5M$fO57&+dV-fK6cokE6al5n1~OURg1z)a3<5I zccgTXY(?^&P6XcFcaA0wnZC69BI6gm41YkN4*Cvts zb(vxifMn5!hZ-o-0b@Nu`~TXMReUi!TZN|QBA06ap+ng}`Ebg+YdKaI#M;1)!C%3O-Mdl#c;;eXOk4qnO77I&!+?k<+eLEowJ9i6)T6Ae`) z8WzI{sw2>caofV_cLE8{8xvPR;IHMM6@UC~&yLS;J58IW*l#($c-1LPp4_ioyEYUI ze}8neZONCJbLPCyIoPqG0tik%hT@RV2=VVxJcYaed&OsMN~<#7n}@41$e+lCmQhD4 zIxzT&e~Q8r)u_lJ>3$ymsQyvjgbU@z|Kp)c&Br2NM?MZvx(QQJoUZgw?A~nHqelbM@Z%ICBf{En7S-wN-WM>1Fo8V-2`8A~m%Q z1l(}+J4m;>+HF)8V3=QX$E|CZnUBFP@YuNXcLM_y@}x6MJG)Jk0E!Mhdp2Z5)6&MK zj^*N;ox*cAV-nrXZ6JLiEeT1P zw7h_zI`26zK700!&wKlH0|GA=~BNGoWaPTKFnXDrBQwZS=n33OqS8(th z*W}2uDYSyT`yuEvzOFJN0w)2JP0X6*<|ej1a!p|AW!fl7?EvCKKJ75xQpRKZ)i39x zBlkR15H&BpckhCNGz&Ys;8`Q{@=+_N|4MQm6!9-Dz$rui;=29&I~o`moF30#BL8g! zV;gF2UY{eEyyiXs{p20Qi5>k*5)zt?nm@mfs+w97-slQjubVPjF*zl$b&F=r))6no z%Y*DZj8>C$pP>?h=3zbc#dwc)|El%!zWw?9}RGAMPZlWd8vV#7wdVM2*C;{ zVQFPmOF`WK1C~v=#-(*kh7GGjA6H8OJaj8Bk1g=s$*8A~+~#={ogCrn>|CH(qtTh! z-NvoUxQptS;SoU)*HIJ{(GRGzDgDIlZ#`|6W*z=z(Uj(bN;HisNy}m7sas!P-e}DX z_t>JlIjc>Jp))KkEj{5GFM@_)4Osf+OUu?f!owvz4qW-AA(xJX|NLa%3lEgf5Y`o* zC^5EVAo^Q?&TMY-gR|8NT8TmR>L@kT(2Fv^V5E3jUKRfSV=Z){bk^&+5G-xDs`{9}M@Ophfa=Kg3Sq0B18NM%OW(t_~0FQ4r0@5~8A)-7yVcl_~LC4y) zH`GmVdq*WmFV~bJalzr>;FgdA?yA=(#?w6{C02kEfrpBV<#bXE6MjES_r0C*%N7qnGfyTfGC5jV~f$5nI)DzHR zTbaz1DfL&kW11i>NJ0{+@Z7MWHtz)>aw2+_DMhLDHLo6@8-R0Er{JO7dje2~U4Nfp zov0=*K6ua?*_;<28K3c|m_^WBk78I|X`Mn#ON*Ly9X}3zNNutcKutq-c(eZ^KK*yR z+#2YTGwV~82q*Ln3}&ER)9RqKb+wB1M zu_ll$n?tQ}m~#2yECg+R5hHp#Q43vH90inixq4j7;B(dKWo4aO6JU-fB~1r_kW%Iy zG1*0FooDMEY-=O_YAEV@!cQhK%>UsVW*zF`Ij(5AuVc%?LSS7!x67P4Hx_z$XgFaJ z7oEiXf^IwitYrh+wPs!x4Nvh))+;%qw!KbShnc)5-rz^cWXDO-nd(meD)rW!tP=vu826JwKdZi%rP~(m7mXGl``t%rt0cjcy1T(+_Agg zJ+SUK)v!~+OJM4N(3B96P0o7iPOBw$ZYl4NA?mzDFSL%duURgU>kRX9Il?iPtDDm$=-_$=N)jOVLf|RfY6h^O>CH zPYuPB`*G@E@p(Ai)sQ$YZW_m*>{vCBkIua8Mt-T4w9}wC6{pRo#F!s%q2l!wa=Z*D z`RUMHcT+sN70%f)LPn@d!>fP%fn{)bL1ia=gGiDhOa}p3Kn&gY_W~uR;_x*IIC1;? zjB}G_RNXC^lYj{N501{9!c}{LEvSRptDr>SK&mSa6j&RWq2^1^q|`D&>x30X`0V{j zNu)q~*MMfR?l;HnT^ep}Zfd5O%i8jy12BDLl-thEp+L>xQT+}Jk29TZT8&0@ft+(z9E>`*lxK3;C1ruomjn4hVaSzeU7lK>Bl3?|`c1uJVzS$cnFv--k! zaI@Ls7i;>lb-Q+rF=)X=Nx|f8O=g`uSz%*%9K*{^{3e_dK^L}>lc&Kdk}5aDPl`bc zbsU;zRpwkBJw5_e;AnoCswfFZk5vr8^YWAwUAlH<5p_E<`H68y>eC-JreodBkTfEF zL8d<37bz3R6tP4j8c~E!wP~>$D~={5UHt!;JM*}n_kG`g6Ui(~NeP)24Tv&JLXu$_ zDj}o{5lUu?P(nq(c@pPJJ_m3mrb%iR)XALxSv85Yk~Cw%v%k}qrChn9-m7Unu!jC{z8c!v>WbXL=!CMJC%UB?M)j98dKjgulnvx>z#Pe)R1MbL~A01AXLPiVQcS2@XvNc z=E@rENZ_DWQV|8-h_PeWiAFSF%0Ih04iI|P>WT$^&Lk7qTQ!Bv+b4y4FJ-O=6 zojVmD;=rU{iX59lw`Guc?sVd?qx;{JQPaF(&D5GJ15G5CnEtwLhYsm9k(s0zf_v8? zbJ6+jNuVaaIpS|yR`=6lH}$fQ>(VfWT*OyPJGQ;g^7A4p4O-o7}mGc z2?R@B1$-mMPq8`DY+TdyyON%9|GpN-w()`s0C@ezkJqECoN=$Bm1YkrEk&WuavajH zpDHW7KRANu&?P~o%d4q8{VEKGimyU5|L=`b?2KX7))tEwcW1k*R(4CRgGOa{52B24 zmeOGokc?B{TC@Il?%!8)smQ&ns8mcPdH&w{+ ztHSv^HXx8N7_q=pyJ_g@)lur#^gD5Du+B76^&99mJ$GiGMx=r1d9Tk6utRACxJ{p~ zqU^qFUAa=!r#Cb(-Bg~4iwycp6|q@zpC4aUxdXXObgpvlQ@^E9(vEw+49t)b537j) zb?DWnPc!sI(o< zvV`vrTQYtRCv1b)(uAtCJSM!mH!8@@(lHUe|dXW?buo{ zkdgxRl)DN8Mbhz6Et*|nFHjxyWpx~T5<>-w$k_^(K>(ZxoIS9hs%X_$arTN_;?a(c z)J~Upq4hO1Q~(i`WNMp@+qbV_K~CbTQ+()oH09LgM3AUs3rsbxk^(@{MV!CVHx|;L@r)Is%YNe^OBD(Z0MH#oH zW&l_EsPK(a?m_*uJzhJ=)Pb@9wWTlTK>@WW2v6_=oBcJY2-FlgyK%=K%Y8e|)pZRU z5a^{*{`*C@@85qB%QV%hq={(h#u0}QAD;@kFyk`M>R$=HF1@jV=)A?ov*c}D;oSS2 zlVjtUII`k()9;DJZ}Qza8^*0FYF$T0Eb%qhrGiYf*3v<=9K<0m;kAb!o?N|JjVBV? zd-Tuq+xm51L!CjZ&Mtaig<^2Pj2Q-;Z#Dzc?EBlw0ImvVC(({CL%`zLu;JfGqhjkHfjQA^Ua0DQSdOygmCT0o(l{@UP9s4n2y`r}d};ctcBG#aug zV=zihTn#H|lguj4!E1{0GoO2KXqRa`K!^bWB2*QoIMoV^ZvMnEZ^F=BTetcls-&t4 z8}lZMLYrx~4Jd==r&VrJp{WKPq1jpindi(w1OhrEarxTc?+9_sSy%v=a%Y%d{X6~h zdLN2Eow9zvWdve$z5o@(l~g20bLwnunzHsjfUiC9>ajuohk6HC+L3&+p?eNjz@tiAgimeDG$w&og zRD2Tu_)YQQ7@s_O=l2akwEhmqrUp3-;TpU8V#E6NR-j9SgJz_XDp(t!YHhh71>hF7 zR#-HjV-8t{%$`rK-?OJKXU&V)e^7}dW}bM?M}Hr;{98<#<3Gs4*PRe=myoC_*KVHSg&SmL5V8GwONeg(o}?~){KE;PpPWks$T3+`8jCx&%Ju6Q4r72Dm!O z$;pSZVoo+pQ2mAteYsa~y9_pJ!L|I95?_EjPC(w02mLnTYNiGuu)LSBQfKV#la389 zSz8sj-y}A67XX+Cpq|%wZv|X$;_W0AWo_(2Qsme zzjd9;ox%m-i`z>WQ$&lCD9jzsEHGA*A3xS(1^aTBVyo!Lp2fLQmn(03SNEn}Ga%+A z(-6I&ntUr7HfdaYd4Fk$0AvN^Gk#oOlKJ{hoY;(WT5Y)9b0oBq@InuB>*hkDPEewU zb4dPuBUWws+?coS+^gvZnc~9H(C+2KlO4uU8ZAB7^&gPbE=_&2%(D9(DHCqoycx6< z8tXFUM~68xxuwv{NW4qH{8xAUWXSc+J<~?Hy)28zFVcndkv0w1aG zeOg=L@#r)H!V&rsxXC0I}!*e(EWFU

{8nB67N zvDCDzt#>hZ-sCD`-bLwlF{A)s9y@WO&^(ltCG_D`w7+m?H-HUu!J)NX0*xJx`b5RV zbVeqS$V|H~aCnflm-iH|PB?nh37bp7g!HtuPD%;F-N5+hw?&luJFwlyL`LSP$@5|g z>RcCbXmkO!9(s&}d4FmK+xpbDnZdd3ZCp#cOqmCDh%fR-*7hxK+~!87w;YXtz1 zDGpE%UBxa~g6p$j0=nUB?Mn_>Zy(Eit*#&2gE+r<^JYSkwp^mYR;T;=SzYAo8H>aT z)2_Vk&rg1QPZv7mg0-2N<}&NS`-+ZC63r8?%uDAuv!`AlN8w08^*TDO>~~35Siw6E zHA{bS`*x9t=NSX)5qAi$D`0O@HfPWfE34Jt$}jvVbY_uGW1AOFh#PZkHRFp`E%ip@ zr{**3$@z05x(D=cai6O}t7+3)snNCEV0%SU#fF}f&0qi{`>L92``@^CZ_76#SDnk& zV}sB1OI!Z!^~9N+=*~&6ii_82>gaCTOfACqL1(k{(RM15PKu+3JLXM|5#3O{ z!!Cg)c&}NAY0_T4rZslq&UQ|{%8v+#O%HhHQwx7Ge`if>1thH23l1~)0U-y z(^7*5?h`3xphg=vp%&Kf-CK`S;pg3hQ%Vf|v2%~_+&T8hxI5l`k6s1i9ty==kn z#rNC%D;c?NchaGlm;kwZ>B%2`K0zTU$+BJBbno66WX=*aQ%Qzi;d{bV%(87y2Tv6z z{#0T$YbnwyDEZO>7#pY3_}63o!yq;9-1#?hF)vcGDxxOs&-urP)9>Q$qMO9))$h_p z3kkn-5_@pntZ}oghw%p>UpjW`^enF&3;4>6q5w04O~>_4a}wXq&Mum;9Bka|R>#8` zfDfREEA{)W_K2syCW*f<04WrdM12c8PD?y{2+%N9ffJQ$NQa5yRq$A_;Kll8O0>B< zF9U*L?M>r`H6^4m6{(S%*8;@j#BloY8p^vP*iE6y<})eLw;Z0AeWnMiDpBqF9NwHa z&P7xGSFc`~Ua;2t!e1EXK69nZqQF45;6VrjBHf7(+G(_7I7>6+e;y1NsMG~ip@13y zg@Hd%<=#YBu+tVGIHx*4rJBi$&TzeV`*yN42F|H9do6mW@E})Ow`!m}YPn7Ri zlL7f2&4*z{e&Mlq4mCVH@#S0gGb3qFe}l>s@Cx277%oha;Od_YYWp`AK{dTV(ZF=|FD|U`eKjK7xL(_~}uDMTiS>uR^ zh$JgxD#8H}rKFg(`*@z0EbSN)Pah~l$e>eY-Bj{vi^%|s8F8i7Po6c~x<>FL^)|SG zd92ck`a~~(SBE>6#gil|O1bYDDY*eY1>Oia*#7XE8n(MrU8%|zY}@~L!a3ZsVA|n* z`>FtvbtAZ#-pZf8aB$Doe`?T0uHqA%+_$#=pRc&vS3>V{3(GTZ!f6FmEp3(ZlI;w% zo7*DE<`S*lQ>KDIrMcj91`bOu3Rb5T>NX4ohiVtk_^swb;h58dR0s$ zy1V}nD63hs4X6cm_zr>{QQ*LS3u1BqL5;>r_V`}r=I1lZy$Q2XQLG(vzc1)}RKNS6 zkyKGgVP9h5-*vK`JY^sR*X&q5C7-591Wc4#iqc`iSw;EVo5!I`|3KEkLE%T4LEc?e zHvRCCBQ4paimhffDja!m^cs#O<=4>pfumV8ukZ0>15CeMle#S%*3=;EmExwKY-$hk z<3W9#@Yy~pE?^(UnuKyE!!ryYJGLe{0`WgdTb*~*Tz?^LzYf_N5+cuvLLE14BeCp) zhL|@1`Mu2YN}3(x8~n#Z9*zcye+m+*0mP*paar7}DYwE;)Q`h8#@N*SpA>D)5pQFs zR*g2_On6~UI++~oFI+sv3wCD|2~NeCCn`2veCzr4U3%;Vg=o@ao;sygk$rXU_-B;) zz_eCuDqz~wyk9J^l3x=pbCBBYmv%ld^wp4soNQm`>L8TUU0T~`{9Oy6ExiEh3l4`T zX)khy^Dv7No2uFkZ~(+YAT1g7+j}kU1Oo<|aVcmF$=zt4{X`zBghfQGLQ}=F3}aQ& zU}D|!QsD<(r%z8Ie8$2qM6xr;kd*8wN%{qTrJ^@X-1nYZK-gmr(?+%G)$^x6Z0@!m0Nj-3SY&} za3=(V97(^^DT{QNknBYC*aWtem%um~cQn;Ub3#!~lwCTaPE2OSC4MIJ!VbYhrsaY4%*C;9zSzy1SAMcf%Ph^gdk`f5a@%U zZ!Uwn>DO0he^;U4c;hw$5SPM1BHLJ1;XI-SE|qMTvCRx`*EP04&c}Fmo%u2dR$7+M zyE3p?NMA7Mpkni(<9)`RluWg-u&}8+Dme0l>cxH@IAz+jY@MaLz1{1K9y|6LeD-!I zjd85*&K)~OF7%?n$wfQ_yrHd{EGMRAZkM3d=FMazmzViopj;DjD_;~9rSsu-oH%j9 z!T)J-JV9@VP0~4&chI}2HvhO6g~|0BH)eX=qPib4e*A8TzkuCjUVKCHHj~5j(Fc@K z2jk*0Zn~q=L8qAd0^8YCUSMtIHM2n;0>u%&>l|@rSRaInlnk8|sSj@w+swrHjbj`f zMAm0NcI<<&UD)c=II(hR7<1WPxj=t8Jk2pY+pcYT_1g9Rv)93W`!3Q`X8$*}SSV@Gavgg=(n+@8h|9;H!Z(rsIZ6DrtGN*Cm{Py9)ub_#(@oi7r z25Vb=ceH_v?%H7xbf4Re5ES;_x3ao3`(V)7qKA4%+yVw!jz1)9)ScS1-)Tpz$}w-m z4&k)y&tv6wnuNosbPF$xbterM3=OzZ;el4;bZE4RA<8f-PT z|FQcy4|jf9>{iNY9gaki*8MAX?+*($C7wJv5DkVzPIys^om%*Ik3&)ZhVd?YHlX~7 zw_AsB!+E@S%+@ViSjpj5y{MH~B4COVV_{=sGu+X!HuiSO;{j4|SyiT0)35&8{_N6~ z1pi$&k*kiz`~!MUOBR-wVm8*{ufjSlO|90r7AZ)bvtK^Fu?4cWDXkI z<&~V>6D}+Zdi2PVYOfg@#ud@g2Fw!mfuefGE4`hWsWRNIRd2>kVB2bGuZ{849~~=% z+WVB$l!Loo_d7SP)%5j?87L>(QSzWnKYJ@-bg7Fz|JgeGQ&^(*;^hS6tia$%BR1;o z{ppHGHvtMRnGIZw3G#P4*O`F}mw&T1=u{=xVKR#3Hev_{M&Z$7lG|{G5NSU?zoJyq z=4Al-meegOVfS0KP$&Q*ZY>5omUNl5**c*E5zHhlnl&J8_4BH>W^JN}a_Sa)FYhPd zBkvQ1OA42;2YTUwbw8(RaIiZw=1GQ>^+mg0;uW`$_?i|IRSjZcB##{!l zQ&SH~xMpNn^1 z`(m>L2n_fgQO`xtfL^p{#+8HQ{u+sHSFSH05qvrAWL>F8>1pbWOQJd>NvH*SS`2FB zBA>CkF9ZCp&85uJ z_H=2LdZj{q14v2U6O`XEZMcJjx}9Ct*mOX~NrWy)<$}`I+AnRak;74iib(49@AwE- zWfVnla|-6iPLcmcvHRh1_YMb<2N};un=M*3kTlmz{QG+0y+^`+U;$cbvKnw)MQ{Xr zn=XwoTf(Y#dEL7bNw$6*`Ffrqz{y$>`%_{f6P@|qS>yBZ4{sjpvMAiPy}A3ZLWG?G z6VH1!hN_d)5^1==`AEH}D-ar6v}UbZbHI8CK$Nlu1A;#V^2Li6i_abUhkik4%V5YU zA(Ap^Vau&iMeH$>i;39C)0ZO(m{MDp(vIBlhkkmxK!` zjA{#RP698_X#E${!qxF-{wLGIy7|^rb^qD1kiA$_0TZl>@*&>xh;^%#c*tbr@wO1Y zf6m9jgG~OM#PPBUQJCU-`{OHCO-l}N7N1vK**Ej*^!<`I4B$0_FUOY^o|-bz*#34* z5U7|0KywIE2#^s~gEwis;0MSs(`{<-em{|2mx&)p>rB_mEwQnwfobAMFcQN~-+!L7 zZ&TqOA>$q;o;i3Fo2t>c#M-FqQ@QfqKVJAG1>u85R`-c8gDNhy!2~3YoKV<$XzoyN zC<=Uz#D9tjm&ik$hf(wGCZ3?MV3@|G2M@;1oH;&88OjJ78d#;fXJuc)b2ES&E>_7UDgBy{5CF= zQhg+1a~FbKkg=vQV_Dh&+8mi&s@s($7(V>EBQ|GQ1CU`R@JI1EXAB#)PQBt!;GT@q z@QHq*`(cOa^aivk{Lc~BnLgW$o|#oe5W#Jkv;1xBpRt#Cxj^@@`S^p>#6$YwN3{#^sbu(%kMNYB1o0f8VsJ)s@n~D0L38O> zoH)y*js|{P%}Fcs{I>H&u&u1)4T`#w-LIxdb=a-hp=4i8Tp5m&lMtP>x8J8JJ4+t>B9lZJ{eh1j97H;Wcczo(j9YVPC zAy*Dwh}>LnUrbCb)DmqTo9jU+quf^PVpKWN#MD>5bFkIHr+qHAGV3(my8R}5h|Ozt{ru?f*ugyoa>otH6TPmg{9LuH|PL+x&|5CGsVlu(Gp+q%h$TNkpAP zqenBqui6?JU4{rL-qk4bL6=!Ds2{t-J@lna13Yq^D~cvKmAQUKVKWSS?ae5<^)N6X zfH@oLVqeebXllB}aGCw)hAo@L3c*Ed=d=2VYt`t}e<1MI@7ncb_feHPWxP)fY+Ywg z!2`60y;Mg8f@J+1zYC89Wi|&qjJnPAsuy|}w8}k%TZHon16>L-k2o4-+A3uAOfl6| z_0bt`7gK8{7L&*i=LFXaG&6Whkw$aw`8bhfDB}}^k)g~)E1XIfaQXC{3&wYbeGU&yuSVx_auC}8_sVW?&Lyhu?1q7PR%6(k!4bzvLmZnCpKvzd* zKzk*b;d!89^JD+^H^TLKzrr>F_bU0f^1T*L2%FkcJAC*WTso2)(6eh9dw^di{>2d! zCfo`N!XpB6swCr}gS`>`9Gw}S6_qu6Q;E5?a|<*L;>cDMR6aVU=czznk}^MQcV#>+ zDspwon6)(5F4UaZd@o7t@AO0c#%kbJ0Ae~dDDe;avwz6$H3qwxx%Bfyu|-FFcwua@ z=5h!f=l7hyeVf;p6#k)G%z{bf`WJM>?=Nzg!PeIKI+?Vka*GI^ND~LWN(8>>3>iqH zT4^|RpRZ9?su?7hB>wlFxsmMtzSO>j~%y;Afz6_2Rh7KQvuly%; z9v18ZoC`$s*K4}rcN43f>j8D3ghoZUEE)g(L)>y{hS=mcw>Pf5!Swf=g|V%Lum=Tm z=8=5D*JoHd&)RS@Ap{@>b7LOmjh-L=jOYKK<<-%zFAr_NHY3gF^}P< z;3!I_dIq5t_1V7z7yGBTGc+uK+UFFSd3zC}l#XdU#@jb`RN5u$TbeHp^twZxkH2X znBoAJjlRG|$w3x!wTi0h>zl2+b`9~vvyZ!9Eb{*T)m;L!dW^>OGMCUtyyztqDL2y7 z{l$?<*m=h^?SM6FoPd{WE90OEKt(3Qz)s=Nt*y)@WTDE6u)a?=;09d#o^#{l^Q*tD z?NbX&v)g4R6X}}%UD&=LXc7Ifp|Nq{1WtSM2JH*>ynSKV?qlp~bNBsiQ0ld5+42(B zmE5rR&>FQBxXW2DZtSevyP5C3KcHDrzYoUvOWl0Deb+^SW(l=aKD$0TJ--&a+yBTB zW9n_AtYdGiUr(Dpy#~9xiMf}dy9}pfzZf`;k3KabW*CuIMBa$mAdpjEu`? zVVhU!fqUv%7Q;!^LjNgKEri(OgC0pA`k=?RwG8bhGou~R^pmlCsV($ar{o{BDh!exkGHP<&7AeSMiqtNkauAuqN>`jT|KqI>i3whTz2gB?qkCJG*m@(qZ!^!tBRfr&Fg87yEpKNhQIYXyKQ( zpnr>v0+S$517bZCv19+uq4g=Qb3e`GE_gHj7A^K=6%ECg&)ynWTGa;87fHHIbbH|X&+n-Bxi}_H zT;j+}o-t%;VF-BgdWd{p;s8>GO*3)o_m2 zrg<9(6t6ouH=e}O#z14X^L6xA)AotZ>FZZ!sjN|}pjx2X8Qq@r>_Awbg2M6Z_*q7; z>FysneO1|a;07w;$cttiQ_VM8D+;v;H}XcexFbhW0YwU(v$C>0GX|e0`Icwp2nz7e zudw?Q#CZ_%=V#cJAna`U3adB$xZ_a-nGzAeUb)%+ za!pbSmMjsE8VF21iV1&easJgD3YmzA$RkIp4Ih1U9jS7Ro0(idunGKH)!70tb+A3eBeC87ug3Z1pMrxhPWO(Mc33J+OQLYz#oAdJzp zn>)uG?$mQw7O^72`EX@gXMn*NHMNA>7adJGr z72-^ndN=5&*z5sfvxh6txzXOJ{bFjui|1KaFQP4@+01-D-SR;@uDLa-&WemsCjbgK zF~KTbPS^%D%+^z{9xxX1c zQCG7z>bG1dcEcLT*TfwU}nc(NYje|8);=50}oq@zB1 zT2MffrYe-Of-HG8C?>K+@eSsjaBD(j&$izJ-RM;KAS zRt+Rvw9`QBDhjU=WVF>uLlf$3fmX}L@cmmSn)jd z6vpqo{A8+Ms*R;lGYsxY!Sc1M*W!dslMc0nxuLvR)?v@@v@slG00-_$vIN4Rl~9d* z7}X7+4P#q1rmC+*1H+{+%qB2a$F#}mF1vT_LbC<#SZC?dim63RZmUIKC2Sn>O@LWwdidw`vX*X!F61qdGQb-a!3d`7q+wgEi#A z`eR2B?T8c`5x2P58EchBd~GllG!1=HcjCbE4EBzu zh`5ApA1lhX-u2!=k$R@Pt~k9xAD|H`$S4u@0=6V(0CkWjhdhvj)j?X*RqU*ik`g(J zMZkj61j02$Gh!k}yZ#{c=1>f6Dvx-;nAhH3ROjmLLu!%Ahq6S8G&G<_y)t|C1x`(H z4(ep(p5CkV!0KFEGeg{*o%bF7^m)_1pN7kEPosLM?1RYdE+m~?ir8E7>P7lSZer=s zC$0vs&+X9PH~8bztcbf7k7K)m!O0%MfjYd~i^BC*ir8Ygk$iDyE%Dp-aVlpP4}B$Q zAP16$re-y&8ZjM^w7^@k?KT@LpyC6Pd(xqO+>f3aPd$!e8>gC7MW4}GOh+6|Mq`gh zUQOZ(loO%XvahGEX5Z0ZugEh6-`bi=fg*wwB1pj!`b z59mm|I242ABeQ@HSu{@uRiEq)RDeIIKGbaA78{$Jm}oe*9~~RKBRE)r*(V*q^p^|U z@OZ>~y;cp@%5YE-RC71`vqhpuwdjBAdcj?Od9-M6hp;6R^n#Rk?1*PXFS`CMDWF4# zRg16U&gClbEIU4spJK?h_?|Xy*;19MN-$EZt`|PzDbzWCK4d^P$^xzH12UUQx?(2& zKDlU+yZF(PE;@Vghm|AC0ay`LX_wDqd+Y;;#8lb0RfDVM+i+&6C^tg8)?-uE(P|7l zTZ6Wa#1$cil;(zy40n#s9MF-w7wB($_qlD5jjAZf9<3n+ht)OAJc;m*9tXK6% z*3;YDX8*Tm?-zOY)ZzZ7=ecaP_spl_^;ThTKF!0Eaqi)ghnqPg5wuH0Axq*iDQ{wr zrp%F^9t@V#^yH)Mw6875n?&GKMUjo-S=wOj^B~Ws9^GoD&v12h!g0!lC}J6jZ@lNl z(GwXJwc^Z~rJLgpL|o0t7|4+Vcq#U5k0eLMhk#PH1gwGKYX(@XM;{*!AVmQtOc2Z$ znSQj!iWflPsI`4L(fu&f7*}o z^LnXg3*WO96P{$M-z(j99cLj$k>>iv=We?a39$TYK{JB)XJ$E$`^}{`&FZ$^KcELP z(b%lO?Zv6ji(Jpa^o*_pVJJ{T!rVK%@Pu!8)HQmCCMP*{f zU?={DW(-ty(}`+Tt0to^fSzWWy4O|*qcncoWcR(j7B?Uu$LF7qySmbZlslqY+o7mZ zx%W+l;T=8&@@tajK>4MbOhIb9tMT+Y z_%B=#lg!+jge2mYs9>v9sp7!4Ull((v*=;yLka_Q)|&zb9T+)v`t+-)F%Ns(K*y4c z$VI=AOmn?^`?eEL?#Q;+p(9_E^=sR(6YVf5c{eChQzZTsY`3Lb1~wxKvq_4!BIa_m z84|uDJNF^tv8e2(DE$f9kMfwwF<0Q3G?P+Y zjO{Z)%jJiVgIT`HzLN=79+{HJrb@tZ&}OSm;WMa*4__z%4SKteYSm!SxwrF^l9GnB zZFntDDV%3 zP;SZKaSC+LQra9?i{Jp{3mQ_xL2^%NH1_!K{Fhg5-t@CI7(jc~hV<3!*-*Fi(FJG| z@;$l{OK`^V3}Tf(%gQ!}hN5Jw<$W)@ki8wY9f5FdB0F`B`8xpsF+w z=CNLS3j4M1ph0U(?yOqvynjmD9zE(*svoG75nhsY^n6KaT%XrN5vo&V%7zc|vr-ZQ z>~svz@@P$SXoc_R4}Gk30Ge7SeNGou<#}CeUuXJ_*M3piW*^5^5`pT{ zuQL}JpYF5EYei?zO4MI!Uthm8g;l#kBJNmvxVdAa?=E%`8$RkWyWLjDAiHYQ#xf$X zZ_KUGO`BG7URmOyACSE`Dmq$%Ha{)@@%D$APD|re=>_f1E_%*kdaouaDcjJL z9qf1V-It0A7q-2exFlYWH7@$N@Az@sH}yJjdktN0VrF)vQ3Wiq*n`DujG`MO_$&{v z`SZ;#JDXto&5ONIuhJkHT&}9AV>gpgisF5s%ca`hPnkD;8+A+sL3vo6YBMLtZuc$S zB!Db*EV;2YgE%R~%0VF@Q@;#L(2Y~%L@()if`(nJ)Z#>@MCHE3x$p^)MSr7p!&?=o z>$A?7X-4+El?x8deZmzkYSDy;izY_XvGOGg+nx79{7Z$eMfJisi#k+eB-+)nIzGM_U8m5&u& z>i&`KL@d$05&=WJ`Ht0p91_xO<$oESE+#V z4L;fL@l=z+`V0+gciCvA27c$?XOh8EkWaht)@9Br=ncUQ=maUp=3{ zMBU`V5h6I0lFuW9M5v}f(vBcyU1g{`oXsH@z~>C1EC^ zHMSTyusYI5fq4!aBW2+h6aAh(i>&RMdv9RZi z|J)>xT~KDAP>WooXN+BAxYZ_;gs!B0w){dIx5f((j~WqpJaF}b->W3^WmAgE(6Q<_ zniklu$)!S1@bC>}9r>MU)w3pzAPG-cjcZeZrNBSr75iSKM2B5ik7~dwU@N01h^iHE zU7nF}{O)Jl{DjMmEmKcs&U2vBpT^m4yfkIA14&6qWFI6S@P~kHNh**?qLFZAa=#mQlwsf)9&XK*fph znn|pIX?!fwLpb8_X&|)S+`U`3jZ|9_c8wvk=q~04F}^%GzZ=4~qf)~4eS;G2 zsoU3=5gR0r?#UPL+$LE3Z)9Zr(O!%S^0(0tBJ0OTpNN|&$$Vtg7I5f9th9+lS0W&5 zzcA+2>oSf7QMZBJ6ux*xu8f4^$;B#j3kDAxHdP^hYMyD3Xze&-I@%Gd2I8&&C9Y? z!3Q&s+?CBSy;@h-fe|&&o_&0W*4OR+;SuwD_@3R$R*7Jpqa>S=5ZAxO3Mg!uvcup; z`ZY;10N0|`Ne0^xnVHx3&qWN5>EJ|@D~SLoW~4x&JyGsG4*Oz2sx>gnxc1!j-SCV4 zrgUV!ozzL_h$MA~9=4i7i|fb0aGC(5X0=JLks-ZAF?Q$3lb{|x&r=RHI<1h$UzT9p z%I$$ofS|YKq2_yI|Jik?N-@stbtj@Qh+#>Ev<{}wrA$hS|1Vv4ZkI-1^&^efXaFdPujnnWTcd^?ZTb26O4ysWAzrze3xEfV_Za>nhP>f>bulyvg$;-(HLKO~ z+?M-$bAf9!6=tMg8sEGz3-=-$Zi)%b5?3eryL#f3prWU~C`|MI^Xl)5-ao4*N}|J$ zCYYzMfU8{8y+;NJd>jsoDF$JhchM5 zP+(w!^)De-VW_}PJvhbwuM zt?QQm@^6Dh?1+dT&E%tfNxxXr2aQmG#DlaHfS&+$6U7!T1=av6ihOjp6E}b3C; z81;6W9i^@kTz+n>xUYc;Wh>3O5bvlATXnWRq=ghL*##p7dD z`&5)1J9#UnJBm_yIuKx0B9~r(yb9;~hr-}bbz8=`fiV@Y`BNk4KM4iw+M`3*rxx4s zH@uki}4yZ(3QMI&u{++ZME^CDB^KPck6|@hlMiPMdv}Ly0W>=(3LnV5NqD~ z*C!AT8F{;~!}D{`$t&pH`&^p}Ds7x+Fr;l~qGnT598qbtpFL?3X+$6F&+IrCU*+0s z?)-mmp?gh;d7c&gkFHtJ%D=YFL0TK(gZ|n!EAbA8Jtl;G|4ZL|uw+B)zjV!KT2+rU zH03ilP)K?e_LOT!n)0>{xxx(2_i8cT*7lxL;PEC?Y6gk>14$9eJoT_wdO>73v-~%e zY-&>}G_kZ!@9&q^WfMwlVN03#w7uJ$eUaT?5#U8?dL&m|_F6kXE zd~K2ao!$;Hn4pr10x@god(OD7|J}n6Ja0THK8A761g71T+O-A;ld8;B}&FA9ukD#>_ z={`g{7;#lNkzPhZ8*a=d3gNJ-)C2;}A*>LIAR)uJiEEqh-9Uy(FRxGa={&^wD~DfQ z|A>xOW9>&}xK>s0CdtVsN{Id~Ml5OcTymeNy0;DaDW$A`XFU{69Wf*T?cpjNFnoBh zJSV%eUa`-v@18*zPoJg$pOUA8n_>-^yg;GMlS(FziKNX{U}{gHC^s%$|BR0F{mBd8 z>Ht%!D1l9T>riyj&kuz35Fr0;Ih zcDYBdF>@msDS+|3As<&!s9>?ZwNXhg@v#FLUKzmJ4!#3T~NiAo1)^+BHewoU{3 zh0XS#iR_dR4b`>%?>yg5d{HL2O1ARq@|~KvofJgt6{^-e6!!w zs|KbQm{56XHE3{=e1&wm_?4shA>z^Z`o*;*WuWv?Qd#MfIqotYsAd67{uF|whZt%`p`q7(EyGJ@aXFO7FH3sy`m@2hbM4K{@Yn)~xNn&| zFmZ9&)cZhm2fmzU`a78K3+T}E090)%NaoqtSX=uj{lv+kWk+roRo5T-8kI<`x^?a6 z>qybD_`;+7jvpT!QQtqn`^UHPPjAk=l_Hs8EYi7{q0vgFb~Ax`J+Q#EH^PRIQMMl) z)3P}TYl8<4potbl)X!UD$Im#>{i)IAxo@f9^&{ChcyDf-i8@G*$nhQ)(j)51^^B9; z-oS>@J$5gcg3(sJMvWAB?9h^>P)uIjqGBB)?3tX6FIE*HTegHzY{)QI=!RImOEv%KY0wcR1o*Y_lH#~Xua-QL7P-d9VchvnOH*TD#9oJ`Kygo$(IHZbzM*JoB zqgT`|-h3?XYTW6(V@&q}X+X4m9{%y;gMn=5KsS%{9*9Pao&$)&Ix23Xh}-}!WckH? z&c5*7j}yy&M^B(|PKLftPR-FV>HLb1uEs^V3CZ%19i6J715d$qWw*Po{w&E#T>ky@ zq3FA=b1>%i1dER6K;(D{THq7BAcpCx zPTGzi9|ZUHlp7LH_%IssP%edIV|xzSTws zb>JxbTDJE`e`q1w-3|2J_jjkO7u~wg>D_0qePRpXa$qvR(XQ7v91rBW6^$_Zpy*+R zyAko`7ME#hYK9Zb&RI8O>j^|)xM5ZT;dc22UBfNStcHC%jYE!3F5bT4wVD3gXg2nV z2k$%D>EI@mytxSz>LQH>r@5l*Hanb4UN_K$I|E5cn~IpIs7atx-5AOaK9<)etFFu% zHtz9+9(ldMkH$TUDqgUc8J*?&VezPuBRf(gyUv)A_-U-g(k2(~Gt-PmO+%DkK2!Er89Y`er{J@3{4gQR!kDhHG{+b}4f>uMUt!*N~{+J`)RHurozjs*1e`9($k%%?NWiK7<-(DICDc~B?l zb$C0nVL<99tjx`~+UV{o7YQ}rNHcch=FKe)^Fv6cqA8C(mzb! zm4LqOe#4l1ZJ^lRs5VTNew@iOTw~L%XU_<#aU`Lm<{S9dueS3{#QQ>x;Y*h;C8BzK zl!?G&=wbcY&1XlfpXFyF>Mp=NO>TpMUS4M%mm<+(ijhp0&^IxO&-+B{+|t0Hro2hy zpQ0tA@S0lqUm9!2WB2=bed=hBHV^=+fE&HedGf~_N|qYLc|waZ12vm82}Beqg&Wdk z!rz14{vciz;!C3Au%1Yeyb>H7>_f{fu`Gz|V7%xx#Sy`lTUB;&|9%mqc)Bs*Yehzp zD7YQli>S{JZ#gaBMzPcaQu(4CBL3Pym*p<~G@oLa7J4-cqJT*C$oz{I!B4hJMa89@ z9Bp73lnB0&fv~+|AcSj+L5Z-Yiu+hsvfr%gIjEL0&!xRP`|NbbvPs619F|cdgk@HcqFWVIJtFR z{eQtM&(sH8NoV(KG;W-Gcw)Z#;$|czJcAO7CP#T68Go99=v&`Rl$2trO{58cUpFwD zjXUAx<1^(wm$s|zQk!;S0B=o2p%gQLB&-)U`%pd#fumEdYt_~=mny=q5m&aC@=?GIx`C1 zFQrdblsZr@h?wQYi#ClUFCA&qDw?`WsIF*AEfG$kJDcL&tw~*Oa*yX%G-MI;UWrl%rorq)>fu$=fV22`^b}Ur`JtK$VBqTE zZ7ENUOY*FSA3~ywi`UOfN8f;! zJt1HU@%d9yGeR2v%$$Ef=KPa8V2{i~sl~#Olo_cg7p~+ep=1?y!Qecpv(w*XzJmD_ zg#zPl@IZ*Q8io2%D)~0)!${x(x=8_5EZox>$$7j>brhbUjx|WD}@(U+o^hy{!cEY(&e`KDmGrsC?jT%+g)6*k5Q}HR8S`G|!as0rh6U4HLs#z$PDKgH{NtTrcXc96X&5=I3P$qhaZ02 z;`PE-gg$FC@`)d6)u0AAm3WJlWKlwReR`*sz6UZ88#=303XVzBrd5x}SHIzqM;><& zd*td(eXRh=gF_5;k=LkrRD7I8KB_ao0a7VHx_?0rSbB;DM+w~t!6R&RH6+_y_aMJ2 zfCN>Bm!J3|T+FZ9PjiV?!8op){xFK%nu_^ek$H@0$Ji)b5hLuAc_j77?mzhK@Rf>d z1Z;{WMTm`2(?@H&Fn(ar=4uy5Yc^_hUDE(&j|Uq2x1c*TPhti*99$O5M;m~Ihja=V zSuSZ~sB-%wbtOIm@4g?K_|1`$d*)+f41cZdsge-Wc{O@gZG;O&MNsL|0o4o z(fj^6VCet%#B`|h^B`JUfs8*ArWeH>oHvMzw`FIuM?F^P{ap)SHih0Ki+tqof1n>x zO$JEX+O#ucHM%VOx+p1!Mdtvy%@AVxDi_D)aWh*R1A)kO%W=r=&#Cy-tWryO69Z!&Ml6c~#rR2)73f zZz7R~W1i6MbXI19(+_E}vF)yMNcAbOs~A2tz%TC z2T3-^IFdt0O``ZWx`N4cbi(-ZTaME89>ncQp)m%vpG#o7K(qbG4vdiFetz4(QcSw~ zahPR+E(P?Byu3Xh-aff->3i5P>roTF-2YKKEhN;4~!%#%4>?g5u(9k{;2> z(7?c1{t~s2&CPhO^Mf%l*Y}uvKRE35WJ`jd)&C1o?QZMZ@UPAJzI$H~BhI9<2)y*P zV_pACXywK%TXz0%?;bsNHrCbUv5_neQ@s*)G`^nsCm$xpg|wf7RO;rf58m%6w$&c; z*{*>kz5ce*s6&Ui2~T)72a0A9JU@wb;2Do%ENO4&ROk3d>7NRJ64!V#D!t`3XM1S7 z94;*%k?8(|1|%NK8vUheGw;7`1U-7b8;y@-;Xnn`LN?|as$WICwo=uSW8h1D+}7-c z`|=81v0$NXyqa1>?Ljw#(C_N{y}(L7`|Vs-rNg^YOQU zR2Z_hC}`ekl@fol^3v0`Gec|+2%}BX6;%snI*-x^W}ee*wO?uesb&C`+-8+Q>HQ_tYbgDqg z5qe{i1Up=)?u+<;^+g2i{xBhTogpSI}Rqq!oI;j zMiHdK2}8eVkadAhmORgz0pn3YNVX|d)$PfIM4Y+Vm60nnB18?&T=1r}FOnsQoswCN z8ilk;$KRSb)#m44J>Jg1w96&xBS}AkFkCpbtD64XgbeeF+TzTFkGUN;u@5DgwSPG# zvZV?gf4k^bb2C9_c55ky2tMs1)rVj@I!guHF! z{271$yJmL3-<|-*FIpwe9$yMefh5OEVh>&tv3tuV>|0hVi2sV)R|Xzj{D{^2;?0|N zu#(u#I4^S-CG!=}D^cJKl5=_ZXjfAclM$zva|&{!>KgC#CU9HKrgJdlv?(Grq6Jlh zYRTg>qhB?7ph{9KD2+^b@>bnmEPz6+j2Eh^$|| z1Lh1p5n)0$!qtEb5p<#)qdz!!^)q2<9nqrFD?X)Ev?=QQOMuWU@^+4|Sd?Ii>ENum zB?Pi5y~T&G!ZIMJUHZNrXVJk-MR-NA!ep(7lP|m|nNeF^-GWzwu4>DzRRTnj9AOS7 ztRdO5=wHZOTFc-JOQdLb+5^Q2hXZ&mA6<}+xRfGegH2nv9@ycm>T4^A+0Ombrd=`#3RrB8fId()|tCo2l3 zur0+;c8w2W<@@49qvAby@6oRv_K|sjH3bEy72OW!2F_CX1ouE9O78=8&yutD=RGY~ z@wwxdMs1fuo8)Ca@s-)mV=;61IYEJOzHZsG_veTFb9RJfL>0nIRV5DT%teRhnlI#heMK85H*! zwF5FxKlEM*(WCg@dAjf+Oi1J+161>!XHDo_e-_x*H_NH|rA1u2n=#r47MO7YIui_m zU|;~*#mBOL!qL8Gm!dx8!`H-X>rAi*<}3^VzGp`xZ4~ow5A&gG)}d0hc;#{zeQYv< zHL(A?CG%zQ7h(&U-4YmYRY5h}h`&qt>pXt<{@(uwVB>jbq*ZtbauTKlU^s!ABEo^I zD*W)_A#A!kPsMp-(@2M&8HRYm*%9Fyis(0}gK=AQd9Wd)$OY~~tkRnY46=N13^(b4 zX5(%<@)@n53?;~#kJa^;VhtScYadBs26$XhlGC?Ou*Mx5E(&-?561LVzU8~S^n&Qt z{3*cibRBl#78**ra)%_()|Z{(bX8G5_UI3H^3s)RY|z z#9}$i8T+d%(TcpSj^#v$A;iMMJ}oUhA}6>zjgQPTpXaqRxQjgnFr8-CnA@(z%9)+$ z`olyY1(z7f0Qz-GY3izz?M+O5X;5~HYdz<^42~YPXc|*r4pRbe+b*pn_=@krgHr?I zs@?PU#@7I%bRzSYJTlC(r4Iwq!x%qEJ2QfXa<&{29XI~W zYM{+?Svx`2E(ta zz!gCW-H!|WS{Dd_pDs9(J(GN2I;W$^)MN@CZJK8(1{Lw!g8<-3xxA!cZ=dDs#nyqq zhfEf1qA9u)XO?~aJXZuqf-F*czDE`^wGh#_KNkJI23wzP=<_`p-MknnLFg18i{?Ju z%jmDx^N91g|7~u5qao$!Wb_0^5IFO$i<^8O-{^lr8FP!K{10NCm4p{wIf7nE&3tb? zx_Go!f{_BQ98FBL55_@JXdh$>vP=hJ*3)<*Cdmafv~!CU-%#7Iz}+(xpD8u7rcCV1;JEtD&s`R zqcRi05V4yi-flA*)548fG%%y+7z}g*zV&#l@ogH`)ZI1OU@L<*y|?YyF~eaKEY$*9ia4~^-2YpJUThmIRjGM^PQ88$fRW99KZ z{h+Si>D%HuN5v{VN#(_Ijcc&6vfyXOV(l;E8Tn+fwokZFk8z>CHBMXr{s(*N!b4^o zP#JA!adE4JcE!Yi*VV)-TXkN49&3Y|LR*t$~__l5Qd9NM(5NeF=&}bzE1oGTej!H$>Wi~rr z;(4VzMyU+8VmnVZ@xs#5P(xF()n`cT-&$Q(BuP2g)0&Ff$Fu7$3dU^ETPGM%9>w>0G>z$cW z=|{NgHD~-9xZpWU)G#*d+?22rrE=_0GRBft0NE4!5HFgw^l=BX4?RjtOQ&qu`u^R! z$gazm6260ur#3a`m_QRXd=D}`2m*)JLq=21p@pAU`oM1XSz0-OBtMR|Q&%gws!AdtjE$er=XA@*f`6Gf`<6KG;O+6vbDPxp zg=5b08AmGBN?+?pVTKXPuLIZ(T*@YWv=tpA$?EoH26pY-tO4B^9LM7C`o8y>Er41IA&yM2h^nd8q7jK1*1pmX{4(DWxS)c#yJ zlv;xt^LfoREJM_|7YXwYL;NKY(TQ`HEID1WNvmhiGgd{jUjD1zCwvsD1e)~(bI?j+ zTGT$()@v>+$3#Y&lm~*W{Fx@Bwpg-@YxzK}od&go;R6w!Gru6Y<(4E($-U z|KB)kfJADhXWhG}C=}y)J*xBkaWg@0`jrkXyyFL!wDM-d7g@b zcy3idz~EH_(l}!-V&Y+OoMK9$a;1Ix2-CQI0Zz#BXctWwr9i|+6-7psLQ;x^1i(Tm zUN%7waf{NNh7hd;#iBUk9{1QC9NhfOnA50cQht>NuRQ$H&{0ywKErFAerisEaH4wK zK;plYzumy{k@h3cjc&YZ$aTBZw?7qU-rQ!&+4vQX+TC+nAz>PDfgZ?)wKAUC@z;fY zh*y7cr)Va}$xCmR9ZAYS#wLL++dp8MXS$RdY}zTnyyvIWFu%@<=$x1o%Y(U(+n zh=(M~Sn3h)FUKz~cAGH++<5nro|Od~!=rCa*@Mr(0Elfib-?oa8QDVHcM zp`0E%U0|KcbP}`@)3^4T;2by!j4UA{m7xZ!@z$`k#XpJSOXQ#ejEb%f+SP^Pol%tj zkTx{_Z02Q-r{~o`aDD-+v2N*YG#i!qVq8JYzo-xGO&3v%th*Il|E4>x4<;E@X0fo8@1HMG8ijOWcM>fq#f~azWnDC0iFBb{*B9 z%sLiehoN*nrQ956i4xq+cgjJ_D959( z$wxdy}LL=SP208~uJr{59sPwifXdxkH?PPdH5SgGF44yMKsUU8tb7rV{=aq z>zwAm^V5aWfHV_Kl5S<&-o0`2+VcT!+_|$!#fjd>nLlX+Liy_T>&}W%+qP>MWln*C zG1rAF(Wq6c{a2I9|4nwZW$J~*g)E>AIy0#j+g-nPD|X@Fc{F6_&c`iZ)}@PBZE6^j zRT4mE!Zq*RxT%ve0z%x*?}^#By~Vtr!^p;_)T!$d*Sq!C-XE|2zE_&FsHmv?_dQB_ zck?|vimY%I4uAfJIQ8=eNILG70|Sl9no;;S~`_H;{72 zVm5(aIz0YNVy>IupsYvfY6y?fE%{w5K%X~{bTNNc4t;r5|4Sos+M`GnQpP=(Bt`7g4eoXyW-Mj34MN!?n%#PK^5l z=C>pA3u=dF*dJDq_7U^^&p(S+q^)xUd#{tobCWp^brLgn_bmT$Y+=zHvx7f=Bu|Xm zZx^_4=(a7xb5AC`P55o9?G|h8scD_IHNLLh%%!8Yn@hVp)(NZEXkBL*+O{=Uj(4ye zdwh3Axqn>Q-5lrQM~9a5ZnmN6?ztVZKiGtqnP>l*S)0;q>$2tMAKml)6SmlK?HeAR zMI+o|-;8)G7jzxx84oaIdq)*^RHL4CHO@_iyEV9c*BL@O5;~S6Yo)%$dC9jYaLV51gL6G(*tT0vwWm z8BEM_c74K9SO@MP-bv?y`umngjX%>xtzOgG#L~WMQPJPloNCKy_U!MO8$D5ZV1HWf zw(!xqeMG5AcC!rc2Jg0?F)A<{%~MAPF9>l>8|Ha#>1+;q@edMbV9{b@WiTb{OBJ?0 zJ_3h?hQ?Lft<5s=(yoCIbOAG^>Z>o``V#JT^s?)?;o~Ff+qFXu^777s>jUF@(z1%z z9Ri^7{v}gbmx8DhT&a83J?W*uYMjtmdIE2Fde?=AVRnglNmw-PL~8Q*=5};QYa)9@ z{ppuR#^FAcAHuuvX#2kvhM#OqCu$@fM06*dKFA_IXIi=ds{kVSK--P9y88VB0B=4g z#@laa+*!Lw;wF(A&}rDm>4f%#9XT?-xZj?EP7UPy*=;cx?<#q7=zkdZsK({DM$dmu zUIejLGOr7{zPwfW9;(2;&5TVk3hZ^wJKG(RdCQ2&KPGZ#%al=!JWVNs-kzOGxh-S1 z7A-mzcvt>}M&_aD1iv(rmc%$f3_>c22bb6P>)(vn?hSwe1%UnXeHw7Rbbz9ltnaty z4ic0I6eOzA+MBl~dKTJOOtwqBje%+bP=O5M zkcHBTK6^Dq=0LLsTQ>vS$#fU98E1c7_`;FXqI2ip$S&$M#C|8O0`&QhNrXSY^^Ooa zfK2;-j*h$j@%bY+`K6KYU^FJ7DwV?beCxU`Hgym&H0luWyURSGs6nk7NDTs|B+^|-Gv{#pXBUz^`xf`}Tr=PMe%$lV?>Fc+*r1R1@ z-y%^^!+KuettayFs}{XE%EX+U0it8w#A;gmBIJ{Jqz#!ap@m#o*-9mb zt(P3JI^R61(}RPlazb7~eM5V4SzOLpF?YdX~@l{B+x&Ctm# z*dZeyAol^!6XaXijdshls)8LN0k)#|Wc|{}`a^;=0$6a;7rB> zz1{3edNXL%+XusFI#iKBV&)$lgR;bsw(g#@@N11pkZsPW6Z0dB9s2V^_2z_m@Y?6Ka=botVv>DD1H8 z2_chq^Y5(qwt;E07I7*OPm1XfX7l=YrTfZu#x_a}b9Q9zI+RRTH6Yv*^>OPk%02^yB6Csd-Cv z1Gl3?g{Q(#IBihUsZOjk$@p&`H%W!F0h#FO<_M9&)8&T`nhygvTz=+aaF={e~b1AiLF5ab^J zWX_I+MK5pGZoILqU6o@@#IuDmA6i_z^v$I)iyz^{0{&Zd{Zx~donau;KP+P|1*F2{ zzRTzuhl^!pvRUdfW6iC)$RUpnb(reC0wXLTFU+mfAAarnSVCzvkN#R0d{k+O<&3Y(E+k*`ZNKE0|Ku}N3!&i8%J$%V_8yjZyz zm1K`e#}3|E7Tlb6Yg5Xx_tA_o2(UU~IWtuM(14d><)jJJ;d_S798FGkuO zcJyY01(Clc(ohm}3)SND3n-?tERlXL&d|BS72E?r5oX@;kU zsyOm6c}&*tHJEZ}P$cs!GStF*9&&O6p&BfvOGQj8e4UOe@Wg1bBg0AiCmj!FJahcS zi8JsqPzgduD!aNF;h`v@@nE^OrS`Y-??LVbdtuxpFR}98nnHSH-Nen?$p`6U^zhlh zb*>_~yy)AHEv-IwrvRfhN}0bKUR~wwy_hEew$x2~mQ9_q?9r1ajbMIh@IK;asZ0qh zqg-Il>TH(Vp8s~obfv9__N~!dpTGhXS2DH&NkjN(ARg>8WP%^2i0*>i&uk=2w}+DZ z!rWya_%0DH5)4w)rf*I?mE#r2jKD%MPQbmW$p(q}B_U4X2HmwsruEw2|>EwHb{`UoBk`UIBG^MT*Be2vW10)7KbQHp8 z7LT#E4pA9OrS*NDIXhl<})WnW|A^a(L;7XEB#E zob_a3Y_N{1m8)yqGA9zK2xK_Q*SMpJdw%!Gng1DwuQQl>2FpogTstTCodJv@b0AnS{!zEnL&Y zw7hpCVAq7CO1X4tySbiF4uh2uAv!V*)x?e@Dy>bFiLB&4cE85$bO(twmpw(MXMtIn$n_4JaD zR{p#k$b-*tKd@81b!)}oUr;p7_|!D@)VG?=ae4lI0P~;qg4El!`*hB7$kX98P2#dqNxTd z1WU=fcpk^Bd_{R7F0agArGDOl(1m-6960u89*$}lU!S;o|0n>LeU>tgT`_~8u%K>K zyaa_tLA0P5dW-vJzVb#NflnTphv%9RTsGi)TbN?MQ8Hp%Z)g_r03@nk#t${SO1&hN zZ8XCDq0LyZW8$fRelTJwHU~E==D1(>FRxo?n_1>py4Aglz&rc`aTB!Wxp9@TS0A7C8-B^E)K)TN z7Vr{53dY>)*sKub{$kSFkPzAIo?3}AsfUb@F^4nw)BdPwr4^E}4qW#v>n0m&N+tCe-dwemwMNzuUAabWhccTpf zcoKjEt!(Y!ZV5yc+LD(lL(LUcP_RwA!) zCNmR#shPcR%LBGxqq( ziOa_u_8_uE8YlIZ=3XGscIQ#JXQ{mJL)=xF;^+~$3u5FU?PPH1sYMCx!mVANYc*;V zdendB&zpg;wqoN2vnM-#V8Rvj>(Q;Pg+XOd{JzO{Sy6?sByhp5nYD>4?{~wNfyP?d z#Ib3D%2j5Tc5j6yGzMEOz?1ZvnGVc{Ymy>KJ}bR#?&dHAwmj1Hs1I~-h-0W{1bQXz zK)3l)?l1{sLvlz>4x_o}`}y@`@v;7I^(Wyo9B8{qt9<+>d5|g)tdS54;%mt46{kAi z;@@=clG@#j(%=EgFLv^|WoBAfyDpZJ0LTCe1;K|s9?eiCIHx|ocrHc5+0#K)uy^X2 zpF8HhaT_<}$J+)3{b2Y;hxPNB7#-&C|Eb%)>VeFgm@T*`E~I!<)K}2~VS@K;0qe-d z=&JWsIdEN*ndelOM`9 z;trM!EGGkX;dwNnI`|vG3Uc>hNRVajXz_DbXb_dN7TW8mc5fBjYp7gn1_QW? zq=s@#=+&b~2-UVqobiD{@Z4)?O^J$;f~vXpKuu00?jGk+oYc|^kc~@Jw^Z?Kz;uXd ze8KU&s(7tq0z=HG@Z)XEs__)uAo#_1D&@dhKTEz@Pnw6jigchc62G&7zqNrvh zWruA(p|uf16N#Me;BDI`CcH0Q&#rT&jlRr5<9R8sGk9|6-@w0n*(kqkuL`qDiK!mAKbcAwh zBkLFVIb$l{uFf!N)36@I)_HEA?8&XEeRKMIOL?2X{*m)XImgQ?P1!F&wJt-)f~Mh? zARNF09f7Msj3k=Psd$_`f2Nn$f1B4hz02mReIv6u)SVnl>EILhj1NJD9d-2Q{BM^7 zQR!0sf!S^FM7p2w0@(?FYH9V?&mX_`nOXCBVW3g(l3irSX_%E_Gqjpp$rpPbw%0YZ zkY4dHHoRfjOZ~1SZV$k=?kI+-C zPjwQ(LY!$yv52LCjai2)*MIR+V~?X~s%b3M!h7iB*{eu;zPCbC%N>V*nf(DVLjmZwriCW@ zU8!TFSL7RJ9giVAX?r@Ynu-}rNm(PM!MVVrr~fu)X+vo|!5Ku>YtUGl`C^<&;ZqQ8 zxR3_Br>%~Fn5@nxN3Vk{hgBCfl2kSdEn0styq0rjdn9F*GCo4tBYO)}#NnaS@AJ;$ zl?K_>!*AfNjx4&wU_%U2(S8ZB3wL^i`E=m&!*g$perd;xu(rMy zd#Ak?8+Ib4?DohJVoARVD)Jbt3;9nJWZ_RSHK0t=AiOmWw_3K)Szt~tS5TxkWbWG9EeX8yPUGrxnl=co(?c}dHe zw-T{k07F+?gOcv4lEQ9gKaQYjDEytQt|YO6C}d6#cCsLPguXkT69v&jdAn|Z-HG<5 z0n$%aoN|1H#(j+Md7J%HPDLo$t5CaH^7*+jFcgXKXQa&$#IS;BgC(*$kti=a12Btc zxasuGn6StHz;j$iW|=($uF1J=vJRD&f7q(^QBh(iEK|00U4^rM{FjcZ_w~IU@rW+T zjB)KL!%!pXos-m#@@PSX4@igdgVsmRlsQT>pR1~^!y9tbq(#C%&hg!r z=xK@L0~No-VtPw$`JYO%xpc{VHl=xPi%vZIB(;qR2~E*$Q$!M!8c?%oTDYW&10U~N zH8=>C0#}Ij|9#M`*fSbY(EsZ;ZDMIbXXRo1huK@$_YnHlL|l_NoEs?74$?Si*KtS; z*FbbXP{M4xpZs!w@;16{ofPE_su7z@S+_u*h~P<@tfosLej)sW_4#T-R_R?r5I%NZ zQoVqtJ&Q6A%S$Ge%ECUpxexKD^24qoN2o`b5lROJSkj$Dx!A{Pm&fz?Y4_4Sh=0*d!#M$$x_pRk@?1M8 z;NzScO&Mra5mpFij$VFArTe67#Qtd6H75rdtKt9Rk6ZMeQ9JIj9s(6!G}#X+bGUrt z7)WNRMPc3h|I6hj{bI|z3@nHa1G5?pZS58SiR>9}0W4RrCHF~tljOHB&|N-j(mR8$ zW78WAJIkTyqZ}*>f3}gG=Z(;g^w4sOe|w^M0G?5dg>>Md&5X(3es%xkPGo1GkAkx$ zbal3#92*TcW4}I0wSWuRN4$UPnU*k8#1>}**{>rD&@kx(1GcefSw7o5@S7PYK3r}l zT*5E{q(C75y(Uk9s@`{w^_h_C6AB!YHLchTw6;Lp#?f@s>8hs%!y?MA>|#6XU1dn@ z=|-R@&)^kVt|tyK1`v(3K-^sUD^zxCZf0k7M!_wtf+T3BM&Q12I4p?9>y1Qf5N4TF z3*MCmnRuZ>uCMPH8fJ2w;`RZb2gO=z3$^lYwUi{BPiYJhWMmCB7cQSbSr9$WKzOtK z{QBsphSFN5X%b9G91f58(p!0I;}gF8`_WNzdZO2qUYElk*kmH~7)IMlPS`vGyjWAo za4~rEQTncxvRFm*Z2pH$MgovYQRg{^Qd>dTiBlGM-qTjuVad8`-P3H=?Ag*Dq2aX_ zaJmKRO09#Lw>qenu~cLFIHn#%I$3`MKU;+vjX2E8Ta-711xo16+AK3nz~o5t{0qpH zGL`RJRfyxm`2T&EO&Otu{5>o6hRAHfJFW@KQstD1^E*+1|Bssx(0iErZxn{|)Ny&+ z!WIwGFT(;d>y2}An0EFAHcx58(yt-yFos_RETH|;=D&BHYA_+unTdC}1bHStH}>}& z*Ibe1DUToNCnFKkTzLce7(!5*&ZCoydN@OFv7|hHY3C=s^*G4VRs(^mGB+URX8-~* zK_m$%-@bA9k&b^;w2OA19>Q5lm`GJ?SDTN-3`z<@rDM|mrz5CL!XD2 zev)5*LX-?Yp&`LB8mOWHnSDWb_*z+=1S!=pQ}swe0zfW=T-!)3C13{PKs?TP zaFmVp8NhHl=|5k&Y&z`UbGO!LZ2AmOPwrPRJ^2ZdufsDUZ~&89>^~YrshgDDU($LD z4UIQ34Pf#D>AmplaCTwFdk%n1^649j3&w~Mjp=VRrG$vowrGbPf@jgc=g-XA*zCl~ zlgS(wj9qqhxZB3LZ8{Bo{{11^M|H3>Y?4^W2)E9M!LdL4!z8YYoJXc}#nN1nmO?jB z7_Pl9brMltALqsPaN&TBv5Wa_QLyjzc-@%vZSEveL-g;0bZ;$WC_Z=gY{TEaxKS;!3f6c1$1kTT&RWfv3TcxQCU)H+8}RQae4= z@iGesn|B%;)?^agGgNo7ZQIX5`BlH_U$Lqp+Y4->=|6eX-PX?iKC(ec+Opd%9ZeYo zX(1lz+qQ|hxc9ZJYZeF4!HE|Jfa>k_MLw~uNV$bj^be?v=*+4F!5~pBH(|nFAOMEw zigbPXZLvd4yPM(XFRBc=zMxRAYdtmG7E(ruqdqT1&tEeu3Ci|2l$N6Rpg}8>{y^p( zOQxJeLC&{2k5d9YzBpf(Zt}xb>@;;% zT|d$Dum{np(7r|~1SAm!6P+M}RaEFTN#9<84oR&Fmzw{5>8_@?n}eEv_r80gA7!Ce zxwb~kRf@uguU|()U5$22^;?LHjy}U3}M11?~zhb&Zm;FevEiWqH%ST1uo zd-}P^msR+>Eltcu2Lq|n4U4s6$7x}9W>TKhRmVbvGA8~ihq8SCWD7jbAHl9+J^91- z%~wv2ya`Uria)RFm7F0e3jU#uK(BwTwW6MiOl6YYDZWF%_HwEJa8mwG4Sr{>l%Ve( zH)YnO`-$#~q|aI?4C&HjFL^VCQ)RG`Inw{^J>>{Vy+;=y@Pj2=ex$!5+j000ZneWQ z^B2FosX-HWHF!!=3Y&CQh>T}N;fda8@A&NcAdF4z6v_jgwNdSU^H~^eFiu>+JM5m* z3AqQV$aXknMDbL8L;A&+Fpn&z3JgZDezs7#dn8iQq5Bi@Lpa>d;jR_Yb zUak$x7=_n)q&Rc~EPz|)>{E><(Tly8Fbr^{7bkh;;bJRL6jUYVhi$jVT@Wj&?euH7 z>9RM4FRnAVKN*BWjj5aZ@U_j^$M}Cg#U86>QTOiMqowvFu8$(N8TmkN3nFOXRxCMc zC84SiU5+jNom0!*FQh0Vbk5ks13~Cx*Pv@(xyW`jT_XJ9&ylwVl!o1V_oa45^G?4v zhq5VvYoii~0yW2f{RyC$!LQChkcWe0{kCo6)kckcCwN)7CYC+2qDQ=;YL5TczF@jw zIEytf9PaJ;?-9S1anNc8G!}E_=jL3o)^|V6oSz)d+qWmaG}7-z*E!p_YQ$lmYihVK ziWfFerJkc>nd#A5X1`>EE9Ho6qQ^;p^6GH=osvbQj3E$no0`bZ#Q^H>09FNGU|^{a zW$peO_VTseri8ZDI+d3v9WOKi?yl{vt#^%L6OMdQm^aEwKbg@2pDL3v7*zJB(J%R) z^81wb9jmIU=(MObXs|}jVdT3~jN<5Fvu)NE_DoDDYs`W2VxBIX3Whpz{>P6u@|++S zEh%EGZs-UI9{SPZ8gu7XJjFhO;Xfe&DPq@ahgyT_+mJ7%#g#GfXfN4H4oD1qx2U@R z`bc7kH$xbW3=Eg0z z)3$t&M)@l22b=TXumj?odp$PLEm8#BXXS#p3s)9Vj&A25~DewN+8kTVf zT0>IR9nCZ~L^K-Ai1-8Rx1J^QQ$nI z9kgn#LH2Ladf8tRrz1ECVW<&An8$AwTEgTM-@Xeq3uaLGJS$?CBjw@0Lm|fjI6E8A zvV>okas$xvfZZL!2-H+O8Is!x-59*}L$o}6^KRLP4l!wLCT%2<4dOIWF|!jhIO2US zQd25YDIsviEDl$mHp?-`+i5jJV*?-UxHTjipP-75+K)HXh2sOnxA}N25@v&Ls=&-@3c@Y1C}j&$orGd z4`vjg2_rvGhaI_HJG@4U zM3$J6@?qEQGek|yaxU-n=u5|)TKs5dm$8dyqF;rNm%Zw&5D~{q-#7OLtDGgui0=~} zmDY^oN$8Mk|KBIc>3+j1nW;NMV`C!6yLp?!GV<~HQIZb~7vFS#)~2AaQ0Bpfy%Bd9)S}2ybh@;0QyD+Sv9(5tHQ*lKlD{3NKXyVg zE{oko)&XK$$CMEleH~VqQdjo#iF=`@Bi(UC;6mGp;{-YY`{PH(e6U>n{OUQRU+P`E zwgQK&VCYsNtkmAnOb>Wrq7?@?g(Bo`O<&H_)=*Awz{ulyRvYc~2lzf}$k2pymogEi z(}otCbr2`l1OixT|Bu9&DI^)1m&q&2wnscTx|Cx=KQQha!LtzD0kB1dmhsohFv}Ez zii}VFDLrW#&Y1QFQF0-vmp+3yrX9l^Rp@lKTjj0z>hiK!i5Og-xOo4dO;78$)Jjjo zZ32Bs5wXiCr+=K!3tqV-18K6s4X}@^xqw3Pz2oEnH^+^?&%L&y10)vB5L^^JUD{r6 z@9dG+Wu`W?FYPFXAjS2N@F0#{)cwi@(@5x;)I4z0SNXbB;>IS*N;i3s$T==1n{$Ht znweb)s@LF{abzTum9heffIo;vTOTAhQuT3S#EK3v0UUmJGqH}IK4S)9r2(u3UTb@; zzUzI{ZJUxq~Oi1$CKZ&!$WdrM#CL1dh z<-*1A9Ysxk(>W^-L4^opctuhv(;m&ng%^?TH1w)6qdLkD1$P^U0fqS#ISVbNjVr?~ z`wv^=t`LY1i&#K%EmJolQ=|-FlHBAqf(^I}lzre#AvOr^^p!g+bu4Tqhj!*KcF^x?wwBT1mv5Hm|zAL#EY2RX}9@oQmX zEo5XqfDkvDNv}_?;(Apw{|BiUPMy<)t^|De2Ozbq^FZyVAQ>>GBsg12LR^f(T<##4 z73vkcGqE&dPVC~K2F}Z1)(WZn4+0p)hYr{zo2Eq@Pfa>%t65Wc$WVp~ZaT}^$$0+# zO=uxS!EVHU5LCSnI9XdKEn|ZK1;<*>qbIn9&$38D7=QC2m_#}jz-Q+JQw9sKiLf1A z&C!en4y?Hlc3~-h8z`(eWL#<8b|3v6O5gD({d)>@1Gwi`RJ10g%SDljLi*KQ%Npw`T03G4ARGYSCtN$#W$BMEeHb}gPa*h7V&}z+ z!y$9~unJDvtek|+>owj$PzVH~jrS)8aRe(@Isk1SQKA!+b3aty;gjIR%Q&l4`_la2 zjjY$+$a4wH+WBjo_Xj9y#w1~b5;HSBcAg6hvj*=r0F0qAKhG|PmjnE|>FG59jlgPK zktDicV}X!Sq6me}YUUr1UKB)oIMC=r|8tohpzxivrrTPyb`D+7okrO#Yju(Vhe9U7 z;fRH$2#&+vVj40TO+FcDyo}GMpBW*t@2`dn*m(K==nukit} zxro0bVkjz2S&oB*Q6?sNWzFF#Ws1quR(at|Utxzt&kJ>h(9%S3_j~fBK!=0AeEFh- z1R37PvnVD39C{~jSsVFe4C`=+_3w5l;vLYB9nR~$%%B0@km1^&*GzfS;}k;>V&P0H z?Ll$Rtm{lB93)x&U=RoyyZGo}^`ee3Jn|B;ikC5QQdU8ay<< zozxTU$S^xsO1l3LJ(EP;{kN;hjx^T9!NWQXk+KrOB)w)iq_{_kO9dgCd7+;Ek4?Md zn@_9C7oaTD7|2#UZ56Sdpc~=Jhe&S}8Lp@gr9_2{f~jpmG-%8Xk=Uzc=B?vDNf`qM zS)RH4<;s{LvK96VIc9)WNLW^iW#G4=lP7BvJlk88u5#gnNx4mCmB##s_W%m)QksK! z5C3sji~AbCk~n^`pmDEV;vULX89)VD4>A#fnmb3`@5RT$K6n)kg^87yEK!Y&ZpDBg z@p>Kkb{e1?VPJcUUd{{{+#Pj)?Hh8A*op z7V3|)MJp?k#zCUZ`gZK)um%QA_w;m`Ipwg#=d(fegv}vXUe0psPLs_|4JK&(=@sTE zAsax1N=~92Jd0{Wc)5&>jA^XtmDL7~TXrr^(kpRAwkNA92~H@w`I)i>6+h9`h_GM1 zdi-QHQS9L@?uTYMa)yUN1#_LO5J1XaLLLB2o6M83$GD~Fng7|fq-W24roH#QJF%sy z3f22M_9D0Yjav|bQi>;PzkW9ly5c}Oq@n9X(YhiEp$ka@FW)_^0W&el6dJ#onT61| zg_;VXW4Wquml>mGt3E9YEu_nDWoz4n(A%N!tWh=%389mxPuJD@eO?$-A~HjkG)wF* zS-Os_Pp6K6n*G5S2}Wq<0E;~zMi_}X7eG(~1M%;_m>NHt z`z-Tg_O=6hVQ-F9UY6}hG9~3A9#f9INtuYoPB_4qH(l*h)6?~6$RLMdLS!hkGau!? zi4m4rj3L^+*@NPYQ5)jS)ZJX-ikOuPEPsA}Pk~-shpg+}iDzG61`vSCYk*S)@sA)+ zL*l1&<*ihFM|q^zbKvhZs{VPTK&Rkzh@h0`b?)XU4uHLKsr zu#Cewqw6wJIpqe%l3GZ*0mTeQd6ul4x%S zY@ifal(I8Ay5;j52R4vFY0S8P8_zLlp=CCdqSyMz^T+PoW^qC!%CI(l$~SZ3vzT^4 z9tXLvx=ta$x*^fV$o5doiMAvp#NU21kjC!(z$uC4*RHvp=x(eLkGJz6bg_z_JzK~# zfU1PT6j>o>_aV^;{hKYniTPHO9kK79&Iq>-hAG?Z^O>p!{c^9*dGv13OvhNRS~xH! zIM^z)zy6d1_8%%=SWj6kamOs&Z! z223M4N{$q69?HPQMm`jUfZdH{rFB0XUvG55nZ zU=&hECKI5RUUv(e?=~5n#)Auy1`)Xb$9r89+Xd}9e`oy>EN@$aE>kKgZg9_>$xyzs z0y2V}t91LvPcXwvLps^OUGdJPCaw91zy)PRy~xN& z2CW)kKVbz8_HETxZaEh?MUrz1GnBe@gZuXZG{MgK<$UD(94@F_bn_dujfcw~h z!~jX31d~G4>^$u4G6pS$IuiGGUg(d<_4{hXmkD3XcqM6k8f!$QyIjt_b-4ldY(m*K z0M+_GyM}BaP)cEsfsyRi6~Px7#Qy$J#n+8HclPSp!BAgcG`w`v01Vdh@LRWbsEbbG zv!TD`dFR7cg7TSXHgdB@AG|W&qsg%>wuoMU?G(EScCg8^Xz|ac;#EOQuE{O3B54bE z%(im-`}$O;-!_lwZHA%#P|gyn#$m+A?j{|aQ{O)-XeOkE&?q20nLPx9|3Gs_wDrxL z`ZtayPM~PM#ncJ&0tP!iE34*|66v2Lec$zLMriq}2(fT{%O>=&&S}M1lg83IUq;;|^(dj0Jf?~_bj^eS zac}56&XfbnEXP^|C6FBSaP#-nY)4ffUOKFUKd+H!ZR(0v2V$*CbReWS#3PvP0=V!5n?cUzqe~$s3RMRIFh1yq;LQ|Lm$~`zt7$rHC9Ej3l2FV% zl+V-EV8X-Gym3_w46Hjk!q6{dK;1SS)niNE>^%Ie18W;8epP}fWCK3E3wvy5Ob|;)Sm<`<`()XmiVD5}00I#-QlA;t(R-P3`nRlL z=3g74Zj__d*}Pj0dPw1lm@f{3WjoW=T4B%1YN5h~l!DQ{nYXO(*8U5oJpDe>!9j>5 z^tATPHZVtV9`h5(U~*mvB#cMozz!s6wv}x0Vc%UPPoato*>9uKsd&09b6n($4m)!R?fAgzrQP+5{ zi(Pd6A|wBNwtai_4E-kI^NNsy#`r!S;MKWkP)op-?Wf{Ro%=4|@X|Rv(kms>DRAYg zFUL99=J>1Qr}#?^@|o^nNHZI)@?wM;8Rp`gbo2et|@ihOm4`#i#h5Y&(cQ{3WFc^ zlt(`ss6F*#J>i;XB}YIl&fTQfv}MbuT->d>%NawkM}ceXk&`m3CZ^Tg`p*G)2M&w@ zg;cGmtDQA-&cpKo`S57$tsbSDdtag{htcfFkNy;>L}=_LDIQ-7 zxlQ;I=`ZN~&Y4xaxSFV-s_u?|wWig!)WDmLpRbf%c@U^O)>+f>;hUNisJ{>l2u>5lbktNLvl^26iany|lLQqe-D~Qk%^UC3n%rNe5w%_B>T3}M~w@Ag<6D*~rv+xMHzzpIE5iOEmzzpa= z@NssWEf7=Gq2z<=U2mh#hkU4}2zK2&-ucylTMj@45tlGSISUdj$A61@M_0%I8aJYz zqLh}kW7pJXQZ`f@k(%Z-+RN{TZC3BNJ;?~NEDrq@RB|!n=lpGFi%sf^ zHMO5V&xSXHDT`s$z~!*F^Ms>+I6_WmA3hw{Z%e4UVxfaaVm9}WRLg|S)x*sVF0>ab z9_e>e-!ZcoPN~XNiZvE_6zgf+bkj!mE8@k35mnzUO~W08wsOJ%;HiikX~Yeyj0sUM z(Zt=7vYuQntunU#(L3Ge-$U2B@ghdu#H5i!Y>u<;w{PE=X^-JB$kgwZJ$$V9b}JK+ zQU_aws)4&xNEN}6$pZS+90w7tvspZuQ|z*e=kEdm5b8tTjf z_Cz54kd=k57P0DzHUng5t^0m2kT27SE@xH-^3fn#*Uu_*#VE34X@b|Kspc^EG=VoN zlAX$siU?Z@%u4eri4{Yu!Gj0mRD6l#a_h+aPO!f&UYc`|ZmopfTL&MOrEPGrlH*Hy znoT(3e^oRSI8RYg^!MM9oXx)I#9R&rY*1HhGws)JCUgwm;cTQ|13Y0QDX!OKEdW1> zHQnEu54FqgQ&=H<-$nxc_5G8#T~7!Jt0EYS8iQ2Is@XAge{1N~Ku;wTDSe9C+;U=C z3&e;@I37xDk9N0<$LS1!59GgaZ}({1s>VcU+Ds>%i{2StL1ZN-i<{}X=yQ$94`^nk zvy(|Ln;5{~bMs%?-b(eO!K&wr@Z#&($Fm`6%<+Yj6$rG9h~+_Bkl55U)gfN?SyQiq z>eKxP>B>?-RBul>**WK&OjsssKs46uHE9cA^Ws_yD=%l97Iwp!#&vg(232`Zo3`6u zU)S1j5fnEP&SG9NV6))*^-Yj`7biqpsV44x?6{+)ni>x1aj5ky_caKg_nNON%{inN zWg|Kv&H$NhqhK8wNxE_V__6P_p4=UNu>gF^(I+^VL!)<@#@T2-O>x`H?UtY+@bM%0_zWX|{$jEjkYlh~rHiLa(A^@_1y4;qi++HSE7 zJW=Uux{vg+l5Mc}P6zJkIIMR_@yfK(J4y*bzKa*nWc0T0Z}7toH0;I2T9dv*m?O9NRwIa2qW1>L*pD@+$>ouwb;nC z6FCJr#6iYty>9jvAKFpb#sDTQXTsE zKC*^K{mM>4nL7sRqg!gNuD+YoA5!cW+rzAJqefEJgooP)Jl62I%83z7BO{&%rknu_bVCZ)q?U+|a38M)qVh9D&1`Om@sp^`B5u^?=OhQ;%&pdDBJWZ<< z**-155qA=d-5)_i3nt}il&IS3hS)VKF)`EX*pUx<`a`i^P8fw3%Kg#A42jJsVV}?+ z|myX z?=}fbTVzF>(GolDo6rxU#M;|CL-_=*IK1nBlAls~cuveb8ZW5Qc!6!Xd8OQNf#cvT z7mKaxbJ8H#2sbK= z2VhqH;DOkWEdBZ8I1eC{i{zZ;YP|CMtNyvkJpY4u9V8H{vpoycw(r<+Em|+6I3F(3 z_s>72K@C>OW_U|}JyNBmAD_qn`|IrvU;gs#<6J0aFY9DGs?81nBkF0VOcw*Dk@KZ7 zc;kjU&!DCdiIR;o?6ds-Jo2^R1~jC_@!dDeLF~94EXRmQ0SA#TLZ+mZf+r;##UMZ( z;6l?Dyr0Gs<~f%Z`)~r*S~nAWOnpN`2^y*HU1GwGB1GPS3{QNmeo#(|_cqiAmm%i0 zTDu;_wAk|@Rh%_#UJd9tj)O?%Nk!X0-J2!9(tgFwMROTHM8B0G|K8&9#m!>lEelZ~ zo~W;PbHuq1uCKfakBJ5ols;;ha-;%{YT|9ZTB8qoD7{V=+7Rs*QqOXVuSFY#WWm^p z6)O!17Ym2Ax%5=!A@M}7Mk_*!y`=Faz7oE^(pyyqr8Wi*ac6lV0v1#3G+?BOl!it| zcFNzbE5F^hs&TxDTkPGTp(Ef8#K4~S_=Flq1RiMOW5LFJ{lpU-qxW~0+?QrJ$j45Y ze?mC(YEGvD7AtRIB3IngghFMc4Cebo9g*23ZAkfV0i&YjUTCQ&pQuU?grr>{lFK19Iuo`b^( zpYp3;pif}KNFmyBJKan?*$=(R(urEu7a2q(DV2dSN7S zXL-ir)yEsQUK@Vt zs&1Oy$4TB^2u6V&Bs4eX{a5h{eQeuUsb13`@12Lxm_Qe2HRJaql>216^F)f;_wQ$r zz8K}p)E*hud;uMwe*s`J-~1uJfyd0oqzHQ7@-98Bfo?a!)VxmyTZc*DnfDS%E3!Kz z3!xqR&T>LNqr}ED*ga&@x`!FQer<-TMcw)<$!OG$fo&Pf6Y034P{OS)%@vHtP3XW9yWu-7;j7EN$e^NWE3=`^T=({a|dUQ3Tc+lCoxdu4UE06R3 z);-PI894>$tlr_GoZNQyJ*pc8#gu+m4j zPEni&5l4IxVoEQ6Zf4Wkwp3?Pc$koXTr4887`k3o_pKkzO&?nj!*sryYlhQm+PZHk zDdZP7B+Fr&8_vK61_4o2TU;|IN3vlcz>Ve z0Lua?5+HEhmA^KVUkjW`Ws|u4S|N9FizT)H4cPq$Lc?hM!QJv6TQiwKdN2p=`q(-b z@V~&}b)lv2Y!RWZ=#&9(0HZmRVo4eO25>9N5!yln8M|i4fITKXBVXAO6lb-*ySVb~ zS)IX$!WMm^V?|ObrIcz{>au?rLn!zx#Aij!JPR9DkSA=)Z+uu3Js(B7nQKQM(fVpl>Cbq*AMyi zV(eiNyw)Ntat2`vak6wf5cKNUjL@yH=Rz_9F_T@~KDPGlr%ahb>ljIWD?uSS#{6X< zfj}n`-Z%?h`-MFa9~7v;;Gq@*)JsMq;i6D)|^teawB>Q^yXBE%w74;^G+mtw@GRuN z94bRdyOF>JImJgllDyGnm6EBhM#rX6Wv`{qr_bpmKlm@wJTDjpvi^Vp;|cAmF}sQ9 zkR$edoSB+O)D>gbAHDnqQx~=Ny|&-;ur^U^)heBDf~Hp7V!#&H_Gcou@&Y9xldLkrCbv?x1&3GQDLlWm zlha?4zZAnL3O!+5&=ZAmWrTXrJ#81PrJNxI+$K%F?&BOx`%{pJXnzYio(v(Om|Y@>;-mJ~G)2H)$86ke3;@^a&>f+?)+TiFPto zPQ9Hg2SEyO&^8nr=v#jcDjB$@sUNjqTxG39W}MOc!*#jv3br6JUw{8`77;o-t*V42 zj*5!6nqgS`{91RRP$@k~l#KS8N?OoQq=b+pPdLcbHgFSER9#Ju7%Xs8J>2ilKOtx$ z2m*VE!Md?##C+#OngIZ1;6S^$8G-->S+_}%r$nbS_J z(=dMj>D5HEu4i#7MGu|4;_Ex*Z{LW1lf*flK@Ot(q*nusWhx2B%c?=MS!m*-rE6^k zZ}K-u=~{Hmbj_X<;)n&0@H(aTRi1QbD2`beHptK+Mkv2#&?{t|Q3l^B998JAoMYftltY)#tXjqOt|8Fo>~GbwWg&`jdaHcA z;R&WQ>8)z8K}dx@0nU{+i2Ru?Vl3M^!zoe!_UKY(67dR5d3uSU4K49=8p=CFZ5vV3 z7Tp>)II%Nbg=)Tvitup!3SlMD1OGz<8{L^la3BtQ*Z|bGz0Ntc2GxCS#`G+CuT`_G-Y?c|;t6wqTG7cHBy!w^UqN_ox~bFK$2_ zi6NHr_G$kxKZQQJxOkx4km^5-+q)(r~y5XL+f+Aq<^ zlAZ^0X7lgfoxE@~0{sE_6TqT_@aHtv(_b9Os1szqc)8Pb@4~YO*xD1x>d9Y}ghSV# zSt}uR4`F~QVN!tid<$c8?+mEbNoQ7S2O(f-ARejGP2!99t2W$}Ovx-gP{z%+@}m6c zk3+^6^GYBIIu5D(Fg7?4er+c2npqyg+^qqjvgKId1`w|hV$}m*Rw#0;)?|d*6THDt zu~-Hn5@BL(Y?5_T0_p|Wp<726$c$rM@GpxT^CxYPw5si9QvS)6;fgh6bvG#RcoaiM zjI2qlTaiej>kctGmaYj=!IMMkqEf(!SG_oD`7CXF)!J~iwkAgW%Q%Y>YoSO67qF`6 zRR@w+4~i7E-P;HFTD>x=EsY+s07rx*%^u^O>c!FN@ZQ zTYsOQMY+|j(w4su%WM+lLTq&01_>+mSA8SCPehVYDFPa^`7duD9m7w{&dG66F~Gl~ z6nMbz)x?3mkrS_K5}w^H-ZRA1S(ulx(Og$i8EmEC)dUC!g84vAvUtr2$z7Y^>D zW-U0O{4?t0Gt;vGhm1+b-_5>~LDxp_MR{@)sd<>2RR&`HT#SrqVKb(%+h4Qx7x-e2 zhyQ;1O(hy;r z!gdnAAi~+G_C;F9>OABl|8=?&sWT{#`NoT}s;kqUP0^7bV)6$YZ8 zMVi1{O#`_Ym&W|XC#WE|t58Gc6SYLTLvgfqp-b#=TibP97lV@m$y~pev zv`f2v=8HpCGYGsop@os&(8>}DDBrK}!6jn3tOk*j-kvhw&>ibW z?a_R!{=98J<{hzg+2;Q9oekx*@OM8&?o+M##4eICVd|r^-!-BcCiIP7H9z{ow=I8p z7DUWB{nM#B_*aYl-@hEQn{1lYXQPa1N`H$&N(p437hFAO&a@!{Bn(+~y3+Y{7$KK; z>qgOFl`@_a!fRMJ>p7c7y%tHzhqyyMH1w6xtttG!{ifpe<#T$RrolhUz*MscgER|e z{^H1|*#=u08b`$LeR22h-Q&b6^mTVm2TnXRtco}CXr2~L;ZD^jcqImv{ti;4i<#@v zGYvkaCu^QDD{2z{v5#)o4H-xp8tVN1`?#?kHnn(I5e>Lm4T!peUKZ)HY>1co+Rlb2 zKp`WuFtntvMsJ+gHRlNKUQhI`RBLCV#T!Fi#P4!bBK(o`n~nIDX>~yZyU!6;dFc^0 zIzj?TKadpANC(=Ac1zj7khiCe{BbE15y7(8=e2@WsNx%n6h!oK#JVwc+CQCL59ik~yK`*`@je(dma)}GLr6djVx6S9r=eI_ZJoHxm{NWpI(5+}) zZnyiVmKPCgA5GddzU#wPp%>&c^rL{dy!YW!e!zpeMKELDq=X_kzItX?4$#ovV|;Ms zkAYoR^^CPui|ROu4Sm$?X|x{Vk*iTnVvf{PbNzA`HL^l)Qwt6h`SbggHN z&2Z8sGHO+~?@9Zjm`0Wj6`)K;MXVpHSj?|u8IdCg3)tcPU58k7|Cvff%Q*G$qVg$*IEc5eJfrNz!eT!W}Y3h5=xRAHX{oUAnY^RLFQs z38ZS!=SD^1nFO30@TLU>``ATY8+se>iVXi)Tg$Lh1dA%U+v($ao%_DHp~M(cv~U!3 zr<|qV9L4dIX;s?8`#tXK>(Q~%;+h$VR}<`ZRfd8Z3(Pyma#&B76jgHCSo3)GUo$YT z#35ooZ;=3|MOe z2Sf@VZoSbY@*uH~qxE>Imzp&TfQnMbU{EDCH4+>v@Oa!z`@~T2KimkV{#6c4 zzXcIF3llZ&ovGb3K3jo;76pMHiAM8GA+ZF7c%+GqL%-YY=JdAs^=R`X`2!t0KCG_Z z{HWk7+K`^a{j|O%t^TJ4c+8zV-=j`v$by`-$3G@*r!`5?b@=|9;%>@_#M(p4j!|cx z{XW$)Bmi%l5-tZ;t?|@j6WLW@<=O$>!ozj=INx~3K{OPm=629qulrKl>>@NfkgZyaRn|3|yp7^@kL zMHvZ$*MX4;gb>c)0>utNt_;+Mf>Zf{yZyH>wXwB51bMiZ1yfRX`g9#a z+Li4TdUNO{(qQ%igeU=n7pOWBO~SmJ2ZtT1w@y8CKINK^t$IQnv2qKnWCbo1Xs<8* z$3@!NqBBfpU#d;U z({O-Ery;X7G*{Nt)6-RVbbEi+Z z;D}KHV>{4$nkKGBB}X>unSZsEz6%x6Ls;Lu<)5R$4YJC%Z0kUK?J;}1+TUH165Xuc zn`TiY@~7}Ukc|_R97+l~fE-o;wn({b6jJ=BdBw_WxVhis z-RPOr1Bs{zq2)sYnGutK#g< zn_Hw&N#RnSGjk-cEy3${NzC@2=e{S+qaqR21C1p5WtZ7RRp$CCCA!=74JogOxfF$d zVXU#SkT^8Mgp|~c-3MjSdC=0oKpD#a@aW6l%U`B1(KR+pJA*7vIu&q4#uE-_NV#ev zN`KbuDXXW%CN9`E zx;-ifIi6+9H~=8JTRSni1K?jXNG*4WzRG1Ok zO3W2$xzwK!gstaWn8)5e!x+y)`k%0!2Xk`jU)Jo;E7-hP%TL~BvLq7{-qJomHVPY&T3qgmFm(Q;4 zp`ZCkW^qYOxKixom}F{qyZh?y$<0W28hHEPUHjQ4VncinkJfDd6gjvOXqVaLQ{M>) zFS}IvMZfx6E1P`e?r&cmy>E)Kp3Jbx3X!Mh(XtI?P#3?XD<7ug!!dPnSH2^4h)-pm zQ=~TfOQt(hC~=r)|Gm75RN-*AYuB`hf+ekjtlz|_?=tP*f3rrERoEySOnNwSl)O?_ zC|e0yyV`0E`H`>Lv17vP={b9dsgG!*z!80AT3G&s(=Gle>c1yIgpNM#ugaO@V2WNK zH#y@`)s9r)@y=(*sc5?C=mgQd{gZ(hV3W($aV|F|PMA=EJrlyb&u5GyRde~{w{-9nL zir_ZbsETJtEID1t3dWKXdNGL~3z{2ydc`tH6b;qKB(lTk$K%rY^*rf4G2L#5M@Oge z2c@+DSE_6d)?-*_0||;z_np(#qWDOUIE26~kWJRtr5iV5z45 zO5VvRO=B19zJu*k_6Zw~&yR3;wB+_dxmh>v!`@iV>s+GmyZidI}`u6L0ZN;nV>LLVFE{pEmyg8tJ zB~l>0bIUIyT;X79Y89l*ONM(#oV-Pu_twIol8HO!TxK5Kq3nAjUF*W%vCyQb8T;zv zqqhB~O~%7hGPj+pO^^YGk(krnc`fUOkAkD56rD_>VOEv^il( z7nt@z{WrFa>G2*=k*NX(QpzTrk=={bRg;eWxte;CS?{OTx1-VPytC9lx{8-o@zU!Q zojN)b!&wJJ-(VJp$-c{&tghjA8|ai&S*>`EW9}2q^1l@xE&r!9Eq(7z^=e211D;@6 z@$fDQ4`R03jIi5%8IKv~j^$elL1i^KqG=CYRQzWrOI6*goAqp4@@a)&pVgpsE|aNZ z{E8UCtt|0(G)hG9%t^;h(fNuzR?Zrc^Brq%;LbY?Tx7%>y|#Cw*^I*GSg{dcXA~#& z?WThJ!41t1IABHbZXl#n_(F#l(3A9_HRU_o< zQm2ZZqqz9lA&vC^2aDNU{M}>fqq!F5E)_$9I1hfzf|$5v@e2)HRt!45Qh#x)*yS5? zOo949JTpBDn@30fxEsMhFu$;YyPH6qP662R)Eyp^B<84Neflk zZ`JQ+W22O+>X6tIO-fGLTCEV@98DWB*8$x98@S6VE4P9hQGzo3 zIz~6dX?WiK(1>f;~+)uk@?#QYsH2H70TsQ+?jz(qw=<$qxN7Hq8WZT4wH-|ZUaKv zy_w))rK+(DCY3!iKNsCkc>p{Wi62mM(R_EVxyy0CnaEZ${hQNN$p%`ZIh>4m|Y;Yw!K6C!957B5#;jDN>xCL`gfu?OvG3c!T8u;bzEEI9gyFJA0< zUbs2I4WbW~M_tlfj_+7=$4N9hDFD_p^9Nx#r*f&kFIEKrf00(uXR$(J(&b&Tf2*%6 z-z}CwWX2aIQJ%;>4m9MhhV9%rFJdzCw@DPt) zytoLmWmjO7d^(_g4oltu7!JZv&Zx*VOtye7$)**ZJH2 z{iaR3O8aW5w5QObBvB+q+S7t*BU;3?DWwRlT8J8vXrm^U)SSz-`5}4d}k6q@AvCr0_HM@rV1IdqMvZhjAe*? z6UHYduj$mfRa=;T+d-idWqB~qpJ6zP-(B@CtG_(fuvK4g=t#a%8}OOE(XK02t;#`V z@*@QW@Fac=Q;^#m7tmdg=RDra&UOS>1ECpFHZWY~#KnezpF%y@*xZ)AO9Ae;DY7M4 zPPT{fvE6wIi!N*AN$2V>IZZe=F zOp6EpQKl4i*6v1AaZM+unK0E>v+J_>OVq1Blt{ zs#5=ffByhMv(@OFm4BX-Kq)38;oYu01SPAeFnO)H?cFbgEY59KPrg1=&`L^xe~|u3 z%mmR@GsQm1sGfK9l$x>$j;X(~9FQ6_c_D%zwpiu^2qMdGV<`Krx)3q1c*F_CG_QVfv1 z4BS6adol8KT{^kTaopg{@L}N;7gx`ZdwH4o0OZwjp1vbkA9RZ*6I38`8wrE%s4tfm zW!@kfHonxSZ%3jI?RNBv^qPK0*^DffLpZ;fQ2t~Ov6v)cPO-;=F8|~07Im!$qPGOZ z;Jc3^re{YrM7>P^VgYDnw>lP;4Z1iBp9!YyFGxI)G?Syry7#S;Enfyo)6Cw7rmb6B zBJ0b!{F`|~Q44d?hs%ouPgcMdLTx8~TvPL5%*JD^>HbK&b@0$3bD9TP6iqddfsdIt zAH?qX6g@PT&5FGiJLYx^SRWJ9l`^8v0j`%caLbcEdmy+&*WVQ(D^;r>Wv||!@9kH9 zd9!Zs-l^+dyJP@a3vr}1fPZo9gi*?%G(Ymk&vc3`3$<=wTUPJ2j zJ5q}Jj#jrG@hP3;0J|&VG6kQyEZ(V9Xhn%~Dk*oluzR8hwwe`o10yO~<~fzMY@9xj z<96s>sO0Np4Pv3dg=GsAh@~1Plz1vl-?h9ntKKU+T38H3dyP#}gbAdix<-kL2-SD) z=+Azh&%Me&S*i1#Klaf%Cvhm)~YUsi7k3=V%VFQ2dG=`$$57_YQc|abOki- zrq5u)ZXi)Ue;1_534v3b#kdNwp4i$Psk}fDG?nN-X(V6SFBZY`M|6e^?CmewI1<$X zRSY=xpR%D2a4NLqB-!QR=EJ{EeoM=++w|h|%l)*>GI=G{H0Kkck>1lMOYBze=f;Xg zfayMf2oP>ddXsZBM%2QNG**^hE3E3z0&-<+F`nTMl$WC70|d%#H*TzN<5I0P7mOWE ze~(5BH(9yPh2B>@Y++T%qbp50l}FAjv*V=c+C?`&WVPh4j;KWW&#*e55;nhFD|%$$ zU8*L5Z}HVhK!!n@G6*yI@$)B}fSiQ+1=YuA)`@D9e(w{UIiWCnnfpDn$)DK#-Wv?i zIw3vbC`hNn{P~;yqfoXX=j-X~{{f$Tq$;0Q*mUybU~tH&N|*ZZS2mDYBuixRxzX^4v#jj79`^_u=kYe)l%=g1*12Fz)?) z3(^v>qJBtZ1fbZub?l|q#=R;#d(5Ep90XM?^bx(jm;^$aXwU^g`TX^K+B*Nlz4ik0 zGRp-=_`J0AXI}55ZZ|$2*-*J#dgU4)VxF$ayn9z2-5i>VT`wEVdWl?U2+OauwGR%w zb7K10t?&Ruo3P%4`W*xZUJUUFH+Pe;F4M;(Ld>n!b3R;d7hvv028u@ZBG3+H0uh6c;bUr{mPkQh9c1v??;aFJ>u5TP-xj5WB1l8R|T61K0|!nY_;i z6lBmJ9|1Ndc$IMuvoVePp+W8F>J<|Cm5j1`r<&%~;03vH?LlG~b?r8B<_F+z0SM6n zir7q&4C3>K>WkJbB&t(kIow&#JL&vrBuzK6)hICL&?6Vg$l?2AWIB3bm*NY82SC$( zDwgf9?$X=Juos^PPmw?lBZEM{2D+j_$S6sRag7$YCPdyc^T6!`FV7vAu`}$2oqt6S z&5nk(27#jOEABJn!d33Be(AjEM9z7$=FQFN178lAMNbP3EE~${RV{*h?iHX!#)BDe z6{HcuLTtUr5!XcQJE#w=8%!)i+~X*fnG9E%O;jgp56Nc9bB=&F$oWKT&_i^M#y_JbBCOp>udcG% z@DOtMeu4Z2VKgYnu2d${O6wxbA}@&`19}}L`^Duk_ujsF(-+6;?vvx%75^Yq+p&0% z=n>a#>64_j9pOd6ttDsT^h-lA3tAY4^(C0FjEZx()LXZ%E5*Tb!V|3$8dLQ)Z9?C8 zFuFzoGHd5q`XAA~Nm+&{it@XT2-6&WE6!^c~Fl9&<81d32(YLX^LMXbz7@;Shg)lMIt z7CMfpu3ncEeZ=O-l$dr}nuDU2_MTMoT)l2*zxnOEZ+wugVs&xpro>4>I+MD^HeNbq z+WtL_dQRPNa=wvE-EPj=3qni3jY>M7G}3SHkt3FI#f5s?o)ld<@pI+2XqUN9-DiBL z&jvd&W&&>&OQ`rtms0f2_Qz%x$KbVfmz5>!etz#^I88NB&C-$qc_%&*TR$dSUHdY% zJHF?Teac&mn) z1$ZsWJ5>uH5LYgZm2OUJPqb@PhelA0*s)rEvTN~?Nyjf;YDZuZn;OugL+!^Nn@0}( z^l7Po@UarOJ4OadipFlaD+}47A$b=&J#{tAZMUQrmWu@?K#-u}))yurpP<2dYk%`s zJUOxc@6<|?jO;wYplM*cv9U{svaUnb(zt45i;kjmA^l^LTqLj>Qh7;P3~p zj5u|wsTjwxuK~N7u-IWoOLRU~mAhTOIQH}2kdWu}f?LX-2Z|xeCHAP@UbOhiAKZXB zZ~qkGIt4;HZN*1|fMPvM4YTN+DdFCs?^pw|2<#t1g zP_p+ft2+80Ex=f)w=}%5Y8~j3mPB7do@k2+(3GS#Lt&!AeoeuVO)RbP2*&7aHg8(h zeCxW<#<_?Lz7s_D?%zkRa2Mj7y@D@*Q5Gz1r*Tbm_f$^~F#Sy}_0z9j)nAq82V{<@ z&?lqto|#kI7Q|E*%|LN(=+edN3W^{{Qf(>>@&$_)tnQJz!d0MzZqx(;Pw=#*%Jw0ycX5|2{aFWr~ywKC5 zM%*|@i|}yOyBy;g_V#Utv^?C+J?hx8TfchtyrDa~L3q>GOCF43!w&Kg$FnuseEYsG zH0$BvJ|ogT_p;qNe~hJNlPLw`>aOJd6XJaEJB@qz<$j+hWFGbA%{Rgz6w*>D-aJ|{ zP8-Oqp^4Xdjw6$7Y%No$ep4v0#N0i)g)I3Ng9t8wtVMy4lO^!B)20OwZp7x9zuuRT zX_4=6T(mGq_`B}5iUCtdag2Yn<*D({Fx_Ka4@VQI zTF*>clhxDCH2`eM5qq7AD}FzhIc+W?pK@H-D|w{_RS=xITB z$E}23m>oaG!ouRP!K%QcqD%g`e8{?}W)nxuoY?_~f+o6#J+NtIYZ(rDYWrAXGF78_ zbGFUbVd9Y;Q7Pkw{oX)c(#C}SgW`BSm)KB+pHdy>y&loJ>^@=e_t0P0Vk5`D*ZVI|BfdhuxcP zQ3z$jt=iaz$Fiae3l`gaht+zVGikBJRXzk=+0Hl*Ks%|LnQkz{0?jUF96o_eI#ovQ zN&7Uc*_tC}UcZR8bhx!NYsz9SEVT`8?Qjm;u%;fCa@PQf7t^?g&OeTCp9)O(-~Q`w z?_1`-SpATVdMZj1BOy#1(3WlC2&*bMK8Ng2z;ah6jEZU`lcH8Cy zI|~EIeV)Tb(A&&~=w&l$k}qXv8pmXIo!(XTdsQdV&dX|WR%smTk}<+Id?{lLXxTBl z`aLpf`Ljza67rl+vuP1}M>Q`mPqZg$nw%olVzKrdG^jNmcGzpmHWd|x3?gEQz-y`K zJ%rd@*QfDYmpb=#7Gvlqj@0~qGRoO4O}YQhvlW^foCw8XPTU!d&F|URn^?!*1ebmO zuTd+r8RK1AZ$Ih&HgMJ>rtlKOaIAni@*q%t zNLjP3_vsubNXDM|z8V?2&iLLwkx?&bcJ%K(N|lM$&8#A$^mP4lbNAb~&d7*T*o{Yh z?+X}CLTcP1uof$d22bB|G>2!3GkD4```muZkGeKpFxnplFrG$tinhK^c!m86_sR(n zAiL4)gn4u4rr-Blg%IF#ihtS*wVp<6=(X^O4B!eBlJ#M4cO-i}T4+c=*eCuRe{%Cz zbw=*%+`Q)<2Qc>BbO)xWiuac<%aml47xRO5I2*!IW)Q<`zTF>C3JS3$E+>0C{jCGM zoan#cb#2MWJbN{rHn4jyP|Sh@$*&{}^WbA^9w@4j$dCwestzr~y8PY^6Knjvf??>; z_EeO3_g*q-z47~ZT>A>Q(N_B~=mjh!;+zPt0MjY<%%5yzx)mV$V_M^3&uKt$xz(65 zvdg=DzAsyTebjbvmjI$J#!U>FH>6%BXO6(Elw{~bO{aI!bj?we7i?#t+1g|Fw&+YP z{@IFRk(;qMy9Y)w+k=B;5`m#>HJBr6xr6x1LK7TiBF-2)Cr#$Y*5ixCbBB^+@3*BCXxMmfs(J8GepAe#F=gy`w(`p6===^lnr&gI z_WI46&hM8S_nxtJCuL~cxI?|^tcS^Dwtk=4J zwIa1Ph-D*JIe7U(M!+yqm5tq+J$lp!qGDgX)$@H~Z;0@bt12OxHezp5YubFAbea<$ z{E*NkZ_e_~Z(^#>C19XmQP2X{f=5#f`qz@Z%ybdMNpwVN+79!7)=0H|XQ$Ed+Qb<% zw$P&_V{uQ1z8(M0u&}mx4u{Nmzj42I&nn*zt43s)F*TqCpm~5Y66z_P1|tB%vpHC4l*?w4Rl6@$ z6H=nxlis*?u+|^1@n8>vkQ(HfG}wf;51z8(1g=p$^A#~^lBKiM*CDxG7^E=X(R2&t zhxkcxi*p~Bubw1KB(Es>-aXs=h||u$rY-a+v-v6vO(B`3BC z&z>#n(lfxtD>&8)W7gE2g z-jZQ8Oh{8TY+HJ6J^8)u%6ARhHxrLktW)(E5GuODu`^-IIPXKX7yzp&CDpp5q+wXq zUVDG6*G14+iiu=5F^r#`kVi6^Tjj~RqVrh*eCo{HDnAy<`#*c;Grw z*&O~cvW!k>`N!ufpuQW3ZGCf9o#$@)_~%i#22~#4{UOb|rgDlE6mc~agIjIR+xM1> zU%8!jh>?h^@~*IO-n_X1&~8Iaja4h!b~`$V9V8Br3WNqVJG%)JV2FJA65^Upn`YG? z=1`HTep?%wT#EQsM$!bB6IX3kXS;am*-p+LH^m5GRaRH=!+BYmNw>Jm5z*1U+}eI; z<(d#&P9`L{oE&GXuaV1tWgFcTm#(SXnk`Pf-K29>WrvF=ovf*Qy{RyBN8gr^yJ_3D zdFhkhRs*=QV{u}q8y){y^2k`8%Hh2m+jQ*M=j7udy?ZyJ2~M0_$1W&`UQ#SPi+h>d zy86<|%Z5f;`%MpSbBnMM^0;ju74Fk5;Ar6Wf;IfDmjtxLxqDY1!IV!S9$wHsqJ|Jy zZ+5c63gg+#P)X=yjioC0g1THJs$;a?b1_9Gz&*yKsR0@_=uV%kxrMwcBA06Lt&I$f zFvaG_Kw~PApV5WPt#>x4_k`S~#A;?ViKi7^U=!M*RKvddu-);Azhpygcx;iH%0*7Nje{qLF-@B}h|>HWw$@&*@$&g(cJ`v3kY~ zY|On!{B$i0vMCTPaNTBQNP>$rZv2dTrfO=M%-XLbQ$LC)TC^F`VcYqwPy>nyPVoqu z1nna|i4QIXJ;c#BbWU`a2Bc-cX|; zePMD8ZW)*EBnk0*rh(-Bn0~^=(8t7|_{9s`YoKjs(<#%uQ z`u4s(@`|@*b{eTKUsi{S;6SyElXL67b<_WbNSCAhRy^?jR>?ousH4JB{PkgNwKxff zJsvkb<;7yzhlBj|B@i|3G}@ta>oC7vm!C#CEbJ^E{$fT%;VS4nNu8OeG5h^j94-0o z{}k(9t3gf4O*f{(*wf}2}A{~#wNg6-?lb!#+o%b;Nr`&-+qMct|?|?^;HdS z#dI`Yy@~fbVDgaInI0Yn`0$>@LQ6LLWjFF`>H`gNw;;yg-c`(!Yd7+L*yS;a zCKJJQL06Fh_|U6*r@auQiLzF{7g$hcs-{XI^| zQY@mK+Ugrc_!jXi)226BXxNtDC{1`&T80QNes;a2tb_g|Z!G89gVu|&qD+HM?e?j& z3e9*&Fa0%p_&kU=A6YS>ar z^3=HrmMd3Yxb8}MW36nbUYNUibC6;0U@I7p>u4x%(Ee>A$t`hr*G2;PiqbN>(KzQf zpOan>Wy}dcV!L!+L(iT&7ov2cHg~MheE*>W8f!z^3w2X9lFpI-Tes~F4zB;}7kPAA zv6{=|i4)BUa4d8fdwI#6dGm5tUAkgyuKBK9-PD4P9_4_mPi_;>v0mi^zC1Gj@n@6t z8#nx^T=2=bk(Jf?!?UDCUlI=&9R%ON|0k+_`aG+#W4B5W%x*Non*em$S-*$0erz zWj2Q=`DV>cgI=M-$!@f# zkSpW<)O)okv(s3kHRn}5JStWezJ;2;oMBv+bM4xpr|v&Mt|PF9`%2&~fSeKEN5`|ETn!7X`FCpsIPS;TRr z*(wbTzigd2lVfJiJ`;hRK_$`LH0;;UpniuHLpsoF)IpKcZIq?$1TTFJ-Te|cC^<4|MZ6r;5eM_^=Hm}ZTVn; zd16gk6RBPimP$39;9!qWCuN^X+qPkIOT*G$$Q~lZ78x9}u-Nd2asMR&B3INN-4g{3 zrToGP1#EGd)=XQ+@|~FzqW#&3**F~z_-%N&J-j#R_orKZzN2s7J8d*~CiVfhS#Qm+ z<#T-^D?bkC6LCZ9Md;@&PZ!ObP8d0hbG@1-9yClH)}RoW-f3`v^HkZ6xVZW`=0l&KPyl}<`}@%C*!MS2I`LS5PthCipwpMK@P zcztsKOSW(g;D*ko;!?!N9>@onl;T}FD1;|RQm%w`XUQX#C7ITTLy9b}7W7LI0M8(B zH`#UJ2{7X2S|U-?niUwiVd{lt-I$H2$={FxT&ht#q}nlh`t-Qv*Z*Sk zUibXdk*4u2#dD2@$^Tkj-g(DqqufLIOu29R|9StT_d1 zL+tPp-UG>`=l`6vTjlT%FR{W&__8svmUB~2CKh$s z?}Rc`jKk2Cwu2?DrAV&kn+W7X$dk}NCfv9^xvAmTCmK+BSAi!eFVrdW7`IGGPxt+? z8LkAk>IqC7wcxvo^kcM@m2783IzU?3w7OS7)7`ig$*M0Tud@-$Af!BJajacvYJPqj z8G##pVUxDL{^{0QL1V+|`Wb6mnP4C0d{Ir)V%)eB6~>)`v&kNcH~YWV? zF1RE>*}Vn9Q%>xy?05H2&dVNb!w}V*BDzZwiLYD-)|)2Y&bo6agOjpj=T4tpyEIUD zs8A!-RwT|U-cm&!8Y-I)sbxms{Tp>gS^WPCyDvfV^dH!reF|*DxwrDCTL4EU4dWJY z`pgDP*Q`&Wf1~;ogp~WO$fMiNFTlf8{)N<=|CpxHSzL@>0>jB(F7OtaQ~{sYKXTbTjCj;e-X4H<2M;#C2Q zHHTABk~1>u&_%Fka|45n_@0FJN*~#|jmm7gXF0h$_V4eNvUplo=oT7aC*lXc=1$Ln zl4;3;>xj~jYNfqK-Z_y4c99(vfdfY3N`iy_J>6(JxxNWKM5z4u1nqI@V|o_>c%dSF z04miqDKu-@B-pmDPP3a~=6Y+K2Hp4vRQDNxyC)aFFy)|Y_U^rPP#@}2^ub-FY#Xw* z2pz(WAu(wAQm$UFPW4TCyl)&1;LimscW~|zxU^S*a|2T~I%g1!KyEY|6wKtO?C3OS~W& z)myggy7(8R?m$*8fXTZ2{ypG+OkdzHBn7i8pJBv^c^=i?!`T-JAJp=lgx~5jtHt?Y zZ*O7T0CC_Zzoe-MF?5}laP83> zGa31iUBpGx6VLR-kQ(kZgV~18p4RAdPEP5$^59v+6+eSYkDoTQ+S7C0(WB0n```T$ z7OY>fMcW+IXGF6tFMlpCYF3>#ZiMKB5e%eYD`yuyCI$S{?7h{lGn9kkDW^Q=&`WoX zn20)8`{cfN)J5#R=r(d&hb@$+vl)oe%Fx`3HoadpGM80F)y*|DLJBGW$Gq$dNujIjB-N~3TBblgX705-Fb<2TdBP^{9==m9j*brH=hqZ3cf4NEk3F ztAo?Q;x{*0lglTfbmB*)L2PwyJ^9!ts)$thYM$+`M+VreYyc)3{cfekgHSQw$k()~ zo<%1mSrn@esFMTePE|8dxYgqV%pEKvK#H~0AEF66M&9b?5^QI#m4E10OlE)G_ zv|s4Lzwwyvr6Noav>b!To)~B3`ZbK5lylh93R)UD^g5# z&Z$b^HB;z8CaeOvVsmrq{|lS*l zG-)0wAf)x9Y@56&JZiUo?tioZsp1^jdXXqlUs;Lk8M8N3%?+sJ6Y~MZpF?!ZwoU%% z`fuA=TcM`cr@iPqD@w&+mO2lIvZQ~-L`rKxUgu5}Eb5%STPe+DG7F?d3PU|DyJUAx z7!!LF@ST9>kCC4L%*iY{teorTvmPbK93t*2zHZZ}*wUfca9?bg`uS*~nxxRCImRzrt+wWpJJ`{kTab zK78btoWbw-(uDk+(zAuc6AoehFL%ZT{JmES8_R&eK$mNuN}f54DB~;82+wwL2?P+6 zWtzlk#o2HI8XF(_Aop0}tErt`GcZ}zwez<8D40w>gV{=sI?idA90z2c*mR53Ikapq zK74o<74Pd@0Imnl(*5JF&EQ`E?VtC-+fZMFU(dVk9~dar671!JcY1~^OZ06%XI+y? zU8rPEzukDf#O{oJkUE81J=DG)Z%m*cCmt)wA~vDLL2?|mQICz+bps-&^ zzNsQh*`Aj#7n3*OD3t@z>7CZrzMpZcmy@HTz*Xx~zx-Oe3Ita%Xd5BWHY&uX7LJDZ zzv3YRd|kskwDB(*RmB_FG>v)8sWV=Y>)`lbQnEZ^eM@iqb2f{f{dzLQJmo~&8{?|1 z*?d5)q=?OoT9Taa&PKaCB$Gx-BQv^2{ZxL#b-w#MQ~6$IV3eH?Rc61x*&L zI7yw3Gh$;9VpfSXA(lyN1n2_%*t2igj_P*5&w2K+Fp;8sQR7s@isOlO!Sx9FRh}{2fX~NwUO@8-e526wXXp_JZDa4 z+5(&c)=Rkfr}fMb=HB!TzlE^dU)kHmbBS zmz&YN-ML49V9Q~Z5!k_GzVy{T))X-hp#9AWsy<;sHtS(>WlJx&PvQnLy!C+bAgpQv z#DL2(@>D|+AO^914MmpF0_F7pqfYJFe3`rZ2tXui&f2u2XigldVosht>$iXZehXKX z=l?hGy?;DXv30jzr=tD@B^kk0+nYDmqvCf-W@@F33wbnkU%E7;bqGw#_oA>xCs_rA z8Z(&+6GfFdB_-_`=%|nL zT0WfE=0#!6r~IZTX*@&#MH-0x_Mt2%D*U7;Ko!{WI+7y$=fuW>GKyG-H1v4NO8<(A zyeS-4S(ivh>(br`ue?*IPCbAAywi>=htK$>y?9Q7hZnrY=PgHP!cbiemYs=W@-`yH zQA9&%g>^(h=qx=CZ#2EypIoLMY*JSi(W{tHo9Zk%KS^6tbDh!&nA|1X<3C_dgSzG; zZNnQ^{#rDY8GTFs@CL!C=KHnunH*0Z%49D$U}O*HvD=d)ISuH6Zp4 zFn)}@SzFmu5kYL`lIhc@vktJ^-MJ{UqG0he@1*8~MeW2%uV@PDK8N@zj1p+SqRFeN zU@+e7&B4QmH<3urvWmA8rAa1d&D%4wh6$`P zB38{F(|;CD_sWgbT_RB!3|iIWna(v@JSace{*1tT8_and5g2goWU+5(=*>)1V{~fr zXGB|;>(%9I0Q+>>AX{u~?CeA{X*bq8spk1Bo^$5Rp)uKjDGX^2B1tT41Z$@~Nu#=w zew#80#eEG~Z;DxKa*X$y$I z^)}bgcz`;QCuTBn;_f5kE@4RtGe6?_5Xu2&{q;V~5{eIssWGm*3wI0*yR{iNlrT|byTnbr1r{mWA zx$fKgBQu^o^q*C{>HQFEufJ7rvE7V}k@BxgdvX8hv-EM!h(e+*%k3t3$$E9F-bh;1 zrb%Z&i@EtMuHJ&cWH>pVq$e6DsjUHi0hpYa`WIO&LwEl4h;^8mQvA|V_34p+h8Bf@!QHu_P}sGP*de?HySWKKpamfXc@fJ@2V0V7%9`A36!^ zM4Rl_?fS1lxH4(*<})-xtFl?VqDHgIREf>8H9k;X|B>FV@ou?u|Na@VQ{5uFck9;V zr+IMDR@Ruy%m%8nBNOje5pzXh35O+**1!4XDhaIQ8-DjIkIb~&#~dDaul%gXc-TE4 zNt7?W0{#a&zJBq*c0-ULV)E(y^@{Od_u7x=B2L-#?0x1c1L#XIl zW;3lJ(y(=*Zn3))6FrXYcg@QmWMt%opw98E5zhhWPp6~$gN2Ofdb7J%j98z`zUO^I zSGAx66Kw39ecaWb4E3dRXx<*zd$64x;-8@vYg{tW+$&&fQtVDX9bEaFQ=S2l`Ir8c zcy=5%Ge2T=3$tDk>CpIEa# zyw&`zQ^yABxT*T6{sRe4^y|*M?v`nf@RJ`@GzG*S>`!hL0U&Yyr3pC%Ge^p|M@(wR z{t+prU|{n=k^*QFhPN-T1{*Ui4OsmST!76O2d@@xpBuhxU*z{U-wQ5CPC;y#-3SBu zqf?(yg|O9A{GCpZ_|cP*2(`Yn7az=MWuCB@^nB2(F*ZrGE`2caItR4MHUSn z3;mcO-Gq&z38ld7yl7nnZlbOnXk?^K3omRSS{G)0W-fa1lBSvR5M~C`pm|NbUIG~6 zV<)T2KRdjoOh8a>3Z$@o|9*iBfy!tGq_+}a9wv&$U7BcsS_g#0;8LNxIyUTlwBwrZ zD&m%mWanenfM;s$!uro^1ho$LPy?)l!LPb-DPu18Thjc0sZ|rNoV2jA8nJX~KNjT5 z93SOQ4drEFp#u|5nIRwWzd@he?R5K2t22);x}@P%$$@m&f0oQix)AvN3&2r z70pY^Fm>sVwGFbizQYDg{_P&%yxXGxjXQU`a`SnqhFK5H6H4Gk$r*uHtSn#xIY8=;(?3`aoO6s|H$WUs24jE5~ZTshc`1y|0XQE zV&f?PXEuEJI!LG>;?#CoZ=}4U6Vn>D6(YHr@(Khm(+jG;D{V$s3ewKlBXxo0?GI|I zC0c^p-Fo)a;rVINs=_I5=NU9p{s29`%Jki9&P&(MYRYJ5Kb+9l{X{r*fc zKQpVkOpM%xFfR1RPa_2TGUL}8VW0bL3N4vB^>M>6tU)>rrF zy*EEoAI3253L)fA#W^PFBS{+&n+}1pTUBllo^iH z67Qr2cx+&=%~sQPG=!;K?D8)qcBrS%-P4;qX_6yT?lL<>;aWoxi9M|X-Hbc$lbjw`X9_t%xp8g{;SvY zr9<})4fR0Uml<}ICF^8TfRfRj-Hw(pmRoqjZV1g8n7scLYo)QBgSmiY( zN{gBVV9OR+WOnxWR;^lz>_~C`t3Nz&YU5$QlI@XgIS^P>J)Ef`Ly8}W*o4CV;JI^C zI{99x@@?y!dpD%p&E->Pg-0IXLUxxI6rPq{f)qbGvypk?Br3PepEZtp&v|fiX9O*hyb~v`^lY ze`#Zo745~<=t4yFhjwt9-uUXSg{X%s(kpT{OatImXX`rYbWEsz zT{Mh|hRu%ksN41U3@@i>-NHFuUWP3w3oqO)KU)DBB~9P*Av2GyUZf`80yQMyym?ziSwTxW5>ZntvvE(5C?x6!gtNlJA=6OpKQNT`N|k~rF{r&%#*-%P zvAD6b&r~>n)~Uz@4oAFqa3EBk?9a(8>WGevFyPG+FiyJiYu5NH^o6fqzs_E*HlZ06 zyYyo+j14&3d8KRByHw_b9t{4vr0F7PKZJ-z(j&s}Jv?tYVTR=}hqAYA`kfjVvYVM{PF;Rx3NPrb&injSelp8N!~ zIA(TC(H^ZHJqn)tZG=d}WhsK#CnEEz?}d#qA7~2DR6~&)3;C_qpn=AGYB?jalfG5v zuW7cTt^PKQHUTnXjI|*EAVUdk^=<_!FJ5xVnVX|Wj~zRb77T<#b8$6RW6#-`S^fLN zT1b`?S1Vnt2DTH}w?40&CF~tY^u(3{$X9!wY|sOA8J3OvT^n)fj^JSNb&?$)NV%J= z`nLF}M;_Sir_Y~>SUUGgDfQc#WSzQG$vJR<_(5^(6V%L9y6fu2k!^3l1VSDoha zV~UFmH;I7_LuL!guViIq$*c)j9B`!;A07#TIb&1AI1{zuFqH}V>5m_`Lk&2b0$5RK z7o;R**6!rII~|5_Q~6ZB7!XOCRD|SJKq1fA>Rnk|4VUAm&7{gHmg68&*&Y^lYu}$3 z`rrtibTvd=GF8GlF#!WY*{#k$CwI0?La(GJ~p-5ij9hmFFB*VLO4qqAJvN z>&hpAc)`N_NmNqSxT&mjS?K{a@9poEYajhKf*o9Zaw5J!jbqUx76 z9NlcfGSd$8fNX$E#B^&+gf~>GtEXb*si^=!f zfkT=oX49vKF{HkgxaB7sQm``FK(cGX$CtgROklVE1m4h$VViX>!nFK>v2IO8#RNyj z(NrjEnj6|-SBGdSQ0aufiZ)53X{p}h3m$>;?UYWGK(7#HvOw(S&70S;bH6TyvxSA) zAcl5uXYT|FH>Lo_ry1$GOYf>Ip3}QxZ|p`L67v3lOS$^w$$oJw%G@_-r$)<`tU|0? zzrHpxwx%+b3Gy4aZ+8LXuc@HN(B?e6BJ4U=YvBeZcauVCgH^RhsODrAfF& zZp+s{##ufin{FZ>vahK{$!goRC%Cs#JJ(cX5E@r(&Ao%%yNFrTpk;5=yKy>*McTj! ze^B`8;26fKQYVr3!yV@nmJCfhb{xl*13>I9>lovn*jxk`JCPz~YLNQ)MgQc*qeUJ5 zQ>Y#+Jm^eR+)k{qG(vlz35sM!^F)N7aPZ!@jj^|=MMv^2E&0uM#*LV@NcU*j_T*=u z+qcl{HEU?4kN`BTTyYMo4^{EmWon6Z*%Af;o72&1>#Qup4PZ8(N~`S5S3nk-6f&{S zx{mave+Ibg@*y1v9FRIQKNt}9lO5yR3>lUEf99?)Km5?2EG$kiAhF(XdC0W_d#ey0067y4jZs4m7s?`>UREZei;C%dMsQM}w{s{nYu&$Xn?9wbcC7}FwT(50 zEkHc1`>?d<#4z|W8a;o6Q5HrT(tQIyA5U~>VQ!hv&|WeuWazsf(`PNu^v1@`g!MZK z>t;Fqty?tG`S@-1*MFZscTR06_ENTQmaS~weD~n$GLbbcy7HqoCz@#*nPPL_oYdCC z$!U9f(?0{8Z&7#p;czStYe~O81)81sUH)iN?)#hJT|OI+rF?W6Goy`j*P9`EcbETI z5FWX8Vn;wWbFV6D=?$k(>o4#dZrkf7a!Jye{*fo?kh9^v!=iOZzl=KL=S6Knb-#fk z-w|(we+B)zv5Obe*&%?k?$qRKXBMcoSb006uIdunnwJE08JuGwm8e7hm^ZJGA*7Mc z&fN{&9$$Yqw%2GB!>g6xTe0pVQSVp_0l*gs*O`HInMz-?Z2Pq4wi#BumI(#@5R#jj zn_C)f9$EPR1`FL5_~rB@wJ%`4ayK&sS6U{hP% zs!IYW6<0Fk16p>d{m6t{nVDBPw2aC==|0JB{I0{HErHcprV~9r)&I%m<&GlR z2X({yVdDfx3Wi1rfuDkzor#`$q8fmKfg82zpU8-js`|HK>@PL+9sZd4kQzpNvgPggw9{)>FKttfWqVj@T%BU*3^#FDdT^` zja6$sjfski>O@Kfj6hWAWhkHBU+=;-&;4=RVCxv;eg5Kw3Mx<;{IIV4dORMB!Nx;} zUT*Q>Y^+oK6dCrUdIdI6ZRF=e+)IU;1?_R1;lfyeh;z7xC&Bjd{W7q-uTk-uKHmp- z^M9l)-OCmm8-bo2J~NLll+awA)hGs z??B9X>3q#(59SnM`+(K;xNz z6!8ZGf>2B0?f*X1fT=ZzzeOW{6q7e=_v`oaa#<2P1h^rO#x$|KGH#7lh5J5_$|LDI z8#}-LA3C~}$7|V0MlG0vjUd`}%*14qJ!w-0o)vHSU|>o)lf#B3-MR28IhdncM`{VS zOKJhkUJd><@W`E6Bb-VAQ{p+^oMe1$8eu!Bc+-J3qa4S((gblane`;@f@5IpCH=0P zVY%~dcFDNB{CqJsAS=t3KeYOM(fYX`zHuIr63n`KdI?i14vucHZW*TeJXx3iijbk zmj&<#-jTTr0+Qh)-)-vaQK>%0G>At56Xpvm=q{?b|AA#{oxT$dM3zEi5)lBMTb-h! zqC~^DFaL{LR&`wUXNSq|`PGeoc?|9~aJRnRpMkLrOh@~k%G|$cxbIwx{sF#G8vW<$ zWNvA@Q!mifBB=9Dz19Y)`Uf|TZGHCU%=vcR^CwvRT=_IqM1PggPjJOtV(rTFvoWgRR*{baydEd9B2=QHuD|=T;8k-Qv>&X& zzzPnAGTDT7jG{L;!s{vLhk|@zVGUo0kZPxe3mXtP#53a3CHwpreH)}%pa_1^w*NsO z=O#3o8<@0bUUn@{QJ@j z5x(MadzHs2`_Z=6-DoQN-(@JA$w1Wl=cfPFwltv^&TxGfWb1%{rR3$cMk&||=fFp0 zc~An$l=yn6j|79J@|V#h9E(0am^{mJbqC9R43#6C@kQCNk6rF_Mx*;`h{Of9+u+ey zC@-+XR)z~((eK@>tgnTlvODGC{fpQ1nX8x4QeFuNNNq|BIHx+q;>Ux(QT<}fvmUQW zzx3;;w;|dl{a&)G?1usZ%?)y>+D!ih^ccY0C4#Q=86I|nRFyS9UbiG}zrgxv!^Mk6 z#HCGuNS%83j=T4`8JZR#lQH`TPh8m2!}70O5yay494O*M&X*vH^ztYFxDn)bv8b1} zSHsuYqz6pGn#&)FNxRIg@?I7%TlVTL&LWav0ff^~|KE!l*U@czeEet6R&P^L^e&(- z#ujy0C1O2Gou(cZ!vH4rc>;|F4QO(Yc2-R}6cx2>O2-B{6XP?g!>}dssi~?`_V|3l zQ@yW$&IyTIMAOvJcWPpBZAnp31R9ukB&JTOmWf}WXnqptXvQab*yRv70|rL zHx~06;{a6P_*^GqjY*Nz?c-u%e>=@&NweF#Fq{MYtDm!6XRnLcXp zyLedji<iSh=a zul-(1wJq(9&g7M*_KhuH*}J*FnLdRG2{{ts-(6aT>x|K)*q6R|F&tb>6fEynw9RSs zq2nUY^R2g>z>_46PyO*m*KpxCrT3+vD7YMs#0BN->7OfF`#L=R13XIrBN^U;ZCv1& z*0+JS0kVAHZ7L4@+Li+f{*=(Xuuc2Kd$lc3rsZdbL_b2PiW zWnio5%a{JLVk0mmea-gm+XEA%qWzUco<8RdQIvsOH*|ZOw|CG5tH%1O;xPqD!)i4P za?WtX;;;a2xdlh^OtjPbj!6?)=xw79Vm-(ZghQ*5&V+&bd=JnrOQT&~u3f#Vct48u zH%asPleZ{v>6Py<>tD^tkY$Ahdr!rC2AyU#fFD1H_$kSN%2P>}DRv}*{9|dgXp|@h z(kQoYOqs4t->6XB=uWCisnbzW2Fzc`pfm8YJ;SLcWwG}u^dIND1^pG%3%z75rgv9{ zFG5^iFT!RMAdlfU%_IL3w5yH7+pdm{ixd2b&2Dx0trZi}9QhXZvVmJp2&ofl6kTRO zuZ!8&I(O-!Y7+Bg@QfI@j~~g`>SRF{gQGKOZ1SlcrhWLqYa8xa)h7F4u|fEqV$ZL0 zLcV@o*&7$+4&M32+m0MraL~o4^5@SqCQ>+Vk6orcIk09#ahb)8H3#Bv^HN2S&Mc;^ zhZfxjAQn*F<05wf0U4<;Q}4_(|CWttPUOc(NX-^6K6yILp_T8BddA_~8XJFCOe1Xa zBlM4)JgKgnYu{y7MWyoeU*(RFqJSYS3%*D7a%FgUF^nS&j*v#wSP%w-zOOb0QEEHG z?_|93bSv6@1u<~9NyhYFq%v6vta$URgu^Fp%an%H7hs5vFiX7#kDD%Y>s`CXJT2Zq zy6r^qhNRr}{fMSWEks@liYvBdk00ABve6i43pKJxT%3*W1`=8A#vK}9;!H8rNmEmW z@Wpj;`52$FtUb`ZFfj!mWBQBYmM8JICD;1(S0AeG)*q!G9sf9I`|ag3`;T$a-^Ccb zHRdLP#Gl0#B;x1z<@`%ZAbz6I{Qlgh@kfr%b222&nkkz&4)P8bWtwQ)gso-HsjU~xPVzuE%TwxL;5uu_jS=8`L^+lr1DTYp<_lUA%nCaFe0a?Fe`ZZ`LsDO1kxKl)`7(@ zMSOv?TOpP1qsy!Q4EwoYD?%2TL;3Kbal?iUN$o0!FS|~|0}tVA@!J41Ee6-qFzn}@ zk;4?5RXyHOli{zG`=Y#GY;Z`(qmqQy>+*}gGY!@R!8|C26GIn|^(>c~IfdRF+OTAE zu|eADkt5E3VUheb7=2kNh!o!`WqmE#agA~4dU{bAs^`NcBVSRJaIdee$!KC^1MW0n z<F(N5XxN|)FdsS6ups*Q@y+ONue*k7 zH}x0)PYrc-69AO^No&_4sf_8k_SX*JgEx$Nbw!5W9Zp^AXm0VbFjM_vc&VD+;0_tP z>7gs&rG9;J7Za!bXyUUO zY_+tq3M9sOeY(+g84$AnOdUGz?)PpRxu(pt**hQwS=Sq0eA$|rU%!7a2KgIFApx{# zAg|Q(zU`0ThWclc=rMCRf`=X^>0BHe6CI5sexaJOK$tW0z&8iC8AtLvZIXg>}j=m7+s6!^2O`y(nk2QYKFZixjyrd6(Fcl)W@T`Tp2wOJ7` zntWT+8-1Y#oUWOfb*)>z9A(><{d?%$4pmZ}O zA7e+Ki$D8ih8-VTyUhx-C-a);ASM)fmdxl>SA84CdSx}v(RaXm_Dde9T>y188?AwE zpFST>K6xzaNtgi5&YcfUKT&Hcm>i=y_V}>@F(i<+rp-qr%GYM#Ym{GcY0<61YF868 zgoT5Fj9F~zPdT%hpX&(G2sfl8^_AgBhCnxs4e zP=beQGvujFxN8MrV94^1iuh%_)ro6Hz=LvTeQaw&fnZe-D>6X<+4J?+g16jFmQnGd z;VAt{%m)yp@}AssK78xq@S*;p6HFQahLO_Jx`waL?p?gFq|_QuNEzvPQyxtO)wwaW z;eu^M&}CiA@En~~oHXLC37=0iCn6FB&qO1qDhNFCi@q$8W-?c1TR&X>)*-iuyo{`B zvR}h~SFD+Wh$nbbWF<5%T+|6~8@E4G_N@FpqE61x^5sK>wr3I=gWT<&1NZyBHjF+P z|LbQFZhWa|j2OFZ!~$=y#3 z^Ed09kI}RqdZM-Og8gm=cfTK{D=nnV@*|N# zRf^*t5*H^J3UL!8N$Fvd0zD{p7uk)@PM^Xdr!m7>vrm~#$Ru!;ImhyUGj?=6E9)9U zyn;bm(LLV6#-Jd3+iS|R{qdYI{XJ`+wHx8vKn#Q#ZjIw*b}g2 zi@1BqA~&9nFoZY^NI8qX0da1+=eEStr!|-#-g28&GF$hAgs2&Z-#(6>A-U1}_NJo4)4_D#4xV$$3^ z%yN5B(0ax`*4-&CBv%Um#~`|*@a+b|BuNeM{_Ib^83jmZm`|!tErk-}Qu9RpTs)iAAZ`=Z#&S5)a4S%1{@j*8Nep-TMPWM!C; z6WmA#bmQ{A1OQ>jmS7Mg7R8F`^^Tw*Rr*;RHrnwIu~ww^BsZhEt99UOngu>90t->$ zauA%b+;A+Wd^W|8or^3$3ux{Zqp$xrysAfeoi{YOtjbeCHUL&qlLLfhL|r2zqc3Re zQw#xNK*Q@O0)3GY+_E}54!(fk#+b z)rSn*0P0HP_w~cGZM+T8_2;|QVK(HFd`4r4NAA96d?bE%^CU!ZY z((|ldna}HI19E6Z#Tko=4KL}-Si3B!!JMH2tk(U!AJ-hf4P(TW7(84Uk2Xz<`{x{4!Qi<;gvq9s6&2VpOQ z7?oxr)F$FY32EdyXQS@qNW~WiVtgLWo&N>!&Im!2Dv?sTm>*h8V3bT2Swy<4Z<=0v=4zGCu z>V+fHM7h8Pnmi9Z`5#VBIj_#=yt@bG6xevdv?s-L!O&O5J&FV;d+w{KrivF}}lufv6H zUmtl?#)jT|^#4$H9&kPH?f?G>aS*bVjO>xE&M^`~C{hthDJxQlBt$qRBP%2w%8E)6 z4M|GFh^$JfY~`F%Ml#CyKQGR`e&hdt{O`y4J%0CblkfNQd5`OQt!pP{ang%b49YHS zrvSw+rRAA_FdV2tyv5eWfoMk_~0b? z7}upw%rA`^iK7MQT8eJLUn#6J@_W{EOe~p;d}}4CAH3myz$Iw6lIaZq6yb(dOcQF1 zpw~dd+dJY;K<_8y@X9y>Oyg%(RW1~0Q)Rv9kVpFp?_1jp(A9M|cXl+fsX5Jgc77v{ zq26`s)Daz|q&q@P9M@`?+IL$TdfWIRUs`$F4OY=~cy+fXm9$#haH|u{kUjk<3YblF zck+Otm5}L+?g6%g?J4?ZDr9-^05ystZJSWV)EiJZmqriq;LamgrU7M>P-CPmK4H3% z*2l_9Om(e^_lZA?03EnVOwke-^-m~e#>K4t2JJnm5I@?rQADXuvny#B_U*3GUonVf z3Oknc?3k17LYC1LkY_dRmD*TNChA<8t@!jz9FcNFIXCW3qs{%WsNa{kKac~7kAd^` zDVkm!c#H2I@HHvY9rsSdQ@1E~-;02!^j>-)u z<7)fQA2VYmRcqzSC*h|rrBK5BSPRDI+74g4jam~G(6s!+eH&!?#ys}P(kwaX_UP%n zb}L*lgUbXr5OOJ2@HLA%KuLaCA;HkB;fcPXY*y;DN>x+5dmGVx5KH2=aH#PO>gZWq zAWteji=qephD2C#7l6en3Rn5Apr9S3H7}Whmot~<@t`FRCxDtcUg z8LvNr3P~DZ(A&>1EEQF!Ng_EBqb+9C02N>_2Os`>ujkvj1vySajg#SQZR`9iP0Tl`TRR$%mYeKeW4d?Ot`TH0q-B)Mt2w~f{O9#=Q2in{P_|DF)icMxaJ8#EciNI|Fr(PLN zrWzPhRCMaBR-v=pBP!0UACuj?yd7%HvcAU{WfuKluxrg)h^F8I#*m}QRXt?zK;FSr zzkzX47q9d+UM;W%@ET;&mx>{i51XUma<%p>gC$4Tl3;34p{8v?P6+GF=KT}`ER?`2C5+c zxq|kFYN!z}lHM^CmbP=)-xCTvePs8t9diSVuCY|680{{YW;450NJn9IVFAUi2~her z==+yCR0Zp|ZCk!?Orv`BQUDR#H`)BCWc?N9bV^VKeAJ@g$tu+Hi(CtUT(nR`VHI;7 zLa#Ih={zqG9JyDSDM#?2eH-(4=jLY>?CkisDsz@Hiwbt-OD>W8>At(k;%|uL{0&Wm zbYSooek+W)N1amMX|UUm)9FfT2bH21cfJInfTEafo(q;K@2_S2 z8WM#SoBl{h=%7tYpm#oM^7!%f*oKNiB+gj=yPIfIz)fU(GLe0-j?I|GZNr zx|c4j1iX||2bJqHx+ACUF~^P>E*id@Qqsrp$e48}KBTB>)OhoG4MaxN`YT;zPzlvh zVG_c*@k2VXT6FE)ec88Z&)>cQY|S| zMuR;_ymtcuT=}FfqtKorp_bxms+9k`}qEma&e}{jS;6`#(;_n@+eL(VZi}vz=`aM z=`MUXL7)ZBFY#NW6fip#gd$yX6tIh|=Lu8gq;IeO^eGv5gcDZ)s(9a?ff>6byi~GR z2+Kj}lgjMDLH%3Jo-j~R&uOZ}2_V`9Tz3<)PjsU1rAD}1 znZ%_<1*+%n%clW!`9&ZiaxC4pR+UB;-#RE+XvzGHMOSIsR+DHW_|vO%x9=pl zEhboMb2>)MiIS>Al#bkVWJ7BD?(h*ggk)er?+P3!Np$R;*%(xuw&Wmr;ePOBr_d?O zVf7r1s$33?JB@4j)#}XpE8k`AQ{7rB;;Q~ZOUfJjYYeJXLDH}pcWB>U0ge-q9%Zj2 z#wI5xt1dQolEgcy70?!oKL9Ead4K$>jrCd0Fts(QR<_Ah8JLDEK+KAQO8qkHSJJha zXDI29nX#eg$Rpq!koXl*W)Uc!rq#5bmjqrTjFYPoS70(3e%JLoFJ+1gwlxi|Z}O(Z zZmEuu11OIgs20_yS}d5^MqoyC9p03tMuoO+Q^^RS@?5p?_y5+VJppHDumPF<3@6Q0 z0|tXr?c#eUU>*Pfrgifk9(o_XIQOvaYdnDiQX|6=vn^V(5!AwZH~OGdXQF!YPexRx z13{bEvF8-e|5d-ytaK7^4OaJL7N_KrpT1NTcK4GcF|yRz5L*%|gbP$zufQ}dc0HxY z=L1uxrxeeT^&*j}v^3Q6QXBX!h+|1WO<+z{^>IVm{M>CJfyf#!!`F$|g2P1mSor=c z{K{KlpEUS+FF9gJo6u(hR4{BT*02w7wqhJ%=(1d;(gC? z2-+>`B#bEVZ&74JN<@={ZwRr)+4KY$rZS&2tLL1E=V6!JB3g+@ihFoJ!&Ila|Epsx zsDxvbj*<1N#9+z3a6oO~ipk{3)0gfX|5$BUQeZ~Q23Z=ua$(E>m7^l%v5V#hRl0Z$ zL}tV(r(_rZ%30Kk_!gg8PEKkxO0*!dQGudxG{0z@>MCan?XB}U{|g5n+3x6rGY&1x zM*I@I_;mX~@JL$h8OhUKDAR0xMB(7c7qmOjXsem84_Jx!Qt& zf1>>b0FiCN^(p5qjIcxt0w8UKHAUvb}A6bWe{ld5pu@k{xI9bWv z+}*R_e^hD=;mKO%*t#`u(nN(X43)WcFW3b)STZ3A24%r#O}jM<`~}l?J+^dc=7HG< zx@5NYb+z$WtGBmLtB^rlWNJk*BJ~IjeM)*(Z3QbOYUjGR3FOlA%+JlnrznFI;PgbZ zhwPj(PA2yRt*r{Z{4$%-*Jl_iFSp#H%{z0SSA6_<6`o9fEpj5n5DiAU%~dOnA1{b7 zvCU43E1QI)-tAMcesxKCbH*d5kiaGLpPmW3G-fB78o9$lj>^aKo1}jkNBV)D$HnGX-0ptn~*!bZbUN@2?#hSa4!*9E_MEJqP_ z*ZD@t#<}f(yo+^f_4fV!E+{I1jD*!e#Z&X+Tyw(x00TDO)~{}$NX9(`nE+F~>%v=^ z5P9}-U6V=x!oj>YP>#sxDrh_*-sv6GuCzq1!IJ?wQciwVnNRo317$qSFFtiySUTIM z&`~JTZIpY|RuB!@qkH^B9{g53+0msj>$C#ze$pVkoSX0|sQ5o`D);)o)KcMH+HqOS zx%Q;Id^`28fPFtLcO$rE4-H=@VQ(&F_3GEJk$r2$%7>-x-r#;{zz_Vw%$)th*Xe&+ zvo;~RA46o&ygg+|k3{2XXgs4`1OPEIHeR$mG3`p#qOCkacKMR}Z*!S+-9Vi0ix)q3 z_HeE9AHt1fr`_&9AqM7PYzgn#tez-ifHEWwkur|tHBS^+Xpc44Eb4pj$F(&nXhmpi zUxVjTIfO;E$u_mAucpSNzErr{^B-XBQc!xIZ@r%L9Y7`x0fiJzB4pv%mVCwBE@+a_ zBNyJL{-(UEp-2S_wJGe{z)Sq?N8w$-A>@zaw|)C)_3II!PE?u|OEek0T6`nK}Ti5)(IJP-iYM zPMtGRQ6nf}YAD*fj7~Kq=pC`i*8Kk!*zrOfO=Az2+5LV9bLVj4sC{@amE%`vPvHEt zj7sKo@p@UhGC!j1{^LNp|Nlfcl4(K2Sc0g?EP6e@pG#&%U^aW>UjXot2 zP3}F?6$}g|&b~#jX?Mb=aqp^php%uc{V)P37I><}&l zfOrKu3Hi@}-%h(AoM0q5!Xgg`!5pW}6IV;M^Y(7#{5r)#<52Gt1W(`akYU?%q4&ui+{t-JGlPtp!q` zMTwwetE%3IPMc1s&bhL@>NzG}qJBqMP0=FXMeaC|5Yvbxg%=0((Nkh**v?i9gwDVy`Uf8}96 zMM5K*U1zOYp8T;Ej4Y@uVV>&~;AWBbQY>eM=(h^d5#1x9AGC)qj@=@y;u?a*>x=4) z6NEBl6z&0vxK!%geuP92Uhq`noqG4a0z8j*ScXeARu@Te()VwgfbyBKH5Sg-?>!^B zE-nT6n{W>cj4en1ufMh;tzrYL7DG4IbJod#0huJ>xys{y*}PueS%!ej$z-mI-bj2S z@X|<}ne`l{)|G$911>OSFH#TCWB_tFUro9a$Z~1kzklXBLvXmC$kMUIZ|$n#>I2~Hk~aFR}8S2uk~OnR~< zZ{M!GxGFwoP?WcqmmM+)YP%=*Jzp6w8nB=5&`$*ySAG5JR|6!(5&PP~{B<;mAUO<0 z*+PkQ*u1)YJUd7Rp->Gx6PwnoZw3WowG^*NE_SE<9GgI;y&96Z^{W5UOj7ntOki1 z%nRSnHHt5@J|#UjM5XSq-+(Sf&ZPFPM$C?v7e>E0VH~glojpRbB?v8QW-#(nX?lw`71*iJ9}MU2}b$@nm8Kn@~ESp{^}16FlsozbDO=tJo(^6d+kclaN)*YL0YD^c0LefUio6vEy{q_^ zkR}}gjOCQ|Ir*s074Sdoj~%dOqLWXd6uK!01ogN=@dZ#Q0hc@Ut4=;xVfZ^H0s$4l z?6^`}T5ZHrS)Z@Pj+7xdY5L6u<;-6OeXgq zm2w9mmUCQ->GKn)-QX<<{S$qa5>vWULSDF7h*wG#-wkCRSDZ`*Wlmilqo@$$v~OHj z-jWQ0BX)WByTz%?JBxR6`*u~K$OI7?ySV}9 zj}O(5b}QF95kr;4?AXV-jTt+3*?^(1A0})HTKBCrXo5GF4ib9!)D6TCEv@{>G-LW& zF(<(5j0P1DzzV2RFMksP6R@34eJ*`c;ZOet0e0TEm-2)HN+S6MBg463_cX9oED1QYRvJ_%Ee3jzWIvuK{uGw_=k*sgh7Ta+!7 zA)`2sYI#EM-pmOA#dwgHH_+XAT-)zA1IO-8a)iC|#q&gN{|IU&L&Ijc?CCh>AbpwY zhSGur;|YeDFHfN&H|3lBqLe*M@Rz(P*w_w=C@$3}KaUR$UDNfKaaCOw4t9(EaA9D} z@BeW9SRDCPBVnTN%!H6zuAs7L&RSm2$hZejWJJxYm_h+;ja)p6?cj6!Wfq?2Kf$If zX3jL>{>i2%mSRyIuaWlIn3&^0f?E^UN^lh(zxbfe5hqTZ7)zOAUxvboR1o1_xGhmua^b+Y{pB)zx{8>IhWLsejyVGO|rErSI|c7@xvi z5Rg2vs`o6o=suuEX3u&URR1s0!tyeFE z!IC{D$m4T@LwAZg3h$Z8p`gUWuMTzkn$VV|uH2!PjPK8f)A)5sa9+!STxBm*A9lL# zis!$*BHuFdrl9M|Un&c;EiO53F0R_w$t|{6mO^PwU#II&Py3H^=}qyiV!)E_d?f#J z)(0#Q1`5W5@u$`RIt~O<(bg8t6hf-I0Ho<9UrKGiA7Om zZtVy%5tU>@;g6(cFu9j`@gnr)azo2DAQBjKcBjU5&tJ~;u4=C>_5K0}2UC8xx;n1> zCX>r5OlWy%2RHj%BgdWi>`4-STXHX|(b|WNH67+WTK>wz?q$=>hCITth(k)w=;xKH zr7mw**6+@>`wojb^JxTykO``6>^EelKclxpg0PVuhXKqgz+-}-eE(9>$TPn{@-!JQ z(`iK{=pl{ZR-KiZKkQk*Mp>Qs+n%NPY~7j)E17nG^{SY%@B<64{(l;@&Q2N!RLTS0 zQnvE~UPk_#NOUSosxhzLzTJqFRsiS4vKnafB#R0%$7L=T93TiNLUCJvV8;Ce7c@c3 z;+cPS3Pi{Wr(4Mv10cXeS@Z=3oAq?a27wXyA^-x%Boq7g%bGiT<>u&|78)vY5!$oL z`w^=L=?&WZdIlFv=j#U};(P1(CPn=AC8z(mTgN5945zM5VrfKdo3)4+N3X_cf%;05+yn$~D~BG+b3|vnJTdo0STpNy|Ih*e18wk; z__oMi|AF7ezZ0hul4X93?}r)+ldZ-AHO9r?XCRA#p&-^&b+CcRMYn=|VV<|=qKZg5 z1O9Q->M1ypjFW&3XxN=7Rt^XHXIPdfbM^z2Y zdq43)QWemIajI;%{Sw{G?`2)`w*QnRsi4S+>_tOJYr+w!EX?(y?D=1Hr97%=aE>**?Zr4%Os9ZQ`$WxAezUh%oTcGTJ2W#+R#M@HC`S?w-- zaG>DL`>lCUkDlm*xQit)oM97D$T6Z;py`u_ z1l&h4c(zLNI;l1CE^JqGs>v}65R<8K$iiockQ|CzFj|P{a|tVSv*$Zb`C$XAgI>RKJ1S98nFs^pe`)^=siR=@ z5S(x_gXj9f8A&<^ix8h+-ZTb3;-+pbaxBv!cO0+-aUZ{DEJLB3s(41 z3MU3&?x6mBh9?AfKXXG{SlZsR?z;Ks%NjQBz!0ldGiPY5vs(5ke6G%KmaBOe67DH> zth0D_W%8OxlqInF{rlxgW6d|=v`(OkQABxwm8y8ARZ6t7lZ$!Wh^b_6T-njCPm2N~ zH2T@4)QT!?_U{nz?s@-$fc08^UDb|P{H%P1IJ5*JEGttBUh%xOB010Cq?&W)fWpuC z{l_bMke84%@eZq7i5bws#Mrp6+JvS(Te}Td&Q_HVN!VJ@CH09!R;-b6W=>xbCS+)Un={L zyPfk0uXl~?hG$e_a+8UoL9JO>mQKP%O&S(fN&E^?r@@c>;Gm3muM+AL)>lUIh?yMO z5E>SIW;PoKXmm9>&}phJ$+i+2Mqh%CB`=3l|^G z&CdQEb%m15nE8KT&y#58XiM@}ETv}NA2*VwnAE()>F+rog|1v7N!jTXJ;0jdoSjl= z(}51#qkbYc3S>$#O_(^9k`je|JDEHRF(+idUel4-?zl@=zW-C5I{OXHZkGb0Xj3qH z2hJ7qDGmGx@?@>9W{44y6)IgQiCL$nMX3Pz6hB*+c$=5IdEG~JlBnsH4X^&8nll6d zAfPndk&xof6ZXxb7-O)MwPSC+ihBHo(y0RJz*vd66Zs~fX!^X@KL6GH_rqB%)y{pb z+54~jI(YCidIEuPm0)VXPdq$2e;*PYnG3)yMu694KLh=3A0p?6T?p+H7> zXlAz2VQR#=_E+ng?Hn5?Uwh1lT(8tgK|g{gJe8<*tM8 z9JQ#65hoq*^T&o->5m1GB3!adAP4#(VB;dLp)Endl3=1*Es|&2RuN8vhLK<#nBes7 zKkSU(hkqy^dMebhtlc-g#-=rvcC%0)QGe>>E!`r{>G#+YtFm`$vt~Vd==8|GZ(DoQ zp}|%jtDax`l4GM>>p8GmxObWd|?pOF{XTQW-tUBXrW+`kEeq@b);2?B6E#B!)xZXEo z@;uiV4loU}A!@MxIvzl;g0=7-72yIZ7mLZ0N1(H7Pe1@G;wZY~$-I2AIKsuoT{v#g zUj8Rq(Bu4{x_VB+W8MgA!X0h2wElv!j%Mo63>4nEr-`W*=L}xBIdpw7k( zh~MLY4~H;DT)TUB7ldvo^GU;-wrXXBySSF;+AnX%BIFCETvjoFp?gtWJO+i|__*uW z1fWOgkYjZMI!W;BDahgm9DjB$r*^@KE0<2Qtqbd29z=EK^yuRB5a&_xz5Dmyjo7@d z;(_F25A!Lwx4T05s~A8(cHCvu$uE!#r6kiW!!*`uuFL${w*FOB?KCwffSW)HkWe7A zBGnV!8~R&Lwe5uq=U#4ZS7jiFF0_GDc4@vOBV^pZeL8OYv|b??_EcvNvLO6i;?}Tf1#ET6y@h4s zAj{~rio;?9Ai%O6ULy}@H`g?%oT9@QHLENsnAEGqDD7P`u#d;>QoWZ%@3uQmEJ?Vi zt9h0PU*?h3v3I zJ=fh#LJt@IGO*m$J?mT2xQ3Q{a`(DVEISC$P6JDkl#@R*)Sv!N(iTZ?zF%H1%TNr| z@QlY>T(jRF>*LX9kvpsxRExJkdxu#0#_pCfRfEb`J`D83xww`@qhX?;lc+;uwe!NL zd0-B$^w*rfIny{I;+JoS7DttyC};)8UjF^v)TxoHQ@XxRO8{2!%|Bh-zp84YM)mu@ zjxMo!sQd8Y!_tWQ3QV2Z4z&giGzvG_?RT?!czK}WQYT9(ayi()qN z_^d|8L4)TuE$6i0sW)$doqIpuxU1eFc|q19=BD(1R(^o2$mTd+qgmM zm+w+~*xB8+AHM%1wjnO2RK!~y2jqUHa-e$8(f;bNU{Ua{1upPOi_UB(90p9VLyN6N zHu2Z_LA&>4THUNJ6?FtP_EQ|{!NKiU6x!z>XCcp)0h?#+Hr@>TXk2anv3z}OTYvWf z9B-|Lu3X~iwy~&Xh{;Z)PVCOBRApAjhG}VOL5`CVZ5B3^_b)^E2tu6uVL_wc_E21y zICIRZ-Iq{AZrBlqBD&@PO&DX_U39w^d#>|YGSS=+`w{U_&3nOA(j2|1LCL?nuBwtB z&sS^8Uq9{aoa;l>{oR-G-<_UaHQQ&qKUTu&MZ+@G-DPVuNz>ls+7d!BDWn}p$i8^T z@WdCUu_XhudQfo5@Hz-H>88Nr3>@7y|4<s}Bl#VJOy+l0& zIQLuoWu;f8%wtD%Y_xY6N`HvplL60cd$nMdX!0b+U!6O{#wyRSj^r1LeRU=^PkJ%_?`FpFoF5ce%Z$ZV}zsM|^ zW+mebz~HvVTvf#50>dKC98T7!54lZ!dEtIBRC} zQP4R!_3fQCIL*EvJbyu2b6Fn#-;OuDatXXU6QUVf*|?iDr}g0uiB@3fXkOBE*%fLIejg z-Bj#H_F#PPUq5aqo727b7MIn_?9c$qmf%7f0zk8#-z7moh7j-A8a<&sFC#MomylSA zpPktTy6%zOrv;WAQ&0{ZjU*Z6SzWCu;~yCI^9%VBqPjjGKA58(WSNjfL4$>8qF~{) zP%%xcn+GoH3v?s6C#T97l%O)meJJVJ6mrv;p$B#^nhU{;iK{QZ>NvVA=`?=4jkNAE z<(!_&tU!nfIy_v$29}mBX|F`q{$uv300At#i0W*^#X3Hy^61=c2Jh=v^X zK-C+zjIXS;dG`)_zRc>Yqh+;ytEAugdGezBUWlf{b7FEV*68CwYr`ky6zF z9F#RAS|k}JjP0?@RV?`4vQ{p+5o(H`wQ-m60d9FIx3 zJn%3oLqD2}lrq#G@97bmDpd?>W8ktgUL&#a@~a_4bTfbwCID$`0?84 zA5;uba(a}mG&E$x2_$wAFy}Nf0Bbq$^?0)*C;nAmH3snr@)-v-aj=HMp{@KgtPng;e z&c-H_E9c9IQD5gF3~->0AH!VNPE9t8HS#^-Buj$CDovH4XPHDDg+OHi34+!S_z2?| zuulJ6_9!us`t=v(E3pACH<~tSQj*z+EpQB4T5({#4nzYhQBq1YeSG9hclVtO^oe%v zpuBkUr2Q0qnj^~MXKkqt17Rxu1Z6g>lpqMSY2$Nc>bnBe-)Tx$R1$e zT+#nt)^xRP>yWb9HGRt_jjHKuXh?as|@| z1J--$`C6KpjilkU5I+!;PdY1w?f2rrY)x@lIlNP+-<2gB8yOl>PR?1p`1UZ2a$O`p ztNh{f3^+^^#Q_9=eB5{aFn15gg^azVh)E6W+p6CN&L#)Qf)^-srkwBw4uC0AGF3w-Wp_BGmz-9UW9LCzpaw4s@!`dcWSVS7H4ZZFMw`Ep_AL@izS;mcx z#uZE4+`583X56|}oB`QSeJH~CR*jFSi82H7(ahDyN8sO~ZFc|{ zb(nJd@nhSCv;2(KHcX_ttr3h)`TEe7!-o%lR`J=w+M4CcfTZ^P{Lu5h*UMrCr5kii za3#v-$K{p3nKOz6DY7Zf9vJ_K zN0J8N^ALa3P=>ah6gG^k0rAH3ZZ}aSg3kk7bs5xXFI*vOmLnNiF-ovrD=z=QHy@#Ja8xJ;T; zQ6G4~vqsG)C{U51i@6Vd`yz{*FgFqzNl{}Rz(^`&SOiKvI*D1NQbm#ZJO-{4%`Mh~ zW~f$7$Q;;k&{AaWp7pnf-Va$8gtT6M>LE5T8?AN&q%9DnFS*E~J~dQ6EoknjL# z1rWIBaJ#4Uwqu0D9OX+LH8AY!M-#@uQ4xT>@b`G}4z!6tRIm)J2Px}Ws0Tob%uGxL z6ep?>96ssxctqm4-YmJ^SR<%1V^&zc$fr7?4ivk*3>JX&98HA#o=3xpXo9!yzxCn3 z6+6`xJ;mIu{TQ3ifyFK9?IBDN}uC#3@66CR<9XWEs19$H9 zL=1`xQ&LJBwH+$R4n@f(EHY1lzyyV(#W!}X|iP##@eRk@|7 z6-oh->)`-7UdsGM1`X8bDiHg;c?=x6*IZt6|7sk_ zGKal!-CAO-rmWCh)CajH8h~Qw>EjB-QZW$Ymvk2Gw9o9o;sKS>sdZi|f zqzu!YQxY*Vp#=hqbY7dV{4%_P3%;=Idq(zOOr8TP6N?SDg(a3H%#+lVSv!E^*J8T& z*+eM@s5f)r3{UD74dh(OC|pIz#t^K(;WgZMpem}S>zo%mzq-HRAA7zlN0VX^l$MLm zy0)o@tDg*B`JRv-zasI#_m?kUCbWl1wyR6b2NE{t21s(?<1b(UAxd&c<4%9#*e0BE+vBGdF+TW2xRK3ZEDM5$*)c zDs3Tl7g37N^f*T-a9)1COT|i+9Lql@9vHYe;o#{@=Z#BAv0601a^U^UK8E66mv~bS zC2{XiHr!mZqVhAJvmXEpY5%ndPnUESCX~&DTGH;;t*+7+L8f?=IM-zkH-+{aUe(35 zUuTq5hF-VpV|yu$$~rlw!)fb)FJ?fC3cB7G_C+sWj_THIgU|V-sH%7EvzALujS7!t zKaxzT>ZgW>&}kgOV#gOswY2r99OEhp!60HtRL9MXA`Qa<+Sn*(um33C5<|;-#FYVG zHU?K`DojyGs0eQ`_w*VtDe_3+qgRjSb#Fc9w?(dlLOMI;omtP_nEhfYJ7@u=L^N<0 z;}G0877hBlu+7>nWU|1tnScCtZgjr+p4?_dxiq`N>q_vnsTBb$b0I+ zOGrEsE;LnKmM%R^(eDQU$0@90z;y)`We5~=0=+~Nh3mh?u$6B-Ay*`yQ@Mjwf252I zwMy@(&G3&9Y^F^W6$7pevvnOwUEr2=r7Pvk?Od@20@ILYuzwMkVJCKM6(0p-%T`dm z>)`QL9_Pl4DjT4)!(*MpbXvI1nwo+07N0o$-1p)aGY~@=uz(7KYwnn^VLiY9FnJMrEgV^2T;nyT}UKJiX4z-zxC49jeX>Dq^e>9 zhaOT)NO&?Rx_9lm36Ic3SfW~<_}H&T6h1O-J7@pC`^~~9o?CyV#hn9nvbk^Wa;%U> z5}sZ#Z^kCLr_p1_>bVzk&)q>eE<%y&&CjS< zWy^)T3tc2=XsprdE$kS&;hyFOi+YER|gR~pTj=K6V_jkY3Sp>}VN`K^M>UIcSsNkmAK&9#!gekicI>e%yksR!2xgAgLK zhdP^ze zk^5z2Qoi6Pjx%s!Wc}iY&CyN)hAs2Eqk9}$B`qy&7H)>|dj~YQv%+O316L$T9pYMd z`o-8c7az40!UXj%STl&0ySs_aS{=9wR(HI6FfQPb_tw*Tx%J<6iu2hHeU79au$nl< zd&b!?00(>oYVYvz<2fmbbN0K9wK{M#Dhk0|-mHW98#iG`(t;S#BB&;r+`Ktc)$O~c zn8?H}sw}_Hm1R^#e-^~`VX4Tl8D6#X?)MsCN3`%)QRc*-#h1;+%Ye6E3@USmKz)zl zipAY~XxPRJ7Y4iaZN4>KntL=z?VDFq1xVnt@jA^C|Nb7l^nYMY_C0&d->>1+xekGI z2BSwGzy2&{@5@Sk;bZwpc5rEglp-dBgR*#99Y8En)U*aooBFM?96rTjl8ud@zil64 zL#R1f`(vEfY&H}`t3hF7t*U=$0boyP|3^}b=zse(+sjGe@2O2}{|+7{P6>;JQ&Kdx zY#-5}BGS*OlSnUM(SH#<|S$xV^O~vzWBGdWe_-Xi$hYfeD36 z-ThmvJH{h65m~J3Ia_Jbj!(TkB^JtM>X0OCGS7%q_rfTOK!kVl3kevZy7>DVVh92s zwj*f{oVGts+{ueIfu2B)WRReA^5^n{kIZhI8r@R##K=w{)tWWzk62cGn-ZA{t2}q@ z^0}>wzatR%y9y(iSiPX55`^a52{4Y;Nm898<1&1;sO+{jhe5#%!Dj0YYJQZfXBWi27UP3_ zeCXGWEG_+&JCu12L=6)EG)i%xe93&@BgTWO0#+)+q|+jPnI7|btfq#>J??oyijK3a z5Z;ku2WDlI{ui1&fldmaLoQFq>Di}GS8m9Bgs5MEqTW+dk$~9K_4T8JGN2OBkQ1F; z2;Xl^*IKre?grq=cPYK*+O!-^VQ}nmnP30c;YZrM$DzC|{0k_# z#E)t|Ts!Kw0fPqZ#mzg0TfQr16RZ~}1oA-bz4_#-LCMUFLwW|8HW3~pdd+8-dw1>x zb2|C z@uN~0w&v4LpD;_!ng%c!14c7eN&POg(>UNO?uz(dk=E4m6yZzc&HQ|7mJ&l6moC~z zxGh~awquEQI8xGdJYBXd3U&3FiL0g*A7gCgKjAR*5ZjFR-SzbR6=7ss@&=)iaTUT=;p|JMR1RTo8YF;B4 z5uD51LGiCK!C5~@`c~Pegr-B!q0?*koJamenIW$5|0)Kkn#_to5W5hatgW4os(~8z zE`Rn|3GK2Ui-W@|+s{AuMNuw>uV-&p(g<{Iej_*6|AO1=-ZAfvbJ8qWxX@)|;1-vc zv|XV@J<&e%Xi-!cX20oYq`u-=?l;m#if3MxR|`BJP~laa^5eA*{DB%>@Ref^*isSM zBG7ya%JCWsaq}`X1gUj=6)=@Za(sP#E&j-ltF5wgdb1Z;DZx(Dfs2l3(57!e`Nrtp z8j4<@AY7A{^TA(`bX+eOn(5X={W48DKZKkTNg80|;x~x@tr-`f*y5R#xGe71u}qq$ zKO`yrg+0<)BzZ!bC}E2?Qcb5qe#d|80S?3Bk%8IhPTRNGO4OPcH$2LwrBN}+Ym4j4 zA^Qwia0g%qk%EM`{v=@H59zB2;E4o2fD8?N%?+m(gkt=oY3J1!soIQQa%J^v%1mx7jy zXaktSp|YT2fZOgEK*aXkguB6>X=;eO$7c=g4x zK)oQUf2D}(3hcz(GSgr-LKds|t{6BL(_3hYBSFIPsMNIBj)Huv5mR_3&)aJrHEkP*pt#vMoog#%-N64U zW5!r-mU_erfCDKCKn+j@pHPLvVGLCul&CkhkT29og$6Dlgj7b}@QgK2+Ncc%wxP~2 ziz@A1>*%5+jgqFO9%k0(`Y#01%=JFv;vIUM-6N6lFohlYD`b0j>*ipwt;%IyX3J8f zdjVhfv|cnPU+NnS%^(8-J$q6Sg1$td@}wZsW(uKlO!douVt&e=$gX`(=VYGdtv^3G z`RFlyb9w>fNgD(QDQV}%+zW|5mvAr5<&gzrpfLq8QOK_~Kp7-?=D|;?UO=p_nJ=@p zvlPkWy}Y;_-S)$VX#vx3ICpMR% zMA`8Sj2U(Cc#)2r__^tV6SNOxxb8r(8rciDlm94Tr+4H3^oFjBn@PYhRmCR~r_cMP z^QDoHOT?*AbTZ(C2}a9bKhmOymS4d*0-f0xim3Ngz5r6`l$nXGw6uIV0?F}x2F=)* z7UFBwAQJeGN1mVjW6ATt0Pl#JNF&|xyx&4OkJ`~92saBj#cltISX{;v3YgbZYt(~3 zTC1zyjXpSkhIQta7ZP7#`t@_vxpTrVx~;~OB>Bc~=zg7(SoNvQ`Xrm;MaG^`-oW)6 zKi+`EUS;T4{n-(bV8@@zIe|&vQWJrkXwfBtFzzYTB`Z%lh2W}vYe_;ZPdjt?AFrSy z)$!qw2jJNUG&LO-)#R$Jp#X7^THf!6h6^(U0(?XKs$ys&MCBRx!8EecNTJ*SWF#6| zCb?_09E(Z|cY;W=ATULoR~{C_ajE1~Y-dE?6Mm3cg*SUZVzT<_)tQyKJ`@RBxL;6E zqr;}39z9ge``_S3m6Ml-O{YLESqP=GfjVq9hvuj9gx8Ale@E|RRs?84T`{2>RQB1$ zQWalNORG*zYA6!e!{~Pj=mjnnN0L6%LLv|K|7sR830sOMts>>O@vu>D1ZG}Fi_4k%gP_$KTub6XXI^NAIK?mQ9BNiMrs4lCQ}7FNN(|kE%;TZP zKbvij53gEtfq<>s8dXyI#LKua$e=eQ@mxk5BCLtJn12!S`WIZ`L^(rYDy+TqE=Yjb zrT?}KkttE2XB>3iiV%5vVy0RDZ7zGJ&vfF|5_lfUP;R^=`t=$%?81d3PHIVr=NtY9 zwxlx5qspQOx}ks5rcI^}cu-IPQ_MO4CkaE^+DB-I-@SR$o(pU+lfgb$Rqck1jH@^Q z|Asb=)!udTv&|V@d?G%?{2iOBHRagf>NE3i3<%@bpR{E-b->3cNQ2)wI`5xk2p#P_ zxMD!z%ZQT~bz?8gsy=qJx<{Wn2%f=S+Dg1FH0lB#c{H&|W8xm0-ZGvv=>$x&x`Kc1 z2&C3dRNG9<*+-Vd1TMxoq}(dZ$nhfb(ir#-PN1$b-O9?`%&Z9p!-L|`Q5;Y&ynk}J zX8e!lP{1k9`^x+3ThsTD|2gZFcS(1@T#1aQW1iaT`_t4-g*}Ssx4OK(@5CgVK)R^z z`ud0H>Rw!Ny4mlfs#WdiuXf=R2fQBqA@NKRSLr4eYHEuGTeTLq3i;}=Wy%S6!&m*c z7O$u%Hj(61D4z{{*pyXtX=Z+v^}I#~>^0fCWlQ^AM#rX}J5|Q%iDqT;!h4UYB|9)? zgJoFQe0hVc>^)z<40ydS&fPZMz8^gkV6~lt!>21Ard{S=!fPj+jhW}11qmKL3z`r$ z(6?W|_5S{rj&8?oV0_U$r-~;vp{H@v=>XJ3a4m%lLTFa8U1afmVNe4v)Kl@UK%_h2 zWa#o_8jYFd61~x1^7B5X?5jxnUY#T{T}V#;(VYyu{J27AZK~`3eP(V;&XoT$KyjYj zXf%+(k>#tw#zcs-H}W>}sAda`H6w;x`?I|K(UVK`V})xxGF}*iA(ww&v~!ocCLj4u zZthhA5oAUtrkL0j{+xXpFP=S9sb7EVs&#opX19Z0@D?jJFtFphPsXKG#&-2D^W%Hu zwZZb**EkUSMw;sPoVLG$fWD{Q?%1bKBVsQ|Ua+s3dvl0{Tp-oI3NC}B!no&80oco_ z7yQx0*FllRC9!Cq{N&!frutdOyZ28!EOLzh zi}Oz22WbD}RWqVOwxwT8`UaUNxC1)6*M^1pdCldE%0F2X*})=yD2#6ItKPWrod(0~ zXP8ls2!p{chI8)JrOQYnHgbOIw8B1yrBSQOFLr7&Xs~|P;QXrQzKi_=4{QWT6srWL z$Rl^F?`Q5^+;poQ4l>~H+DvkyX0UOV=@ZXH-7r_O_#pHb?z3a-VBA#o_uIRmZ z8U6Td(X&xgnoeF&nEQ?5{?v?^uz*!*(R--0xufZaRVb{$g4qk|O!hcG82H;llpGC9lbqv?dXng=EaBphB|EEs&=E}-6=#1)f-I&<76*d0;^nFc^po507 z665atAP-e-l^s|8LO*hH0KYz=ip8Pc;8ImdPsNi|V+0hy_!3zDQ?3>J$YX!(OSZB+ z83|GVgtTU+68{l>5W?j%XmSdP<(@mIuWN~g^W|)6ZJBR{ zeT%F>ZA{TyVTrk0MdtcX{a7nSocH9yTt-?aot^X`v^Z?bKeI#qQmni8X<<6-)9R-w zwK44`CnqbyRq>zf3Y07<{8{Olu)9e{3F-8|6XF@Dv1jgq+b}PU(GgM%GNskO=WwnT zb}=-q!seHFFPIF)lXW$l_y!3`fyHFvzz%x1_8^uNGN<&Mm(2>Y_$SXNC@fQi+t4MV z@2n}=as-wI_qz-s3eJfqmKvk;MH_U-|QXgeGy8+dzkAC41H=+`Po2y06m? z;8Px+$otC68%i&{V8LOoRB*u>rXJxl?d^};oVA_h$QA63pjV6pJ<)-Um^0_tJ@zbj z%e@d)jlI`RSW%p&si;N!5wXC;N$Ai~J#H}tU<2i=R1Xp!#lNOFF;>ZoDpEcTRGYjl z$^7JmS;{O@sfPFx9c7K6IbxGB7U{Rh>vHWt~WZ0o*K2J+$!7Y^Pi~{gD4}krYA7{yZQPPW-;qD!To4bk6Ee zPk`C1^F|Cd3T+=N63|cuDl9GM>D!h2aYf2=0qH)GDzWp`7`T z*H-Ju*gMK(8L~wHxQut@iz^nknJdAVWo&`+hIC-jU)EA!Fzk2>(iX#8klIaF&}LwE zR(L9M37FC5P$i6-lHhvIRLQwSi=M)2KUP)x;zcdsF4$NN86yZJ|Ar`t*#O5v6z~yV z6pwe0qM??+L{k3h$!r3~m_{vn(0|S6B7b>U1nV?V&cwucA^%a53D^|!A1(`v<$Bcj zgKlf4M*l`FLAO)r+5Ue9d6K*|c%Cw0g$R^$oN`_NB}gbUV1@(D$8l9(PA^c@F3UzU z!Lbt#-&%?+I@Aq#ce_n_Y4X7?pRstRFE;x7o@rTZw#B~#IHeNO%7cf5;lI`!VNKl*PXQEz)+8^16g8G41DLb(n zD1wp2E#_n*3tW~2a;rJCSvDJm1vP^5r+%0au?E+;lxjVg>N+bF^?SL7RpSvg~zV>ysA20G=gN z9+n;r3(JV;bTfLLczK~YC<4$TN}Q+SxoJ}!TFsMhDm2keAY0>ad(H$dCCu{B)MjzN z{hR)(4oc4&o>q2eV=r_~AE`;-I-`Me8!BXD?CbHGW(^JJ9o~4z z>uwbj36%OT(j__ekOpuPo?nI1l%!jcD)klbnFHUaaOcvbGqD>?tE zo{{K`!b?O-q(l3Qscq4ti>;}g$6!fvn{cwfTu~&rD^Bx*CkGVLA#JDie^n4SDeNNA zi4?_3Se?&*>wn6(T&3Xp1L1+=bKqXH>ZCQFcWW+MP>)n;K~vm@taxbM{Wh4pVo)U~Qx5a>V${r8JuW9~xaOt=bZzi;h|5+%TAi{C3wbM9oPR0Jixl===_eT&*Jv3!OGpmSydmdfwx)$-%+Rp_s1S zy!kt9n48KBD=1?e=?x)21A3mtRJ0Y8jA?ESIOKCQ)925a1OS=2LnXo_piqR=|5OmY z17bo!bTiY=$WVN=xGaE$1gj;_)rrvN>%Dp3>8uL81xLUzgD_nwCy?MK_ zk^asS(iUo&CblwjvX5#-#C)N)5#?tq^0D>qOw+UEA~Nr?5A6jE)Donx#C6u9GgLfz z_djyunCFyhlbI8QGs${7agN}ZJ|{1ScMWhW027jV0?zt(HBbp$krqzW^r#|=dr_| zOoGxA4wFw%iA)IG#_Ew?jqSe)g9-i62t)awmQsKsxH@~81BA$F{r)w3#Bi@pc$)0R zThnX*F62iu73pBnMGyMz8CQ+e8OT_+6YKa5NHKPKX23nN~;`19E4M zCYb?%^T;b=uu0HAv;dT%GIs${xXe`I#(Ba8GV=kw>~?U!CH*+#qXfZ7CJjuB(E6PS((TJ(jT60T5o0Vzvnmer^ zx-tzCMy4u-#<$t;Rsa(DrmE7BIQ;OKgnJ=Tx7P{UG6Sfdzpwq^!Llx#pSJC<)?G)( zi|t5RDo1t93b&HZyNs}B)xiH5`gy^D39Ky1GM5G%CxLZ52NlAzV^e*5f#re~QRpy1 zVjDm&ebU9HnkJ|u>(cAX;IMOP^JhF_^)YEAGTTjjN(Q4w)kOM=T5oSc)uM6ZHfR5G z(X`v189J3eVUS&6S$FioX@OSeN{}MuK6OcPWSZkd-`^f}^9y9?G5hUB!{y*hIBRf1 z_GnYLPmdlO47HyYk6B~R@=T%F5)TrFVp2wGLv-}7d{H!hG@Y6k5I)BA46XL|`CSaAM`GWs{Ps)BMt0e5F;0}~kI%SmL8{-auuC7y z`<8CBi6(bDdoMVb7m?*;#Pn4(CA*%}7^MMR&CTEmZv)SbDjH(3>BzT!#*Wn_puQRY zy<&L8L*EspPCHsNpj7yzk6wv7Fn6Q5$GS{9RH$9XDbfKNgS@NVB~tw!SDivg(vR4$ z{CpKf*bXv@6PIfK03n*?zcYBAaEt*fX!rwj_t~{75`Ij&{P3v}3(K7Q#-Q7Ll^;_d z)jcP`#cW;ek*Jw@1?l1-fa6%YBWT#_D<*$5w6HB+_x+oPS6L#7xx`QHN{C=1Q3De5 z`Lg(3*y(kHzK%08N~REy$4fBSn~3Te*ySis%HKlf=56saPUJONCDbo7GuIjb9~*q+ z-iM@mQ~`(4xb+=0NbU#f{>1pMs5xPT)NZvtangnim?Bj)KV2uuBwO z;it+XJ9j7LU4_TYO07OWZt(~m)r#IS8VoAzrxOrgx!4;kZaGxXZ~0ngQo++>Hnm5M zQ264kp-g_3fhKtn&D5GR;!nKxXB3)vvO(_BkdH4R=20;qhnh*|J}ef5nn#E{&==s zmjGPmq(sE6Q)EsGlr700jifGv^W%)!99L3`C_=Q$q9NwA{lrHV%b+CLOBO&@QVrvT zk%ona1tML4_}BV&cnc)MchxEvzgfAD;<~#q$EEwXilFbaIFIS$cUK)&P9ECuL>44V zh9h$myD*>?L`)t!06Va@bmqRnMgwA(N+~Gg>A2xo)9P2i} zE>T2#z}TLzw@%rxZU39BXQT+BhA|>eW_NQsfm3=xdfY78m62Wlr`qV4^Fv(8B15X7 z@Zx2Fmn6UkOb4;6i^Wb4uf{ zVu1iL9}nDmjMjgn;Meri$9Xtept9G%Ot{TOnvnGU>tg5O@WlNG4LW}D;!wOXQBjA% za>m8XwYMLM?XiRQ5z%u(Ne7=<2ih)57Ku0m+93aley>I_6+_|_+s!^WNETq(Sk6fG z$l(}Yrbf&-{$|jkKPzru4BX>k`!X|dtf*F>J{8{;| z4Lc~=u97PJA>^7Dh08#v{DI?*j3~AZx%Wcw*L#+(L&+eT9;i3A@eT^vF#yQRfjfGS zVYUBNQqn}pNA{EnA&dp2t|IRA&~IZ54ez);_85z%lgQupkPF&wZXd_Quqf`Dz5-Jd zkG??rl%ZvhlY#~>|I+|#!s7R(4|e}Qlswh{o05mY6GxGzjxS?Cya~_T%ws^mejV^J zhxVWQ?#xBqh0nTYe9UOmr=&{Q%9X~|Lf+s^q9^NnQ*-v?`c_v^8=vGQ>)dZUp3^?tiqoY$rjPm)j z)J;u(c=_V}*5U{{8yg!!ecwd=uF$L#2mYVh-UF=4v)kHTCNU;PV`4-FdjtytDuNwr zEDJ=DDk4}ArCY&*h$(6;SeAkvQ9vmIqBKFpXha1SL5d=lXs}Rh6csGzo(L&>@9#hV zIp;rI*WO>Gto5$Mi6>H0S}N9W+GL&Lzq=s3y6@;+H@c49*cr`+XDVVK8cX^Qzqa)M zfs7}!z76kpk4b2~3AQRQLI%qOr(s5w?YGpad%nN6dgSA@xs6xccWKpY;j-%LI!Btf9%qow2(EMC{CsRzkJokBycD;K#9lVd%1{0!W3c8~6BD>btPXh44v`(!%kyDN( zD2g|9jLv_S&ol}TvM{x?YsW`q-q-)2)^TdxQi#rf3R8-!xfyj$OcZ=YkCc}b=Xhb1 zscE@?%~Y9P@WU^hWDA|xcnE)FRLGr@ggJ#tB-Juoqv*k;VQ)SoQM=!99A;HW3l*#A zg{AbqV9`y(F?uIyeB(r$>EUrDz;?>YjneKXFpOkrv#`fTr0>qdT;5mw%9a>$m((@--UqWVubuv_^3A%cEhkoE)O+ z47@_EH(B;?k?Hp7mi^D#Ejr0u&zWt5Yw}&ZZ!vx>I;_06;hT;TKF5-vOD;2^1lTXF z%0jAA9gjDEc4s)$M_Xk0uzO({VBhQnH=Br&>!85M>&|3nqpMN60LTRk6AeEJ?-;&^ z=_JqsOduzye#-gTgb+s|$0oV6IGXmMg;b`oQJ=IM80`7sCow-6KI~D-hYug#-I%lT z^4lr>uZ(qX`enZ-m0kOW^!v6{=a(_rPcHv{YgeZ{+rbtYw%Z5p`Q?i-eT&Dn)^mVuC_PMiZ zZqx43A9+UIO(I{BjWRsk(%IShgj;WyuP@c~3po{u=I#1lE=lIKGk^PyY_7Q~bm1$UWX+(ab3>Ki zne4yc|0fQ1de8Rc8STla9X73~z4?o0uYfGNuO2m4D9ryzpH()n`5Eii$^?x!cRxI* zv?S+q(=We7`LvxG5MCUJ?W}AnXb6m=6hOt-!rMDBj24}8F;+ws7_etg6e`P7K1vM* z(cvcZF^`Uc@pHqtPSw;eO5jq}v%7bXIYw%lqBKJ9a{m3{zTba83Yt^Zbo5WC)m)hJ ztd-B)gJ@(uND6Jt|5($Cu1NEdBY&j*Q1@J)l{V31AgpI5tnW8qfDP4*)9>G8`IW#7 zpRQ`t-Y}mvW=O7nipGWIWpNigZ$#8s-~H;!oa5BTTdAwtmQ{@Dl?8F&n78smhb7^h z@2*A3FHHAMQdZGm z%y%;{urO&}Eb3$Bv8HcD)vTcr$$6eZr>rXDHq&m-zWiz*36(Ox-EXaU1d~ypMmwav zJlUenfN?I*UKPJNsZiWo{+YkRecgWRs4ur&nw@YKY&s4M8pvP)V06{~4{ygIe7wfw zD3^2EZkuSNq1x`i?VC!IuGVXeItuU{%8i{q9Ad_X8ncQrwv+xY855uRg5LGz&wp#8 z+v^jW>_cGps!-x%zCRfmKQwbF)eO5$>-_y$RZ%+AsrI_@OjV(PO&+odwyQnjPIH%f zGLsQyUdELxdEDx#AkAQKNoBB@h9LYPYE!^{I|a?=?F0 z{Onh1?_P#{J_uP4W}MOY?#;;58dmV^-n4I@twyx6MKjv=czUFfmev@Er2wa}LeS0J zs<$X_NuGuxkUmwkD8PCiiy52r1F*Ap9k%V>9XV!0>j0mxpQLAPJeIDvyg5F5%PcBx zOwHbO8yh@T)9H5sN6kA8D_Xeh1M?h*( zY@my?^{FJ;=IUpM) zKxKNnc}yy>Vo4T27?Q|n@mKWKxPWg09dYvvK}M8${`?qHyMWD`HzO9Yn&nG=l+Q!X zJute7*OQzvh}z|ao1K%MuY;^ETMpXfCsX(B-5bx;tU&_@_CP5J2tuoEMoLv#v?$9e zqqIdWH_e(CcE zH!u%v5TD$fIzu*vLA5q*4pXp&S)Ryl%ZHSy1mcyjjRS9eifE)oO2yS7y07#C-hT-A zk;El1k@J|hrh!y_&E#39sE%Cg6?^g^zJ>Dqh<-Wb6!db=@#=K(*uy@%CJb0Hrc`zh=7T0b;*1 zMT&T^f^C=w?JM&8Or8a2BTHjIUM#m{p|^KF{irdIf_x?- z%+4cc$va}(Z`snNQ_=_fy*~**Y6v=tTAo=kZ148%i_gg-8{knGe>v-`-1d}2%}H$C z?LVKNncTs$IOEc#xT2O{Dk6)b-L-yx*)!{@EA_24+qP}X4xilB5feyiQh{V_E<<1Z z{#`nO(;t!8?V}LF1(0?dJSHiXeThw;t~$qWPAgjqeov=W8GhKl{fx1XGQhRx#=EBx95!4r?N4J_|g{8Z@3rY5-z?SxV=9~elw0zX$>9=->~{* zo1h^}u0G}v;RZG%0HPwifT!Hv;y2{QjwIn@2}u6nvfrm9y6=)ga@6g0nB_EsNF1$t zak*bsTf$=>2x>P2=WA!S%syFN=q9ODL>0g#H@P$>wV85_rkIkYlg!U^WxBx0cEkE* z^s6RRZ{)HyRPtIv%BOeTJakr(--mbU^xZlG`)*g3PPHPv_MpJIqpSDR`<=e=>d(^X>1lWHE7V)?_Sl`ZAC_!7cIR31kjktLs-s`^P~=Wv`Sk0!;`wjd zdp2jaJG0XA_n7P5&b{8Bni+DI>})z=B##<@@q-5zv(gCq8Jyr`ma0&6h`X-LA)wfL zoM~j4zz*&?aNv|@Co8U+cCx$U_u;i;#N)IAie{NU`!%D~wy3>vKmx0M{HA+C*XLD? zafqa{7fz523`iRgHMFXmrY0focFvo(&*qL@cHuNO1k@M0Gx(ds?C#Mozs>;T3{3F2 zWldB`^6DA(^2Lj6nJRXFL+`?rR*Dg78iE2XETTPAC+~WFvQgQ%#w&(*3U??NpEsZQ z8DEp~xMlxQV{gz@yBE@S{<(LTR;{m0n`t}wVvm6L4sBbuG=F%5qC-;AgILgOST%jG z2k^(9{BUPj&n%x;9hMX=Fj+Kyz{)w>mz0$*x!Y~F4$}wIX(u&-amyip6zdeb9iJ&i z#34=4X*c|n5msHDKQois_><_QJLX7n;{mS{+hCC5y=+&Ff&JU%8`C0x)!owGes4A~ zk%6x1)-Lp8{SvQO<=Tth3j4#2hCENR@wIxjV8b^CpY)i_%T&To+)8N{`~w}_30_}@ zcY%5e=Zq~(X{JzQ5Yj9jTD4b1CLs$FyE6nD+unckz#;2E&e+ct@@1txU_jDThry>9 z)MB?oVd+B{otixV0JM6Z81K{~7I{pJv-W_P$VL5U8Q&Zl`7|41cKLrqmp~RYz{p-IZ?3a7^`G{_;}y1Zb?rO`4QS z^+c1F6lV^D5;iZn7xZNqK2+hLPf|#|MMdk=IlJWR#=4TkgMIb&L0KaSVv@w?60eM2 zUhJw0g+lbWonnR3+=5e4t!-OpuQDdH93on4Yij<0fFK%0fdQHt7#pInsm9QyM`3M4 znM|3k9|OcCjD!Jt`JtJiHbOP7@e{l96Yfc2<>>Nz-Wh4Q#o0BdKG1i zxb}s7xK;bYJ$q@u79JrdDPcvsUO(ht{GJfH6-V;uZWDHPhRCQtJexLe9-t|w6EzK> zPQXp~d`dg5+O_M2k*RpcApWD0awlN|stmdPZs(SP{nF zD`C1^fF{~k5Z^NWJy%#Jc*w6qmD^jH^6HL{A$C7|PIeoi6^$-W^|9d6?T10ObZQM&b|l#Wjf_(^=xWs z+|)Y8W^wP7QZTiN3e?&~DQ$`33&}d`*{6>gV}f?ktTy%OB89fy(xu5*rN;tx)mpWB z3=OLa<$R7LJJagergiH>L`xT1sB2J8{Q2xzEGGjzuG$dK1$TR^t+qcLwIwz-c0#xB zT68olk(Audue1>tXPDk;<`{Ng2+RHuC!TinZ;WR_O>H@T{I5JEc`1;arAzY>N@|ja zU%y`3__NQP@le45+;YT-RUFznuhavY+UUhhO)%zRSgcral8*WqxKioq;`rDhk^=&S z~>Abp3)e6U4_c~MoP0YhhCd@;Y!TjL_SzQcH5;;80*Y}eS^Ks$Btbi;FvKE zwuOjOINdZ>|MH9F(&l`ysY#yuf>yj#u3odo71BVX&x}+B5~{8M z(Jn$TT|{nV4*|Y}>K55(KPCgVz`uI;}4}s>7+e4kxGZ_G}qR#PZ0<4PA0V7f$Nf z<1i*ePuS>zKHFL^D~(R7Jo{B|#)a9!90%t8#-P+|;F0ga_9k~(1hQ}l_CD;E)_VIm zXz~hKqD0Q&H6uH0_- zr;1f}0jQwMe@Z8AX7p($XC{r${uphF6>Ea~P4j+pE?=WVJDPmPzW1q06|S{f%DTc? zP8OBS^S}Q-Ud3h(QR`(rUM0rIJXm8y+BINyEhRa74%;b#^Eh*T48lW;?i2T318+%E zDlE&XS9h+{gM|f{WnS+lW(QSg&yaFX^}Mj;RO#sS?+ey$bn>@TpOXEBBD@u39YnZ< zj*z4~%QN$f@3nBJ6Xqp661B|8*{8vlZp5V;W*_9*97(CjuIiL``RvPaOX*q6f(T7-z z%~BSv5=WVSVca1pbS2T%+`%Ep>WjADeHSwP-IGby?~r#z65FfLdER&dI|EON<61Nc zLJ65@Pp_|)lvd?Xrvt(E{;76oN*9BB%(I#+%=!_2k0z8P1!*W|ENOO*p6iX==We*{ z61j6q#-`H^RKRwS)Dmb_!mdad?ENpeajV_aZ8jalu5?j*YKa_qEZph@xQJ_TEprXw zGY-`t26sif$^p6;*|>|^_W|A=)x0)Uy9(oVD=8MfAp#$t5(6}J zbj=-4c7D9lk1e|xh%CpJ%xjgqhE~IcJE3(o4s7XBlsp+xTfW64qcCnKAmRMWw}>dj zLdJ;fSvDh|r;pLAqD5-2jXzbqfBc0;qZ@l0DbAR}Tu6Iw?)g2Kmg-1yT~v2fqG zF?&N!8Jn6Kf(QLUOKlGvI4~AOOl$reN;X+EsK$#@tp6T=L{V+gji$k8x(&)_%m=QV zamA%Ct4psP`1wUr+Pfg%fq_b+Vox6%+6jV?UWUxs`Vmwp z$+oW(Uv(qqhsnrGomZz)KVqqSdtwg64~WRQ&w3)_qDC|CQX z!DD74&b8z)&5Q%^QTsWz!aiuF`7ae(S$E0_$eMbfI2)5QxZ zAPKao3U)8|zoWaldnmF^Mrao#In~nLDU3ip@J{$PhOg5dsq-PS0Ps(5pv}@vxxF|5)9|fO~B??@m!pcSA!f zZ1h|UV)khUq~0Z_ayhNO`DVtG?+CFH6zrd-`F&W4(oxO8A!V^r01M|3w`Y#nJ*7Sc z37aI_7PVJZ;$1BCI(JJYm1kzjayT9l#Pf_VybcB_@9B*R9KD9ajlGRZvji8nT(9^9bl%u@aEn2|eeYpyTm_h`h!3 z?^8ow66LslRQk)xK@0!xSm|;OCl*_47g^Hmx?n1}ZKL$L95XR*- z7;&mlr6>j1ondDm_Stg<$9Ng{wC=)=ybJ6NlDb}AJ09d*4f+0RcE^3gd<(UY1T-D+ z=Pr0-gql(8++NJXQXTmu4J<36o;(8>vg2dH<><5==seAc`C}ZjowCM|%;%EDMpFn zS2UIE@wmZyT3Y89_P%ti2Hz7UC}1i(rm7-|3kNJN-%#y`vF|MBx3#p?i`H|I6NKBA5Cu*XO1gi8jOp%Xg+~sgMBv6 z)03hS|LMF zNY~VM5k6Dl@VZl39z56(;D@%J-b6sHBqf?63~{PnRvcg>hqTk&xo(^Zf^(oW#a=kG z&&HNnA@Pq~Q{at-dMiv`5JFgpMziUbvAt(t=H3n1pX~pT%i1+0u)pK1#2dwG%9NdX zuIFA>M)4ttO;xZ@r6(Q*SwN;F_`LFDM8yQk#es2XC1c69;2X|F-XZ=0L#>b{{48yS zP`*8(>(|VD%)s?*dNq!4bJ{%|xai?VM4ozt%zZ{a)+81L`#{YIh??EQd{Y>v91Q!8 z@sC6{j+L0pJ31BJs!9zMwm(`?PwMK3zu5f9{K;y_#XVPOtm_UPl;)P zU158KGU;S^8$s^vx^24zcRv=f-$tH@t^Z`}{al`goI+hLZJEtQ-5JQ91`=!y2(Y@p zY=IW z`{34HyIhc#U!;r1IPL-6I2t4^J&q@qa>`|(ik*0(76kR09N2TbzGPC`d$!e)x@TTh ztlT#9t>1uVpD2R&(()!)P@o|s>WShPXmtK$W3p}3woqAxK0)hYPU@oVwCQ_xAm6Ya z^7)6@V>Pr)2uo|`=JIIrftnwUJs33$%*kCG*_5kQ<2~tQy_TYLdJd1ih%B$tnWcnS=JT$47o9v-Q?%ehhx3U$ygP;&tu2{JM#Mf9+Ee68pw~0`T1=w<8EQf~ zZE*9q-_qJ;$w42xJr2!QG;8w-hktDGu$Si(tUf1~IMd0xj+({$Eh@O>jnuTa#k~uU zIdMy-hI*CeN06%|>V2tL)vnd;khsS?c7_k6^ds(qRkW@~A=Fq-AIS!fC}aAFNbw58 zU@w{@Dn&XSk@KcJ?}NjaR_YJBQI4Z+uY~QkI;*SFbhiPf`a77Ed)NGy_NUK=DOL?X z!X7V7`7HMv@IfveQXcl?)nPmk)$D{W8rtbnh2(-?6 z1y~MSQcl&5K6gj#W2FS!&2+bA!~+I$MgiThrb69Xiwz z52S^Ziy7pgb38f{<%I?L!hXVl;i88V-tokK*_edFXA)GeC6*vm!R=?I(OJddtMtfn zJ{}gjo7X3fxI_V#Oz85QMe81<|Ni^qNq#jJaf@)4%{eys4Rp!iWiLbXDhQQOgU4N& zhHFXHsJImT)RR|7YbY00Pr|LzZ)LAq`EGso{lZ8Y%&zn5zQL{uu5hmbSLe<9ANc1g zNjfTNnV0m45_vu*o1K*u4zJPak&8}>*qRtZ*0BE*+t2O4$E(18$rIK=Qal8AI8Vp=)#lzY z(fad2-traR3T_wtvsLr9_brd*4|mPgWJcp;Nbf)h$e@)qF3KFH|0M7*aLtOKM!Fm| zXLPzik$8*@0LqV1g;>G&A?BI-)W`e~^?nLDf_z01GW^4vEFm1N_8)T%A0M`F60t*4 zGXj;oTA-CPdr~SEivg$*CGhFPR7f{yZk*=gmH*j)p}0# zLNdX7zpSp#=WK{T8Jq2DHS32Te&IZsmk@_<_wUVnwb|en)eIeid~n%a&ju3Uw+M#E+^_c7jv|*sbiLsnqL$ms0B# zTcXQ@kxt>_Zh2)_zj2d>zW8PA=8n-Hsicb+E*zd_Xc?Mw74`8L2*x}C$R#C_z!1rj zNd{9MY#PEqGxB|oMHsF&{Uf3jC`W|N(E6hHF%Y&t?brxs6XYJ!cW@rmAlQ8v*hSP5 zi0z~RL?wO4U{APAdPf(tlx(lahY$jts3XMz+fc8K{^T4bmq>19 z$<5=3L(iI=z?1(71i;jqRt46!5=rH2h?)I>A9sP{cx~yjw=0xlJg+NdES0E3{+0Y4 zFBwiNsu5%jvw(K+C65?o>WOJx97$1Wl0Lu<;NDF}a0Fkxo0CNN0FyLXcv|}??Qj0i zDWDY&b;Zw~^;Sx(hHs04C9yX4H&i0&A-n^mhnwt~*%rtt3ZDQ9^<>u0JZXt2ARZ5| zx2%C!Y8AJAo}F^q)b3+CbLaUx$&u(RiWJ$x7hi1AEH64?K~A!ribLpGwP-F@$YJhM z3fZ|AYsRjB;7u4jt+zVn)9jpPj_F!=x4c80E!LNuy>MZiluv*uF$E4jj1ZkT_4JAp z;Qe_|zEe|k`qBaoS|qaB5&)g`gI2fK&WD{)Sw3A=zGvR)Yy!9eHDQxIX=!P4(3X|0 zuwW(KzxvMv?;RD%&Lj`}1}+%IZiO?o1tMnzR+_iQuY7oW}?R)1hSP745sFPyjs9!B|NywHHIR=$5T z7o?Ek-rvrDa;ulyTUBq@1i>=#V&Re{Nv>(bX6wvrt`?D8@t^WEmxSEK6nIJsV)ygl zx9{7hs}ztLde$$MU7+GSY4 zo4ExyYjmlXY(OB+J|8DkqeM&oImKu=hn%D`R1_0C`jqv}+?1jAn|pr-u#YB)hgC;i z*2u{Gn3>VD=Ma!(Zy{#_)D8>p8hNa)-?M*Y3cj=)2_+${0KYSM<=bSTKIy5DiiO~D z_A6A~d|t}h(ipZPIZT%_D6Ki98v#t3IzX;oN;WB*o+^Zi5aVFN zEhyVxAgx;ooNK zOQ77{*KrM7Fac^Pa}mm>9LZSWDI*9lD$4`Cz8tdnac8AOEVKh7O?-@v*E3RSh(wA2 zhtbQR{2Nj@aK>zxNv5WG;QCh7*ozHsrg9D`RkEFk#*yS-s!+rju8`bQkIwi}NNctBKj|xO#s4|(RNPXvtWuu z-QllBJc?vK&x)X*9yIj|(|aIiHv zvdN-Xs(|-8QRg$xv7_jVnEc`Wy9A2n)ZV8f5za>&p);f^&}uXf55b9+QW9YqhY5x$ zx@PU5wQmx2Nb-^YR3H*aC68MbO6lI2>@@J`cWKiXbaC8TSKCqds;ljc>EgI7U3bIJ zKc8#ftCKX@eW@OH*6&uqJOc}ZM<4R1azgY~&+{0fEuU90|GF?#j~+>ZGJ&+!_+jPW z%9Ow;JP{9Z*w~aU0t;C8>aYlD$jBLEa8jjsAILtv;@U$5nyBy+oU_k%s|a~`;^wnQ zv)bQ-Hj^(iE#s!@TDFZ>s4~=^2RsT|jYee$(sxZ;@5<X1BGP7tXFgMw|w`tPM&d#6B=+*FxKOR)`CyDGI^VtYf z(sX&rRUKw3u46hr{v+<43q9{`G?HaR;Tf){IJLz?CM! z3jr4|0ylN9SDSo&V^+Zvq)*8i4?i(`w_tO9OvG`s>%5aZz@=1{ee z*FX9lqEmz!2^4Kzdo;Y#XTk_cAmQuG#DPPGpf|XX@1zJn@YkPl($C=@?fFYqgXC|| zzuxILBQ8X^CHeCHNq)Aj;VMRQ34wpT7CZ4D|2o3MF===>Uvf!_KQ!nwWQqO#uexXS z*HyBVj%xk&0XQ%I@h`b0e_u)Px2tp;9$$B%rC_k+NLlmOYb7TqBfz}=?@vsYfI4bM zzyJBtPyq$eHvs_guy49b=E74Cc(9eIvQ0qOKnK@s9fZFFx_VA0Wt*oL*k>uWK;py0}~Yc@pw! zt2*!?|K1Z@0Rp{@jBd*v__5)Vx(eA1+aH~&F#ELO29BeOFU?*zQF0l6)9|!^&&vzo zHk;P(?-}F3^Q)r&!@qd_8%-Pj@)!A3Y7Niv3wd&X|BRRX`$go9u@b;~U*vR#e=T|5 z+e$Sh?~CA!e_mr}*4_wtfbwb2Y?9UT_q)j5YFNns{?T~AZE;!I5tM}gewXwsS4?RN ziIaWPw&8N+8te!zb?*Fqq?U#my^?(8qlo5^}H|3Um(eOHl(EX`?_tj`**|XtNHxkJ; z`G?hphCd$7@@w7jtoy-JNiZ40fEn|K6|df;NfU7FuWaSWRt>LBKZZT(fMfD0ziD{& zE${+*(@(pX>ML?PH~e`Qd|0UB|NVOM4T|*HvdTs_yz^-Gzgga2|Kg7${|bf|aXW(3 zB)3CQsc`+`pKnJVOR*-}q>>oN~FvQ%&QO|!|@E?&K^i2Q3!g>!4r zaJOcm{ry|$6jV(08`*W0Hg`py=Bj(vcf0J(x8#faC>^^wA( z-HuO!KmX?oDqHU0IkjxK(Gid>_BdIHG+J)Rr+-~hd$fV^E*A3K|B3_;%gar#eGzQj z@JkoaHYJbaqc>b#hl-akXN~#iH$)|B)$q9;Nh6SaZ@=lD$U`Y>->@()-@0W7ZGkWG zfBC*-?l->qd_>q^=WFmw1JWN@1?(=jrlt9<-t>--&~xa}lcO$t_C6dDjW9QMJ7!um ze8f1>odaGl0k>Se*%vP$O8O_yUTFOOS81-nXX&56eFDsGWqfVQpC1|Ii1-&jmONeE z#=#8ECOY%zpr@!}>K<}b8FyZ`g?yEDP2Wo<19^TFnSeQWoCd3toC@CE_v8}=QVCQTGc|M=c2jaB{#3Q?yi zMGsLh`S)5Aee_@*bmD!2U%X;Y;ns&)`(x=F*PVWP8l~)7`v(Zkow3kSn`U2}bC+Zcq zxG-CvIRlRZ)3N+HhCs8lX=HfG!}ruHg%$(gn$U|Js9E(mdo-mL3hgRQ`6f$D&ARvd zSqq_Xsi_B_|85&``78_dQq1<^~bR_Zmtorw@Kr1(EW zMugL%XSz#&j7oN+5tdhy&Z4oqq`z5L1^0!L4%gv{a;PUIC5ae{3XF+Ll63hI1$j!a zk}Bq*l`(&$x0#oBMG&lN+B=OLCrL|*LMsJDImvYbWG!u~fdj`X6Mh>O*YMtjFyCEMP&qVlrQBVxwPyyoQ8js1LC}ODi z_V9X2Fnn%C=Z&UlT;;mR+o~YDN+l;ISw_M-{r-!vyw7Xv!Q6=E*@%?64VUwsEZsOs zrNpMAT>kXLBLjF*(O^(=dDYyPDZugoa6Es^2b1zF4Mx4{Qu)xc1t*d5sX`?fAklbP zRi(>kh=(?yTWubKGc!-m7)a*uC3lvMBgdE4a03Vu%3ZiT#VUnAJu8E}jSj$Ix zv80%!G^149MGw)eB1hJ?nPO}0adc9B+{yih<>EBPQJs#t17HGU71!I*Bz41v%;vt5 zVVte@EeqqbsW*7ok4^BVP@TEoXz0+4GAVkq{Oe?e!i>T{wr zyjk9f{+1oGj zX~XP~l`i=ckJ*F8S!-q%U0VWBTK;I?5>bG&2{V2>u%a*K#KptB@pMAKx8DH&BVqL= z{Ajvh;VXiXu)9zk(VK=a1906Uo7HzM+P#0>5Hz~LFy+zV^HaLHXqE!Elx^sJddp3B zW`@QSJav__;TSO*GbB=D1v-pJy`i z^n>i8A2=KARh5(`TyY>O$qCz_`9v81k8<}YQFcL>Ij&pOrd6x$wN*V*_SV+i4%={b zx{9f;nwJ*(T{>tLTpZE7dhN%9VUNn?;wL{4%S>Dw0aiVb@W-yQ>qavB?Ulul7f*B+jj=D22>?(hq^AFT&iw=Y!$$IHe|@aQ*pRUv;a5Ij!r?vl~SsucJylH9DyF)W55{ z5vxifTO7Z6gk~E-d)xvm6cIoYR@155t7qbK^>_;pQuo!VlRPe3A)FK3ycp|(kS>fQ zCxZVi9#)MAj7S?T+Aef{N?u0V_E7&@b)Zc#WnuAp*|;1nnI$LgKa4RB%bU~NVd)=@ zDS$nu9+{|jKJe>$KxFpwNp=CEj;%OddM?xP`C6m==BF)&n$6iSQeNMh%80kK%03$z zqWMIT3QftE+0RTxm<*Vi45&kUG^=#sj89=zovIfsSrYq`@r-#5YjPyD5l!WS1q(z1 z=^yo@?&aNvnfY30CdR4L8N-+1`0RIz799r9E;U!&Xn?VLIfVRa_)K2BRi{p|_`}s9lZFp;ju{?u2w_{g4SYCb+zqd~YRf8(hgXL@v!twF z#q%#ggB*P=Zg_23KU9%%AhF00&OI>2jg*b9AL;iGv8RT#h)TIyJN*6Nj?)%IO*S#> z761EXgRw;&4qnXOe$4K&d2X*}1;F=$+ zyX^n%dClBbZCW?B9hNry?Xp8%Q&)|9{w7(kx6%HpcRxND9$T6UYqd6Fa>$yFzLk?H zF%BZT$@{|QBY8BxN#v0>XK}rrfLcF47H@<>f{W}Nrc+KvKU9Ty`S#0G9mGvxc}c|K_8bHC*z-&Wjzdq9 z2AP;NKeM`7ixvfd3Qp_{yTrp?DOHTli<4^?H6EiW#14R z5~`$=>GTtoB0(~nKmWPz-CN&sNFk{Cbx+X#pdg1I^DG+}%RxS;X)mSK_I)TNE*o%7J_8S8Ip^uG&Fi1emPUmi9&Vm_nH4Lc z`&@Ne9&GiJ>o)VGErMo}h#Nmec4@ftGd3-q=tweZ88Y_f_v=I}M^QN@biMYZsf(bA(XfY#U_nwKvD?wyDRJ4MLm2e+*4N% zq{u~Z@w7cK`}0lLz0 z#Z59MO6xAXLm|xTY0)61IZXqR=hZe#X?c53i|rs&$}hJH+4mOwy(v$}+T6Sj*h#@0 zvEFq=Pn3 zXvxW?+D((0-F*$MJZlxb+oik{jUw9&u$qEOpFk~ZISf7>c44&TeE+DwOU@6}#9ePC zM)O7%B}Vh+N-r5{MpsA*UiET*M{*!iI_U?oa&`{y==XMk=-#ADo6ZJJxuI;L9n`3p zl0E+7lu!Oz!ju}p-U*C$bLd&hoib-&Sh9GjkP0G4C^R~3rjiVdC6dY7L6u`e+>$Bq zc7FUvE*X1XgvB`dE6I274|UuqF0hdIW9!zV^rxv}PQ4K?pM3Nog84*S$IvdBzvez~ zjh<_ks)!8mg|d2?4XI8HxP_kfbuBSM1~CS;MX~ zoiz;H?26NZC>sw{Q1C*ru3vs5?p$?kk)L$d9E#X?<(uL*A}AFNHT|cM-oV zG%&{#Fl4!e?T2bHQg;EqpGn!lJilLTCn)t z-K6+Kb7Q_NasK!FS0`=5e7iZ>y7t<{GG`U!qK~n=WjF6sm?TT=2917mCj97|_Tjw` z4>oIN)`lZGxMx|?Y()yV7At1@eRWT-$JT*`8Ih6>W*=OQbkr_*9!vPx)aZ=Mmyg8H z^%`sp%R#@*w9cXL9`2b(J>UH+d;h*nPq{P&*~z#x){FraIKCPoJY_UwOro0g0H47ZruO zrbHJ3;MkZr`5c-zkc<(WyS;9KqjP zFzRi!gYp%`1Z?Z=>Yu(AddkVHMMJ0wN%QKkEaHZp0gCAf5XQs{A=ZD1pI4UQER71CnORT3n%VI=B= z61c4-{odxy2-U~Z+>(=&YSoq9^!1PBE+UKb(Al{tEV(xy*Rj z%_iA}{p(KJGj_dRrD znY1ywu=SRa1q0OGE~7HILU!fu)nVw;Yjuq1DeM)05=<-nLdG%uD{pXvEAYX%J)41C zWRPP)HMUBDuA8MBs6~s-dm>{sZaVgT_+eaaT_C=<=^_0i5~DT3fj%w9e0 z)w5TkN?%@8A~6`Xdt--zD#YKk1>G1%k{LzJ_I9LN9twcI;or;C0pvBt<&7n2)yxyaQ6%(OInvPQ_~dsS+UU9x4$osV^T zMLp37Ha2hK@087O0^mcl`CN z3&uIr@Hs*cx4aNAc4k6A;+ng%;aBH=y1k)xGF}B%Go)uph$i@wg^LFzb(lBrS<4G= zhnacJZq7^+Yhu;`o6VmvzUE`)dAv&U();%hA`78B;Ic{YQDQ&(w#V%-i_{Rg2mwvq zk&aB6W6?A&fvgnh+7AX`?_J)p(?%TK^em6k_F6ed6e?Yd>a~oy*2Vimz3- zE{G}uNvs=sm`!?(2apAg#b7C}d=k;QgMG`V-_!?+Gl$3p1ZD*4*xr9^uCVOe@PqVO zan1}!s6iaheky*vP+{4n;b&K+sLh(<(7Bo&rU)NZpWUq5C-5CF7&D+w2sMP&^;y!Y zPaJ%!tR%{SeiulR)KG;nTLssd0vaS4xhAjmef?{VIN;#-&I?-)2Rg95R*WW+jU=>xQG*r909|_59lu-<^E$BTYGd#cHmAAexO?z)bW|jxD@3gV! ztK}@s8r0~tgIG4o`(rGpxCO1(#K{i!#&N(J29b&=t~{{(W}2U$95+esX+xyRQ@(nr z&dTv$72;H<2d&uL&| z!kFyqwRpb6%Tp$3^ctu`>AYxR*YxPk?XzYJU0Z)U|6{dAGGkinV%c$zX5SXim5 z?})IQju?ojCu+yuy<_dH=UI7`t_-QWN5L6l$3tEujuN81gYWB6Rm5rEi-=D!LLPp6 zW8Y&_bi5s5Oj^;RiqeE+*#hvgH@Y`^Ftp*@xzQIo`RRb!5$6RC%xW8g$dA}QiBuay zeebNtCY*7wqdYswrj6eGxh<^l_&`)_BE8pA`n^8asgg%+q7p#rR-h-?pT=tQ2-oEK zfBYn3GtPWBG@I);Z!Q4p^o+`z`sWFPl3KkT8vU6uiKu#f-&GX|t%STb0(cQlcujmy zv!M{?k|lV7IHfTZeme)|ETe8Ffg5X2iq@$?rwdj_1uod1Vp=9N&l%{MG!UVT*zU?Re|sxb3o(u8_r2s$JetixYgDOn+UUu zZ%x-NG5ZLagSW$S&OKrswTvsjLSEopf9fH>yTPfqn~we8DBoirZpqP<*cV^S zzLG>BOB?_VFhF`fNn_)xK7i8?A-X%nHF1*Wyyf3vyI(;D?IcCdZR>cMRLoQkW#3y# z^9E2;m;=N0!C zt=)B;P9HpbHodF{y;)!2zuMGAYVq}atz!En*oX5oPF?Pl0+PxOAIS&H7 z?5A`Zn0GR&=EkrVWUxco*xseh_%G@IjfqZPh6?Er77p?R@`r;}`M|ix3xJ`e?nVtK zq*dqkI__CH<0njzp5{J&Z!)L5d&v3Ur_Z(O69$H`V3)`1Q^M<)-gXh_ZB~}k#=hjb zjekl9cRzUg)KgrfY#xE@O!=UfNi8<^O+Q(cZ#PyYa$Jv%YuAoD(8yj1|KraDxZxk( z9*_1wBQsX@W%K5S={K7Av;X9QcN;P!^|DXEpGd`I4Lp=Q-EESwvA*guWRoLt#}jM= zReuPel6cny-N*vQ3$YY>dbjv#EFaBh>Dc<9yA5wI2hq#1%CwY}RjipL_u~SV8gVmK z#FcKxr|9747@z7jSXy zN|gixjuB@X#&-9a`4j#$!ANK$$P(CV&T&40Ymh>a(1}fpq#Si96ODXf|3e0bD1pYS zf_$rOl?VcS;vT6&(Y3Y-?@qdkvz;|AZe@iZXzDEGK8OR1<|S{F(a`NQdv-V;2Hxd}j>&~+ zjL;aIqEsl<^p!u1v^-EgB!v`N;wjQCRgCWixJKlgarMV~@MWnvve->jhbHZh zfe(s;%@Mt)2itD1=98`CI>znsaa{U{jsQvplUeY%*saZ_Sc@{S&5h$n=$_bfnABE^ z8RA@p3xt*uir(8f5XxT z72j?cY%`g04Ly|7v}rHB4_ZnZDYX~I9>}~Z7vhPYy8IXtk|$EA#E5@& z*`g3o6x}MxBd*m*oCSW0^UdP&T|q2GY02=oOf<}c7otpRZS0ZPy#Clt?ny87_QmDv z{Rl}Gys#jHsl@i5t zjFi-c(}U&=)TB~f$|)fqmkO1Z{Z|^kD^`dV;E^Loj3Z(P*Q=Kl#W@tXu$_5~b4RMg zMz?2#(@?jhkk-YttcXmSt?iNYw6&L^2si$f{Fb}^#tjiOFKjzQP#a6ja?%AJy&6L3 zKZBfR#!V3|yHxeW6K2iV9b5~h7ixG~7M)n1gHw)Ddw%9ieAthm%Mzc-BNtM8qQWOb zKUTYGe8I}rES~F>GOKXjv3?u`R>PP{3NzyNK&o%5s$|WhVi|^dc8})|PX>rPCf!xPU9yo5#ZV z&rs?7X8q>y2n~rLaGOQeAN+j+tMc!VRti@~&2}S6vG1556IHqK zNx0FQ52ZjvjVqVSDGN%HiM^#H-0?<3Ue>6E7sS@2o}TFKU2S@M*gi}dme_fdpOS32 ze@vzya1{JAnXYgcgMZT@OiaRQ_{Jnxnq^fnxBL6P^Zrl@;`w(Tos;m$4X)7!z;xtj zN~G?%)9XLt3QK#u)bw6NUl(+MP@|QKL+mN)yRrgZGL2V{F(Uo5_-)gTBKlmn25j zYM%pfB6;}U{?u|S5z2sI-Loo>Y+~Aq*cQwD!LEEk;+z3hTLpAzouVc^BjaWd_%cdwbFd2ceo?Sc5{Nj||1d82@ z)WXPMp`XX?dZZk%WTlclRcurm9|@ ztuSlKqVZn*<=0k)75sb}!Ban7h$Vn&M=Rke<3 zk-F=^l)C%R|Fh~;e{3jLEvzr&QONbIKT&Zh^smqQ6(dg6>$%7m2h?Y>3Po{!QI3wJ zqWbhyq1fK;zrW~+1V#@#gylzjHvF1dF<7jw@}*yD-Ef90!bJ{CF&`)L3-#A z(%!z@V1J9nB#yE|-bouT=}-WKW{KBdtYL7D6#oR6@Pk+y;B3X%8FiS*LsJ(3`lr$& zDxr+JPMax@*wWIPfMk;{cTM^=ohja6SG;YpZXcN*QAN>%sXbRbJ!zbgo?`HLYz=yW zUeAus|CP(prp*O7BAu$dOIZz848j=p!i*ZH4FNcH5)E+-6B!8Hni#r>bb|idXi#`S zvR+dhk3N!m!OJCw2jTXR9{cpRa{7@8EQfUnp!NkAw^JF=7$ZrUphWZ`pu}i6PB&^| zs!%dXj@6R8AKoW1CwVky_79w*`EUf1Sc_jX?W=se>mYV6=e`nz@?s+&QTn+1gHRf#G;KM!AjMi4<3X_ofR0HPhB$s z?UFA}fzrAuT5}+7n-h9XFQB27uV8aXJ(#P>s!&=Lo#DgelNZCDL&+DWiwugKcn8Tc zCq%4p?1a$7l_ceMk8ea;^%5H{07jrVTkscR+g<0B%LjS9Z^<2Iar`3TA39q6h)Dpm z9XA^t!nM_n<4oMhCrr2;{ca}C4~Gx;>eg*6qC4+$hPq_K3{-9Qdn*ErJRq-FKS9_P zrccpnjk!T7${90U{iWSg6c^`XHXLlI42ByB0k3%?HT70u$Ca;V`c_;SkeI~{Czu5` zz;>PawTlQgn)YAm#JW**(-cFV#QvP%knz1}SrzRaw3>)~>^{KU2{e_$AmAh_-0`e* zU2W%NZ1aVNqXB+Qs1l?qz=>)Jc`2I%Fj#oy8C{e3*hWz}*paNae%wbfufh{5z}4r? z8clwQ!4;#(Jpv&~Y0->kN^i@86Dx~vKEq+yLWNsagv3ia?u)wFUlHU3cs%~qH%x8} zrwc}Utmuf z+;0d+YlUB?I@5)Samyor!0D|ubEMZ5<(Bj_yQLwnq8_L{yT!nNW2yAk<~}|?nc$^r z^UhBS12&K?Sdp=bJ0Bks=oG<*F1ekWgg1FGz4DNGr!RU<9@@a;xx@^2djT3sOa$D> z=TC17z#3ndd;N!^MENr^(p!TI9t92eyOSS?Q$)J~lt!c>kYYp*{qDg44;Od?IYid9 z=;W~OaC)=~4@ISDaijv5>h(Uz=V_fQkF@%+J6Y@3xo6@9B5G8a$fJB=9_VTk%>+80m!pvAE8Nu736QFo!9XuTTh+;=N)E!p7ae7xi&(8wn zfF_n{aR#Pny(IY-5tTRr3X>wyRB$3`H+N!(_^PrTC7fUoaGfex6@r>4=rfkHA*8WO zy2M0Ki&vo5sTb+(AXKxVFJjgE&FR6Z&a$&a`lKm&qeKs?195im9*oi=-Ysa|y!|dLn?X3&~#~?GH zhhj!8ijHWu_>QC>_K*i&oE0bNKrOM`{0?ZEdIr{)1Qly*eJ}Qi7&UsVQcMQ47aJ)W zzt}kTQrM)|C@GE%2lZSgMiW51F`JmS zoQK3`R)IIxh0?v9-nK4@Tlx0II5O%T%kAGbslSr&RzeGinigPcoZ?+8O_0*&2?)sO z{}iZ8T;8fF{AQ`55^+$LUIf@*F_4$H;l22 z^MGy?US(hu#T`N#Dl%;)W$HlO@=5+?w?oNr7!MexDVYi(M!=%RotwI3Ue(jvb`*Ne z0E}e!)BjBAK|4j==L^T#j1akp07Ik<46aP6w)HPea3mGjI85N(phE z_Sxb36auuA1UPf}RV#8WWezTkA!14ksca4v_a{xQ;)X{buNWQ(_yEfG=-hdPsuI_A zZ7L5!_@LomKdsIA<82)`hHUtF$E{Fq_=qNuItY(VZE!dGWgH6XPm2s!c z9*_OskJ^U-BYOspO!(HM0k;^t{FGiQg;-Xpu?UAPDJR)Y{(ip6HwMP4StmZ~I2D#D zrBp;mjSb#i-_rPhHj`QO7fyGr#ZN~@F^t>MXy9cO$nEXVdi+?QC~eQA<-Fu^OU2p> ztRYC(=Jri*JFd+~h_47X-cYRm5d{AqOs@Vu33$=+4~-5+_s|-;aJ7#-X62ZPKg5ih H@zehW-**Bf diff --git a/sql/swefreq.sql b/sql/swefreq.sql deleted file mode 100644 index b253e21a0..000000000 --- a/sql/swefreq.sql +++ /dev/null @@ -1,262 +0,0 @@ --- Script for creating the swefreq tables. To run this file use: --- mysql databasename revoked.ts) - GROUP BY granted.user_pk, granted.dataset_pk, granted.action - ); - -CREATE OR REPLACE VIEW dataset_access_pending AS - SELECT DISTINCT - access.*, - FALSE AS has_access, - request.ts AS access_requested - FROM dataset_access AS access - JOIN ( SELECT user_pk, dataset_pk, MAX(ts) AS ts - FROM user_access_log WHERE action = "access_requested" - GROUP BY user_pk, dataset_pk ) AS request - ON access.user_pk = request.user_pk AND - access.dataset_pk = request.dataset_pk - WHERE (access.user_pk, access.dataset_pk) IN ( - -- get user_pk for all users that have pending access requests - SELECT requested.user_pk, requested.dataset_pk - FROM _user_access_log_summary AS requested - LEFT JOIN _user_access_log_summary AS granted - ON requested.user_pk = granted.user_pk AND - requested.dataset_pk = granted.dataset_pk AND - granted.action = 'access_granted' - LEFT JOIN _user_access_log_summary AS revoked - ON requested.user_pk = revoked.user_pk AND - requested.dataset_pk = revoked.dataset_pk AND - revoked.action = 'access_revoked' - WHERE requested.action = 'access_requested' AND - (granted.user_pk IS NULL OR requested.ts > granted.ts) AND - (revoked.user_pk IS NULL OR requested.ts > revoked.ts) - GROUP BY requested.user_pk, requested.dataset_pk, requested.action - ); - -CREATE TABLE IF NOT EXISTS dataset_logo ( - dataset_logo_pk INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT, - dataset_pk INTEGER NOT NULL, - mimetype VARCHAR(50) NOT NULL, - data MEDIUMBLOB NOT NULL, - CONSTRAINT UNIQUE (dataset_pk), - CONSTRAINT FOREIGN KEY (dataset_pk) REFERENCES dataset(dataset_pk) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - -CREATE TABLE IF NOT EXISTS linkhash ( - linkhash_pk INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT, - dataset_version_pk INTEGER NOT NULL, - user_pk INTEGER NOT NULL, - hash VARCHAR(64) NOT NULL, - expires_on TIMESTAMP NOT NULL, - CONSTRAINT UNIQUE (hash), - CONSTRAINT FOREIGN KEY (dataset_version_pk) - REFERENCES dataset_version(dataset_version_pk), - CONSTRAINT FOREIGN KEY (user_pk) REFERENCES user(user_pk) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - --- dataset_version_current, a view that only contains the (most) current --- version of each entry dataset_version - -CREATE OR REPLACE VIEW dataset_version_current AS - SELECT * FROM dataset_version - WHERE (dataset_pk, dataset_version_pk) IN ( - SELECT dataset_pk, MAX(dataset_version_pk) FROM dataset_version - WHERE available_from < now() - GROUP BY dataset_pk ); diff --git a/sql/user_schema.sql b/sql/user_schema.sql index 6ca1f32f8..ccefa8197 100644 --- a/sql/user_schema.sql +++ b/sql/user_schema.sql @@ -6,6 +6,8 @@ -- -- -------------------------------------------------------------------------------- +CREATE SCHEMA IF NOT EXISTS users; + -------------------------------------------------------------------------------- -- User fields -- @@ -73,3 +75,69 @@ CREATE TABLE IF NOT EXISTS users.user_download_log ( dataset_file integer NOT NULL REFERENCES data.dataset_files, ts timestamp NOT NULL DEFAULT current_timestamp ); + +-------------------------------------------------------------------------------- +-- User views +-- + +CREATE OR REPLACE VIEW users.user_access_log_summary AS + SELECT MAX(id) AS id, + user_id, + dataset, + "action", + MAX(ts) AS ts + FROM users.user_access_log + GROUP BY user_id, dataset, "action" +; + +CREATE OR REPLACE VIEW users.dataset_access_current AS + SELECT DISTINCT + access.*, + TRUE AS has_access, + request.ts AS access_requested + FROM users.dataset_access AS access + JOIN ( SELECT user_id, dataset, MAX(ts) AS ts + FROM users.user_access_log WHERE action = 'access_requested' + GROUP BY user_id, dataset ) AS request + ON access.user_id = request.user_id AND + access.dataset = request.dataset + WHERE (access.user_id, access.dataset) IN ( + SELECT granted.user_id, granted.dataset + FROM users.user_access_log_summary AS granted + LEFT JOIN users.user_access_log_summary AS revoked + ON granted.user_id = revoked.user_id AND + granted.dataset = revoked.dataset AND + revoked.action = 'access_revoked' + WHERE granted.action = 'access_granted' AND + (revoked.user_id IS NULL OR granted.ts > revoked.ts) + GROUP BY granted.user_id, granted.dataset, granted.action + ); + +CREATE OR REPLACE VIEW users.dataset_access_pending AS + SELECT DISTINCT + access.*, + FALSE AS has_access, + request.ts AS access_requested + FROM users.dataset_access AS access + JOIN ( SELECT user_id, dataset, MAX(ts) AS ts + FROM users.user_access_log WHERE action = 'access_requested' + GROUP BY user_id, dataset ) AS request + ON access.user_id = request.user_id AND + access.dataset = request.dataset + WHERE (access.user_id, access.dataset) IN ( + -- get user_id for all users that have pending access requests + SELECT requested.user_id, requested.dataset + FROM users.user_access_log_summary AS requested + LEFT JOIN users.user_access_log_summary AS granted + ON requested.user_id = granted.user_id AND + requested.dataset = granted.dataset AND + granted.action = 'access_granted' + LEFT JOIN users.user_access_log_summary AS revoked + ON requested.user_id = revoked.user_id AND + requested.dataset = revoked.dataset AND + revoked.action = 'access_revoked' + WHERE requested.action = 'access_requested' AND + (granted.user_id IS NULL OR requested.ts > granted.ts) AND + (revoked.user_id IS NULL OR requested.ts > revoked.ts) + GROUP BY requested.user_id, requested.dataset, requested.action + ); diff --git a/test/data/clean_dummy_data.sql b/test/data/clean_dummy_data.sql index 1e9e3dba5..6355b8491 100644 --- a/test/data/clean_dummy_data.sql +++ b/test/data/clean_dummy_data.sql @@ -1,21 +1,23 @@ -- Delete test data -DELETE FROM user_access_log WHERE user_pk > 1000000 OR dataset_pk > 1000000; -DELETE FROM dataset_access WHERE user_pk > 1000000 OR dataset_pk > 1000000; -DELETE FROM user WHERE user_pk > 1000000; -DELETE FROM dataset_file WHERE dataset_file_pk > 1000000; -DELETE FROM dataset_version WHERE dataset_pk > 1000000; -DELETE FROM sample_set WHERE sample_set_pk > 1000000; -DELETE FROM collection WHERE collection_pk > 1000000; -DELETE FROM dataset WHERE dataset_pk > 1000000; -DELETE FROM study WHERE study_pk > 1000000; +DELETE FROM users.user_access_log WHERE id > 1000000 OR dataset > 1000000; +DELETE FROM users.dataset_access WHERE id > 1000000 OR dataset > 1000000; +DELETE FROM users.users WHERE id > 1000000; +DELETE FROM data.dataset_files WHERE id > 1000000; +DELETE FROM data.dataset_versions WHERE id > 1000000; +DELETE FROM data.sample_sets WHERE id > 1000000; +DELETE FROM data.datasets WHERE id > 1000000; +DELETE FROM data.reference_sets WHERE id > 1000000; +DELETE FROM data.dbsnp_versions WHERE id > 1000000; +DELETE FROM data.collections WHERE id > 1000000; +DELETE FROM data.studies WHERE id > 1000000; -- Reset auto increment counters -ALTER TABLE user_access_log AUTO_INCREMENT = 1; -ALTER TABLE dataset_access AUTO_INCREMENT = 1; -ALTER TABLE user AUTO_INCREMENT = 1; -ALTER TABLE dataset_file AUTO_INCREMENT = 1; -ALTER TABLE dataset_version AUTO_INCREMENT = 1; -ALTER TABLE collection AUTO_INCREMENT = 1; -ALTER TABLE sample_set AUTO_INCREMENT = 1; -ALTER TABLE dataset AUTO_INCREMENT = 1; -ALTER TABLE study AUTO_INCREMENT = 1; +ALTER SEQUENCE data.dataset_files_id_seq RESTART WITH 1; +ALTER SEQUENCE data.dataset_versions_id_seq RESTART WITH 1; +ALTER SEQUENCE data.collections_id_seq RESTART WITH 1; +ALTER SEQUENCE data.sample_sets_id_seq RESTART WITH 1; +ALTER SEQUENCE data.datasets_id_seq RESTART WITH 1; +ALTER SEQUENCE data.studies_id_seq RESTART WITH 1; +ALTER SEQUENCE users.users_id_seq RESTART WITH 1; +ALTER SEQUENCE users.user_access_log_id_seq RESTART WITH 1; +ALTER SEQUENCE users.dataset_access_id_seq RESTART WITH 1; diff --git a/test/data/load_dummy_data.sql b/test/data/load_dummy_data.sql index d8238201b..1277d8796 100644 --- a/test/data/load_dummy_data.sql +++ b/test/data/load_dummy_data.sql @@ -1,35 +1,49 @@ -INSERT INTO study (study_pk, pi_name, pi_email, contact_name, contact_email, title, description, publication_date, ref_doi) +-- dbSNP tables. + +INSERT INTO data.dbsnp_versions (id, version_id) + VALUES (1000001, 'dummy 1'), + (1000002, 'dummy 2'); + +-- Reference Set tables + +INSERT INTO data.reference_sets (id, dbsnp_version, reference_build, reference_name, ensembl_version, gencode_version, dbnsfp_version, omim_version) + VALUES (1000001, 1000002, 'GRCh1p2', 'Dummyman', 'homo_sapiens_core_0_3', '11', 'b142', 'ominfo'), + (1000002, 1000001, 'GRCh2p1', 'Mummydam', 'homo_sapiens_core_1_2', '23', 'b131', 'omimeme'); + +-- Study and Dataset fields + +INSERT INTO data.studies (id, pi_name, pi_email, contact_name, contact_email, title, study_description, publication_date, ref_doi) VALUES (1000001, 'PI_STUDY1', 'pi1@example.com', 'Contact Study 1', 'contact1@example.com', 'Study 1', 'Study 1 description', '2017-01-01', 'study1DOI'), (1000002, 'PI_STUDY2', 'pi2@example.com', 'Contact Study 2', 'contact2@example.com', 'Study 2', 'Study 2 description', '2017-02-01', 'study2DOI'); -INSERT INTO collection (collection_pk, name, ethnicity) VALUES +INSERT INTO data.collections (id, study_name, ethnicity) VALUES (1000001, 'Collection1', 'CollEth1'), (1000002, 'Collection2', 'CollEth2'), (1000003, 'Collection3', 'CollEth3'); -INSERT INTO dataset (dataset_pk, study_pk, short_name, full_name, browser_uri, beacon_uri, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size, mongodb_collection) - VALUES (1000001, 1000001, 'Dataset 1', 'Dataset 1 Long name', 'http://example.com/browser1', 'http://example.com/beacon1', 1.0, 'SeqType1', 'SeqTech1', 'SeqCenter1', 1001, 'na'), - (1000002, 1000002, 'Dataset 2', 'Dataset 2 Long name', 'http://example.com/browser2', 'http://example.com/beacon2', 2.0, 'SeqType2', 'SeqTech2', 'SeqCenter2', 1002, 'na'); +INSERT INTO data.datasets (id, study, reference_set, short_name, full_name, browser_uri, beacon_uri, beacon_description, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size) + VALUES (1000001, 1000001, 1000001, 'Dataset 1', 'Dataset 1 Long name', 'http://example.com/browser1', 'http://example.com/beacon1', 'Dummy Dataset 1', 1.0, 'SeqType1', 'SeqTech1', 'SeqCenter1', 1001), + (1000002, 1000002, 1000002, 'Dataset 2', 'Dataset 2 Long name', 'http://example.com/browser2', 'http://example.com/beacon2', 'Dummy Dataset 2', 2.0, 'SeqType2', 'SeqTech2', 'SeqCenter2', 1002); -INSERT INTO sample_set (sample_set_pk, dataset_pk, collection_pk, sample_size, phenotype) +INSERT INTO data.sample_sets (id, dataset, "collection", sample_size, phenotype) VALUES (1000001, 1000001, 1000001, 10, 'SamplePheno1'), (1000002, 1000001, 1000002, 15, 'SamplePheno2 Coll1'), (1000003, 1000002, 1000003, 20, 'SamplePheno2 Coll2'); -INSERT INTO dataset_version (dataset_version_pk, dataset_pk, version, description, terms, var_call_ref, available_from, ref_doi, data_contact_name, data_contact_link) - VALUES (1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', "Gunnar Green", "gunnar.green@example.com"), - (1000002, 1000002, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL), - (1000003, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', "Strummer project", "https://example.com/strummer"), - (1000004, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', "Drummer project", "https://example.com/drummer"); +INSERT INTO data.dataset_versions (id, dataset, dataset_version, dataset_description, terms, var_call_ref, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels) + VALUES (1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', 'Gunnar Green', 'gunnar.green@example.com', 10, ARRAY[1,5,10]), + (1000002, 1000002, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL, 100, ARRAY[1,5,10]), + (1000003, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', 'Strummer project', 'https://example.com/strummer', 1000, ARRAY[1,5,10]), + (1000004, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', 'Drummer project', 'https://example.com/drummer', 10000, ARRAY[1,5,10]); -INSERT INTO dataset_file(dataset_file_pk, dataset_version_pk, name, uri, bytes) +INSERT INTO data.dataset_files(id, dataset_version, basename, uri, file_size) VALUES (1000001, 1000001, 'File11-1', '/release/file111.txt', 100), (1000002, 1000001, 'File11-2', '/release/file112.txt', 100000), (1000003, 1000002, 'File21-1', '/release/file211.txt', 1000000000), (1000004, 1000003, 'File22-1', '/release/file221.txt', 973826482736), (1000005, 1000004, 'File23-1', '/release/file231.txt', 239847293874293874); -INSERT INTO user(user_pk, name, email, affiliation, country, identity, identity_type) VALUES +INSERT INTO users.users(id, username, email, affiliation, country, identity, identity_type) VALUES (1000100, 'Not req yet', 'email0', 'i', '', 'email0', 'elixir'), (1000101, 'Requested access', 'email1', 'w1', '', 'email1', 'google'), (1000102, 'Approved access', 'email2', 'c1', '', 'email2', 'elixir'), @@ -47,15 +61,15 @@ INSERT INTO user(user_pk, name, email, affiliation, country, identity, identity_ (1000114, 'Admin2', 'admin2', 'Rootspace', '', 'admin2', 'elixir'), (1000115, 'Admin12', 'admin12', 'Rootspace', '', 'admin12', 'google'); -INSERT INTO dataset_access(user_pk, dataset_pk) VALUES +INSERT INTO users.dataset_access(user_id, dataset) VALUES (1000100, 1000001), (1000101, 1000001), (1000102, 1000001), (1000103, 1000001), (1000104, 1000001), (1000105, 1000001), (1000106, 1000001), (1000107, 1000001), (1000108, 1000001), (1000108, 1000002), (1000109, 1000001), (1000109, 1000002), (1000110, 1000001), (1000110, 1000002), (1000111, 1000001), (1000111, 1000002), (1000112, 1000001), (1000112, 1000002); -INSERT INTO dataset_access(user_pk, dataset_pk, is_admin) VALUES - (1000113, 1000001, 1), (1000114, 1000002, 1), (1000115, 1000001, 1), (1000115, 1000002, 1); +INSERT INTO users.dataset_access(user_id, dataset, is_admin) VALUES + (1000113, 1000001, TRUE), (1000114, 1000002, TRUE), (1000115, 1000001, TRUE), (1000115, 1000002, TRUE); -INSERT INTO user_access_log(user_pk, dataset_pk, action, ts) VALUES +INSERT INTO users.user_access_log(user_id, dataset, "action", ts) VALUES (1000101, 1000001, 'access_requested', '2017-01-01'), (1000102, 1000001, 'access_requested', '2017-01-02'), (1000103, 1000001, 'access_requested', '2017-01-03'), @@ -104,13 +118,20 @@ INSERT INTO user_access_log(user_pk, dataset_pk, action, ts) VALUES (1000115, 1000002, 'access_requested', '2017-02-15'), (1000115, 1000002, 'access_granted', '2017-02-16'); -SELECT "Waiting", user.name, user.affiliation as visibility, user.user_pk, - dataset_access_pending.dataset_pk, - dataset_access_pending.dataset_access_pk -FROM dataset_access_pending JOIN user ON (dataset_access_pending.user_pk = user.user_pk) -WHERE dataset_pk > 1000000; +SELECT 'Waiting', users.users.username, users.users.affiliation AS visibility, + users.users.id, users.dataset_access_pending.dataset, + users.dataset_access_pending.id + FROM users.dataset_access_pending + JOIN users.users + ON (users.dataset_access_pending.user_id = users.users.id) + WHERE dataset > 1000000; + +SELECT 'Current', users.users.username, users.users.affiliation AS visibility, + users.users.id, users.dataset_access_current.dataset, + users.dataset_access_current.id + FROM users.dataset_access_current + JOIN users.users + ON (users.dataset_access_current.user_id = users.users.id) + WHERE dataset > 1000000; -SELECT "Current", user.name, user.affiliation as visibility, user.user_pk, dataset_access_current.dataset_pk, - dataset_access_current.dataset_access_pk -FROM dataset_access_current JOIN user ON (dataset_access_current.user_pk = user.user_pk) -WHERE dataset_pk > 1000000; +-- Variant and coverage data fields From 035406b388dbb2e689c1efa42c5cce410bc869fe Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Wed, 13 Feb 2019 19:00:26 +0100 Subject: [PATCH 052/360] feat: Rewrite travis tests to use postgres Replaces mysql tests with postgres tests. No new tests are added, the old tests are just rewriten for postgres. --- .gitignore | 3 +++ .travis.yml | 5 +++++ backend/test.py | 2 +- settings_sample.json | 16 +++++++++++----- test/travis_before_install.sh | 12 ++++-------- test/travis_script.sh | 27 ++++++++++++++------------- 6 files changed, 38 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index bb3e30b2a..1e3537bf3 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,6 @@ scripts/importer/downloaded_files postgres-data # local personal things personal +# travis test remnants +master-schema.sql +settings.json.tmp diff --git a/.travis.yml b/.travis.yml index e9d8926e4..9323e2edd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,3 +10,8 @@ install: - pip install coverage coveralls script: - test/travis_script.sh +addons: + postgresql: "10" + apt: + packages: + - postgresql-client-10 diff --git a/backend/test.py b/backend/test.py index 4834e81e8..6eb43006a 100644 --- a/backend/test.py +++ b/backend/test.py @@ -101,7 +101,7 @@ def tearDown(self): try: u = db.User.select(db.User).where(db.User.email==self.USER).get() try: - u.access.get().delete_instance() + u.dataset_access.get().delete_instance() except peewee.PeeweeException: pass try: diff --git a/settings_sample.json b/settings_sample.json index a42391034..7eef537e3 100644 --- a/settings_sample.json +++ b/settings_sample.json @@ -5,11 +5,17 @@ "googleSecret" : "a secret from google", "redirectUri" : "https://google oauth redirect uri", - "postgresHost": "postgres host", - "postgresPort": 5432, - "postgresUser": "postgres", - "postgresPass": "", - "postgresName": "", + "mysqlHost" : "127.0.0.1", + "mysqlPasswd" : "password", + "mysqlSchema" : "swefreq", + "mysqlUser" : "swefreq", + "mysqlPort" : 3306, + + "postgresHost" : "postgres host", + "postgresPort" : 5432, + "postgresUser" : "postgres", + "postgresPass" : "", + "postgresName" : "", "mongoHost" : "mongodb host", "mongoPassword" : "password", diff --git a/test/travis_before_install.sh b/test/travis_before_install.sh index 1d3c7cadf..844721ff1 100755 --- a/test/travis_before_install.sh +++ b/test/travis_before_install.sh @@ -1,12 +1,8 @@ #!/bin/sh -x -docker pull mysql:5.7 -docker pull ubuntu:16.04 +PSQL_VERSION="10" +PSQL_PORT="5433" -VOLUME='mysql-data-volume' -MYSQL_PORT=3366 +docker pull postgres:"${PSQL_VERSION}" -scripts/download_and_create_docker_db_volume.sh - -docker run -v "$VOLUME:/var/lib/mysql" \ - --rm --name mysql -d -p "$MYSQL_PORT:3306" mysql:5.7 +docker run --rm -d -p $PSQL_PORT:5432 postgres:"${PSQL_VERSION}" diff --git a/test/travis_script.sh b/test/travis_script.sh index b94dd8cc7..891818d00 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -2,9 +2,8 @@ ## SETUP SETTINGS cp settings_sample.json settings.json -sed -i 's/password//' settings.json -sed -i 's/"mysqlSchema" : "swefreq"/"mysqlSchema" : "swefreq_test"/' settings.json -sed -i 's/"mysqlPort" : 3306/"mysqlPort" : 3366/' settings.json +sed -i.tmp 's/"postgresHost" : "postgres host"/"postgresHost" : "127.0.0.1"/' settings.json +sed -i.tmp 's/"postgresPort" : 5432/"postgresPort" : 5433/' settings.json echo 'SETTINGS' cat settings.json @@ -13,21 +12,23 @@ echo '/SETTINGS' echo '>>> Test 1. The SQL Patch' LATEST_RELEASE=$(git tag | grep '^v' | sort -V | tail -n 1) -git show "$LATEST_RELEASE:sql/swefreq.sql" >master-schema.sql +git show ${LATEST_RELEASE}:sql/*_schema.sql > master-schema.sql + +psql -U postgres -h 127.0.0.1 -p 5433 -f master-schema.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/patch-master-db.sql -mysql -u swefreq -h 127.0.0.1 -P 3366 swefreq_test >> Test 2. Load the swefreq schema' -mysql -u swefreq -h 127.0.0.1 -P 3366 swefreq_test >> Test 2. Load the swefreq schema" +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/data_schema.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/user_schema.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f test/data/load_dummy_data.sql -echo '>>> Test 3. Check that the backend starts' +echo ">>> Test 3. Check that the backend starts" (cd backend && ../test/01_daemon_starts.sh) From d0d69614a96ba46b2358e539709c89d7d1dc7e21 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Wed, 13 Feb 2019 11:52:35 +0100 Subject: [PATCH 053/360] Add sampleCount in import script --- scripts/importer/data_importer/raw_data_importer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 54f5b222e..9c5f8b24a 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -274,6 +274,7 @@ def _insert_variants(self): sys.exit(1) base = {} + samples = 0 for i, item in enumerate(line.strip().split("\t")): if i == 0: base['dataset_version'] = self.dataset_version @@ -282,6 +283,9 @@ def _insert_variants(self): elif i == 7 or not self.settings.beacon_only: # only parse column 7 (maybe also for non-beacon-import?) info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) + elif i > 8: + # TODO is it always column 8, or does it vary? + samples += 1 if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): @@ -314,6 +318,11 @@ def _insert_variants(self): data['allele_num'] = int(info[an]) data['allele_freq'] = None + if 'NS' in info: + data['sample_count'] = int(info['NS']) + else: + data['sample_count'] = samples + data['allele_count'] = int(info[ac].split(',')[i]) if 'AF' in info and data['allele_num'] > 0: data['allele_freq'] = data['allele_count']/float(info[an]) From 9332827833160c48f6fa2e475e1277fb683d7545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 13:05:05 +0100 Subject: [PATCH 054/360] added sample_count column to model --- backend/db.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/db.py b/backend/db.py index 6d13bb4dd..94cf4244c 100644 --- a/backend/db.py +++ b/backend/db.py @@ -276,6 +276,7 @@ class Meta: site_quality = FloatField() orig_alt_alleles = ArrayField(CharField) hom_count = IntegerField() + sample_count = IntegerField() allele_freq = FloatField() filter_string = CharField() variant_id = CharField() From 7c04ddbc3c010d98e32223ccc39bb8d874f53ffc Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 14 Feb 2019 11:29:30 +0100 Subject: [PATCH 055/360] Remove sample_count model again Undoes 6ee3c09e542edda2fa173de0d179f2d6e35704df --- backend/db.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/db.py b/backend/db.py index 94cf4244c..6d13bb4dd 100644 --- a/backend/db.py +++ b/backend/db.py @@ -276,7 +276,6 @@ class Meta: site_quality = FloatField() orig_alt_alleles = ArrayField(CharField) hom_count = IntegerField() - sample_count = IntegerField() allele_freq = FloatField() filter_string = CharField() variant_id = CharField() From e0ef58cae2918a654d14b9d205b7bac95b56c87b Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 14 Feb 2019 11:08:10 +0100 Subject: [PATCH 056/360] Only count samples in header, not on each data row --- scripts/importer/data_importer/raw_data_importer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 9c5f8b24a..f8f312f55 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -266,6 +266,8 @@ def _insert_variants(self): dp_mids = map(float, line.split('Mids: ')[-1].strip('">').split('|')) if line.startswith('##INFO=').split('|')) + if line.startswith('#CHROM'): + samples = len(line.split('\t')[9:]) continue if not self.settings.beacon_only: @@ -283,10 +285,6 @@ def _insert_variants(self): elif i == 7 or not self.settings.beacon_only: # only parse column 7 (maybe also for non-beacon-import?) info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) - elif i > 8: - # TODO is it always column 8, or does it vary? - samples += 1 - if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): continue From 64ede9823708b6634d318adb7e4930665e6c2991 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 14 Feb 2019 11:09:48 +0100 Subject: [PATCH 057/360] Save sampleCount in sample_sets, add parameter to import script Information from `NS` or header in vcf gets saved to data.sample_sets.sample_size (No changes to db schemas needed anymore!) --- .../data_importer/raw_data_importer.py | 25 +++++++++++++++---- scripts/importer/importer.py | 2 ++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index f8f312f55..127608c87 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -60,6 +60,7 @@ class RawDataImporter(DataImporter): def __init__(self, settings): super().__init__(settings) self.dataset_version = None + self.sampleset = None self.counter = {'coverage':None, 'variants':None} @@ -84,6 +85,16 @@ def _select_dataset_version(self): print("Please select a number in {}".format([d.id for d in datasets])) ds = [d for d in datasets if d.id == selection][0] logging.info("Using dataset {}".format(ds.short_name)) + self.dataset = ds + + if self.settings.set_sampleset_size: + try: + samplesets = db.SampleSet.select() + self.sampleset = [s for s in samplesets if s.dataset.id == self.dataset.id][0] + except IndexError: + logging.warning("No sample set found for data set {}".format(self.dataset.id)) + logging.warning("Sample size will not be set") + self.settings.set_sampleset_size = False versions = [] for version in db.DatasetVersion.select().where(db.DatasetVersion.dataset == ds): @@ -168,6 +179,7 @@ def _select_dataset_version(self): logging.info("Using dataset version {}".format(self.dataset_version)) self.dataset_version = [v for v in versions if v.id == selection][0] + def _insert_coverage(self): """ Header columns are chromosome, position, mean coverage, median coverage, @@ -247,6 +259,7 @@ def _insert_variants(self): last_progress = 0.0 counter = 0 + samples = 0 vep_field_names = None dp_mids = None gq_mids = None @@ -276,7 +289,6 @@ def _insert_variants(self): sys.exit(1) base = {} - samples = 0 for i, item in enumerate(line.strip().split("\t")): if i == 0: base['dataset_version'] = self.dataset_version @@ -316,10 +328,9 @@ def _insert_variants(self): data['allele_num'] = int(info[an]) data['allele_freq'] = None - if 'NS' in info: - data['sample_count'] = int(info['NS']) - else: - data['sample_count'] = samples + if 'NS' in info and not samples: + # save this unless we already now the sample size + samples = int(info['NS']) data['allele_count'] = int(info[ac].split(',')[i]) if 'AF' in info and data['allele_num'] > 0: @@ -380,6 +391,10 @@ def _insert_variants(self): self._tick() last_progress += 0.01 + if self.settings.set_sampleset_size and samples: + self.sampleset.sample_size = samples + self.sampleset.save() + if batch and not self.settings.dry_run: if not self.settings.dry_run: if not self.settings.beacon_only: diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 6d0204c88..ce0fd3fe5 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -57,6 +57,8 @@ "next coming dataset version.")) parser.add_argument("--ref_name", default="", help="Reference name to use when creating a reference set.") + parser.add_argument("--set_sampleset_size", action="store_true", + help = "Set/update dataset size to the value given in the VCF") # omim file, since we can't download or version them parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), From ecb8eee1a3f213423f9c44ee5f2d1e2f845d3031 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 19 Feb 2019 10:59:29 +0100 Subject: [PATCH 058/360] Fix typo --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 127608c87..2bcb61179 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -329,7 +329,7 @@ def _insert_variants(self): data['allele_num'] = int(info[an]) data['allele_freq'] = None if 'NS' in info and not samples: - # save this unless we already now the sample size + # save this unless we already know the sample size samples = int(info['NS']) data['allele_count'] = int(info[ac].split(',')[i]) From 70afe4650689f3988afe0908d458d5871575a74e Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 19 Feb 2019 18:00:38 +0100 Subject: [PATCH 059/360] Add parameters for assembly_id and beacon_description assembly_id sets data.dataset_versions.var_call_ref (remember to update beacon schema) beacon_description sets data.datasets.beacon_description sampleset_size sets a cusom sampleset size set_vcf_sampleset_size sets the sampleset size according to the vcf --- backend/db.py | 1 + .../data_importer/raw_data_importer.py | 22 ++++++++++++++++--- scripts/importer/importer.py | 12 ++++++++-- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/backend/db.py b/backend/db.py index 6d13bb4dd..1bb505b00 100644 --- a/backend/db.py +++ b/backend/db.py @@ -194,6 +194,7 @@ class Meta: full_name = CharField() browser_uri = CharField(null=True) beacon_uri = CharField(null=True) + description = TextField(db_column="beacon_description", null=True) avg_seq_depth = FloatField(null=True) seq_type = CharField(null=True) seq_tech = CharField(null=True) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 2bcb61179..6f936e5bf 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -64,6 +64,20 @@ def __init__(self, settings): self.counter = {'coverage':None, 'variants':None} + + def _set_dataset_info(self): + """ Save dataset information given as parameters """ + if self.settings.beacon_description: + self.dataset.description = self.settings.beacon_description + self.dataset.save() + if self.settings.assembly_id: + self.dataset_version.var_call_ref = self.settings.assembly_id + self.dataset_version.save() + if self.settings.sampleset_size: + self.sampleset.sample_size = self.settings.sampleset_size + self.sampleset.save() + + def _select_dataset_version(self): datasets = [] @@ -87,14 +101,15 @@ def _select_dataset_version(self): logging.info("Using dataset {}".format(ds.short_name)) self.dataset = ds - if self.settings.set_sampleset_size: + if self.settings.set_vcf_sampleset_size or self.settings.sampleset_size: try: samplesets = db.SampleSet.select() self.sampleset = [s for s in samplesets if s.dataset.id == self.dataset.id][0] except IndexError: logging.warning("No sample set found for data set {}".format(self.dataset.id)) logging.warning("Sample size will not be set") - self.settings.set_sampleset_size = False + self.settings.set_vcf_sampleset_size = False + self.settings.sampleset_size = 0 versions = [] for version in db.DatasetVersion.select().where(db.DatasetVersion.dataset == ds): @@ -391,7 +406,7 @@ def _insert_variants(self): self._tick() last_progress += 0.01 - if self.settings.set_sampleset_size and samples: + if self.settings.set_vcf_sampleset_size and samples: self.sampleset.sample_size = samples self.sampleset.save() @@ -453,6 +468,7 @@ def prepare_data(self): self._select_dataset_version() def start_import(self): + self._set_dataset_info() self._insert_variants() if not self.settings.beacon_only: self._insert_coverage() diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index ce0fd3fe5..ad07ca8c5 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -57,8 +57,16 @@ "next coming dataset version.")) parser.add_argument("--ref_name", default="", help="Reference name to use when creating a reference set.") - parser.add_argument("--set_sampleset_size", action="store_true", - help = "Set/update dataset size to the value given in the VCF") + + parser.add_argument("--set_vcf_sampleset_size", action="store_true", + help = "Set/update sampleset size to the value given in the VCF") + + parser.add_argument("--sampleset_size", type=int, default=0, + help = "Sampleset size") + parser.add_argument("--beacon_description", default="", + help="Beacon description of the dataset.") + parser.add_argument("--assembly_id", default="", + help="Reference assembly id (GRC notation, e.g. GRCh37)") # omim file, since we can't download or version them parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), From 9c4dd27758b983eccff590c2827ecedfb3ea4fb0 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Wed, 20 Feb 2019 08:48:21 +0100 Subject: [PATCH 060/360] Fix small mistakes --- scripts/importer/data_importer/raw_data_importer.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 6f936e5bf..9ddbced69 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -60,11 +60,11 @@ class RawDataImporter(DataImporter): def __init__(self, settings): super().__init__(settings) self.dataset_version = None + self.dataset = None self.sampleset = None self.counter = {'coverage':None, 'variants':None} - def _set_dataset_info(self): """ Save dataset information given as parameters """ if self.settings.beacon_description: @@ -77,7 +77,6 @@ def _set_dataset_info(self): self.sampleset.sample_size = self.settings.sampleset_size self.sampleset.save() - def _select_dataset_version(self): datasets = [] @@ -194,7 +193,6 @@ def _select_dataset_version(self): logging.info("Using dataset version {}".format(self.dataset_version)) self.dataset_version = [v for v in versions if v.id == selection][0] - def _insert_coverage(self): """ Header columns are chromosome, position, mean coverage, median coverage, @@ -406,9 +404,6 @@ def _insert_variants(self): self._tick() last_progress += 0.01 - if self.settings.set_vcf_sampleset_size and samples: - self.sampleset.sample_size = samples - self.sampleset.save() if batch and not self.settings.dry_run: if not self.settings.dry_run: @@ -432,6 +427,10 @@ def _insert_variants(self): self.add_variant_genes(indexes, genes, refgenes) self.add_variant_transcripts(indexes, transcripts, reftranscripts) + if self.settings.set_vcf_sampleset_size and samples: + self.sampleset.sample_size = samples + self.sampleset.save() + self.dataset_version.num_variants = counter self.dataset_version.save() if self.counter['variants'] != None: From 10447229f38b499fa2c38ab8075e03e0f4e30723 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Wed, 20 Feb 2019 08:48:31 +0100 Subject: [PATCH 061/360] Improve help messages --- scripts/importer/importer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index ad07ca8c5..265b454c5 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -59,14 +59,14 @@ help="Reference name to use when creating a reference set.") parser.add_argument("--set_vcf_sampleset_size", action="store_true", - help = "Set/update sampleset size to the value given in the VCF") - + help = "Set/update sampleset size to the value given in the VCF." + "This is either the NS value, or the number of stated samples") parser.add_argument("--sampleset_size", type=int, default=0, - help = "Sampleset size") + help = "Set sampleset size for this dataset") parser.add_argument("--beacon_description", default="", - help="Beacon description of the dataset.") + help="Set beacon description of the dataset.") parser.add_argument("--assembly_id", default="", - help="Reference assembly id (GRC notation, e.g. GRCh37)") + help="Set reference assembly id (GRC notation, e.g. GRCh37)") # omim file, since we can't download or version them parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), From ef81b39c787c0f159241ddd943884d974b16b2fc Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Wed, 20 Feb 2019 09:06:09 +0100 Subject: [PATCH 062/360] Add parameter for datasetsize --- scripts/importer/data_importer/raw_data_importer.py | 3 +++ scripts/importer/importer.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 9ddbced69..b3e393a98 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -76,6 +76,9 @@ def _set_dataset_info(self): if self.settings.sampleset_size: self.sampleset.sample_size = self.settings.sampleset_size self.sampleset.save() + if self.settings.dataset_size: + self.dataset.dataset_size = self.settings.dataset_size + self.dataset.save() def _select_dataset_version(self): datasets = [] diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 265b454c5..bee0dafc4 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -58,6 +58,8 @@ parser.add_argument("--ref_name", default="", help="Reference name to use when creating a reference set.") + parser.add_argument("--dataset_size", type=int, default=0, + help = "Set dataset size for this dataset") parser.add_argument("--set_vcf_sampleset_size", action="store_true", help = "Set/update sampleset size to the value given in the VCF." "This is either the NS value, or the number of stated samples") From 9742f12add86e4f8752cdfc22e312ef59ce50005 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 21 Feb 2019 13:34:08 +0100 Subject: [PATCH 063/360] Fix parsing of multiple rsids --- .../data_importer/raw_data_importer.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index b3e393a98..8183899d6 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - import re import sys import json @@ -322,20 +321,23 @@ def _insert_variants(self): annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] alt_alleles = base['alt'].split(",") + if base['rsid'].startswith('rs'): + rsids = [int(rsid.strip('rs')) for rsid in base['rsid'].split(';')] + else: + rsids = [None] + for i, alt in enumerate(alt_alleles): if not self.settings.beacon_only: vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] data = dict(base) data['alt'] = alt - try: - data['rsid'] = int(data['rsid'].strip('rs')) if data['rsid'].startswith('rs') else None - except: - if self.settings.beacon_only: - # ignore lines having double ids: "rs539868657;rs561027534" - continue - else: - raise + + if len(rsids) <= i: + data['rsid'] = rsids[-1] # same id as the last alternate + else: + data['rsid'] = rsids[i] + an, ac = 'AN_Adj', 'AC_Adj' if self.settings.beacon_only and 'AN_Adj' not in info: an = 'AN' From 02b8536ae1b4bd0c4dfed62d6cca00f4922f073d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 25 Feb 2019 22:52:36 +0100 Subject: [PATCH 064/360] use beacon fix for A[NC] for all datasets without A[NC]_adj. Also check that gene identifiers contain ENSG --- scripts/importer/data_importer/raw_data_importer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 8183899d6..72b43d18e 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -339,9 +339,9 @@ def _insert_variants(self): data['rsid'] = rsids[i] an, ac = 'AN_Adj', 'AC_Adj' - if self.settings.beacon_only and 'AN_Adj' not in info: + if 'AN_Adj' not in info: an = 'AN' - if self.settings.beacon_only and 'AC_Adj' not in info: + if 'AC_Adj' not in info: ac = 'AC' data['allele_num'] = int(info[an]) @@ -357,7 +357,7 @@ def _insert_variants(self): if not self.settings.beacon_only: data['vep_annotations'] = vep_annotations - genes.append(list({annotation['Gene'] for annotation in vep_annotations})) + genes.append(list({annotation['Gene'] for annotation in vep_annotations if annotation['Gene'][:4] == 'ENSG'})) transcripts.append(list({annotation['Feature'] for annotation in vep_annotations})) data['orig_alt_alleles'] = [ From 34f51d93dbfc67ffe4a4668f624cdcb90723bc8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 26 Feb 2019 08:57:49 +0100 Subject: [PATCH 065/360] moved ENS[GT] checks to parser function, also added set to remove duplicates --- scripts/importer/data_importer/raw_data_importer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 72b43d18e..9325771f0 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -357,8 +357,8 @@ def _insert_variants(self): if not self.settings.beacon_only: data['vep_annotations'] = vep_annotations - genes.append(list({annotation['Gene'] for annotation in vep_annotations if annotation['Gene'][:4] == 'ENSG'})) - transcripts.append(list({annotation['Feature'] for annotation in vep_annotations})) + genes.append(list(set({annotation['Gene'] for annotation in vep_annotations if annotation['Gene'][:4] == 'ENSG'}))) + transcripts.append(list(set({annotation['Feature'] for annotation in vep_annotations if annotation['Feature'][:4] == 'ENST'}))) data['orig_alt_alleles'] = [ '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles @@ -489,7 +489,7 @@ def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list, batch = [] for i in range(len(variant_indexes)): connected_transcripts = [{'variant':variant_indexes[i], 'transcript':reftranscripts[transcript]} - for transcript in transcripts_to_add[i] if transcript and transcript[:4] == 'ENST'] + for transcript in transcripts_to_add[i]] batch += connected_transcripts if not self.settings.dry_run: db.VariantTranscripts.insert_many(batch).execute() From eb0424245175a5e3337ae7a357a3dbcc4bde9739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 07:35:59 +0100 Subject: [PATCH 066/360] add tracking of return value from the actual tests --- test/travis_script.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/travis_script.sh b/test/travis_script.sh index 891818d00..a76157e54 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -32,7 +32,7 @@ echo ">>> Test 3. Check that the backend starts" (cd backend && ../test/01_daemon_starts.sh) -echo '>>> Test 4. the backend API' +echo ">>> Test 4. the backend API" coverage run backend/route.py --port=4000 --develop 1>http_log.txt 2>&1 & BACKEND_PID=$! @@ -44,17 +44,19 @@ exit_handler () { set +e # We want to make sure the background process has stopped, otherwise the # travis test will stall for a long time. - kill -9 "$BACKEND_PID" + kill -9 ${BACKEND_PID} echo 'THE HTTP LOG WAS:' cat http_log.txt - exit $rv + exit ${rv} } trap exit_handler EXIT +RETURN_VALUE=0 python backend/test.py -v +RETURN_VALUE=$((${RETURN_VALUE} + ${?})) # Quit the app curl localhost:4000/developer/quit @@ -64,3 +66,5 @@ if [ -f .coverage ]; then coveralls coverage report fi + +exit ${RETURN_VALUE} From 38dd02adc5e87dd142960f7d8f5ec8b7fc62d7d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 07:46:24 +0100 Subject: [PATCH 067/360] apparently {} are not needed on arithmetic variables --- test/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/travis_script.sh b/test/travis_script.sh index a76157e54..2992afa26 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -56,7 +56,7 @@ trap exit_handler EXIT RETURN_VALUE=0 python backend/test.py -v -RETURN_VALUE=$((${RETURN_VALUE} + ${?})) +RETURN_VALUE=$(($RETURN_VALUE + ${?})) # Quit the app curl localhost:4000/developer/quit From d082b7292ea2cf3e38f8221808c08d81df6d5335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 09:17:04 +0100 Subject: [PATCH 068/360] forgot to remove the other {} --- test/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/travis_script.sh b/test/travis_script.sh index 2992afa26..1e808a64a 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -56,7 +56,7 @@ trap exit_handler EXIT RETURN_VALUE=0 python backend/test.py -v -RETURN_VALUE=$(($RETURN_VALUE + ${?})) +RETURN_VALUE=$(($RETURN_VALUE + $?)) # Quit the app curl localhost:4000/developer/quit From a351e94c7b3e790cf2322c98121da69eccba8142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 6 Mar 2019 12:25:21 +0100 Subject: [PATCH 069/360] added space to make the code easier to read --- scripts/importer/data_importer/raw_data_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 9325771f0..8ba61d272 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -281,8 +281,8 @@ def _insert_variants(self): with db.database.atomic(): for filename in self.settings.variant_file: # gene/transctipt dbids; need to add support for version - refgenes = {gene.gene_id:gene.id for gene in db.Gene.select(db.Gene.id, db.Gene.gene_id)} - reftranscripts = {tran.transcript_id:tran.id for tran in db.Transcript.select(db.Transcript.id, db.Transcript.transcript_id)} + refgenes = {gene.gene_id: gene.id for gene in db.Gene.select(db.Gene.id, db.Gene.gene_id)} + reftranscripts = {tran.transcript_id: tran.id for tran in db.Transcript.select(db.Transcript.id, db.Transcript.transcript_id)} for line in self._open(filename): line = bytes(line).decode('utf8').strip() From 28fd9f3d1eab58eb2971748b58d1924949c622be Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 5 Mar 2019 13:26:47 +0100 Subject: [PATCH 070/360] Log info about dryrun --- scripts/importer/data_importer/raw_data_importer.py | 8 +++++--- scripts/importer/importer.py | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 8ba61d272..113743422 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -258,13 +258,14 @@ def _insert_coverage(self): db.Coverage.insert_many(batch) if self.counter['coverage'] != None: self._tick(True) - logging.info("Inserted {} coverage records in {}".format(counter, self._time_since(start))) + if not self.settings.dry_run: + logging.info("Inserted {} coverage records in {}".format(counter, self._time_since(start))) def _insert_variants(self): """ Insert variants from a VCF file """ - logging.info("Inserting variants") + logging.info("Inserting variants%s", " (dry run)" if self.settings.dry_run else "") header = [("chrom", str), ("pos", int), ("rsid", str), ("ref", str), ("alt", str), ("site_quality", float), ("filter_string", str)] start = time.time() @@ -440,7 +441,8 @@ def _insert_variants(self): self.dataset_version.save() if self.counter['variants'] != None: self._tick(True) - logging.info("Inserted {} variant records in {}".format(counter, self._time_since(start))) + if not self.settings.dry_run: + logging.info("Inserted {} variant records in {}".format(counter, self._time_since(start))) def count_entries(self): start = time.time() diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index bee0dafc4..048b0ad08 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -141,6 +141,7 @@ importer.start_import() if args.add_raw_data: + logging.info("Adding raw data %s", "(dry run)" if args.dry_run else '') importer = RawDataImporter(args) importer.prepare_data() if not args.disable_progress: From 58084098cc6ffaf9117632612d92da453e28e6e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 11:08:27 +0100 Subject: [PATCH 071/360] new indexes added; sorted alphabetically --- sql/data_schema.sql | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 3b192034c..9f6c03848 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -233,9 +233,12 @@ CREATE OR REPLACE VIEW data.dataset_version_current AS -- Indexes -- -CREATE INDEX variant_pos ON data.variants (pos); -CREATE INDEX dbsnp_chrom_pos ON data.dbsnp (chrom, pos); CREATE INDEX coverage_pos_chrom ON data.coverage (chrom, pos); -CREATE INDEX variants_rsid ON data.variants (rsid); -CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); +CREATE INDEX dbsnp_chrom_pos ON data.dbsnp (chrom, pos); +CREATE INDEX dbsnp_rsid ON data.dbsnp (rsid); +CREATE INDEX features_transcript ON data.features (transcript) +CREATE INDEX features_gene ON data.features (gene) +CREATE INDEX genes_gene_id ON data.genes (gene_id) CREATE INDEX transcripts_transcript_id ON data.transcripts (transcript_id); +CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); +CREATE INDEX variants_rsid ON data.variants (rsid); From 4cf890c1f3fee0f03e79a0901b0a3018a340e6ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 28 Feb 2019 07:27:47 +0100 Subject: [PATCH 072/360] forgotten ; --- sql/data_schema.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 9f6c03848..3ccfe867c 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -236,9 +236,9 @@ CREATE OR REPLACE VIEW data.dataset_version_current AS CREATE INDEX coverage_pos_chrom ON data.coverage (chrom, pos); CREATE INDEX dbsnp_chrom_pos ON data.dbsnp (chrom, pos); CREATE INDEX dbsnp_rsid ON data.dbsnp (rsid); -CREATE INDEX features_transcript ON data.features (transcript) -CREATE INDEX features_gene ON data.features (gene) -CREATE INDEX genes_gene_id ON data.genes (gene_id) +CREATE INDEX features_transcript ON data.features (transcript); +CREATE INDEX features_gene ON data.features (gene); +CREATE INDEX genes_gene_id ON data.genes (gene_id); CREATE INDEX transcripts_transcript_id ON data.transcripts (transcript_id); CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); CREATE INDEX variants_rsid ON data.variants (rsid); From 668cb90249f5ace14218c1bec560067d4ad6add7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 28 Feb 2019 08:24:36 +0100 Subject: [PATCH 073/360] g is before t --- sql/data_schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 3ccfe867c..1d88322ea 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -236,8 +236,8 @@ CREATE OR REPLACE VIEW data.dataset_version_current AS CREATE INDEX coverage_pos_chrom ON data.coverage (chrom, pos); CREATE INDEX dbsnp_chrom_pos ON data.dbsnp (chrom, pos); CREATE INDEX dbsnp_rsid ON data.dbsnp (rsid); -CREATE INDEX features_transcript ON data.features (transcript); CREATE INDEX features_gene ON data.features (gene); +CREATE INDEX features_transcript ON data.features (transcript); CREATE INDEX genes_gene_id ON data.genes (gene_id); CREATE INDEX transcripts_transcript_id ON data.transcripts (transcript_id); CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); From 12d21eb4a71cc06a1546d0820ef8d7eb3eb22528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 28 Feb 2019 14:03:32 +0100 Subject: [PATCH 074/360] indexes for genes and transcripts associated with variants --- sql/data_schema.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 1d88322ea..54866e0c6 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -241,4 +241,7 @@ CREATE INDEX features_transcript ON data.features (transcript); CREATE INDEX genes_gene_id ON data.genes (gene_id); CREATE INDEX transcripts_transcript_id ON data.transcripts (transcript_id); CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); +CREATE INDEX variants_chrom_pos_ref_alt ON data.variants (chrom, pos, ref, alt); CREATE INDEX variants_rsid ON data.variants (rsid); +CREATE INDEX variant_genes_gene ON data.variant_genes (gene); +CREATE INDEX variant_transcripts_transcript ON data.variant_transcripts (transcript); From f7ef8e6f37b317329e46a217aeb3c735570a85b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 28 Feb 2019 14:19:25 +0100 Subject: [PATCH 075/360] and indexes for variant->genes/transcripts --- sql/data_schema.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 54866e0c6..2f5971f19 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -241,7 +241,8 @@ CREATE INDEX features_transcript ON data.features (transcript); CREATE INDEX genes_gene_id ON data.genes (gene_id); CREATE INDEX transcripts_transcript_id ON data.transcripts (transcript_id); CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); -CREATE INDEX variants_chrom_pos_ref_alt ON data.variants (chrom, pos, ref, alt); CREATE INDEX variants_rsid ON data.variants (rsid); CREATE INDEX variant_genes_gene ON data.variant_genes (gene); +CREATE INDEX variant_genes_variant ON data.variant_genes (variant); CREATE INDEX variant_transcripts_transcript ON data.variant_transcripts (transcript); +CREATE INDEX variant_transcripts_variant ON data.variant_transcripts (variant); From 0671e15b858f6e60b876ef6d92699a46bf22dd60 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 5 Mar 2019 13:29:57 +0100 Subject: [PATCH 076/360] Add indices for beacon --- sql/data_schema.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 2f5971f19..0493fae64 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -246,3 +246,5 @@ CREATE INDEX variant_genes_gene ON data.variant_genes (gene); CREATE INDEX variant_genes_variant ON data.variant_genes (variant); CREATE INDEX variant_transcripts_transcript ON data.variant_transcripts (transcript); CREATE INDEX variant_transcripts_variant ON data.variant_transcripts (variant); +CREATE INDEX beacon_data_chrpos ON beacon.beacon_data_table (chromosome,start); +CREATE INDEX beacon_data_chrref ON beacon.beacon_data_table (chromosome,reference); From 9a22dd1d528d0d0fa56da80ec360ff16508de77c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 1 Mar 2019 11:56:59 +0100 Subject: [PATCH 077/360] make sure the correct reference set is used for genes/transcripts --- scripts/importer/data_importer/raw_data_importer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 113743422..6a8e457c2 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -281,9 +281,14 @@ def _insert_variants(self): gq_mids = None with db.database.atomic(): for filename in self.settings.variant_file: - # gene/transctipt dbids; need to add support for version - refgenes = {gene.gene_id: gene.id for gene in db.Gene.select(db.Gene.id, db.Gene.gene_id)} - reftranscripts = {tran.transcript_id: tran.id for tran in db.Transcript.select(db.Transcript.id, db.Transcript.transcript_id)} + ref_dbid = db.get_reference_dbid_dataset(self.settings.dataset) + refgenes = {gene.gene_id:gene.id for gene in (db.Gene.select(db.Gene.id, db.Gene.gene_id) + .where(db.Gene.reference_set == ref_dbid))} + reftranscripts = {tran.transcript_id:tran.id for tran in (db.Transcript + .select(db.Transcript.id, + db.Transcript.transcript_id) + .join(db.Gene) + .where(db.Gene.reference_set == ref_dbid))} for line in self._open(filename): line = bytes(line).decode('utf8').strip() From 7d1dbc7325a8841d64753288d13806552902f528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Mar 2019 08:17:10 +0100 Subject: [PATCH 078/360] refgenes/transcripts -> ref_* --- .../data_importer/raw_data_importer.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 6a8e457c2..6719bd6bc 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -282,13 +282,13 @@ def _insert_variants(self): with db.database.atomic(): for filename in self.settings.variant_file: ref_dbid = db.get_reference_dbid_dataset(self.settings.dataset) - refgenes = {gene.gene_id:gene.id for gene in (db.Gene.select(db.Gene.id, db.Gene.gene_id) - .where(db.Gene.reference_set == ref_dbid))} - reftranscripts = {tran.transcript_id:tran.id for tran in (db.Transcript - .select(db.Transcript.id, - db.Transcript.transcript_id) - .join(db.Gene) - .where(db.Gene.reference_set == ref_dbid))} + ref_genes = {gene.gene_id: gene.id for gene in (db.Gene.select(db.Gene.id, db.Gene.gene_id) + .where(db.Gene.reference_set == ref_dbid))} + ref_transcripts = {tran.transcript_id: tran.id for tran in (db.Transcript + .select(db.Transcript.id, + db.Transcript.transcript_id) + .join(db.Gene) + .where(db.Gene.reference_set == ref_dbid))} for line in self._open(filename): line = bytes(line).decode('utf8').strip() @@ -400,8 +400,8 @@ def _insert_variants(self): indexes = [] for entry in batch: indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) - self.add_variant_genes(indexes, genes, refgenes) - self.add_variant_transcripts(indexes, transcripts, reftranscripts) + self.add_variant_genes(indexes, genes, ref_genes) + self.add_variant_transcripts(indexes, transcripts, ref_transcripts) genes = [] transcripts = [] @@ -435,8 +435,8 @@ def _insert_variants(self): indexes = [] for entry in batch: indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) - self.add_variant_genes(indexes, genes, refgenes) - self.add_variant_transcripts(indexes, transcripts, reftranscripts) + self.add_variant_genes(indexes, genes, ref_genes) + self.add_variant_transcripts(indexes, transcripts, ref_transcripts) if self.settings.set_vcf_sampleset_size and samples: self.sampleset.sample_size = samples @@ -484,18 +484,18 @@ def start_import(self): if not self.settings.beacon_only: self._insert_coverage() - def add_variant_genes(self, variant_indexes:list, genes_to_add:list, refgenes:dict): + def add_variant_genes(self, variant_indexes:list, genes_to_add:list, ref_genes:dict): batch = [] for i in range(len(variant_indexes)): - connected_genes = [{'variant':variant_indexes[i], 'gene':refgenes[gene]} for gene in genes_to_add[i] if gene] + connected_genes = [{'variant':variant_indexes[i], 'gene':ref_genes[gene]} for gene in genes_to_add[i] if gene] batch += connected_genes if not self.settings.dry_run: db.VariantGenes.insert_many(batch).execute() - def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list, reftranscripts:dict): + def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list, ref_transcripts:dict): batch = [] for i in range(len(variant_indexes)): - connected_transcripts = [{'variant':variant_indexes[i], 'transcript':reftranscripts[transcript]} + connected_transcripts = [{'variant':variant_indexes[i], 'transcript':ref_transcripts[transcript]} for transcript in transcripts_to_add[i]] batch += connected_transcripts if not self.settings.dry_run: From e85c2dd336c43d17cd989050649282df584bf6e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Mar 2019 09:23:53 +0100 Subject: [PATCH 079/360] added missing function --- backend/db.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/backend/db.py b/backend/db.py index 1bb505b00..f71646cd8 100644 --- a/backend/db.py +++ b/backend/db.py @@ -516,3 +516,19 @@ def build_dict_from_row(row): continue d[field] = value return d + + +def get_reference_set_for_dataset(dataset): + """ + Get the reference set associated with a dataset + Args: + dataset (str): short name of the dataset + Returns: + ReferenceSet: the associated reference set; returns None if not available + """ + try: + return (Dataset.select() + .where(Dataset.short_name==dataset) + .get()).reference_set + except Dataset.DoesNotExist: + return None From f91f49ea00f4d95ff75924c4092fe75fd21c8202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Mar 2019 09:25:02 +0100 Subject: [PATCH 080/360] dbid renamed to refset, function name updated --- scripts/importer/data_importer/raw_data_importer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 6719bd6bc..c275a42bf 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -281,14 +281,14 @@ def _insert_variants(self): gq_mids = None with db.database.atomic(): for filename in self.settings.variant_file: - ref_dbid = db.get_reference_dbid_dataset(self.settings.dataset) + ref_set = get_reference_set_for_dataset(self.settings.dataset) ref_genes = {gene.gene_id: gene.id for gene in (db.Gene.select(db.Gene.id, db.Gene.gene_id) - .where(db.Gene.reference_set == ref_dbid))} + .where(db.Gene.reference_set == ref_set))} ref_transcripts = {tran.transcript_id: tran.id for tran in (db.Transcript .select(db.Transcript.id, db.Transcript.transcript_id) .join(db.Gene) - .where(db.Gene.reference_set == ref_dbid))} + .where(db.Gene.reference_set == ref_set))} for line in self._open(filename): line = bytes(line).decode('utf8').strip() From 2fa3408b9083724082bc4d2ec71019d41d0f7b7d Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Fri, 8 Mar 2019 14:49:53 +0100 Subject: [PATCH 081/360] feat: Add option to specify settings file --- Dockerfile-backend | 10 +++++----- Dockerfile-database | 5 +++++ Dockerfile-frontend-rebuilder | 7 ++----- backend/route.py | 8 ++++++++ backend/settings.py | 15 +++++++++++++-- docker-compose.yml | 33 +++++++++++++-------------------- 6 files changed, 46 insertions(+), 32 deletions(-) create mode 100644 Dockerfile-database diff --git a/Dockerfile-backend b/Dockerfile-backend index 37bc117e7..3aeb82466 100644 --- a/Dockerfile-backend +++ b/Dockerfile-backend @@ -4,11 +4,11 @@ RUN apt-get update && apt-get install -y \ python3 \ python3-pip -COPY backend/requirements.txt /code/requirements.txt -COPY settings.json /code/settings.json -RUN sed -i 's/"postgresHost"\s*:.*,/"postgresHost" : "db",/' /code/settings.json +ADD . /code +COPY settings_sample.json /settings.json +RUN sed -i 's/"postgresHost"\s*:.*,/"postgresHost" : "db",/' /settings.json WORKDIR /code -RUN pip3 install -r requirements.txt +RUN pip3 install -r backend/requirements.txt -CMD ["python3", "backend/route.py", "--develop"] +CMD ["python3", "backend/route.py", "--develop", "--settings_file", "/settings.json"] diff --git a/Dockerfile-database b/Dockerfile-database new file mode 100644 index 000000000..e189d277b --- /dev/null +++ b/Dockerfile-database @@ -0,0 +1,5 @@ +FROM postgres:10 + +ENV POSTGRES_DB swefreq +COPY sql/data_schema.sql /docker-entrypoint-initdb.d/01_data_schema.sql +COPY sql/user_schema.sql /docker-entrypoint-initdb.d/02_user_schema.sql diff --git a/Dockerfile-frontend-rebuilder b/Dockerfile-frontend-rebuilder index ab2b2a3bc..32cb88a52 100644 --- a/Dockerfile-frontend-rebuilder +++ b/Dockerfile-frontend-rebuilder @@ -14,13 +14,10 @@ RUN apt-get install -y \ nodejs \ npm -COPY Makefile /code/Makefile -COPY backend/requirements.txt /code/requirements.txt -COPY scripts/compile_template.py /code/scripts/compile_template.py -COPY scripts/watch_frontend.py /code/scripts/watch_frontend.py +ADD . /code WORKDIR /code -RUN pip3 install -r requirements.txt && \ +RUN pip3 install -r backend/requirements.txt && \ pip3 install inotify CMD ["python", "scripts/watch_frontend.py"] diff --git a/backend/route.py b/backend/route.py index 1786ae722..ce6e2cf31 100644 --- a/backend/route.py +++ b/backend/route.py @@ -4,6 +4,7 @@ import tornado.web from tornado.options import define, options +import sys import application import handlers import auth @@ -109,6 +110,13 @@ def __init__(self, settings): tornado.web.Application.__init__(self, self.declared_handlers, **settings) if __name__ == '__main__': + # Make sure that the extra option to `settings` isn't upsetting tornado + if '--settings_file' in sys.argv: + flag_index = sys.argv.index('--settings_file') + # first remove flag, then argument + del sys.argv[flag_index] + del sys.argv[flag_index] + tornado.log.enable_pretty_logging() tornado.options.parse_command_line() diff --git a/backend/settings.py b/backend/settings.py index dc6d9f94f..249cd91a9 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -1,13 +1,24 @@ import os +import sys import json +import logging + +ARG = "--settings_file" +SETTINGS_FILE = "settings.json" +if ARG in sys.argv: + try: + SETTINGS_FILE = sys.argv[sys.argv.index(ARG)+1] + except IndexError: + logging.error("No argument for --settings_file") + sys.exit(1) try: current_dir = os.path.dirname(os.path.realpath(__file__)) - json_settings_fh = open(os.path.join(current_dir, "settings.json")) + json_settings_fh = open(os.path.join(current_dir, SETTINGS_FILE)) except FileNotFoundError: parent_dir = os.path.join(current_dir, os.pardir) - json_settings_fh = open(os.path.join(parent_dir, "settings.json")) + json_settings_fh = open(os.path.join(parent_dir, SETTINGS_FILE)) json_settings = json.load(json_settings_fh) json_settings_fh.close() diff --git a/docker-compose.yml b/docker-compose.yml index e2f6ce32f..498b3c2b1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,14 @@ version: "3.2" services: + db: + build: + context: ./ + dockerfile: Dockerfile-database + restart: on-failure + volumes: + - ./postgres-data:/var/lib/postgresql/data + ports: + - 5432:5432 web: build: context: ./ @@ -11,29 +20,13 @@ services: restart: on-failure volumes: - type: bind - source: ./backend - target: /code/backend - - type: bind - source: ./frontend - target: /code/frontend - - type: bind - source: ./static - target: /code/static - db: - restart: always - image: postgres:10 - volumes: - - ./postgres-data:/var/lib/postgresql/data - ports: - - 5432:5432 + source: . + target: /code rebuilder: build: context: ./ dockerfile: Dockerfile-frontend-rebuilder volumes: - type: bind - source: ./frontend - target: /code/frontend - - type: bind - source: ./static - target: /code/static + source: . + target: /code From b08d70cf416d17eeadb17d03dbbf40aec2eddafd Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Mon, 11 Mar 2019 16:26:42 +0100 Subject: [PATCH 082/360] Start on beacon documentation --- docs/beacon_doc.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 docs/beacon_doc.md diff --git a/docs/beacon_doc.md b/docs/beacon_doc.md new file mode 100644 index 000000000..a4002b364 --- /dev/null +++ b/docs/beacon_doc.md @@ -0,0 +1,40 @@ +## Variant types + +- Supported and existing in our current Beacon: `INS, DEL, SNP` + +- Available from the datasets (although not used by the beacon): + - in the SweGen dataset: `insertion, SNV, deletion, indel, sequence_alteration` + - in the ACpop dataset: `insertion, SNV, deletion` + +- What we don't have: `MNP, DUP, BND, INV, CNV, DUP:TANDEM, DEL:ME, INS:ME` + +- What we don't know: + - How do `indel` and `sequence_alteration` relate to the Beacon variant types? Do they require multiple tags (eg `indel => del + ins`)? + `rs200774489` is annotated as `sequence_alteration`, but is an insertion according to https://www.ncbi.nlm.nih.gov/snp/rs200774489 + - How to communicate which types the datasets support? + + +## Alternate bases + +- Allowed: `(ATCGN)+`. Extra value in our data: `*`. + + Should probably be expressed with `N` in the response (`*` is not allowed). + - Example, as given in the Beacon response now: + ```"referenceName": "22", + "start": "16060517", + "referenceBases": "T", + "alternateBases": "*", + "variantType": "SNP" + ``` + In the VCF, this variation is not annotated with `rsid` or `variantType` in the CSQ field. + + From the VCF spec: + > The ‘*’ allele is reserved to indicate that the allele is missing due to an overlapping deletion. + + Should rather be shown as (?): + ```"referenceName": "22", + "start": "16060517", + "referenceBases": "T", + "alternateBases": "N", + "variantType": "DEL" + ``` From 933e9233ba6f86f9cf45d207c116aed57143b043 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Mon, 11 Mar 2019 16:30:39 +0100 Subject: [PATCH 083/360] Undo add beacon documentation Is moved to another branch --- docs/beacon_doc.md | 40 ---------------------------------------- 1 file changed, 40 deletions(-) delete mode 100644 docs/beacon_doc.md diff --git a/docs/beacon_doc.md b/docs/beacon_doc.md deleted file mode 100644 index a4002b364..000000000 --- a/docs/beacon_doc.md +++ /dev/null @@ -1,40 +0,0 @@ -## Variant types - -- Supported and existing in our current Beacon: `INS, DEL, SNP` - -- Available from the datasets (although not used by the beacon): - - in the SweGen dataset: `insertion, SNV, deletion, indel, sequence_alteration` - - in the ACpop dataset: `insertion, SNV, deletion` - -- What we don't have: `MNP, DUP, BND, INV, CNV, DUP:TANDEM, DEL:ME, INS:ME` - -- What we don't know: - - How do `indel` and `sequence_alteration` relate to the Beacon variant types? Do they require multiple tags (eg `indel => del + ins`)? - `rs200774489` is annotated as `sequence_alteration`, but is an insertion according to https://www.ncbi.nlm.nih.gov/snp/rs200774489 - - How to communicate which types the datasets support? - - -## Alternate bases - -- Allowed: `(ATCGN)+`. Extra value in our data: `*`. - - Should probably be expressed with `N` in the response (`*` is not allowed). - - Example, as given in the Beacon response now: - ```"referenceName": "22", - "start": "16060517", - "referenceBases": "T", - "alternateBases": "*", - "variantType": "SNP" - ``` - In the VCF, this variation is not annotated with `rsid` or `variantType` in the CSQ field. - - From the VCF spec: - > The ‘*’ allele is reserved to indicate that the allele is missing due to an overlapping deletion. - - Should rather be shown as (?): - ```"referenceName": "22", - "start": "16060517", - "referenceBases": "T", - "alternateBases": "N", - "variantType": "DEL" - ``` From d1393e66c6a3dad6769e4aeb83123511b1b45fee Mon Sep 17 00:00:00 2001 From: Johan Viklund Date: Thu, 14 Mar 2019 15:24:13 +0100 Subject: [PATCH 084/360] Adding beacon database schema --- sql/beacon_schema.sql | 113 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 sql/beacon_schema.sql diff --git a/sql/beacon_schema.sql b/sql/beacon_schema.sql new file mode 100644 index 000000000..e625905f4 --- /dev/null +++ b/sql/beacon_schema.sql @@ -0,0 +1,113 @@ +------------------------------------------------------------------------------- +-- Modified beacon schema. +-- +-- This schema is heavily based on the finnish beacon schema at: +-- https://github.com/CSCfi/beacon-python/blob/master/data/init.sql +-- +-- but has been modified to use views instead of tables for the +-- beacon_dataset_table and beacon_data_table. +-- This was done so that the main swefreq-schema could be kept, without having +-- to store duplicate information. + +CREATE SCHEMA IF NOT EXISTS beacon; + +-------------------------------------------------------------------------------- +-- Beacon dataset and data tables +-- +-- These tables need to be represented as semi-complex views, as to avoid +-- storing redundant data. + +CREATE OR REPLACE VIEW beacon.beacon_dataset_table AS -- original type + SELECT v.id AS index, -- serial + d.short_name AS name, -- varchar(128) + concat_ws(':', r.reference_build, + d.short_name, + v.dataset_version) AS datasetId, -- varchar(128) + d.beacon_description AS "description", -- varchar(512) + substr(r.reference_build, 0, 7) AS assemblyId, -- varchar(16) + v.available_from AS createDateTime, -- timestamp + v.available_from AS updateDateTime, -- timstamp + v.dataset_version AS "version", -- varchar(8) + s.sample_size AS sampleCount, -- integer + d.browser_uri AS externalUrl, -- varchar(256) + CASE WHEN v.available_from < now() THEN 'PUBLIC' + WHEN v.available_from > now() THEN 'CONTROLLED' + END AS accessType -- varchar(10) + FROM data.datasets AS d + JOIN data.dataset_version_current AS v + ON v.dataset = d.id + JOIN data.reference_sets AS r + ON d.reference_set = r.id + JOIN data.sample_sets AS s + ON s.dataset = d.id +; + + +-- This seems to return correct values except that it seems to +-- _always_ return 1 for callCount, even when there's no data. +-- TODO: make sure that callCount can handle zero values. +CREATE OR REPLACE VIEW beacon.beacon_dataset_counts_table AS + SELECT concat_ws(':', r.reference_build, + d.short_name, + v.dataset_version) AS datasetId, -- varchar(128) + COUNT(DISTINCT(dv.ref, dv.pos)) AS callCount, -- integer + COUNT(dv) AS variantCount -- integer + FROM data.datasets as d + JOIN data.reference_sets AS r + ON d.reference_set = r.id + JOIN data.dataset_version_current AS v + ON v.dataset = d.id + LEFT JOIN data.variants AS dv + ON dv.dataset_version = v.id + GROUP BY r.reference_build, d.short_name, v.dataset_version +; + + +CREATE MATERIALIZED VIEW beacon.beacon_data_table AS + SELECT dv.id AS index, -- serial + concat_ws(':', r.reference_build, + d.short_name, + v.dataset_version) AS datasetId, -- varchar(128) + dv.pos - 1 AS "start", -- integer + substr(dv.chrom, 1, 2) AS chromosome, -- varchar(2) + dv.ref AS reference, -- varchar(8192) + dv.alt AS alternate, -- varchar(8192) + dv.pos - 1 + char_length(dv.ref) AS "end", -- integer + dv.allele_num AS callCount, -- integer + dv.allele_freq AS frequency, -- integer + dv.allele_count AS alleleCount, -- integer + CASE WHEN length(dv.ref) = length(dv.alt) THEN 'SNP' + WHEN length(dv.ref) > length(dv.alt) THEN 'DEL' + WHEN length(dv.ref) < length(dv.alt) THEN 'INS' + END AS variantType -- varchar(16) + FROM data.variants AS dv + JOIN data.dataset_version_current as v + ON dv.dataset_version = v.id + JOIN data.datasets as d + ON v.dataset = d.id + JOIN data.reference_sets AS r + ON d.reference_set = r.id +; + + +-------------------------------------------------------------------------------- +-- Beacon views. +-- +-- These are kept as-is from the reference. + +-- This index is part of the finnish schema, but I deactivated it so that I don't have to materialize the views +-- CREATE UNIQUE INDEX data_conflict ON beacon_data_table (datasetId, chromosome, start, reference, alternate); +-- CREATE UNIQUE INDEX metadata_conflict ON beacon_dataset_table (name, datasetId); +-- This gets really, really slow if not materialized. (TODO why?) + +CREATE MATERIALIZED VIEW beacon.dataset_metadata(name, datasetId, description, assemblyId, + createDateTime, updateDateTime, version, + callCount, variantCount, sampleCount, externalUrl, accessType) +AS SELECT a.name, a.datasetId, a.description, a.assemblyId, a.createDateTime, + a.updateDateTime, a.version, b.callCount, + b.variantCount, + a.sampleCount, a.externalUrl, a.accessType +FROM beacon.beacon_dataset_table a, beacon.beacon_dataset_counts_table b +WHERE a.datasetId=b.datasetId +GROUP BY a.name, a.datasetId, a.description, a.assemblyId, a.createDateTime, +a.updateDateTime, a.version, a.sampleCount, a.externalUrl, a.accessType, b.callCount, b.variantCount; From 05a60e22e065153cc2c0e741634187939e501f7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=28Kusalananda=29=20K=C3=A4h=C3=A4ri?= Date: Thu, 14 Mar 2019 16:53:40 +0100 Subject: [PATCH 085/360] Add missing quotes and make existing quotes consistent --- test/travis_before_install.sh | 4 ++-- test/travis_script.sh | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/travis_before_install.sh b/test/travis_before_install.sh index 844721ff1..e59af8f6b 100755 --- a/test/travis_before_install.sh +++ b/test/travis_before_install.sh @@ -3,6 +3,6 @@ PSQL_VERSION="10" PSQL_PORT="5433" -docker pull postgres:"${PSQL_VERSION}" +docker pull "postgres:$PSQL_VERSION" -docker run --rm -d -p $PSQL_PORT:5432 postgres:"${PSQL_VERSION}" +docker run --rm -d -p "$PSQL_PORT:5432" "postgres:$PSQL_VERSION" diff --git a/test/travis_script.sh b/test/travis_script.sh index 1e808a64a..adb53b7da 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -12,7 +12,7 @@ echo '/SETTINGS' echo '>>> Test 1. The SQL Patch' LATEST_RELEASE=$(git tag | grep '^v' | sort -V | tail -n 1) -git show ${LATEST_RELEASE}:sql/*_schema.sql > master-schema.sql +git show "$LATEST_RELEASE:sql/*_schema.sql" > master-schema.sql psql -U postgres -h 127.0.0.1 -p 5433 -f master-schema.sql psql -U postgres -h 127.0.0.1 -p 5433 -f sql/patch-master-db.sql @@ -23,16 +23,16 @@ DROP SCHEMA data; DROP SCHEMA users; __END__ -echo ">>> Test 2. Load the swefreq schema" +echo '>>> Test 2. Load the swefreq schema' psql -U postgres -h 127.0.0.1 -p 5433 -f sql/data_schema.sql psql -U postgres -h 127.0.0.1 -p 5433 -f sql/user_schema.sql psql -U postgres -h 127.0.0.1 -p 5433 -f test/data/load_dummy_data.sql -echo ">>> Test 3. Check that the backend starts" +echo '>>> Test 3. Check that the backend starts' (cd backend && ../test/01_daemon_starts.sh) -echo ">>> Test 4. the backend API" +echo '>>> Test 4. the backend API' coverage run backend/route.py --port=4000 --develop 1>http_log.txt 2>&1 & BACKEND_PID=$! @@ -44,12 +44,12 @@ exit_handler () { set +e # We want to make sure the background process has stopped, otherwise the # travis test will stall for a long time. - kill -9 ${BACKEND_PID} + kill -9 "$BACKEND_PID" echo 'THE HTTP LOG WAS:' cat http_log.txt - exit ${rv} + exit "$rv" } trap exit_handler EXIT @@ -67,4 +67,4 @@ if [ -f .coverage ]; then coverage report fi -exit ${RETURN_VALUE} +exit "$RETURN_VALUE" From 64439a6b4b04c26dd89a8c57b0b46f9e86f2afec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=28Kusalananda=29=20K=C3=A4h=C3=A4ri?= Date: Fri, 15 Mar 2019 08:30:47 +0100 Subject: [PATCH 086/360] Remove $ on variable in arithmetic context --- test/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/travis_script.sh b/test/travis_script.sh index adb53b7da..5e7df3ccd 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -56,7 +56,7 @@ trap exit_handler EXIT RETURN_VALUE=0 python backend/test.py -v -RETURN_VALUE=$(($RETURN_VALUE + $?)) +RETURN_VALUE=$((RETURN_VALUE + $?)) # Quit the app curl localhost:4000/developer/quit From f8e7be8bb8a5c9a2f82d934b697e0c4f694c2a37 Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Mon, 18 Mar 2019 06:57:33 +0100 Subject: [PATCH 087/360] feat: Remove dbSNP and OMIM data Removes dbSNP and OMIM data, as we felt that they did not contribute enough to the system. --- backend/db.py | 27 +-- .../importer/data_importer/dbsnp_importer.py | 126 ------------ .../importer/data_importer/old_db_importer.py | 12 +- .../data_importer/raw_data_importer.py | 123 +++--------- .../data_importer/reference_set_importer.py | 74 +------ scripts/importer/importer.py | 187 ++++++++---------- settings_sample.json | 2 +- sql/data_schema.sql | 36 +--- 8 files changed, 125 insertions(+), 462 deletions(-) delete mode 100644 scripts/importer/data_importer/dbsnp_importer.py diff --git a/backend/db.py b/backend/db.py index f71646cd8..22068a8b6 100644 --- a/backend/db.py +++ b/backend/db.py @@ -52,28 +52,6 @@ def python_value(self, value): # Reference Tables ## -class DbSNP_version(BaseModel): - """ - dbSNP datasets are very large, and some reference sets can use the same set, - which is why they are given their own header-table. - """ - class Meta: - db_table = 'dbsnp_versions' - schema = 'data' - - version_id = CharField() - - -class DbSNP(BaseModel): - class Meta: - db_table = 'dbsnp' - schema = 'data' - - version = ForeignKeyField(DbSNP_version, related_name="variants") - rsid = BigIntegerField() - chrom = CharField(max_length=10) - pos = IntegerField() - class ReferenceSet(BaseModel): """ @@ -85,12 +63,10 @@ class Meta: db_table = 'reference_sets' schema = 'data' - dbsnp_version = ForeignKeyField(DbSNP_version, db_column="dbsnp_version", related_name="references") name = CharField(db_column="reference_name", null=True) ensembl_version = CharField() gencode_version = CharField() dbnsfp_version = CharField() - omim_version = CharField() class Gene(BaseModel): @@ -189,7 +165,6 @@ class Meta: schema = 'data' study = ForeignKeyField(Study, db_column="study", related_name='datasets') - reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='datasets') short_name = CharField() full_name = CharField() browser_uri = CharField(null=True) @@ -226,6 +201,7 @@ class Meta: schema = 'data' dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='versions') + reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='dataset_versions') version = CharField(db_column="dataset_version") description = TextField(db_column="dataset_description") terms = TextField() @@ -444,6 +420,7 @@ class Meta: schema = 'data' dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='current_version') + reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='current_version') class DatasetAccessCurrent(DatasetAccess): diff --git a/scripts/importer/data_importer/dbsnp_importer.py b/scripts/importer/data_importer/dbsnp_importer.py deleted file mode 100644 index 87a302b7e..000000000 --- a/scripts/importer/data_importer/dbsnp_importer.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python3 - -import os -import time -import logging -import db -from peewee import fn -from .data_importer import DataImporter - -class DbSNPImporter( DataImporter ): - """ - Downloads and imports a dbSNP-dataset into the swefreq database. - """ - - URL=("ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_{a.dbsnp_version}" - "_{a.dbsnp_reference}/database/data/organism_data/{a.dbsnp_version}_" - "SNPChrPosOnRef_{a.dbsnp_number}.bcp.gz") - - def __init__(self, settings): - super().__init__(settings) - self.settings.dbsnp_number = 105 - if settings.dbsnp_reference.startswith("GRCh38"): - self.settings.dbsnp_number = 108 - self.total = None - - def count_entries(self): - start = time.time() - self.total = 0 - logging.info("Counting valid lines in file (for progress bar)") - for line in self.in_file: - line = line.decode('ascii').strip() - if line.startswith("#"): - continue - - if line.count("\t") < 2: - continue - - if self.chrom and not line.split("\t")[1] == str(self.chrom): - continue - - self.total += 1 - self.in_file.rewind() - logging.info("Found {:,} lines in {}".format(self.total, self._time_since(start))) - - def prepare_data(self): - url = DbSNPImporter.URL.format(a=self.settings) - filename = url.split("/")[-1] - try: - os.stat( os.path.join(self.download_dir, filename) ) - self.in_file = self._open( os.path.join(self.download_dir, filename) ) - except FileNotFoundError: - self.in_file = self._download_and_open(url) - - def prepare_version(self): - version_id = "{a.dbsnp_version}_{a.dbsnp_reference}".format(a=self.settings) - if self.settings.dry_run: - try: - dbsnp_version = db.DbSNP_version.get(version_id = version_id) - logging.info("dbsnp_version '{}' already in database".format(version_id)) - except db.DbSNP_version.DoesNotExist: - dbsnp_version = db.DbSNP_version.select(fn.Max(db.DbSNP_version.version_id)).get() - logging.info("Created dbsnp_version '{}'".format(version_id)) - else: - dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) - if created: - logging.info("Created dbsnp_version '{}'".format(version_id)) - else: - logging.info("dbsnp_version '{}' already in database".format(version_id)) - return dbsnp_version - - def start_import(self): - """ - dbsnp-file header is 'rsid', 'chrom', 'position' - """ - dbsnp_version = self.prepare_version() - - start = time.time() - last_progress = 0.0 - logging.info("Inserting dbSNP data into database.") - - counter = 0 - batch = [] - with db.database.atomic(): - for line in self.in_file: - line = line.decode('ascii').strip() - if line.startswith("#"): - continue - - try: - rsid, chrom, position = line.split("\t")[:3] - position = int(position) - position += 1 # 0-indexed - except ValueError: - # we don't care for incomplete entries - continue - - if self.chrom and not chrom == self.chrom: - continue - - batch += [{ 'version':dbsnp_version, - 'rsid':rsid, - 'chrom':chrom, - 'pos':position}] - counter += 1 - - if self.total != None: - progress = counter / self.total - while progress > last_progress + 0.01: - if not last_progress: - logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) - if self.total != None: - self._print_progress_bar() - self._tick() - last_progress += 0.01 - - if len(batch) >= self.batch_size: - if not self.settings.dry_run: - db.DbSNP.insert_many(batch).execute() - batch = [] - db.database.commit() - if batch: - if not self.settings.dry_run: - db.DbSNP.insert_many(batch).execute() - if self.total != None: - self._tick(True) - logging.info("Inserted {:,} valid lines in {}".format(counter, self._time_since(start))) diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py index 96eb69797..971604619 100644 --- a/scripts/importer/data_importer/old_db_importer.py +++ b/scripts/importer/data_importer/old_db_importer.py @@ -24,12 +24,13 @@ def __init__(self, settings): def _select_reference_set(self, short_name): if len(self.reference_sets) == 1: - logging.info(("Only one reference set is available, {}," - "will default to this set for all datasets".format(self.reference_sets[0]))) + logging.info(("Only one reference set is available, %s," + "will default to this set for all datasets"), + self.reference_sets[0].name) return self.reference_sets[0].id elif short_name.lower() in [r.name.lower() for r in self.reference_sets]: refset = [r for r in self.reference_sets if r.name.lower() == short_name.lower()][0] - logging.info("Auto-selecting reference set '{}' based on name.".format(refset.name)) + logging.info("Auto-selecting reference set '%s' based on name.", refset.name) return refset else: print("Select a reference set to use with this dataset") @@ -106,12 +107,10 @@ def _move_datasets(self): new_id = db.Dataset.get(study = study_ref_id, short_name = dataset.short_name).id except db.Dataset.DoesNotExist: - target_reference_id = self._select_reference_set(dataset.short_name) if self.settings.dry_run: continue new_id = (db.Dataset .insert(study = study_ref_id, - reference_set = target_reference_id, short_name = dataset.short_name, full_name = dataset.full_name, browser_uri = dataset.browser_uri, @@ -150,6 +149,7 @@ def _move_dataset_versions(self): for dataset_version in old_db.DatasetVersion.select(): try: dataset_ref_id = self.id_map['dataset'][dataset_version.dataset.dataset] + dataset = db.Dataset.get(id = dataset_ref_id) except KeyError: if not self.settings.dry_run: raise @@ -165,10 +165,12 @@ def _move_dataset_versions(self): data_contact_name = dataset_version.data_contact_name, data_contact_link = dataset_version.data_contact_link).id except db.DatasetVersion.DoesNotExist: + target_reference_id = self._select_reference_set(dataset.short_name) if self.settings.dry_run: continue new_id = (db.DatasetVersion .insert(dataset = dataset_ref_id, + reference_set = target_reference_id, version = dataset_version.version, description = dataset_version.description, terms = dataset_version.terms, diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index c275a42bf..71b1bb302 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -82,26 +82,33 @@ def _set_dataset_info(self): def _select_dataset_version(self): datasets = [] + # Make sure that the dataset exists try: ds = db.Dataset.get(short_name=self.settings.dataset) except db.Dataset.DoesNotExist: - print("Select a Dataset to use with this data") + logging.error("Unknown dataset '%s'", self.settings.dataset) + logging.info("Available datasets are:") for dataset in db.Dataset.select(): - print(" {} : {}".format(dataset.id, dataset.short_name)) - datasets += [dataset] - - selection = -1 - while selection not in [d.id for d in datasets]: - if selection != -1: - print("Please select a number in {}".format([d.id for d in datasets])) - try: - selection = int(input("Please select a dataset: ")) - except ValueError: - print("Please select a number in {}".format([d.id for d in datasets])) - ds = [d for d in datasets if d.id == selection][0] + logging.info(" * %s", dataset.short_name) + sys.exit(1) logging.info("Using dataset {}".format(ds.short_name)) self.dataset = ds + versions = [v for v in db.DatasetVersion.select().where(db.DatasetVersion.dataset == ds)] + + # Make sure that the dataset version exists + if not versions: + raise db.DatasetVersion.DoesNotExist("No versions exist for this dataset") + + if self.settings.version not in [v.version for v in versions]: + logging.error("Unknown version '%s' for dataset '%s'.", self.settings.version, self.dataset.short_name) + logging.info("Available versions are:") + for version in versions: + logging.info(" * %s", version.version) + sys.exit(1) + self.dataset_version = [v for v in versions if v.version == self.settings.version][0] + + # Set the sample set's sample size if self.settings.set_vcf_sampleset_size or self.settings.sampleset_size: try: samplesets = db.SampleSet.select() @@ -112,89 +119,6 @@ def _select_dataset_version(self): self.settings.set_vcf_sampleset_size = False self.settings.sampleset_size = 0 - versions = [] - for version in db.DatasetVersion.select().where(db.DatasetVersion.dataset == ds): - versions += [version] - - if not versions: - raise db.DatasetVersion.DoesNotExist("At least one dataset version required for dataset") - - if len(versions) == 1: - logging.info("Only one available dataset version, using: {}".format(versions[0].version)) - self.dataset_version = versions[0] - return - - if self.settings.version: - # name based version picking - if self.settings.version.lower() in [v.version.lower() for v in versions]: - selected = [v for v in versions if v.version.lower() == self.settings.version.lower()][0] - self.dataset_version = selected - logging.info("Using dataset version {}".format(self.dataset_version.version)) - return - - # date-based version picking - # note that this only works if the version string is formatted like: - # yyyymmdd or yyyy-mm-dd - - target = self.settings.version - version_dates = [] - for v in versions: - try: - version_dates += [(datetime.strptime(v.version, "%Y-%m-%d"), v)] - except ValueError: - try: - version_dates += [(datetime.strptime(v.version, "%Y%m%d"), v)] - except ValueError: - pass - if target not in ["latest", "next"]: - try: - target = datetime.strptime(target, "%Y-%m-%d") - except ValueError: - pass - try: - target = datetime.strptime(target, "%Y%m%d") - except ValueError: - pass - for date, version in version_dates: - if target == date: - self.dataset_version = version - logging.info("Using dataset version {}".format(self.dataset_version.version)) - return - else: - today = datetime.today() - if target == "latest": - try: - target, version = max([i for i in version_dates if i[0] < today]) - self.dataset_version = version - logging.info("Using dataset version {}".format(self.dataset_version.version)) - return - except ValueError: - pass - elif target == "next": - try: - target, version = min([i for i in version_dates if i[0] > today]) - self.dataset_version = version - logging.info("Using dataset version {}".format(self.dataset_version.version)) - return - except ValueError: - logging.warning("No future dataset versions found!") - - print("Select a Version of this dataset to use") - for version in versions: - print(" {} : {}".format(version.id, version.version)) - - selection = -1 - while selection not in [v.id for v in versions]: - if selection != -1: - print("Please select a number in {}".format([v.id for v in versions])) - try: - selection = int(input("Please select a version: ")) - except ValueError: - print("Please select a number in {}".format([v.id for v in versions])) - - logging.info("Using dataset version {}".format(self.dataset_version)) - self.dataset_version = [v for v in versions if v.id == selection][0] - def _insert_coverage(self): """ Header columns are chromosome, position, mean coverage, median coverage, @@ -281,7 +205,10 @@ def _insert_variants(self): gq_mids = None with db.database.atomic(): for filename in self.settings.variant_file: - ref_set = get_reference_set_for_dataset(self.settings.dataset) + # Get reference set for the variant + ref_set = self.dataset_version.reference_set + + # Get all genes and transcripts for foreign keys ref_genes = {gene.gene_id: gene.id for gene in (db.Gene.select(db.Gene.id, db.Gene.gene_id) .where(db.Gene.reference_set == ref_set))} ref_transcripts = {tran.transcript_id: tran.id for tran in (db.Transcript @@ -426,7 +353,7 @@ def _insert_variants(self): curr_id = 0 db.Variant.insert_many(batch).execute() - + if not self.settings.beacon_only: last_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id if last_id-curr_id == len(batch): diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 1cb1ea314..5ba879b5a 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -36,7 +36,6 @@ def __init__(self, settings): # file handlers for later self.gencode = None self.dbnsfp = None - self.omim = None self.ensembl = None def _insert_features(self): @@ -112,31 +111,12 @@ def _insert_genes(self): logging.info("Genes inserted in {}".format( self._time_since(start) )) def _insert_reference(self): - version_id = "{a.dbsnp_version}_{a.dbsnp_reference}".format(a=self.settings) - - if self.settings.dry_run: - try: - dbsnp_version = db.DbSNP_version.get(version_id = version_id) - logging.info("Using dbsnp_version '{}'".format(version_id)) - except db.DbSNP_version.DoesNotExist: - dbsnp_version = db.DbSNP_version.select(fn.Max(db.DbSNP_version.version_id)).get() - logging.info("Created dbsnp_version '{}'".format(version_id)) - else: - dbsnp_version, created = db.DbSNP_version.get_or_create(version_id = version_id) - if created: - logging.info("Created dbsnp_version '{}'".format(version_id)) - else: - logging.info("Using dbsnp_version '{}'".format(version_id)) - - omim_filename = self.settings.omim_file.split("/")[-1] logging.info("inserting reference header") self.db_reference = db.ReferenceSet(name = self.settings.ref_name, + reference_build = self.settings.assembly_id, ensembl_version = self.settings.ensembl_version, gencode_version = self.settings.gencode_version, - dbnsfp_version = self.settings.dbnsfp_version, - omim_version = omim_filename, - dbsnp_version = dbsnp_version.id) - + dbnsfp_version = self.settings.dbnsfp_version) if self.settings.dry_run: max_id = db.ReferenceSet.select(fn.Max(db.ReferenceSet.id)).get() @@ -146,7 +126,7 @@ def _insert_reference(self): self.db_reference.id = max_id.id + 1 else: self.db_reference.save() - logging.info("Reference {} created".format(self.db_reference.id)) + logging.info("Reference %s created", self.db_reference.id) def _insert_transcripts(self): logging.info("Inserting transcripts into database") @@ -241,13 +221,6 @@ def _open_gencode(self): except FileNotFoundError: self.gencode = self._download_and_open(url) - def _open_omim(self): - """ - We can't download OMIM files, so we just open the given OMIM file - """ - logging.info("----- Opening OMIM file -----") - self.omim = self._open( self.settings.omim_file ) - def _read_dbnsfp(self): start = time.time() header = None @@ -315,45 +288,6 @@ def _read_ensembl(self): self._tick(True) logging.info("Canonical transcript information from ensembl added in {}.".format( self._time_since(start) )) - def _read_omim(self): - start = time.time() - logging.info("Adding OMIM annotations") - - cache = {} - header = None - for line in self.omim: - raw = bytes(line).decode('utf8').strip().split("\t") - if not header: - header = [r.strip() for r in raw] - if header: - continue - - values = {} - for i, value in enumerate(raw): - values[header[i]] = value - - if 'MIM Gene Description' not in values: - continue - - if 'Ensembl Transcript ID' in cache: - logging.warning(("The Ensembl Transcript ID '{}' was found twice" - " in the OMIM file. this was not planned for.")) - cache[values['Ensembl Transcript ID']] = \ - {'mim_gene_accession':int(values['MIM Gene Accession']), - 'mim_annotation':values['MIM Gene Description'].strip().capitalize(), - } - - counter = 0 - empty = {'mim_gene_accession':None, 'mim_annotation':None} - for i, transcript in enumerate(self.transcripts): - if transcript['transcript_id'] in cache: - self.transcripts[i].update(cache[transcript['transcript_id']]) - counter += 1 - else: - self.transcripts[i].update(empty) - - logging.info("OMIM information added in {}.".format( self._time_since(start) )) - def count_entries(self): logging.info("Counting features in gencode file (for progress bar)") start = time.time() @@ -389,7 +323,6 @@ def count_entries(self): def prepare_data(self): self._open_gencode() self._open_dbnsfp() - self._open_omim() self._open_ensembl() def start_import(self): @@ -449,7 +382,6 @@ def start_import(self): logging.info("Gencode data read into buffers in {}.".format( self._time_since(start) )) self._read_ensembl() self._read_dbnsfp() - self._read_omim() self._insert_reference() self._insert_genes() self._insert_transcripts() diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 048b0ad08..95f691a97 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -11,7 +11,6 @@ - peewee-2.10.2 """ -from data_importer.dbsnp_importer import DbSNPImporter from data_importer.reference_set_importer import ReferenceSetImporter from data_importer.old_db_importer import OldDbImporter from data_importer.raw_data_importer import RawDataImporter @@ -22,128 +21,108 @@ import argparse import logging - parser = argparse.ArgumentParser( description = __doc__ , - formatter_class = argparse.ArgumentDefaultsHelpFormatter ) + PARSER = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--batch_size", type=int, default=5000, - help = ("Where batch insertion is possible, use this number of" - " inserts per batch.")) - parser.add_argument("--limit_chrom", default=None, - help = "Limit chromosome to insert into the database.") - parser.add_argument("--data_dir", default=os.path.join(os.path.dirname(__file__), - "downloaded_files"), - help = "Default directory to download and look for files.") + PARSER.add_argument("--batch_size", type=int, default=5000, + help=("Where batch insertion is possible, use this " + "number of inserts per batch.")) + PARSER.add_argument("--limit_chrom", default=None, + help="Limit chromosome to insert into the database.") + PARSER.add_argument("--data_dir", + default=os.path.join(os.path.dirname(__file__), + "downloaded_files"), + help="Default directory to download and look for files." + ) # Reference versions - parser.add_argument("--gencode_version", default=19, type=int, - help = "Gencode version to download and use.") - parser.add_argument("--ensembl_version", default="homo_sapiens_core_75_37", - help = "Ensembl database to connect to.") - parser.add_argument("--dbnsfp_version", default="2.9.3", - help = "dbNSFP version to download and use.") - parser.add_argument("--dbsnp_version", default="b150", - help = "dbSNP version to download and use.") - parser.add_argument("--dbsnp_reference", default="GRCh37p13", - help = "Which reference the dbSNP should be aligned to.") + PARSER.add_argument("--gencode_version", default=19, type=int, + help="Gencode version to download and use.") + PARSER.add_argument("--ensembl_version", default="homo_sapiens_core_75_37", + help="Ensembl database to connect to.") + PARSER.add_argument("--dbnsfp_version", default="2.9.3", + help="dbNSFP version to download and use.") # Dataset connections and naming - parser.add_argument("--dataset", default="", + PARSER.add_argument("--dataset", default="", help="Which dataset to connect imported data to.") - parser.add_argument("--version", default="latest", - help=("Which dataset version to connect imported data to. " - "This can be a text-string name, a date in on of the " - "formats yyyymmdd or yyyy-mm-dd, or 'latest' for the " - "last published dataset version, or 'next' for the " - "next coming dataset version.")) - parser.add_argument("--ref_name", default="", - help="Reference name to use when creating a reference set.") - - parser.add_argument("--dataset_size", type=int, default=0, - help = "Set dataset size for this dataset") - parser.add_argument("--set_vcf_sampleset_size", action="store_true", - help = "Set/update sampleset size to the value given in the VCF." - "This is either the NS value, or the number of stated samples") - parser.add_argument("--sampleset_size", type=int, default=0, - help = "Set sampleset size for this dataset") - parser.add_argument("--beacon_description", default="", + PARSER.add_argument("--version", default="", + help="Which dataset version to add imported data to.") + PARSER.add_argument("--ref_name", default="", + help=("Reference name to use when creating a reference " + "set.")) + + PARSER.add_argument("--dataset_size", type=int, default=0, + help="Set dataset size for this dataset") + PARSER.add_argument("--set_vcf_sampleset_size", action="store_true", + help=("Set/update sampleset size to the value given in " + "the VCF. This is either the NS value, or the " + "number of stated samples")) + PARSER.add_argument("--sampleset_size", type=int, default=0, + help="Set sampleset size for this dataset") + PARSER.add_argument("--beacon_description", default="", help="Set beacon description of the dataset.") - parser.add_argument("--assembly_id", default="", - help="Set reference assembly id (GRC notation, e.g. GRCh37)") - - # omim file, since we can't download or version them - parser.add_argument("--omim_file", default=os.path.join(os.path.dirname(__file__), - "downloaded_files", - "omim_info.txt.gz"), - help = "OMIM annotation file.") + PARSER.add_argument("--assembly_id", default="", + help=("Set reference assembly id (GRC notation, e.g. " + "GRCh37)")) # Raw data (coverage + variants) files - parser.add_argument("--coverage_file", nargs="*", - help = "Coverage file(s) to import.") - parser.add_argument("--variant_file", nargs="*", - help = "Variant file(s) to import.") + PARSER.add_argument("--coverage_file", nargs="*", + help="Coverage file(s) to import.") + PARSER.add_argument("--variant_file", nargs="*", + help="Variant file(s) to import.") # Actions - parser.add_argument("--add_reference", action="store_true", - help = "Insert new reference set.") - parser.add_argument("--add_raw_data", action="store_true", - help = "Adds a Coverage and Variants to the database.") - parser.add_argument("--add_dbsnp", action="store_true", - help = "Adds a new dbSNP version to the database.") - parser.add_argument("--move_studies", action="store_true", - help = ("Moves studies and datasets from an old database" - " to a new one.")) - parser.add_argument("--dry_run", action="store_true", - help = "Do not insert anything into the database") + PARSER.add_argument("--add_reference", action="store_true", + help="Insert new reference set.") + PARSER.add_argument("--add_raw_data", action="store_true", + help="Adds a Coverage and Variants to the database.") + PARSER.add_argument("--move_studies", action="store_true", + help=("Moves studies and datasets from an old database " + "to a new one.")) + PARSER.add_argument("--dry_run", action="store_true", + help="Do not insert anything into the database") # Logging and verbosity - parser.add_argument("--disable_progress", action="store_true", + PARSER.add_argument("--disable_progress", action="store_true", help="Do not show progress bars.") - parser.add_argument("-v", "--verbose", action = "count", default = 3, + PARSER.add_argument("-v", "--verbose", action="count", default=3, help="Increase output Verbosity.") - parser.add_argument("-q", "--quiet", action = "count", default = 0, + PARSER.add_argument("-q", "--quiet", action="count", default=0, help="Decrease output Verbosity.") # Beacon-only variants - parser.add_argument("--beacon-only", action="store_true", - help="Variants are intended only for Beacon, loosening the requirements") - - args = parser.parse_args() - - logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", level = (5-args.verbose+args.quiet)*10, datefmt="%H:%M:%S") + PARSER.add_argument("--beacon-only", action="store_true", + help=("Variants are intended only for Beacon, loosening" + " the requirements")) - if args.add_dbsnp: - logging.info("Adding new dbSNP version") - logging.info(" - dbSNP version: {}".format(args.dbsnp_version)) - logging.info(" - dbSNP reference: {}".format(args.dbsnp_reference)) + ARGS = PARSER.parse_args() - importer = DbSNPImporter(args) - importer.prepare_data() - if not args.disable_progress: - importer.count_entries() - importer.start_import() + logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", + level=(5-ARGS.verbose+ARGS.quiet)*10, + datefmt="%H:%M:%S") - if args.add_reference: + if ARGS.add_reference: logging.info("Adding a new reference set using these sources:") - logging.info(" - Gencode: {}".format(args.gencode_version)) - logging.info(" - Ensembl: {}".format(args.ensembl_version)) - logging.info(" - dbNSFP: {}".format(args.dbnsfp_version)) - logging.info(" - dbSNP: {}".format(args.dbsnp_version)) - - importer = ReferenceSetImporter(args) - importer.prepare_data() - if not args.disable_progress: - importer.count_entries() - importer.start_import() - - if args.move_studies: - importer = OldDbImporter(args) - importer.prepare_data() - importer.start_import() - - if args.add_raw_data: - logging.info("Adding raw data %s", "(dry run)" if args.dry_run else '') - importer = RawDataImporter(args) - importer.prepare_data() - if not args.disable_progress: - importer.count_entries() - importer.start_import() + logging.info(" - Gencode: %s", ARGS.gencode_version) + logging.info(" - Ensembl: %s", ARGS.ensembl_version) + logging.info(" - dbNSFP: %s", ARGS.dbnsfp_version) + + IMPORTER = ReferenceSetImporter(ARGS) + IMPORTER.prepare_data() + if not ARGS.disable_progress: + IMPORTER.count_entries() + IMPORTER.start_import() + + if ARGS.move_studies: + IMPORTER = OldDbImporter(ARGS) + IMPORTER.prepare_data() + IMPORTER.start_import() + + if ARGS.add_raw_data: + logging.info("Adding raw data %s", "(dry run)" if ARGS.dry_run else '') + IMPORTER = RawDataImporter(ARGS) + IMPORTER.prepare_data() + if not ARGS.disable_progress: + IMPORTER.count_entries() + IMPORTER.start_import() diff --git a/settings_sample.json b/settings_sample.json index 7eef537e3..d3310b007 100644 --- a/settings_sample.json +++ b/settings_sample.json @@ -15,7 +15,7 @@ "postgresPort" : 5432, "postgresUser" : "postgres", "postgresPass" : "", - "postgresName" : "", + "postgresName" : "swefreq", "mongoHost" : "mongodb host", "mongoPassword" : "password", diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 0493fae64..9739fec11 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -7,41 +7,17 @@ -------------------------------------------------------------------------------- CREATE SCHEMA IF NOT EXISTS data; --------------------------------------------------------------------------------- --- dbSNP tables. --- --- dbSNP datasets are quite large (~200.000.000 entries) and completely separate --- from the rest of the reference data. In order to minimize the number of dbSNP --- sets that need to be stored, the dbsnp_version table (which links to the --- dataset table) allows multiple datasets to use the same dbSNP data. - -CREATE TABLE IF NOT EXISTS data.dbsnp_versions ( - id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, - version_id varchar(64) -); - -CREATE TABLE IF NOT EXISTS data.dbsnp ( - id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, - version_id integer REFERENCES data.dbsnp_versions, - rsid bigint, - chrom varchar(10), - pos integer, - UNIQUE(version_id, rsid) -); - -------------------------------------------------------------------------------- -- Reference Set tables -- CREATE TABLE IF NOT EXISTS data.reference_sets ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, - dbsnp_version integer REFERENCES data.dbsnp_versions, reference_build varchar UNIQUE, -- should be ^(GRCh[0-9]+([.]p[0-9]+)?)$ reference_name varchar, ensembl_version varchar, gencode_version varchar, - dbnsfp_version varchar, - omim_version varchar + dbnsfp_version varchar ); CREATE TABLE IF NOT EXISTS data.genes ( @@ -112,7 +88,6 @@ CREATE TABLE IF NOT EXISTS data.studies ( CREATE TABLE IF NOT EXISTS data.datasets ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, study integer NOT NULL REFERENCES data.studies, - reference_set integer NOT NULL REFERENCES data.reference_sets, short_name varchar(50) UNIQUE NOT NULL, full_name varchar(100) NOT NULL, browser_uri varchar(200) DEFAULT NULL, @@ -143,6 +118,7 @@ CREATE TABLE IF NOT EXISTS data.sample_sets ( CREATE TABLE IF NOT EXISTS data.dataset_versions ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, dataset integer NOT NULL REFERENCES data.datasets, + reference_set integer NOT NULL REFERENCES data.reference_sets, dataset_version varchar(20) NOT NULL, dataset_description text NOT NULL, terms text NOT NULL, @@ -152,7 +128,7 @@ CREATE TABLE IF NOT EXISTS data.dataset_versions ( data_contact_name varchar(100) DEFAULT NULL, data_contact_link varchar(100) DEFAULT NULL, num_variants integer DEFAULT NULL, - coverage_levels integer[] DEFAULT NULL + coverage_levels integer[] DEFAULT NULL -- Levels used for coverage.coverage ); CREATE TABLE IF NOT EXISTS data.dataset_files ( @@ -207,7 +183,7 @@ CREATE TABLE IF NOT EXISTS data.coverage ( pos integer, mean real, median real, - coverage real[] + coverage real[] -- These are the coverage values, for the levels defined in dataset_versions.coverage_levels ); CREATE TABLE IF NOT EXISTS data.metrics ( @@ -234,8 +210,6 @@ CREATE OR REPLACE VIEW data.dataset_version_current AS -- CREATE INDEX coverage_pos_chrom ON data.coverage (chrom, pos); -CREATE INDEX dbsnp_chrom_pos ON data.dbsnp (chrom, pos); -CREATE INDEX dbsnp_rsid ON data.dbsnp (rsid); CREATE INDEX features_gene ON data.features (gene); CREATE INDEX features_transcript ON data.features (transcript); CREATE INDEX genes_gene_id ON data.genes (gene_id); @@ -246,5 +220,3 @@ CREATE INDEX variant_genes_gene ON data.variant_genes (gene); CREATE INDEX variant_genes_variant ON data.variant_genes (variant); CREATE INDEX variant_transcripts_transcript ON data.variant_transcripts (transcript); CREATE INDEX variant_transcripts_variant ON data.variant_transcripts (variant); -CREATE INDEX beacon_data_chrpos ON beacon.beacon_data_table (chromosome,start); -CREATE INDEX beacon_data_chrref ON beacon.beacon_data_table (chromosome,reference); From 8346951c6f1958d8f7f7fbeb14bf39702cf8c633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 18 Mar 2019 13:10:03 +0100 Subject: [PATCH 088/360] better hom_count? --- .../importer/data_importer/raw_data_importer.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 71b1bb302..1cc3707a7 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -259,6 +259,13 @@ def _insert_variants(self): else: rsids = [None] + try: + hom_counts = tuple(int(info['AC_Hom'])) + except KeyError: + hom_counts = None # null is better than 0, as 0 has a meaning + except ValueError: + data['hom_count'] = tuple(int(count) for count in info['AC_Hom'].split(',')) # parsing Swegen sometimes give e.g. 14,0 + for i, alt in enumerate(alt_alleles): if not self.settings.beacon_only: vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] @@ -296,12 +303,9 @@ def _insert_variants(self): data['orig_alt_alleles'] = [ '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles ] - try: - data['hom_count'] = int(info['AC_Hom']) - except KeyError: - pass # null is better than 0, as 0 has a meaning - except ValueError: - data['hom_count'] = int(info['AC_Hom'].split(',')[0]) # parsing Swegen sometimes give e.g. 14,0 + + data['hom_count'] = hom_counts[i] + data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] From 6efde853a0d42b48b99ebbd604a5464cce280f3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 18 Mar 2019 13:43:47 +0100 Subject: [PATCH 089/360] use minimal representation for variants Also avoid recalculation of identical data structure --- scripts/importer/data_importer/raw_data_importer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 1cc3707a7..a3aab6249 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -266,12 +266,16 @@ def _insert_variants(self): except ValueError: data['hom_count'] = tuple(int(count) for count in info['AC_Hom'].split(',')) # parsing Swegen sometimes give e.g. 14,0 + data['orig_alt_alleles'] = [ + '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles + ] + for i, alt in enumerate(alt_alleles): if not self.settings.beacon_only: vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] data = dict(base) - data['alt'] = alt + data['ref'], data['alt'] = get_minimal_representation(base['pos'], base['ref'], alt) if len(rsids) <= i: data['rsid'] = rsids[-1] # same id as the last alternate @@ -300,16 +304,12 @@ def _insert_variants(self): genes.append(list(set({annotation['Gene'] for annotation in vep_annotations if annotation['Gene'][:4] == 'ENSG'}))) transcripts.append(list(set({annotation['Feature'] for annotation in vep_annotations if annotation['Feature'][:4] == 'ENST'}))) - data['orig_alt_alleles'] = [ - '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles - ] - data['hom_count'] = hom_counts[i] data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] - + print(batch) counter += 1 if len(batch) >= self.settings.batch_size: From bca9f8d6c534f16dcceb1a0deb3c59e963c29767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 18 Mar 2019 14:05:27 +0100 Subject: [PATCH 090/360] forgot to take care of unused return value --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index a3aab6249..2daeb6da4 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -275,7 +275,7 @@ def _insert_variants(self): vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] data = dict(base) - data['ref'], data['alt'] = get_minimal_representation(base['pos'], base['ref'], alt) + _, data['ref'], data['alt'] = get_minimal_representation(base['pos'], base['ref'], alt) if len(rsids) <= i: data['rsid'] = rsids[-1] # same id as the last alternate From b5a2800b11364d48e1dfa3020e47ac9c4e12306c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 18 Mar 2019 14:54:46 +0100 Subject: [PATCH 091/360] the return value _is_ needed --- scripts/importer/data_importer/raw_data_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 2daeb6da4..bf27dbe3f 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -275,7 +275,7 @@ def _insert_variants(self): vep_annotations = [ann for ann in annotations if int(ann['ALLELE_NUM']) == i + 1] data = dict(base) - _, data['ref'], data['alt'] = get_minimal_representation(base['pos'], base['ref'], alt) + data['pos'], data['ref'], data['alt'] = get_minimal_representation(base['pos'], base['ref'], alt) if len(rsids) <= i: data['rsid'] = rsids[-1] # same id as the last alternate From ebdf393fded4537e2809130addfbde4701986e7c Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Mon, 18 Mar 2019 15:13:10 +0100 Subject: [PATCH 092/360] Attempt to get code to work: don't reference data before it is created --- scripts/importer/data_importer/raw_data_importer.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index bf27dbe3f..32ffe4b73 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -264,11 +264,8 @@ def _insert_variants(self): except KeyError: hom_counts = None # null is better than 0, as 0 has a meaning except ValueError: - data['hom_count'] = tuple(int(count) for count in info['AC_Hom'].split(',')) # parsing Swegen sometimes give e.g. 14,0 + hom_count = tuple(int(count) for count in info['AC_Hom'].split(',')) # parsing Swegen sometimes give e.g. 14,0 - data['orig_alt_alleles'] = [ - '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles - ] for i, alt in enumerate(alt_alleles): if not self.settings.beacon_only: @@ -276,6 +273,10 @@ def _insert_variants(self): data = dict(base) data['pos'], data['ref'], data['alt'] = get_minimal_representation(base['pos'], base['ref'], alt) + data['orig_alt_alleles'] = [ + '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles + ] + if len(rsids) <= i: data['rsid'] = rsids[-1] # same id as the last alternate @@ -304,7 +305,7 @@ def _insert_variants(self): genes.append(list(set({annotation['Gene'] for annotation in vep_annotations if annotation['Gene'][:4] == 'ENSG'}))) transcripts.append(list(set({annotation['Feature'] for annotation in vep_annotations if annotation['Feature'][:4] == 'ENST'}))) - data['hom_count'] = hom_counts[i] + data['hom_count'] = hom_counts[i] if hom_counts else None data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) From fc8aee290d5006b01ce2ca04a164a536878c48c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 18 Mar 2019 20:16:24 +0100 Subject: [PATCH 093/360] fix hotfix --- scripts/importer/data_importer/raw_data_importer.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 32ffe4b73..5e27f50e0 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -264,8 +264,14 @@ def _insert_variants(self): except KeyError: hom_counts = None # null is better than 0, as 0 has a meaning except ValueError: - hom_count = tuple(int(count) for count in info['AC_Hom'].split(',')) # parsing Swegen sometimes give e.g. 14,0 + hom_counts = tuple(int(count) for count in info['AC_Hom'].split(',')) + fmt_alleles = ['{}-{}-{}-{}' + .format(data['chrom'], + *get_minimal_representation(base['pos'], + base['ref'], + x)) + for x in alt_alleles] for i, alt in enumerate(alt_alleles): if not self.settings.beacon_only: @@ -273,10 +279,7 @@ def _insert_variants(self): data = dict(base) data['pos'], data['ref'], data['alt'] = get_minimal_representation(base['pos'], base['ref'], alt) - data['orig_alt_alleles'] = [ - '{}-{}-{}-{}'.format(data['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) for x in alt_alleles - ] - + data['orig_alt_alleles'] = fmt_alleles if len(rsids) <= i: data['rsid'] = rsids[-1] # same id as the last alternate From f56be4adda0ae7c3e0629149d864a6e686142cfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 18 Mar 2019 20:23:38 +0100 Subject: [PATCH 094/360] fix for int->tuple --- scripts/importer/data_importer/raw_data_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 5e27f50e0..1b037aa25 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -260,11 +260,11 @@ def _insert_variants(self): rsids = [None] try: - hom_counts = tuple(int(info['AC_Hom'])) + hom_counts = [int(info['AC_Hom'])] except KeyError: hom_counts = None # null is better than 0, as 0 has a meaning except ValueError: - hom_counts = tuple(int(count) for count in info['AC_Hom'].split(',')) + hom_counts = [int(count) for count in info['AC_Hom'].split(',')] fmt_alleles = ['{}-{}-{}-{}' .format(data['chrom'], From 46eb946765c8c82e319f5dfccc73551e2c6092bc Mon Sep 17 00:00:00 2001 From: Martin Norling Date: Tue, 19 Mar 2019 11:06:00 +0100 Subject: [PATCH 095/360] test: Fix travis tests for postgres update --- test/data/load_dummy_data.sql | 28 +++++++++++----------------- test/travis_script.sh | 1 + 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/test/data/load_dummy_data.sql b/test/data/load_dummy_data.sql index 1277d8796..f3342f431 100644 --- a/test/data/load_dummy_data.sql +++ b/test/data/load_dummy_data.sql @@ -1,14 +1,8 @@ --- dbSNP tables. - -INSERT INTO data.dbsnp_versions (id, version_id) - VALUES (1000001, 'dummy 1'), - (1000002, 'dummy 2'); - -- Reference Set tables -INSERT INTO data.reference_sets (id, dbsnp_version, reference_build, reference_name, ensembl_version, gencode_version, dbnsfp_version, omim_version) - VALUES (1000001, 1000002, 'GRCh1p2', 'Dummyman', 'homo_sapiens_core_0_3', '11', 'b142', 'ominfo'), - (1000002, 1000001, 'GRCh2p1', 'Mummydam', 'homo_sapiens_core_1_2', '23', 'b131', 'omimeme'); +INSERT INTO data.reference_sets (id, reference_build, reference_name, ensembl_version, gencode_version, dbnsfp_version) + VALUES (1000001, 'GRCh1p2', 'Dummyman', 'homo_sapiens_core_0_3', '11', 'b142'), + (1000002, 'GRCh2p1', 'Mummydam', 'homo_sapiens_core_1_2', '23', 'b131'); -- Study and Dataset fields @@ -21,20 +15,20 @@ INSERT INTO data.collections (id, study_name, ethnicity) VALUES (1000002, 'Collection2', 'CollEth2'), (1000003, 'Collection3', 'CollEth3'); -INSERT INTO data.datasets (id, study, reference_set, short_name, full_name, browser_uri, beacon_uri, beacon_description, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size) - VALUES (1000001, 1000001, 1000001, 'Dataset 1', 'Dataset 1 Long name', 'http://example.com/browser1', 'http://example.com/beacon1', 'Dummy Dataset 1', 1.0, 'SeqType1', 'SeqTech1', 'SeqCenter1', 1001), - (1000002, 1000002, 1000002, 'Dataset 2', 'Dataset 2 Long name', 'http://example.com/browser2', 'http://example.com/beacon2', 'Dummy Dataset 2', 2.0, 'SeqType2', 'SeqTech2', 'SeqCenter2', 1002); +INSERT INTO data.datasets (id, study, short_name, full_name, browser_uri, beacon_uri, beacon_description, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size) + VALUES (1000001, 1000001, 'Dataset 1', 'Dataset 1 Long name', 'http://example.com/browser1', 'http://example.com/beacon1', 'Dummy Dataset 1', 1.0, 'SeqType1', 'SeqTech1', 'SeqCenter1', 1001), + (1000002, 1000002, 'Dataset 2', 'Dataset 2 Long name', 'http://example.com/browser2', 'http://example.com/beacon2', 'Dummy Dataset 2', 2.0, 'SeqType2', 'SeqTech2', 'SeqCenter2', 1002); INSERT INTO data.sample_sets (id, dataset, "collection", sample_size, phenotype) VALUES (1000001, 1000001, 1000001, 10, 'SamplePheno1'), (1000002, 1000001, 1000002, 15, 'SamplePheno2 Coll1'), (1000003, 1000002, 1000003, 20, 'SamplePheno2 Coll2'); -INSERT INTO data.dataset_versions (id, dataset, dataset_version, dataset_description, terms, var_call_ref, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels) - VALUES (1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', 'Gunnar Green', 'gunnar.green@example.com', 10, ARRAY[1,5,10]), - (1000002, 1000002, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL, 100, ARRAY[1,5,10]), - (1000003, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', 'Strummer project', 'https://example.com/strummer', 1000, ARRAY[1,5,10]), - (1000004, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', 'Drummer project', 'https://example.com/drummer', 10000, ARRAY[1,5,10]); +INSERT INTO data.dataset_versions (id, dataset, reference_set, dataset_version, dataset_description, terms, var_call_ref, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels) + VALUES (1000001, 1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', 'Gunnar Green', 'gunnar.green@example.com', 10, ARRAY[1,5,10]), + (1000002, 1000002, 1000001, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL, 100, ARRAY[1,5,10]), + (1000003, 1000002, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', 'Strummer project', 'https://example.com/strummer', 1000, ARRAY[1,5,10]), + (1000004, 1000002, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', 'Drummer project', 'https://example.com/drummer', 10000, ARRAY[1,5,10]); INSERT INTO data.dataset_files(id, dataset_version, basename, uri, file_size) VALUES (1000001, 1000001, 'File11-1', '/release/file111.txt', 100), diff --git a/test/travis_script.sh b/test/travis_script.sh index 5e7df3ccd..3c8692d73 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -4,6 +4,7 @@ cp settings_sample.json settings.json sed -i.tmp 's/"postgresHost" : "postgres host"/"postgresHost" : "127.0.0.1"/' settings.json sed -i.tmp 's/"postgresPort" : 5432/"postgresPort" : 5433/' settings.json +sed -i.tmp 's/"postgresName" : "swefreq"/"postgresName" : ""/' settings.json echo 'SETTINGS' cat settings.json From 406d59e5ca8d0b781a8efaa9dde80266d8a0c093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Mar 2019 13:58:32 +0100 Subject: [PATCH 096/360] data->base; remove unintended debug line --- scripts/importer/data_importer/raw_data_importer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 1b037aa25..13060cf4c 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -267,7 +267,7 @@ def _insert_variants(self): hom_counts = [int(count) for count in info['AC_Hom'].split(',')] fmt_alleles = ['{}-{}-{}-{}' - .format(data['chrom'], + .format(base['chrom'], *get_minimal_representation(base['pos'], base['ref'], x)) @@ -313,7 +313,6 @@ def _insert_variants(self): data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], data['alt']) data['quality_metrics'] = dict([(x, info[x]) for x in METRICS if x in info]) batch += [data] - print(batch) counter += 1 if len(batch) >= self.settings.batch_size: From e35c55739367d06c1ca62def64b87749d74fcc4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 09:59:26 +0100 Subject: [PATCH 097/360] perform coverage reformatting immediately --- .../data_importer/raw_data_importer.py | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 13060cf4c..d30c1a0ae 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -146,26 +146,25 @@ def _insert_coverage(self): data['dataset_version'] = self.dataset_version data[header[i][0]] = header[i][1](item) + # re-format coverage for batch + data['coverage'] = [item['cov1'], item['cov5'], item['cov10'], + item['cov15'], item['cov20'], item['cov25'], + item['cov30'], item['cov50'], item['cov100']] + del data['cov1'] + del data['cov5'] + del data['cov10'] + del data['cov15'] + del data['cov20'] + del data['cov25'] + del data['cov30'] + del data['cov50'] + del data['cov100'] + if self.counter['coverage'] != None: counter += 1 batch += [data] if len(batch) >= self.settings.batch_size: - # re-format coverage for batch - for i, item in enumerate(batch): - batch[i]['coverage'] = [item['cov1'], item['cov5'], item['cov10'], - item['cov15'], item['cov20'], item['cov25'], - item['cov30'], item['cov50'], item['cov100']] - del batch[i]['cov1'] - del batch[i]['cov5'] - del batch[i]['cov10'] - del batch[i]['cov15'] - del batch[i]['cov20'] - del batch[i]['cov25'] - del batch[i]['cov30'] - del batch[i]['cov50'] - del batch[i]['cov100'] - if not self.settings.dry_run: db.Coverage.insert_many(batch).execute() batch = [] From e618972449baf340d13d6790719aa849205cc8a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 10:13:12 +0100 Subject: [PATCH 098/360] a few fixes for batch management - check batch size using >= - check if batch: instead of if len(batch): - no need to reset batch at end - a few formatting changes --- scripts/importer/data_importer/reference_set_importer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 5ba879b5a..26c8b00c4 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -54,7 +54,7 @@ def _insert_features(self): 'strand':feature['strand'], 'feature_type':feature['feature_type']}] - if len(batch) % self.batch_size == 0: + if len(batch) >= self.batch_size: if not self.settings.dry_run: db.Feature.insert_many(batch).execute() batch = [] @@ -63,13 +63,12 @@ def _insert_features(self): while progress - last_progress > 0.01: last_progress += 0.01 self._tick() - if len(batch): + if batch: if not self.settings.dry_run: db.Feature.insert_many(batch).execute() - batch = [] self._tick(True) - logging.info("Features inserted in {}".format( self._time_since(start) )) + logging.info("Features inserted in {}".format(self._time_since(start))) def _insert_genes(self): logging.info("Inserting genes into database") From 74888386341272dba0babd58f93946268fb5c463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 10:24:06 +0100 Subject: [PATCH 099/360] clarify that the values are in data.* --- sql/data_schema.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 9739fec11..7c518df12 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -128,7 +128,7 @@ CREATE TABLE IF NOT EXISTS data.dataset_versions ( data_contact_name varchar(100) DEFAULT NULL, data_contact_link varchar(100) DEFAULT NULL, num_variants integer DEFAULT NULL, - coverage_levels integer[] DEFAULT NULL -- Levels used for coverage.coverage + coverage_levels integer[] DEFAULT NULL -- Levels used for data.coverage.coverage ); CREATE TABLE IF NOT EXISTS data.dataset_files ( @@ -183,7 +183,7 @@ CREATE TABLE IF NOT EXISTS data.coverage ( pos integer, mean real, median real, - coverage real[] -- These are the coverage values, for the levels defined in dataset_versions.coverage_levels + coverage real[] -- These are the coverage values, for the levels defined in data.dataset_versions.coverage_levels ); CREATE TABLE IF NOT EXISTS data.metrics ( From 63c019ff278846b754754e982dcc97ff459fdb42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 13:45:02 +0100 Subject: [PATCH 100/360] first implementation of replacement function for progress --- .../importer/data_importer/data_importer.py | 32 ++++++----- .../data_importer/raw_data_importer.py | 20 ++----- .../data_importer/reference_set_importer.py | 55 ++++++------------- 3 files changed, 39 insertions(+), 68 deletions(-) diff --git a/scripts/importer/data_importer/data_importer.py b/scripts/importer/data_importer/data_importer.py index e49a6e519..42071610f 100644 --- a/scripts/importer/data_importer/data_importer.py +++ b/scripts/importer/data_importer/data_importer.py @@ -89,20 +89,6 @@ def _open(self, filename): except IOError as e: logging.error("IOERROR: {}".format(e)) - def _print_progress_bar(self): - if logging.getLogger().getEffectiveLevel() < 30: - sys.stderr.write("".join(["{:<10}".format(i) for i in range(0,101,10)]) + "\n") - sys.stderr.write("| ------- "*10 + "|\n") - - def _tick(self, finished = False): - """ - Prints a single progress tick, and a newline if finished is True. - """ - sys.stderr.write("=") - if finished: - sys.stderr.write("\n") - sys.stderr.flush() - def _time_format(self, seconds): h, rem = divmod(seconds, 3600) mins, secs = divmod(rem, 60) @@ -119,3 +105,21 @@ def _time_since(self, start): def _time_to(self, start, progress = 0.01): return self._time_format( (time.time() - start)/progress ) + + def _update_progress_bar(self, current_count, total, last_progress, finished=False): + if not finished: + progress = current_count/total + else: + progress = 1.001 + if last_progress < 0: + if logging.getLogger().getEffectiveLevel() < 30: + sys.stderr.write("".join(["{:<10}".format(i) for i in range(0,101,10)]) + "\n") + sys.stderr.write("| ------- "*10 + "|\n") + last_progress = 0 + while progress > last_progress + 0.01: + last_progress += 0.01 + sys.stderr.write("=") + sys.stderr.flush() + if finished: + sys.stderr.write("\n") + return last_progress diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index d30c1a0ae..e4e5875c5 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -170,17 +170,11 @@ def _insert_coverage(self): batch = [] # Update progress if self.counter['coverage'] != None: - progress = counter / self.counter['coverage'] - while progress > last_progress + 0.01: - if not last_progress: - logging.info("Estimated time to completion: {}".format(self._time_to(start, progress))) - self._print_progress_bar() - self._tick() - last_progress += 0.01 + last_progress = self._update_progress_bar(counter, self.counter['coverage'], last_progress) if batch and not self.settings.dry_run: db.Coverage.insert_many(batch) if self.counter['coverage'] != None: - self._tick(True) + last_progress = self._update_progress_bar(counter, self.counter['coverage'], last_progress, finished=True) if not self.settings.dry_run: logging.info("Inserted {} coverage records in {}".format(counter, self._time_since(start))) @@ -341,13 +335,7 @@ def _insert_variants(self): batch = [] # Update progress if self.counter['variants'] != None: - progress = counter / self.counter['variants'] - while progress > last_progress + 0.01: - if not last_progress: - self._print_progress_bar() - self._tick() - last_progress += 0.01 - + last_progress = self._update_progress_bar(counter, self.counter['variants'], last_progress) if batch and not self.settings.dry_run: if not self.settings.dry_run: @@ -378,7 +366,7 @@ def _insert_variants(self): self.dataset_version.num_variants = counter self.dataset_version.save() if self.counter['variants'] != None: - self._tick(True) + last_progress = self._update_progress_bar(counter, self.counter['variants'], last_progress, finished=True) if not self.settings.dry_run: logging.info("Inserted {} variant records in {}".format(counter, self._time_since(start))) diff --git a/scripts/importer/data_importer/reference_set_importer.py b/scripts/importer/data_importer/reference_set_importer.py index 26c8b00c4..901783bbd 100644 --- a/scripts/importer/data_importer/reference_set_importer.py +++ b/scripts/importer/data_importer/reference_set_importer.py @@ -41,8 +41,7 @@ def __init__(self, settings): def _insert_features(self): logging.info("Inserting features into database") start = time.time() - self._print_progress_bar() - last_progress = 0 + last_progress = -1 batch = [] with db.database.atomic(): for i, feature in enumerate(self.features): @@ -59,14 +58,11 @@ def _insert_features(self): db.Feature.insert_many(batch).execute() batch = [] - progress = i / len(self.features) - while progress - last_progress > 0.01: - last_progress += 0.01 - self._tick() + last_progress = self._update_progress_bar(i, len(self.features), last_progress) if batch: if not self.settings.dry_run: db.Feature.insert_many(batch).execute() - self._tick(True) + last_progress = self._update_progress_bar(i, len(self.features), last_progress, finished=True) logging.info("Features inserted in {}".format(self._time_since(start))) @@ -74,8 +70,7 @@ def _insert_genes(self): logging.info("Inserting genes into database") start = time.time() self.gene_db_ids = {} - self._print_progress_bar() - last_progress = 0 + last_progress = -1 for i, gene in enumerate(self.genes): # As far as I know I can't batch insert these and still get the id's back db_gene = db.Gene( reference_set = self.db_reference, @@ -101,11 +96,9 @@ def _insert_genes(self): self.add_other_names(db_gene.id, other_names) except KeyError: pass - progress = i / len(self.genes) - while progress - last_progress > 0.01: - last_progress += 0.01 - self._tick() - self._tick(True) + + last_progress = self._update_progress_bar(i, len(self.genes), last_progress) + last_progress = self._update_progress_bar(i, len(self.genes), last_progress, finished=True) logging.info("Genes inserted in {}".format( self._time_since(start) )) @@ -132,8 +125,7 @@ def _insert_transcripts(self): start = time.time() self.transcript_db_ids = {} - self._print_progress_bar() - last_progress = 0 + last_progress = -1 for i, transcript in enumerate(self.transcripts): db_transcript = db.Transcript( transcript_id = transcript['transcript_id'], gene = self.gene_db_ids[transcript['gene_id']], @@ -152,11 +144,8 @@ def _insert_transcripts(self): db_transcript.save() self.transcript_db_ids[transcript['transcript_id']] = db_transcript.id - progress = i / len(self.transcripts) - while progress - last_progress > 0.01: - last_progress += 0.01 - self._tick() - self._tick(True) + last_progress = self._update_progress_bar(i, len(self.transcripts), last_progress) + last_progress = self._update_progress_bar(i, len(self.transcripts), last_progress, finished=True) logging.info("Transcripts inserted in {}".format( self._time_since(start) )) @@ -269,23 +258,17 @@ def _read_ensembl(self): for transcript in self.ensembl.fetchall(): canonical_dict[transcript[0]] = transcript[1] - last_progress = 0.0 - if self.numbers['genes'] != None: - self._print_progress_bar() - + last_progress = -1.0 for i, gene in enumerate(self.genes): if gene['gene_id'] in canonical_dict: self.genes[i]['canonical_transcript'] = canonical_dict[gene['gene_id']] self.counters['genes'] += 1 if self.numbers['genes'] != None: - progress = i / self.numbers['genes'] - while progress - last_progress > 0.01: - last_progress += 0.01 - self._tick() + last_progress = self._update_progress_bar(i, self.numbers['genes'], last_progress) if self.numbers['genes'] != None: - self._tick(True) - logging.info("Canonical transcript information from ensembl added in {}.".format( self._time_since(start) )) + last_progress = self._update_progress_bar(i, self.numbers['genes'], last_progress, finished=True) + logging.info("Canonical transcript information from ensembl added in {}.".format(self._time_since(start))) def count_entries(self): logging.info("Counting features in gencode file (for progress bar)") @@ -327,9 +310,7 @@ def prepare_data(self): def start_import(self): start = time.time() logging.info("Reading gencode data into buffers.") - last_progress = 0.0 - if self.numbers['genes'] != None: - self._print_progress_bar() + last_progress = -1.0 for line in self.gencode: line = bytes(line).decode('ascii').strip() if line.startswith("#"): @@ -352,9 +333,7 @@ def start_import(self): # only progress for genes to keep it simple if self.numbers['genes'] != None: progress = self.counters['genes'] / self.numbers['genes'] - while progress - last_progress > 0.01: - last_progress += 0.01 - self._tick() + last_progress = self._update_progress_bar(self.counters['genes'], self.numbers['genes'], last_progress) if values[2] == 'gene': data['name'] = info['gene_name'] self.genes += [data] @@ -377,7 +356,7 @@ def start_import(self): logging.error("{}".format(e)) break if self.numbers['genes'] != None: - self._tick(True) + last_progress = self._update_progress_bar(self.counters['genes'], self.numbers['genes'], last_progress, finished=True) logging.info("Gencode data read into buffers in {}.".format( self._time_since(start) )) self._read_ensembl() self._read_dbnsfp() From 3e53e4929abcb48f4788802d52be755c853b9ef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 14:03:52 +0100 Subject: [PATCH 101/360] negative last_progress in raw import as well --- scripts/importer/data_importer/raw_data_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index e4e5875c5..2179b93b6 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -131,7 +131,7 @@ def _insert_coverage(self): ('cov30', float), ('cov50', float), ('cov100', float)] logging.info("Inserting Coverage") batch = [] - last_progress = 0.0 + last_progress = -1.0 counter = 0 with db.database.atomic(): for filename in self.settings.coverage_file: @@ -190,7 +190,7 @@ def _insert_variants(self): genes = [] transcripts = [] - last_progress = 0.0 + last_progress = -1.0 counter = 0 samples = 0 vep_field_names = None From ed6632004f939de52901ae448f72a4f5b5c2b82b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 14:18:19 +0100 Subject: [PATCH 102/360] added missing space --- scripts/importer/data_importer/old_db_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/importer/data_importer/old_db_importer.py b/scripts/importer/data_importer/old_db_importer.py index 971604619..a1beb077c 100644 --- a/scripts/importer/data_importer/old_db_importer.py +++ b/scripts/importer/data_importer/old_db_importer.py @@ -24,7 +24,7 @@ def __init__(self, settings): def _select_reference_set(self, short_name): if len(self.reference_sets) == 1: - logging.info(("Only one reference set is available, %s," + logging.info(("Only one reference set is available, %s, " "will default to this set for all datasets"), self.reference_sets[0].name) return self.reference_sets[0].id From f803efa7e5e5882cef9a53192fad6c0584199284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 14:33:52 +0100 Subject: [PATCH 103/360] another fix for coverage --- scripts/importer/data_importer/raw_data_importer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 2179b93b6..8cd395632 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -147,9 +147,9 @@ def _insert_coverage(self): data[header[i][0]] = header[i][1](item) # re-format coverage for batch - data['coverage'] = [item['cov1'], item['cov5'], item['cov10'], - item['cov15'], item['cov20'], item['cov25'], - item['cov30'], item['cov50'], item['cov100']] + data['coverage'] = [data['cov1'], data['cov5'], data['cov10'], + data['cov15'], data['cov20'], data['cov25'], + data['cov30'], data['cov50'], data['cov100']] del data['cov1'] del data['cov5'] del data['cov10'] From c673791d9e1e08c6946d2b08d049e183a2109217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 14:42:36 +0100 Subject: [PATCH 104/360] possible to only import variants or coverage --- scripts/importer/data_importer/raw_data_importer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 8cd395632..27d8b9d25 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -401,8 +401,9 @@ def prepare_data(self): def start_import(self): self._set_dataset_info() - self._insert_variants() - if not self.settings.beacon_only: + if self.settings.variant_file: + self._insert_variants() + if not self.settings.beacon_only and self.settings.coverage_file: self._insert_coverage() def add_variant_genes(self, variant_indexes:list, genes_to_add:list, ref_genes:dict): From a6e75ed06a0c6cc169920fe72a2ec7a974ebcd2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 20 Mar 2019 15:21:26 +0100 Subject: [PATCH 105/360] removed need of the function --- backend/db.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/backend/db.py b/backend/db.py index 22068a8b6..d3e329c3b 100644 --- a/backend/db.py +++ b/backend/db.py @@ -493,19 +493,3 @@ def build_dict_from_row(row): continue d[field] = value return d - - -def get_reference_set_for_dataset(dataset): - """ - Get the reference set associated with a dataset - Args: - dataset (str): short name of the dataset - Returns: - ReferenceSet: the associated reference set; returns None if not available - """ - try: - return (Dataset.select() - .where(Dataset.short_name==dataset) - .get()).reference_set - except Dataset.DoesNotExist: - return None From 49eb56aa46ff8529b97831dcbd723e528eec125f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 4 Jan 2019 13:51:01 +0100 Subject: [PATCH 106/360] Autocomplete working with pgsql --- backend/modules/browser/browser_handlers.py | 3 ++- backend/modules/browser/pgsql.py | 25 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 backend/modules/browser/pgsql.py diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 1d57f3988..7dff60210 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -2,6 +2,7 @@ from . import lookups from . import mongodb +from . import pgsql from .utils import get_xpos, add_consequence_to_variant, remove_extraneous_vep_annotations, \ order_vep_by_csq, get_proper_hgvs @@ -264,7 +265,7 @@ class Autocomplete(handlers.UnsafeHandler): def get(self, dataset, query): ret = {} - results = mongodb.get_autocomplete(dataset, query) + results = pgsql.get_autocomplete(dataset, query) ret = {'values': sorted(list(set(results)))[:20]} self.finish( ret ) diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py new file mode 100644 index 000000000..90b757784 --- /dev/null +++ b/backend/modules/browser/pgsql.py @@ -0,0 +1,25 @@ +""" +Replaces mongodb.py +""" + +import logging + +from . import db +from . import lookups +from .utils import get_xpos + + +def get_autocomplete(dataset, query): + """ + Provide autocomplete suggestions based on the query + NOTE: dataset is not used for sql + Args: + dataset (str): name of the dataset + query (str): the query to compare to the available gene names + Returns: + list: A list of genes names whose beginning matches the query + """ + genes = db.Gene.select(db.Gene.name).where(db.Gene.name.startswith(query)) + gene_names = [str(gene.name) for gene in genes] + logging.error('Autocomplete: {}'.format(gene_names)) + return gene_names From 010b761b6535d09fce55fe5a9abcbd61b8eb503b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 4 Jan 2019 14:39:51 +0100 Subject: [PATCH 107/360] skeleton for get_variant_list --- backend/modules/browser/pgsql.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index 90b757784..a8d761377 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -23,3 +23,7 @@ def get_autocomplete(dataset, query): gene_names = [str(gene.name) for gene in genes] logging.error('Autocomplete: {}'.format(gene_names)) return gene_names + + +def get_variant_list: + pass From e8c5e7e3f61924b4e93d0911700a1f9bbfaa7767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 4 Jan 2019 15:18:55 +0100 Subject: [PATCH 108/360] first function passing test --- backend/modules/browser/lookups.py | 20 +++++++++++++++---- backend/modules/browser/test_lookups.py | 26 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 backend/modules/browser/test_lookups.py diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 829dff050..430f461e6 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,15 +1,27 @@ import re -from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant +import db + +#from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant SEARCH_LIMIT = 10000 -def get_gene(sdb, gene_id): - return sdb.genes.find_one({'gene_id': gene_id}, projection={'_id': False}) +def get_gene(gene_id): + """ + Retrieve gene by gene_id + Args: + gene_id: the id of the gene + + """ + try: + return db.Gene.select().where(db.Gene.gene_id==gene_id).dicts().get() + except db.Gene.DoesNotExist: + return {} -def get_gene_by_name(sdb, gene_name): +def get_gene_by_name(gene_name): # try gene_name field first + gene = db.Gene.select().where(db.Gene.gene_id==gene_id).dicts().get() gene = sdb.genes.find_one({'gene_name': gene_name}, projection={'_id': False}) if gene: return gene diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py new file mode 100644 index 000000000..fe1fa0a59 --- /dev/null +++ b/backend/modules/browser/test_lookups.py @@ -0,0 +1,26 @@ +import lookups + +def test_get_gene(): + expected = {'id': 1, + 'reference_set': 1, + 'gene_id': 'ENSG00000223972', + 'gene_name': 'DDX11L1', + 'full_name': 'DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1', + 'canonical_transcript': 'ENST00000456328', + 'chrom': '1', + 'start_pos': 11870, + 'strand': '+'} + result = lookups.get_gene('ENSG00000223972') + print(result) + assert result['id'] == expected['id'] + assert result['reference_set'] == expected['reference_set'] + assert result['gene_id'] == expected['gene_id'] + assert result['name'] == expected['gene_name'] + assert result['full_name'] == expected['full_name'] + assert result['canonical_transcript'] == expected['canonical_transcript'] + assert result['chrom'] == expected['chrom'] + assert result['start'] == expected['start_pos'] + assert result['strand'] == expected['strand'] + + result = lookups.get_gene('NOT_A_GENE') + assert not result From b85793e1a427ff48c96b73712948318cb087f8e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 4 Jan 2019 16:53:01 +0100 Subject: [PATCH 109/360] transcript retrieval working --- backend/modules/browser/lookups.py | 60 ++++++++++++----- backend/modules/browser/test_lookups.py | 88 +++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 16 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 430f461e6..f2754409b 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -8,10 +8,11 @@ def get_gene(gene_id): """ - Retrieve gene by gene_id + Retrieve gene by gene id Args: gene_id: the id of the gene - + Returns: + dict: values for the gene; empty if not found """ try: return db.Gene.select().where(db.Gene.gene_id==gene_id).dicts().get() @@ -20,16 +21,41 @@ def get_gene(gene_id): def get_gene_by_name(gene_name): + """ + Retrieve gene by gene_name. + First checks gene_name, then other_names. + Args: + gene_name: the id of the gene + Returns: + dict: values for the gene; empty if not found + """ # try gene_name field first - gene = db.Gene.select().where(db.Gene.gene_id==gene_id).dicts().get() - gene = sdb.genes.find_one({'gene_name': gene_name}, projection={'_id': False}) - if gene: - return gene - # if not, try gene['other_names'] - return sdb.genes.find_one({'other_names': gene_name}, projection={'_id': False}) + try: + return db.Gene.select().where(db.Gene.name==gene_name).dicts().get() + except db.Gene.DoesNotExist: + try: + # troubles with KeyError + return db.Gene.select().where(db.Gene.other_names.contains(gene_name)).dicts().get() + except db.Gene.DoesNotExist: + return {} -def get_transcript(sdb, transcript_id): +def get_transcript(transcript_id): + """ + Retrieve transcript by transcript id + Also includes exons as ['exons'] + Args: + transcript_id: the id of the transcript + Returns: + dict: values for the transcript, including exons; empty if not found + """ + try: + transcript = db.Transcript.select().where(db.Transcript.transcript_id==transcript_id).dicts().get() + transcript['exons'] = get_exons_in_transcript(transcript['id']) + return transcript + except db.Transcript.DoesNotExist: + return {} + transcript = sdb.transcripts.find_one({'transcript_id': transcript_id}, projection={'_id': False}) if not transcript: return None @@ -317,10 +343,12 @@ def get_variants_in_transcript(db, sdb, transcript_id): return variants -def get_exons_in_transcript(sdb, transcript_id): - # return sorted( - # [x for x in - # db.exons.find({'transcript_id': transcript_id}, projection={'_id': False}) - # if x['feature_type'] != 'exon'], - # key=lambda k: k['start']) - return sorted(list(sdb.exons.find({'transcript_id': transcript_id, 'feature_type': { "$in": ['CDS', 'UTR', 'exon'] }}, projection={'_id': False})), key=lambda k: k['start']) +def get_exons_in_transcript(transcript_dbid): + """ + Retrieve exons associated with the given transcript id + Args: + transcript_dbid: the id of the transcript in the database (Transcript.id; not transcript_id) + Returns: + list: dicts with values for each exon sorted by start position + """ + return sorted(list(db.Feature.select().where(db.Feature.transcript==transcript_dbid).dicts()), key=lambda k: k['start']) diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index fe1fa0a59..b4484cb6b 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -24,3 +24,91 @@ def test_get_gene(): result = lookups.get_gene('NOT_A_GENE') assert not result + + +def test_get_gene_by_name(): + expected = {'id': 1, + 'reference_set': 1, + 'gene_id': 'ENSG00000223972', + 'gene_name': 'DDX11L1', + 'full_name': 'DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1', + 'canonical_transcript': 'ENST00000456328', + 'chrom': '1', + 'start_pos': 11870, + 'strand': '+'} + result = lookups.get_gene_by_name('DDX11L1') + assert result['id'] == expected['id'] + assert result['reference_set'] == expected['reference_set'] + assert result['gene_id'] == expected['gene_id'] + assert result['name'] == expected['gene_name'] + assert result['full_name'] == expected['full_name'] + assert result['canonical_transcript'] == expected['canonical_transcript'] + assert result['chrom'] == expected['chrom'] + assert result['start'] == expected['start_pos'] + assert result['strand'] == expected['strand'] + + # crashing with other_names.contains() +# result = lookups.get_gene_by_name('NOT_A_GENE') +# assert not result + # NOC2L +# result = lookups.get_gene_by_name('NOC2L') +# result = lookups.get_gene_by_name('NIR') +# print(result) +# assert False +# result = lookups.get_gene_by_name('Z') + + +def test_get_transcript(): + expected = {'id': 5, + 'transcript_id': 'ENST00000438504', + 'gene': '2', + 'mim_annotation': 'Was protein family homolog 1; wash1', + 'chrom': '1', + 'mim_gene_accession': 613632, + 'start_pos': 14364, + 'stop_pos': 29371, + 'strand': '-'} + exp_exon = [{'id': 28, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14364, 'stop': 14830, 'strand': '-', 'feature_type': 'exon'}, + {'id': 27, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14971, 'stop': 15039, 'strand': '-', 'feature_type': 'exon'}, + {'id': 26, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15797, 'stop': 15902, 'strand': '-', 'feature_type': 'exon'}, + {'id': 25, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15905, 'stop': 15948, 'strand': '-', 'feature_type': 'exon'}, + {'id': 24, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16608, 'stop': 16766, 'strand': '-', 'feature_type': 'exon'}, + {'id': 23, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16855, 'stop': 17056, 'strand': '-', 'feature_type': 'exon'}, + {'id': 22, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17234, 'stop': 17365, 'strand': '-', 'feature_type': 'exon'}, + {'id': 21, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17603, 'stop': 17743, 'strand': '-', 'feature_type': 'exon'}, + {'id': 20, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17916, 'stop': 18062, 'strand': '-', 'feature_type': 'exon'}, + {'id': 19, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 18269, 'stop': 18380, 'strand': '-', 'feature_type': 'exon'}, + {'id': 18, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 24739, 'stop': 24892, 'strand': '-', 'feature_type': 'exon'}, + {'id': 17, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 29322, 'stop': 29371, 'strand': '-', 'feature_type': 'exon'}] + + result = lookups.get_transcript('ENST00000438504') + assert result['id'] == expected['id'] + assert result['mim_annotation'] == expected['mim_annotation'] + assert result['transcript_id'] == expected['transcript_id'] + assert result['mim_gene_accession'] == expected['mim_gene_accession'] + assert result['chrom'] == expected['chrom'] + assert result['start'] == expected['start_pos'] + assert result['stop'] == expected['stop_pos'] + assert result['strand'] == expected['strand'] + assert result['exons'] == exp_exon + + assert not lookups.get_transcript('INCORRECT') + + +def test_get_exons_in_transcript(): + result = lookups.get_exons_in_transcript(5) + expected = [{'id': 28, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14364, 'stop': 14830, 'strand': '-', 'feature_type': 'exon'}, + {'id': 27, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14971, 'stop': 15039, 'strand': '-', 'feature_type': 'exon'}, + {'id': 26, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15797, 'stop': 15902, 'strand': '-', 'feature_type': 'exon'}, + {'id': 25, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15905, 'stop': 15948, 'strand': '-', 'feature_type': 'exon'}, + {'id': 24, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16608, 'stop': 16766, 'strand': '-', 'feature_type': 'exon'}, + {'id': 23, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16855, 'stop': 17056, 'strand': '-', 'feature_type': 'exon'}, + {'id': 22, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17234, 'stop': 17365, 'strand': '-', 'feature_type': 'exon'}, + {'id': 21, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17603, 'stop': 17743, 'strand': '-', 'feature_type': 'exon'}, + {'id': 20, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17916, 'stop': 18062, 'strand': '-', 'feature_type': 'exon'}, + {'id': 19, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 18269, 'stop': 18380, 'strand': '-', 'feature_type': 'exon'}, + {'id': 18, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 24739, 'stop': 24892, 'strand': '-', 'feature_type': 'exon'}, + {'id': 17, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 29322, 'stop': 29371, 'strand': '-', 'feature_type': 'exon'}] + print(result) + assert result == expected + From 370066ce48b9926bc8ea4dd20655d091057b3e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 7 Jan 2019 13:10:40 +0100 Subject: [PATCH 110/360] some get_variant functions fixed --- backend/modules/browser/lookups.py | 93 ++++++++++++------------- backend/modules/browser/test_lookups.py | 59 ++++++++++++++-- 2 files changed, 97 insertions(+), 55 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index f2754409b..c2ef98043 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,5 +1,4 @@ import re - import db #from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant @@ -10,7 +9,7 @@ def get_gene(gene_id): """ Retrieve gene by gene id Args: - gene_id: the id of the gene + gene_id (str): the id of the gene Returns: dict: values for the gene; empty if not found """ @@ -25,7 +24,7 @@ def get_gene_by_name(gene_name): Retrieve gene by gene_name. First checks gene_name, then other_names. Args: - gene_name: the id of the gene + gene_name (str): the id of the gene Returns: dict: values for the gene; empty if not found """ @@ -45,7 +44,7 @@ def get_transcript(transcript_id): Retrieve transcript by transcript id Also includes exons as ['exons'] Args: - transcript_id: the id of the transcript + transcript_id (str): the id of the transcript Returns: dict: values for the transcript, including exons; empty if not found """ @@ -63,54 +62,50 @@ def get_transcript(transcript_id): return transcript -def get_raw_variant(db, xpos, ref, alt, get_id=False): - return db.variants.find_one({'xpos': xpos, 'ref': ref, 'alt': alt}, projection={'_id': get_id}) - - -def get_variant(db, sdb, xpos, ref, alt): - variant = get_raw_variant(db, xpos, ref, alt, False) - if variant is None or 'rsid' not in variant: - return variant - if variant['rsid'] == '.' or variant['rsid'] is None: - rsid = sdb.dbsnp.find_one({'xpos': xpos}) - if rsid: - variant['rsid'] = 'rs%s' % rsid['rsid'] - return variant - - -def add_rsid_to_variant(sdb, variant): - if variant['rsid'] == '.' or variant['rsid'] is None: - rsid = sdb.dbsnp.find_one({'xpos': variant['xpos']}) - if rsid: - variant['rsid'] = 'rs%s' % rsid['rsid'] - - -def get_variants_by_rsid(db, rsid): - if not rsid.startswith('rs'): - return None +def get_raw_variant(pos, chrom, ref, alt): + """ + Retrieve variant by position and change + Args: + pos (int): position of the variant + chrom (str): name of the chromosome + ref (str): reference sequence + ref (str): variant sequence + Returns: + dict: values for the variant; empty if not found + """ try: - int(rsid.lstrip('rs')) - except ValueError: - return None - variants = list(db.variants.find({'rsid': rsid}, projection={'_id': False})) - add_consequence_to_variants(variants) - return variants + return db.Variant.select().where(db.Variant.pos == pos, + db.Variant.ref == ref, + db.Variant.alt == alt, + db.Variant.chrom == chrom).dicts().get() + except db.Variant.DoesNotExist: + return {} -def get_variants_from_dbsnp(db,sdb, rsid): - if not rsid.startswith('rs'): - return None - try: - rsid = int(rsid.lstrip('rs')) - except ValueError: - return None - position = sdb.dbsnp.find_one({'rsid': rsid}) - if position: - variants = list(db.variants.find({'xpos': {'$lte': position['xpos'], '$gte': position['xpos']}}, projection={'_id': False})) - if variants: - add_consequence_to_variants(variants) - return variants - return [] +def get_variant(pos, chrom, ref, alt): + """ + Retrieve variant by position and change + Retrieves rsid from db (if available) if not present in variant + Args: + pos (int): position of the variant + chrom (str): name of the chromosome + ref (str): reference sequence + ref (str): variant sequence + Returns: + dict: values for the variant; empty if not found + """ + try: + variant = get_raw_variant(pos, chrom, ref, alt) + if not variant or 'rsid' not in variant: + return variant + if variant['rsid'] == '.' or variant['rsid'] is None: + rsid = db.dbsnp.select().where(db.snp.pos==pos, + db.snp.chrom==chrom).dicts().get() + if rsid: + variant['rsid'] = 'rs{}'.format(rsid['rsid']) + return variant + except db.Variant.DoesNotExist: + return {} def get_coverage_for_bases(db, xstart, xstop=None): diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index b4484cb6b..e1cf5f83c 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -1,6 +1,14 @@ +''' +Tests for the functions available in lookups.py +''' + import lookups def test_get_gene(): + ''' + Test get_gene() + ''' + # normal entry expected = {'id': 1, 'reference_set': 1, 'gene_id': 'ENSG00000223972', @@ -21,12 +29,17 @@ def test_get_gene(): assert result['chrom'] == expected['chrom'] assert result['start'] == expected['start_pos'] assert result['strand'] == expected['strand'] - + + # non-existing result = lookups.get_gene('NOT_A_GENE') assert not result def test_get_gene_by_name(): + ''' + Test get_gene_by_name() + ''' + # normal entry expected = {'id': 1, 'reference_set': 1, 'gene_id': 'ENSG00000223972', @@ -51,14 +64,21 @@ def test_get_gene_by_name(): # result = lookups.get_gene_by_name('NOT_A_GENE') # assert not result # NOC2L -# result = lookups.get_gene_by_name('NOC2L') -# result = lookups.get_gene_by_name('NIR') -# print(result) -# assert False -# result = lookups.get_gene_by_name('Z') + result = lookups.get_gene_by_name('NOC2L') + assert result['gene_id'] == 'ENSG00000188976' + result = lookups.get_gene_by_name('NIR') + result = lookups.get_gene_by_name('Z') + # non-existing + assert not lookups.get_gene_by_name('INCORRECT') + + def test_get_transcript(): + ''' + Test get_transcript() + ''' + # normal entry expected = {'id': 5, 'transcript_id': 'ENST00000438504', 'gene': '2', @@ -92,10 +112,37 @@ def test_get_transcript(): assert result['strand'] == expected['strand'] assert result['exons'] == exp_exon + # non-existing assert not lookups.get_transcript('INCORRECT') +def test_get_raw_variant(): + ''' + Test get_raw_variant + ''' + result = lookups.get_raw_variant(55500283, '1', 'A', 'T') + assert result['genes'] == ['ENSG00000169174'] + assert result['transcripts'] == ['ENST00000302118'] + assert not lookups.get_raw_variant(55500281, '1', 'A', 'T') + + +def test_get_variant(): + ''' + Test get_variant() + ''' + result = lookups.get_variant(55500283, '1', 'A', 'T') + assert result['genes'] == ['ENSG00000169174'] + assert result['transcripts'] == ['ENST00000302118'] + assert result['rsid'] == [75050571] + # need to add test for entry with missing rsid + # too slow query atm + assert not lookups.get_variant(55500281, '1', 'A', 'T') + + def test_get_exons_in_transcript(): + ''' + Test get_exons_in_transcript() + ''' result = lookups.get_exons_in_transcript(5) expected = [{'id': 28, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14364, 'stop': 14830, 'strand': '-', 'feature_type': 'exon'}, {'id': 27, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14971, 'stop': 15039, 'strand': '-', 'feature_type': 'exon'}, From 1ba5c04106e92562dffab547a041d3194e8b92eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 7 Jan 2019 14:44:40 +0100 Subject: [PATCH 111/360] coverage --- backend/modules/browser/lookups.py | 97 +++++++++++-------------- backend/modules/browser/test_lookups.py | 25 ++++++- 2 files changed, 67 insertions(+), 55 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index c2ef98043..ea37ab058 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -5,6 +5,44 @@ SEARCH_LIMIT = 10000 + +def get_coverage_for_bases(chrom, start_pos, stop_pos=None): + """ + Get the coverage for the list of bases given by start_pos->xstop_pos, inclusive + Args: + chrom (str): chromosome + start_pos (int): first position of interest + end_pos (int): last position of interest; if None it will be set to start_pos + Returns: + list: coverage dicts for the region of interest + """ + if stop_pos is None: + stop_pos = start_pos + + return [values for values in db.Coverage.select().where((db.Coverage.pos >= start_pos) & + (db.Coverage.pos <= stop_pos) & + (db.Coverage.chrom == chrom)).dicts()] + + +def get_coverage_for_transcript(chrom, start_pos, stop_pos=None): + """ + Get the coverage for the list of bases given by start_pos->xstop_pos, inclusive + Args: + chrom (str): chromosome + start_pos (int): first position of interest + end_pos (int): last position of interest; if None it will be set to start_pos + Returns: + list: coverage dicts for the region of interest + """ + coverage_array = get_coverage_for_bases(db, xstart, xstop) + # only return coverages that have coverage (if that makes any sense?) + # return coverage_array + covered = [c for c in coverage_array if c['has_coverage']] + for c in covered: + del c['has_coverage'] + return covered + + def get_gene(gene_id): """ Retrieve gene by gene id @@ -74,10 +112,10 @@ def get_raw_variant(pos, chrom, ref, alt): dict: values for the variant; empty if not found """ try: - return db.Variant.select().where(db.Variant.pos == pos, - db.Variant.ref == ref, - db.Variant.alt == alt, - db.Variant.chrom == chrom).dicts().get() + return db.Variant.select().where((db.Variant.pos == pos) & + (db.Variant.ref == ref) & + (db.Variant.alt == alt) & + (db.Variant.chrom == chrom)).dicts().get() except db.Variant.DoesNotExist: return {} @@ -99,8 +137,8 @@ def get_variant(pos, chrom, ref, alt): if not variant or 'rsid' not in variant: return variant if variant['rsid'] == '.' or variant['rsid'] is None: - rsid = db.dbsnp.select().where(db.snp.pos==pos, - db.snp.chrom==chrom).dicts().get() + rsid = db.dbsnp.select().where((db.snp.pos==pos) & + (db.snp.chrom==chrom)).dicts().get() if rsid: variant['rsid'] = 'rs{}'.format(rsid['rsid']) return variant @@ -108,53 +146,6 @@ def get_variant(pos, chrom, ref, alt): return {} -def get_coverage_for_bases(db, xstart, xstop=None): - """ - Get the coverage for the list of bases given by xstart->xstop, inclusive - Returns list of coverage dicts - xstop can be None if just one base, but you'll still get back a list - """ - if xstop is None: - xstop = xstart - - coverages = { - doc['xpos']: doc for doc in db.base_coverage.find( - {'xpos': {'$gte': xstart, '$lte': xstop}}, - projection={'_id': False} - ) - } - ret = [] - # We only store every 10'th base in the db, so we have to make the checks - # only then. - for i in range(xstart-xstart%10, xstop+1, 10): - if i in coverages: - ret.append(coverages[i]) - else: - ret.append({'xpos': i, 'pos': xpos_to_pos(i)}) - for item in ret: - item['has_coverage'] = 'mean' in item - del item['xpos'] - return ret - - -def get_coverage_for_transcript(db, xstart, xstop=None): - """ - - :param db: - :param genomic_coord_to_exon: - :param xstart: - :param xstop: - :return: - """ - coverage_array = get_coverage_for_bases(db, xstart, xstop) - # only return coverages that have coverage (if that makes any sense?) - # return coverage_array - covered = [c for c in coverage_array if c['has_coverage']] - for c in covered: - del c['has_coverage'] - return covered - - def get_constraint_for_transcript(db, transcript): return db.constraint.find_one({'transcript': transcript}, projection={'_id': False}) diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index e1cf5f83c..303eaa478 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -4,6 +4,27 @@ import lookups + +def test_get_coverage_for_bases(): + ''' + Test get_coverage_for_bases() + ''' + coverage = lookups.get_coverage_for_bases('1', 55500283, 55500320) + expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500290, 'mean': 40.66, 'median': 39.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, + {'id': 5474063, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500300, 'mean': 40.7, 'median': 39.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.971, 0.878, 0.132, 0.001]}, + {'id': 5474064, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500310, 'mean': 40.35, 'median': 39.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.995, 0.974, 0.859, 0.138, 0.001]}, + {'id': 5474065, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500320, 'mean': 39.69, 'median': 38.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.961, 0.856, 0.117, 0.001]}] + assert coverage == expected + + def test_get_gene(): ''' Test get_gene() @@ -133,11 +154,11 @@ def test_get_variant(): result = lookups.get_variant(55500283, '1', 'A', 'T') assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] - assert result['rsid'] == [75050571] + assert result['rsid'] == 75050571 # need to add test for entry with missing rsid # too slow query atm assert not lookups.get_variant(55500281, '1', 'A', 'T') - + def test_get_exons_in_transcript(): ''' From 5313802674841490d0fb99575727fdd102d01e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 7 Jan 2019 14:49:16 +0100 Subject: [PATCH 112/360] potentially no longer needed function added --- backend/modules/browser/lookups.py | 6 +++--- backend/modules/browser/test_lookups.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index ea37ab058..afea7a353 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -34,12 +34,12 @@ def get_coverage_for_transcript(chrom, start_pos, stop_pos=None): Returns: list: coverage dicts for the region of interest """ + # Is this function no longer relevant with postgres? + # Only entries with reported cov are in database coverage_array = get_coverage_for_bases(db, xstart, xstop) # only return coverages that have coverage (if that makes any sense?) # return coverage_array - covered = [c for c in coverage_array if c['has_coverage']] - for c in covered: - del c['has_coverage'] + covered = [c for c in coverage_array if c['mean']] return covered diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 303eaa478..c2fbf7870 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -9,7 +9,7 @@ def test_get_coverage_for_bases(): ''' Test get_coverage_for_bases() ''' - coverage = lookups.get_coverage_for_bases('1', 55500283, 55500320) + # coverage = lookups.get_coverage_for_bases('1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, From 43bbf71522e5d8e4d8747e3c1af2bfa546316e09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 7 Jan 2019 16:23:05 +0100 Subject: [PATCH 113/360] in progress for variants_from_transcript --- backend/modules/browser/lookups.py | 76 ++++++++++++------------- backend/modules/browser/test_lookups.py | 72 +++++++++++++++-------- 2 files changed, 83 insertions(+), 65 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index afea7a353..fa5f05b13 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -36,13 +36,24 @@ def get_coverage_for_transcript(chrom, start_pos, stop_pos=None): """ # Is this function no longer relevant with postgres? # Only entries with reported cov are in database - coverage_array = get_coverage_for_bases(db, xstart, xstop) + coverage_array = get_coverage_for_bases(chrom, start_pos, stop_pos) # only return coverages that have coverage (if that makes any sense?) # return coverage_array covered = [c for c in coverage_array if c['mean']] return covered +def get_exons_in_transcript(transcript_dbid): + """ + Retrieve exons associated with the given transcript id + Args: + transcript_dbid: the id of the transcript in the database (Transcript.id; not transcript_id) + Returns: + list: dicts with values for each exon sorted by start position + """ + return sorted(list(db.Feature.select().where(db.Feature.transcript==transcript_dbid).dicts()), key=lambda k: k['start']) + + def get_gene(gene_id): """ Retrieve gene by gene id @@ -93,12 +104,6 @@ def get_transcript(transcript_id): except db.Transcript.DoesNotExist: return {} - transcript = sdb.transcripts.find_one({'transcript_id': transcript_id}, projection={'_id': False}) - if not transcript: - return None - transcript['exons'] = get_exons_in_transcript(sdb, transcript_id) - return transcript - def get_raw_variant(pos, chrom, ref, alt): """ @@ -132,13 +137,13 @@ def get_variant(pos, chrom, ref, alt): Returns: dict: values for the variant; empty if not found """ - try: + try: variant = get_raw_variant(pos, chrom, ref, alt) if not variant or 'rsid' not in variant: return variant if variant['rsid'] == '.' or variant['rsid'] is None: - rsid = db.dbsnp.select().where((db.snp.pos==pos) & - (db.snp.chrom==chrom)).dicts().get() + rsid = db.DbSNP.select().where((db.DbSNP.pos==pos) & + (db.DbSNP.chrom==chrom)).dicts().get() if rsid: variant['rsid'] = 'rs{}'.format(rsid['rsid']) return variant @@ -146,16 +151,27 @@ def get_variant(pos, chrom, ref, alt): return {} -def get_constraint_for_transcript(db, transcript): - return db.constraint.find_one({'transcript': transcript}, projection={'_id': False}) - - -def get_exons_cnvs(db, transcript_name): - return list(db.cnvs.find({'transcript': transcript_name}, projection={'_id': False})) - +def get_variants_in_transcript(transcript_id): + """ + Retrieve variants inside a transcript + Args: + pos (int): position of the variant + chrom (str): name of the chromosome + ref (str): reference sequence + ref (str): variant sequence + Returns: + dict: values for the variant; empty if not found + """ + variants = [] + for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts(): + variants.append(variant) + return variants + variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Feature'] == transcript_id] + add_consequence_to_variant(variant) + remove_extraneous_information(variant) + variants.append(variant) + return variants -def get_cnvs(db, gene_name): - return list(db.cnvgenes.find({'gene': gene_name}, projection={'_id': False})) REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') @@ -316,25 +332,3 @@ def get_number_of_variants_in_transcript(db, transcript_id): total = db.variants.count({'transcripts': transcript_id}) filtered = db.variants.count({'transcripts': transcript_id, 'filter': 'PASS'}) return {'filtered': filtered, 'total': total} - - -def get_variants_in_transcript(db, sdb, transcript_id): - variants = [] - for variant in db.variants.find({'transcripts': transcript_id}, projection={'_id': False}): - variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Feature'] == transcript_id] - add_rsid_to_variant(sdb, variant) - add_consequence_to_variant(variant) - remove_extraneous_information(variant) - variants.append(variant) - return variants - - -def get_exons_in_transcript(transcript_dbid): - """ - Retrieve exons associated with the given transcript id - Args: - transcript_dbid: the id of the transcript in the database (Transcript.id; not transcript_id) - Returns: - list: dicts with values for each exon sorted by start position - """ - return sorted(list(db.Feature.select().where(db.Feature.transcript==transcript_dbid).dicts()), key=lambda k: k['start']) diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index c2fbf7870..72b400b59 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -9,7 +9,7 @@ def test_get_coverage_for_bases(): ''' Test get_coverage_for_bases() ''' - # coverage = lookups.get_coverage_for_bases('1', 55500283, 55500320) + coverage = lookups.get_coverage_for_bases('1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, @@ -25,6 +25,44 @@ def test_get_coverage_for_bases(): assert coverage == expected +def test_get_coverage_for_transcript(): + coverage = lookups.get_coverage_for_transcript('1', 55500283, 55500320) + expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500290, 'mean': 40.66, 'median': 39.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, + {'id': 5474063, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500300, 'mean': 40.7, 'median': 39.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.971, 0.878, 0.132, 0.001]}, + {'id': 5474064, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500310, 'mean': 40.35, 'median': 39.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.995, 0.974, 0.859, 0.138, 0.001]}, + {'id': 5474065, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500320, 'mean': 39.69, 'median': 38.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.961, 0.856, 0.117, 0.001]}] + assert coverage == expected + + +def test_get_exons_in_transcript(): + ''' + Test get_exons_in_transcript() + ''' + result = lookups.get_exons_in_transcript(5) + expected = [{'id': 28, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14364, 'stop': 14830, 'strand': '-', 'feature_type': 'exon'}, + {'id': 27, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14971, 'stop': 15039, 'strand': '-', 'feature_type': 'exon'}, + {'id': 26, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15797, 'stop': 15902, 'strand': '-', 'feature_type': 'exon'}, + {'id': 25, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15905, 'stop': 15948, 'strand': '-', 'feature_type': 'exon'}, + {'id': 24, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16608, 'stop': 16766, 'strand': '-', 'feature_type': 'exon'}, + {'id': 23, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16855, 'stop': 17056, 'strand': '-', 'feature_type': 'exon'}, + {'id': 22, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17234, 'stop': 17365, 'strand': '-', 'feature_type': 'exon'}, + {'id': 21, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17603, 'stop': 17743, 'strand': '-', 'feature_type': 'exon'}, + {'id': 20, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17916, 'stop': 18062, 'strand': '-', 'feature_type': 'exon'}, + {'id': 19, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 18269, 'stop': 18380, 'strand': '-', 'feature_type': 'exon'}, + {'id': 18, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 24739, 'stop': 24892, 'strand': '-', 'feature_type': 'exon'}, + {'id': 17, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 29322, 'stop': 29371, 'strand': '-', 'feature_type': 'exon'}] + print(result) + assert result == expected + + def test_get_gene(): ''' Test get_gene() @@ -93,7 +131,6 @@ def test_get_gene_by_name(): # non-existing assert not lookups.get_gene_by_name('INCORRECT') - def test_get_transcript(): ''' @@ -145,7 +182,7 @@ def test_get_raw_variant(): assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] assert not lookups.get_raw_variant(55500281, '1', 'A', 'T') - + def test_get_variant(): ''' @@ -157,26 +194,13 @@ def test_get_variant(): assert result['rsid'] == 75050571 # need to add test for entry with missing rsid # too slow query atm - assert not lookups.get_variant(55500281, '1', 'A', 'T') - - -def test_get_exons_in_transcript(): + assert not lookups.get_variant(-1, '1', 'A', 'T') + + +def test_get_variants_in_transcript(): ''' - Test get_exons_in_transcript() + Test get_variants_in_transcript() ''' - result = lookups.get_exons_in_transcript(5) - expected = [{'id': 28, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14364, 'stop': 14830, 'strand': '-', 'feature_type': 'exon'}, - {'id': 27, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14971, 'stop': 15039, 'strand': '-', 'feature_type': 'exon'}, - {'id': 26, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15797, 'stop': 15902, 'strand': '-', 'feature_type': 'exon'}, - {'id': 25, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15905, 'stop': 15948, 'strand': '-', 'feature_type': 'exon'}, - {'id': 24, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16608, 'stop': 16766, 'strand': '-', 'feature_type': 'exon'}, - {'id': 23, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16855, 'stop': 17056, 'strand': '-', 'feature_type': 'exon'}, - {'id': 22, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17234, 'stop': 17365, 'strand': '-', 'feature_type': 'exon'}, - {'id': 21, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17603, 'stop': 17743, 'strand': '-', 'feature_type': 'exon'}, - {'id': 20, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17916, 'stop': 18062, 'strand': '-', 'feature_type': 'exon'}, - {'id': 19, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 18269, 'stop': 18380, 'strand': '-', 'feature_type': 'exon'}, - {'id': 18, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 24739, 'stop': 24892, 'strand': '-', 'feature_type': 'exon'}, - {'id': 17, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 29322, 'stop': 29371, 'strand': '-', 'feature_type': 'exon'}] - print(result) - assert result == expected - + res = lookups.get_variants_in_transcript('ENST00000302118') + + assert False From 989d860181ae17c580495d8137601b325400cfc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 8 Jan 2019 10:10:12 +0100 Subject: [PATCH 114/360] a bit of test fixing --- backend/modules/browser/test_lookups.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 72b400b59..36d6c15a8 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -119,18 +119,16 @@ def test_get_gene_by_name(): assert result['start'] == expected['start_pos'] assert result['strand'] == expected['strand'] - # crashing with other_names.contains() -# result = lookups.get_gene_by_name('NOT_A_GENE') -# assert not result - # NOC2L + # non-exist + result = lookups.get_gene_by_name('NOT_A_GENE') + assert not result + + # waiting for fixed db result = lookups.get_gene_by_name('NOC2L') assert result['gene_id'] == 'ENSG00000188976' result = lookups.get_gene_by_name('NIR') result = lookups.get_gene_by_name('Z') - # non-existing - assert not lookups.get_gene_by_name('INCORRECT') - def test_get_transcript(): ''' @@ -202,5 +200,4 @@ def test_get_variants_in_transcript(): Test get_variants_in_transcript() ''' res = lookups.get_variants_in_transcript('ENST00000302118') - - assert False + assert len(res) == 426 From dd0b5a580e2dad9c631031092f0e9368bc080849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 8 Jan 2019 16:10:50 +0100 Subject: [PATCH 115/360] multiple updates to tests and code --- backend/modules/browser/lookups.py | 309 +++++++++++++----------- backend/modules/browser/test_lookups.py | 134 ++++++---- 2 files changed, 252 insertions(+), 191 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index fa5f05b13..30ec43b03 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,12 +1,91 @@ import re import db +import logging #from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant SEARCH_LIMIT = 10000 -def get_coverage_for_bases(chrom, start_pos, stop_pos=None): +REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') + +def get_awesomebar_result(dataset, query): + """ + Similar to the above, but this is after a user types enter + We need to figure out what they meant - could be gene, variant, region + + Where datatype is one of 'gene', 'variant', or 'region' + And identifier is one of: + - ensembl ID for gene + - variant ID string for variant (eg. 1-1000-A-T) + - region ID string for region (eg. 1-1000-2000) + + Follow these steps: + - if query is an ensembl ID, return it + - if a gene symbol, return that gene's ensembl ID + - if an RSID, return that variant's string + + Finally, note that we don't return the whole object here - only it's identifier. + This could be important for performance later + + Args: + dataset (str): short name of dataset + query (str): the search query + Returns: + tuple: (datatype, identifier) + """ + query = query.strip() + + # Parse Variant types + variant = get_variants_by_rsid(db, query.lower()) + if not variant: + variant = get_variants_from_dbsnp(db,sdb, query.lower()) + + if variant: + if len(variant) == 1: + retval = ('variant', variant[0]['variant_id']) + else: + retval = ('dbsnp_variant_set', variant[0]['rsid']) + return retval + + gene = get_gene_by_name(sdb, query) + # From here out, all should be uppercase (gene, tx, region, variant_id) + query = query.upper() + if not gene: + gene = get_gene_by_name(sdb, query) + if gene: + return 'gene', gene['gene_id'] + + # Ensembl formatted queries + if query.startswith('ENS'): + # Gene + gene = get_gene(sdb, query) + if gene: + return 'gene', gene['gene_id'] + + # Transcript + transcript = get_transcript(sdb, query) + if transcript: + return 'transcript', transcript['transcript_id'] + + # Region and variant queries + query = query[3:] if query.startswith('CHR') else query + + match = REGION_REGEX.match(query) + if match: + target = match.group(0) + target_type = 'region' + if match.group(2) == ":": + target = target.replace(":","-") + if match.group(5) and set(match.group(4)).issubset(set("ACGT")): + target_type = 'variant' + + return target_type, target + + return 'not_found', query + + +def get_coverage_for_bases(dataset, chrom, start_pos, stop_pos=None): """ Get the coverage for the list of bases given by start_pos->xstop_pos, inclusive Args: @@ -16,12 +95,16 @@ def get_coverage_for_bases(chrom, start_pos, stop_pos=None): Returns: list: coverage dicts for the region of interest """ - if stop_pos is None: - stop_pos = start_pos + dataset_version = db.get_dataset_version(dataset) + print(dataset_version) + return dict(dataset_version) +# if stop_pos is None: +# stop_pos = start_pos - return [values for values in db.Coverage.select().where((db.Coverage.pos >= start_pos) & - (db.Coverage.pos <= stop_pos) & - (db.Coverage.chrom == chrom)).dicts()] +# return [values for values in db.Coverage.select().where((db.Coverage.pos >= start_pos) & +# (db.Coverage.pos <= stop_pos) & +# (db.Coverage.chrom == chrom) & +# (db.Coverage.data)).dicts()] def get_coverage_for_transcript(chrom, start_pos, stop_pos=None): @@ -34,7 +117,7 @@ def get_coverage_for_transcript(chrom, start_pos, stop_pos=None): Returns: list: coverage dicts for the region of interest """ - # Is this function no longer relevant with postgres? + # Is this function still relevant with postgres? # Only entries with reported cov are in database coverage_array = get_coverage_for_bases(chrom, start_pos, stop_pos) # only return coverages that have coverage (if that makes any sense?) @@ -51,24 +134,31 @@ def get_exons_in_transcript(transcript_dbid): Returns: list: dicts with values for each exon sorted by start position """ - return sorted(list(db.Feature.select().where(db.Feature.transcript==transcript_dbid).dicts()), key=lambda k: k['start']) + return sorted(list(db.Feature.select().where((db.Feature.transcript==transcript_dbid) & + (db.Feature.feature_type=='exon')).dicts()), + key=lambda k: k['start']) -def get_gene(gene_id): +def get_gene(dataset, gene_id): """ Retrieve gene by gene id Args: + dataset (str): short name of the dataset gene_id (str): the id of the gene Returns: dict: values for the gene; empty if not found """ + ref_dbid = db.get_reference_dbid_dataset(dataset) + if not ref_dbid: + return {} try: - return db.Gene.select().where(db.Gene.gene_id==gene_id).dicts().get() + return db.Gene.select().where((db.Gene.gene_id == gene_id) & + (db.Gene.reference_set == ref_dbid)).dicts().get() except db.Gene.DoesNotExist: return {} -def get_gene_by_name(gene_name): +def get_gene_by_name(dataset, gene_name): """ Retrieve gene by gene_name. First checks gene_name, then other_names. @@ -77,17 +167,42 @@ def get_gene_by_name(gene_name): Returns: dict: values for the gene; empty if not found """ - # try gene_name field first + ref_dbid = db.get_reference_dbid_dataset(dataset) + if not ref_dbid: + return {} try: return db.Gene.select().where(db.Gene.name==gene_name).dicts().get() except db.Gene.DoesNotExist: try: - # troubles with KeyError return db.Gene.select().where(db.Gene.other_names.contains(gene_name)).dicts().get() except db.Gene.DoesNotExist: return {} +def get_genes_in_region(chrom, start_pos, stop_pos): + """ + Retrieve genes located within a region + Args: + chrom (str): chromosome name + start_pos (int): start of region + stop_pos (int): end of region + Returns: + dict: values for the gene; empty if not found + """ + gene_query = db.Gene.select().where((((db.Gene.start >= start_pos) & + (db.Gene.start <= stop_pos)) | + ((db.Gene.stop >= start_pos) & + (db.Gene.stop <= stop_pos))) & + (db.Gene.chrom == chrom)).dicts() + return [gene for gene in gene_query] + + +def get_number_of_variants_in_transcript(db, transcript_id): + total = db.variants.count({'transcripts': transcript_id}) + filtered = db.variants.count({'transcripts': transcript_id, 'filter': 'PASS'}) + return {'filtered': filtered, 'total': total} + + def get_transcript(transcript_id): """ Retrieve transcript by transcript id @@ -125,6 +240,21 @@ def get_raw_variant(pos, chrom, ref, alt): return {} +def get_transcripts_in_gene(dataset, gene_id): + """ + Get the transcripts associated with a gene + Args: + dataset (str): short name of the reference set + gene_id (str): id of the gene + Returns: + list: transcripts (dict) associated with the gene + """ + ref_dbid = db.get_reference_dbid_dataset(dataset) + gene = db.Gene.select().where((db.Gene.reference_set == ref_dbid) & + (db.Gene.gene_id == gene_id)).dicts().get() + return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] + + def get_variant(pos, chrom, ref, alt): """ Retrieve variant by position and change @@ -151,6 +281,26 @@ def get_variant(pos, chrom, ref, alt): return {} +def get_variants_in_gene(dataset, gene_id): + """ + Retrieve variants present inside a gene + Args: + dataset: short name of the dataset + gene_id (str): id of the gene + Returns: + list: values for the variants + """ + ref_dbid = db.get_reference_dbid_dataset(dataset) +# db.Variant.select().where(db.Variant.gene.contains(re + variants = [] + for variant in db.variants.find({'genes': gene_id}, projection={'_id': False}): + variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Gene'] == gene_id] + add_consequence_to_variant(variant) + remove_extraneous_information(variant) + variants.append(variant) + return variants + + def get_variants_in_transcript(transcript_id): """ Retrieve variants inside a transcript @@ -173,95 +323,7 @@ def get_variants_in_transcript(transcript_id): return variants -REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') - -def get_awesomebar_result(db,sdb, query): - """ - Similar to the above, but this is after a user types enter - We need to figure out what they meant - could be gene, variant, region - - Return tuple of (datatype, identifier) - Where datatype is one of 'gene', 'variant', or 'region' - And identifier is one of: - - ensembl ID for gene - - variant ID string for variant (eg. 1-1000-A-T) - - region ID string for region (eg. 1-1000-2000) - - Follow these steps: - - if query is an ensembl ID, return it - - if a gene symbol, return that gene's ensembl ID - - if an RSID, return that variant's string - - - Finally, note that we don't return the whole object here - only it's identifier. - This could be important for performance later - - """ - query = query.strip() - - # Parse Variant types - variant = get_variants_by_rsid(db, query.lower()) - if not variant: - variant = get_variants_from_dbsnp(db,sdb, query.lower()) - - if variant: - if len(variant) == 1: - retval = ('variant', variant[0]['variant_id']) - else: - retval = ('dbsnp_variant_set', variant[0]['rsid']) - return retval - - gene = get_gene_by_name(sdb, query) - # From here out, all should be uppercase (gene, tx, region, variant_id) - query = query.upper() - if not gene: - gene = get_gene_by_name(sdb, query) - if gene: - return 'gene', gene['gene_id'] - - # Ensembl formatted queries - if query.startswith('ENS'): - # Gene - gene = get_gene(sdb, query) - if gene: - return 'gene', gene['gene_id'] - - # Transcript - transcript = get_transcript(sdb, query) - if transcript: - return 'transcript', transcript['transcript_id'] - - # Region and variant queries - query = query[3:] if query.startswith('CHR') else query - - match = REGION_REGEX.match(query) - if match: - target = match.group(0) - target_type = 'region' - if match.group(2) == ":": - target = target.replace(":","-") - if match.group(5) and set(match.group(4)).issubset(set("ACGT")): - target_type = 'variant' - - return target_type, target - - return 'not_found', query - - -def get_genes_in_region(sdb, chrom, start, stop): - """ - Genes that overlap a region - """ - xstart = get_xpos(chrom, start) - xstop = get_xpos(chrom, stop) - genes = sdb.genes.find({ - 'xstart': {'$lte': xstop}, - 'xstop': {'$gte': xstart}, - }, projection={'_id': False}) - return list(genes) - - -def get_variants_in_region(db, sdb, chrom, start, stop): +def get_variants_in_region(db, chrom, start, stop): """ Variants that overlap a region Unclear if this will include CNVs @@ -278,28 +340,6 @@ def get_variants_in_region(db, sdb, chrom, start, stop): return list(variants) -def get_metrics(db, variant): - if 'allele_count' not in variant or variant['allele_num'] == 0: - return None - metrics = {} - for metric in METRICS: - metrics[metric] = db.metrics.find_one({'metric': metric}, projection={'_id': False}) - - metric = None - if variant['allele_count'] == 1: - metric = 'singleton' - elif variant['allele_count'] == 2: - metric = 'doubleton' - else: - for af in AF_BUCKETS: - if float(variant['allele_count'])/variant['allele_num'] < af: - metric = af - break - if metric is not None: - metrics['Site Quality'] = db.metrics.find_one({'metric': 'binned_%s' % metric}, projection={'_id': False}) - return metrics - - def remove_extraneous_information(variant): #del variant['genotype_depths'] #del variant['genotype_qualities'] @@ -311,24 +351,3 @@ def remove_extraneous_information(variant): del variant['xstop'] del variant['site_quality'] del variant['vep_annotations'] - - -def get_variants_in_gene(db, sdb, gene_id): - variants = [] - for variant in db.variants.find({'genes': gene_id}, projection={'_id': False}): - variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Gene'] == gene_id] - add_rsid_to_variant(sdb, variant) - add_consequence_to_variant(variant) - remove_extraneous_information(variant) - variants.append(variant) - return variants - - -def get_transcripts_in_gene(sdb, gene_id): - return list(sdb.transcripts.find({'gene_id': gene_id}, projection={'_id': False})) - - -def get_number_of_variants_in_transcript(db, transcript_id): - total = db.variants.count({'transcripts': transcript_id}) - filtered = db.variants.count({'transcripts': transcript_id, 'filter': 'PASS'}) - return {'filtered': filtered, 'total': total} diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 36d6c15a8..4680bc849 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -1,15 +1,23 @@ -''' +""" Tests for the functions available in lookups.py -''' +""" import lookups +def test_get_awesomebar_result(): + """ + Test get_awesomebar_result() + """ + pass + + def test_get_coverage_for_bases(): - ''' + """ Test get_coverage_for_bases() - ''' - coverage = lookups.get_coverage_for_bases('1', 55500283, 55500320) + """ + coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500320) + print(type(coverage)) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, @@ -26,7 +34,7 @@ def test_get_coverage_for_bases(): def test_get_coverage_for_transcript(): - coverage = lookups.get_coverage_for_transcript('1', 55500283, 55500320) + # coverage = lookups.get_coverage_for_transcript('1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, @@ -43,30 +51,33 @@ def test_get_coverage_for_transcript(): def test_get_exons_in_transcript(): - ''' + """ Test get_exons_in_transcript() - ''' - result = lookups.get_exons_in_transcript(5) - expected = [{'id': 28, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14364, 'stop': 14830, 'strand': '-', 'feature_type': 'exon'}, - {'id': 27, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14971, 'stop': 15039, 'strand': '-', 'feature_type': 'exon'}, - {'id': 26, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15797, 'stop': 15902, 'strand': '-', 'feature_type': 'exon'}, - {'id': 25, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15905, 'stop': 15948, 'strand': '-', 'feature_type': 'exon'}, - {'id': 24, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16608, 'stop': 16766, 'strand': '-', 'feature_type': 'exon'}, - {'id': 23, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16855, 'stop': 17056, 'strand': '-', 'feature_type': 'exon'}, - {'id': 22, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17234, 'stop': 17365, 'strand': '-', 'feature_type': 'exon'}, - {'id': 21, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17603, 'stop': 17743, 'strand': '-', 'feature_type': 'exon'}, - {'id': 20, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17916, 'stop': 18062, 'strand': '-', 'feature_type': 'exon'}, - {'id': 19, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 18269, 'stop': 18380, 'strand': '-', 'feature_type': 'exon'}, - {'id': 18, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 24739, 'stop': 24892, 'strand': '-', 'feature_type': 'exon'}, - {'id': 17, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 29322, 'stop': 29371, 'strand': '-', 'feature_type': 'exon'}] - print(result) + """ + result = lookups.get_exons_in_transcript(28186) + expected = [{'id': 326403, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202047893, 'stop': 202048032, 'strand': '+', 'feature_type': 'exon'}, + {'id': 326404, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202050495, 'stop': 202050848, 'strand': '+', 'feature_type': 'exon'}, + {'id': 326406, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202052430, 'stop': 202052523, 'strand': '+', 'feature_type': 'exon'}, + {'id': 326408, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202057708, 'stop': 202057843, 'strand': '+', 'feature_type': 'exon'}, + {'id': 326410, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202060566, 'stop': 202060672, 'strand': '+', 'feature_type': 'exon'}, + {'id': 326412, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202072799, 'stop': 202072907, 'strand': '+', 'feature_type': 'exon'}, + {'id': 326414, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202073794, 'stop': 202074286, 'strand': '+', 'feature_type': 'exon'}, + {'id': 326416, 'gene': 8600, 'transcript': 28186, 'chrom': '2', + 'start': 202082312, 'stop': 202084804, 'strand': '+', 'feature_type': 'exon'}] assert result == expected def test_get_gene(): - ''' + """ Test get_gene() - ''' + """ # normal entry expected = {'id': 1, 'reference_set': 1, @@ -77,7 +88,7 @@ def test_get_gene(): 'chrom': '1', 'start_pos': 11870, 'strand': '+'} - result = lookups.get_gene('ENSG00000223972') + result = lookups.get_gene('SweGen', 'ENSG00000223972') print(result) assert result['id'] == expected['id'] assert result['reference_set'] == expected['reference_set'] @@ -89,15 +100,19 @@ def test_get_gene(): assert result['start'] == expected['start_pos'] assert result['strand'] == expected['strand'] - # non-existing - result = lookups.get_gene('NOT_A_GENE') + # non-existing gene + result = lookups.get_gene('SweGen', 'NOT_A_GENE') + assert not result + + # non-existing dataset + result = lookups.get_gene('NoDataset', 'ENSG00000223972') assert not result def test_get_gene_by_name(): - ''' + """ Test get_gene_by_name() - ''' + """ # normal entry expected = {'id': 1, 'reference_set': 1, @@ -108,7 +123,7 @@ def test_get_gene_by_name(): 'chrom': '1', 'start_pos': 11870, 'strand': '+'} - result = lookups.get_gene_by_name('DDX11L1') + result = lookups.get_gene_by_name('SweGen', 'DDX11L1') assert result['id'] == expected['id'] assert result['reference_set'] == expected['reference_set'] assert result['gene_id'] == expected['gene_id'] @@ -119,21 +134,33 @@ def test_get_gene_by_name(): assert result['start'] == expected['start_pos'] assert result['strand'] == expected['strand'] - # non-exist - result = lookups.get_gene_by_name('NOT_A_GENE') + # non-existing gene + result = lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') + assert not result + + # non-existing dataset + result = lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') assert not result - # waiting for fixed db - result = lookups.get_gene_by_name('NOC2L') + # name in other_names + result = lookups.get_gene_by_name('SweGen', 'NIR') assert result['gene_id'] == 'ENSG00000188976' - result = lookups.get_gene_by_name('NIR') - result = lookups.get_gene_by_name('Z') + + +def test_get_genes_in_region(): + """ + Test get_genes_in_region() + """ + res = lookups.get_genes_in_region('4', 99080000, 99210000) + # stop_pos missing in db, so needs to be updated when available + # exp_names = + assert False def test_get_transcript(): - ''' + """ Test get_transcript() - ''' + """ # normal entry expected = {'id': 5, 'transcript_id': 'ENST00000438504', @@ -172,10 +199,24 @@ def test_get_transcript(): assert not lookups.get_transcript('INCORRECT') +def test_get_transcripts_in_gene(): + """ + Test get_transcripts_in_gene() + """ + res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000241670') + expected = [{'id': 39, 'transcript_id': 'ENST00000424429', 'gene': 19, + 'mim_gene_accession': None, 'mim_annotation': None, + 'chrom': '1', 'start': 228293, 'stop': 228655, 'strand': '-'}, + {'id': 40, 'transcript_id': 'ENST00000450734', 'gene': 19, + 'mim_gene_accession': None, 'mim_annotation': None, + 'chrom': '1', 'start': 228320, 'stop': 228776, 'strand': '-'}] + assert res == expected + + def test_get_raw_variant(): - ''' + """ Test get_raw_variant - ''' + """ result = lookups.get_raw_variant(55500283, '1', 'A', 'T') assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] @@ -183,9 +224,9 @@ def test_get_raw_variant(): def test_get_variant(): - ''' + """ Test get_variant() - ''' + """ result = lookups.get_variant(55500283, '1', 'A', 'T') assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] @@ -196,8 +237,9 @@ def test_get_variant(): def test_get_variants_in_transcript(): - ''' + """ Test get_variants_in_transcript() - ''' - res = lookups.get_variants_in_transcript('ENST00000302118') - assert len(res) == 426 + """ + # res = lookups.get_variants_in_transcript('ENST00000302118') + # assert len(res) == 426 + assert False From 267d083a84d84cb48367ce556ee57e4da2dcec28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 8 Jan 2019 16:54:05 +0100 Subject: [PATCH 116/360] keep on fixing tests and lookups --- backend/db.py | 33 ++++++++++++++++++++++--- backend/modules/browser/lookups.py | 29 +++++++++++++++++++--- backend/modules/browser/test_lookups.py | 26 ++++++++++++++++++- 3 files changed, 79 insertions(+), 9 deletions(-) diff --git a/backend/db.py b/backend/db.py index d3e329c3b..eac8e215c 100644 --- a/backend/db.py +++ b/backend/db.py @@ -464,20 +464,30 @@ def get_next_free_uid(): return next_uid + def get_admin_datasets(user): return DatasetAccess.select().where( DatasetAccess.user == user, DatasetAccess.is_admin) + def get_dataset(dataset): dataset = Dataset.select().where( Dataset.short_name == dataset).get() return dataset + def get_dataset_version(dataset, version=None): + """ + Given dataset get DatasetVersion + Args: + dataset (str): short name of the dataset + Returns: + DatasetVersionCurrent: the corresponding DatasetVersion entry + """ if version: dataset_version = (DatasetVersion - .select(DatasetVersion, Dataset) - .join(Dataset) - .where(DatasetVersion.version == version, - Dataset.short_name == dataset)).get() + .select(DatasetVersion, Dataset) + .join(Dataset) + .where(DatasetVersion.version == version, + Dataset.short_name == dataset)).get() else: dataset_version = (DatasetVersionCurrent .select(DatasetVersionCurrent, Dataset) @@ -485,6 +495,21 @@ def get_dataset_version(dataset, version=None): .where(Dataset.short_name == dataset)).get() return dataset_version + +def get_reference_dbid_dataset(dataset): + """ + Get the database id of the associated reference set for a dataset + Args: + dataset (str): short name of the dataset + Returns: + int: id of the associated reference set; returns None if not available + """ + try: + return Dataset.select().where(Dataset.short_name==dataset).dicts().get()['reference_set'] + except Dataset.DoesNotExist: + return None + + def build_dict_from_row(row): d = {} diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 30ec43b03..b4f53dd54 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -7,6 +7,18 @@ SEARCH_LIMIT = 10000 +def add_rsid_to_variant(variant): + """ + Add rsid to a variant in the database + Args: + variant (dict): values for a variant + """ + if variant['rsid'] == '.' or variant['rsid'] is None: + rsid = db.DbSNP.select().where(db.DbSNP.pos == variant['pos']).dicts().get() + if rsid: + variant['rsid'] = 'rs{}'.format(rsid['rsid']) + + REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') def get_awesomebar_result(dataset, query): @@ -272,15 +284,24 @@ def get_variant(pos, chrom, ref, alt): if not variant or 'rsid' not in variant: return variant if variant['rsid'] == '.' or variant['rsid'] is None: - rsid = db.DbSNP.select().where((db.DbSNP.pos==pos) & - (db.DbSNP.chrom==chrom)).dicts().get() - if rsid: - variant['rsid'] = 'rs{}'.format(rsid['rsid']) + add_rsid_to_variant(variant) return variant except db.Variant.DoesNotExist: return {} +def get_variants_by_rsid(db, rsid): + if not rsid.startswith('rs'): + return None + try: + int(rsid.lstrip('rs')) + except ValueError: + return None + variants = list(db.variants.find({'rsid': rsid}, projection={'_id': False})) + add_consequence_to_variants(variants) + return variants + + def get_variants_in_gene(dataset, gene_id): """ Retrieve variants present inside a gene diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 4680bc849..233d71593 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -5,11 +5,19 @@ import lookups +def test_add_rsid_to_variant(): + """ + Test add_rsid_to_variant() + """ + variant = '' + assert False + + def test_get_awesomebar_result(): """ Test get_awesomebar_result() """ - pass + assert False def test_get_coverage_for_bases(): @@ -231,8 +239,24 @@ def test_get_variant(): assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] assert result['rsid'] == 75050571 + + # missing rsid in result, multiple transcripts + # slow, need to fix db + # result = lookups.get_variant(47730411, '21', 'TA', 'T') + assert result['genes'] == ['ENSG00000160298'] + assert result['transcripts'] == ['ENST00000417060', 'ENST00000397682', + 'ENST00000397683', 'ENST00000397680', + 'ENST00000397685', 'ENST00000397679', + 'ENST00000291691', 'ENST00000445935', + 'ENST00000491666', 'ENST00000472607', + 'ENST00000475776'] + assert result['rsid'] == 75050571 + + # need to add test for entry with missing rsid # too slow query atm + + # incorrect position assert not lookups.get_variant(-1, '1', 'A', 'T') From 20ef941f1e3ef5736a9c45031ecbf0eb4b9f9ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 9 Jan 2019 16:41:48 +0100 Subject: [PATCH 117/360] more updates, focused on logging, adding version as a parameter, and many other things --- backend/modules/browser/lookups.py | 117 +++++++++++++++++------- backend/modules/browser/test_lookups.py | 25 +++-- 2 files changed, 101 insertions(+), 41 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index b4f53dd54..7924f1669 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -7,16 +7,32 @@ SEARCH_LIMIT = 10000 -def add_rsid_to_variant(variant): +def add_rsid_to_variant(dataset, variant): """ Add rsid to a variant in the database Args: + dataset (str): short name of the dataset variant (dict): values for a variant """ + refset = (db.Dataset + .select(db.ReferenceSet) + .join(db.ReferenceSet) + .where(db.Dataset.short_name == dataset) + .dicts() + .get()) + dbsnp_version = refset['dbsnp_version'] + if variant['rsid'] == '.' or variant['rsid'] is None: - rsid = db.DbSNP.select().where(db.DbSNP.pos == variant['pos']).dicts().get() + rsid = (db.DbSNP + .select() + .where((db.DbSNP.pos == variant['pos']) & + (db.DbSNP.version == dbsnp_version)) + .dicts() + .get()) if rsid: variant['rsid'] = 'rs{}'.format(rsid['rsid']) + else: + logging.error('add_rsid_to_variant({}, {}): unable to retrieve rsid'.format(dataset, variant)) REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') @@ -97,26 +113,35 @@ def get_awesomebar_result(dataset, query): return 'not_found', query -def get_coverage_for_bases(dataset, chrom, start_pos, stop_pos=None): +def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=None): """ - Get the coverage for the list of bases given by start_pos->xstop_pos, inclusive + Get the coverage for the list of bases given by start_pos->end_pos, inclusive Args: + dataset (str): short name for the dataset chrom (str): chromosome start_pos (int): first position of interest end_pos (int): last position of interest; if None it will be set to start_pos + ds_version (str): version of the dataset Returns: - list: coverage dicts for the region of interest + list: coverage dicts for the region of interest: None if unable to retrieve """ - dataset_version = db.get_dataset_version(dataset) - print(dataset_version) - return dict(dataset_version) -# if stop_pos is None: -# stop_pos = start_pos + dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + return -# return [values for values in db.Coverage.select().where((db.Coverage.pos >= start_pos) & -# (db.Coverage.pos <= stop_pos) & -# (db.Coverage.chrom == chrom) & -# (db.Coverage.data)).dicts()] + if end_pos is None: + end_pos = start_pos + try: + return [values for values in (db.Coverage + .select() + .where((db.Coverage.pos >= start_pos) & + (db.Coverage.pos <= end_pos) & + (db.Coverage.chrom == chrom) & + (db.Coverage.dataset_version == dataset_version.id)) + .dicts())] + except db.Coverage.DoesNotExist: + logging.error('get_coverage_for_bases({}, {}, {}, {}): '.format(dataset, chrom, start_pos, end_pos)) + return def get_coverage_for_transcript(chrom, start_pos, stop_pos=None): @@ -188,6 +213,7 @@ def get_gene_by_name(dataset, gene_name): try: return db.Gene.select().where(db.Gene.other_names.contains(gene_name)).dicts().get() except db.Gene.DoesNotExist: + logging.error('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) return {} @@ -201,12 +227,15 @@ def get_genes_in_region(chrom, start_pos, stop_pos): Returns: dict: values for the gene; empty if not found """ - gene_query = db.Gene.select().where((((db.Gene.start >= start_pos) & - (db.Gene.start <= stop_pos)) | - ((db.Gene.stop >= start_pos) & - (db.Gene.stop <= stop_pos))) & - (db.Gene.chrom == chrom)).dicts() - return [gene for gene in gene_query] + try: + gene_query = db.Gene.select().where((((db.Gene.start >= start_pos) & + (db.Gene.start <= stop_pos)) | + ((db.Gene.stop >= start_pos) & + (db.Gene.stop <= stop_pos))) & + (db.Gene.chrom == chrom)).dicts() + return [gene for gene in gene_query] + except db.Gene.DoesNotExist: + logging.error('get_genes_in_region({}, {}, {}): no genes found'.format(chrom, start_pos, stop_pos)) def get_number_of_variants_in_transcript(db, transcript_id): @@ -232,23 +261,36 @@ def get_transcript(transcript_id): return {} -def get_raw_variant(pos, chrom, ref, alt): +def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): """ Retrieve variant by position and change Args: + dataset (str): short name of the reference set pos (int): position of the variant chrom (str): name of the chromosome ref (str): reference sequence - ref (str): variant sequence + alt (str): variant sequence + ds_version (str): dataset version Returns: dict: values for the variant; empty if not found """ + dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + return + try: - return db.Variant.select().where((db.Variant.pos == pos) & - (db.Variant.ref == ref) & - (db.Variant.alt == alt) & - (db.Variant.chrom == chrom)).dicts().get() + return (db.Variant + .select() + .where((db.Variant.pos == pos) & + (db.Variant.ref == ref) & + (db.Variant.alt == alt) & + (db.Variant.chrom == chrom) & + (db.Variant.dataset_version == dataset_version.id)) + .dicts() + .get()) except db.Variant.DoesNotExist: + logging.error(('get_raw_variant({}, {}, {}, {}, {}, {})'.format(dataset, pos, chrom, ref, alt, ds_version) + + ': unable to retrieve variant')) return {} @@ -259,19 +301,27 @@ def get_transcripts_in_gene(dataset, gene_id): dataset (str): short name of the reference set gene_id (str): id of the gene Returns: - list: transcripts (dict) associated with the gene + list: transcripts (dict) associated with the gene; empty if no hits """ ref_dbid = db.get_reference_dbid_dataset(dataset) - gene = db.Gene.select().where((db.Gene.reference_set == ref_dbid) & - (db.Gene.gene_id == gene_id)).dicts().get() - return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] + if not ref_dbid: + logging.error('get_transcripts_in_gene({}, {}): unable to get referenceset dbid'.format(dataset, gene_id)) + return [] + try: + gene = db.Gene.select().where((db.Gene.reference_set == ref_dbid) & + (db.Gene.gene_id == gene_id)).dicts().get() + return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] + except db.Gene.DoesNotExist or db.Transcript.DoesNotExist: + logging.error('get_transcripts_in_gene({}, {}): unable to retrieve gene or transcript'.format(dataset, gene_id)) + return [] -def get_variant(pos, chrom, ref, alt): +def get_variant(dataset, pos, chrom, ref, alt): """ Retrieve variant by position and change Retrieves rsid from db (if available) if not present in variant Args: + dataset (str): short name of the dataset pos (int): position of the variant chrom (str): name of the chromosome ref (str): reference sequence @@ -280,11 +330,14 @@ def get_variant(pos, chrom, ref, alt): dict: values for the variant; empty if not found """ try: - variant = get_raw_variant(pos, chrom, ref, alt) + variant = get_raw_variant(dataset, pos, chrom, ref, alt) if not variant or 'rsid' not in variant: return variant if variant['rsid'] == '.' or variant['rsid'] is None: add_rsid_to_variant(variant) + else: + if str(variant['rsid'])[:2] != 'rs': + variant['rsid'] = 'rs{}'.format(variant['rsid']) return variant except db.Variant.DoesNotExist: return {} diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 233d71593..6b35e63e6 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -9,8 +9,16 @@ def test_add_rsid_to_variant(): """ Test add_rsid_to_variant() """ - variant = '' - assert False + # "with ." + variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') + rsid = variant['rsid'] + variant['rsid'] = '.' + # lookups.add_rsid_to_variant('SweGen', variant) + assert variant['rsid'] == rsid + # "non-existing" + del variant['rsid'] + # lookups.add_rsid_to_variant(variant) + assert variant['rsid'] == rsid def test_get_awesomebar_result(): @@ -24,8 +32,7 @@ def test_get_coverage_for_bases(): """ Test get_coverage_for_bases() """ - coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500320) - print(type(coverage)) + # coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, @@ -42,7 +49,7 @@ def test_get_coverage_for_bases(): def test_get_coverage_for_transcript(): - # coverage = lookups.get_coverage_for_transcript('1', 55500283, 55500320) + coverage = lookups.get_coverage_for_transcript('1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, @@ -225,20 +232,20 @@ def test_get_raw_variant(): """ Test get_raw_variant """ - result = lookups.get_raw_variant(55500283, '1', 'A', 'T') + result = lookups.get_raw_variant('SweGen', 55500283, '1', 'A', 'T') assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] - assert not lookups.get_raw_variant(55500281, '1', 'A', 'T') + assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') def test_get_variant(): """ Test get_variant() """ - result = lookups.get_variant(55500283, '1', 'A', 'T') + result = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] - assert result['rsid'] == 75050571 + assert result['rsid'] == 'rs75050571' # missing rsid in result, multiple transcripts # slow, need to fix db From b3e0305b4c7c068bb24966d25175dfbc175a55e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 10 Jan 2019 10:29:42 +0100 Subject: [PATCH 118/360] fixes and tests for get_variants_by_rsid --- backend/modules/browser/lookups.py | 44 ++++++++++++++++++------- backend/modules/browser/test_lookups.py | 24 +++++++++++++- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 7924f1669..ad7cde7cc 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -2,7 +2,7 @@ import db import logging -#from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant +from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant SEARCH_LIMIT = 10000 @@ -39,11 +39,10 @@ def add_rsid_to_variant(dataset, variant): def get_awesomebar_result(dataset, query): """ - Similar to the above, but this is after a user types enter - We need to figure out what they meant - could be gene, variant, region + Parse the search input - Where datatype is one of 'gene', 'variant', or 'region' - And identifier is one of: + Datatype is one of 'gene', 'variant', or 'region' + Identifier is one of: - ensembl ID for gene - variant ID string for variant (eg. 1-1000-A-T) - region ID string for region (eg. 1-1000-2000) @@ -336,22 +335,43 @@ def get_variant(dataset, pos, chrom, ref, alt): if variant['rsid'] == '.' or variant['rsid'] is None: add_rsid_to_variant(variant) else: - if str(variant['rsid'])[:2] != 'rs': + if not str(variant['rsid']).startswith('rs'): variant['rsid'] = 'rs{}'.format(variant['rsid']) return variant except db.Variant.DoesNotExist: return {} -def get_variants_by_rsid(db, rsid): +def get_variants_by_rsid(dataset, rsid, ds_version=None): + """ + Retrieve variants by their associated rsid + Args: + dataset (str): short name of dataset + rsid (str): rsid of the variant (starting with rs) + ds_version (str): version of the dataset + Returns: + list: variant dicts; no hits + """ + dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + return + if not rsid.startswith('rs'): - return None + logging.error('get_variants_by_rsid({}, {}): rsid not starting with rs'.format(dataset, rsid)) + return + try: - int(rsid.lstrip('rs')) + rsid = int(rsid.lstrip('rs')) except ValueError: - return None - variants = list(db.variants.find({'rsid': rsid}, projection={'_id': False})) - add_consequence_to_variants(variants) + logging.error('get_variants_by_rsid({}, {}): not an integer after rs'.format(dataset, rsid)) + return + query = (db.Variant + .select() + .where((db.Variant.rsid == rsid) & + (db.Variant.dataset_version == dataset_version)) + .dicts()) + variants = [variant for variant in query] + # add_consequence_to_variants(variants) return variants diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 6b35e63e6..4e1fa629c 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -259,7 +259,6 @@ def test_get_variant(): 'ENST00000475776'] assert result['rsid'] == 75050571 - # need to add test for entry with missing rsid # too slow query atm @@ -267,6 +266,29 @@ def test_get_variant(): assert not lookups.get_variant(-1, '1', 'A', 'T') +def test_get_variants_by_rsid(caplog): + ''' + Test get_variants_by_rsid() + ''' + # normal + result = lookups.get_variants_by_rsid('SweGen', 'rs373706802') + assert result[0]['genes'] == ['ENSG00000229286', 'ENSG00000235265'] + assert result[0]['transcripts'] == ['ENST00000448070','ENST00000413156'] + + # errors + assert lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') is None + assert lookups.get_variants_by_rsid('SweGen', '373706802') is None + assert lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') is None + expected = ('get_dataset_version(incorrect_name, version=None): cannot retrieve dataset version', + 'get_variants_by_rsid(SweGen, 373706802): rsid not starting with rs', + 'get_variants_by_rsid(SweGen, rs3737o68o2): not an integer after rs') + for comparison in zip(caplog.messages, expected): + assert comparison[0] == comparison[1] + + # no variants with rsid available + assert not lookups.get_variants_by_rsid('SweGen', 'rs1') + + def test_get_variants_in_transcript(): """ Test get_variants_in_transcript() From 8f73f677390a17e89e6c74675e05dfb71141ab68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 10 Jan 2019 13:59:13 +0100 Subject: [PATCH 119/360] get_variants_in region migrated, many other fixes in tests and code --- backend/modules/browser/lookups.py | 174 ++++++++++++++++-------- backend/modules/browser/test_lookups.py | 48 +++++-- 2 files changed, 150 insertions(+), 72 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index ad7cde7cc..2346cda1e 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -2,14 +2,15 @@ import db import logging -from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant +# from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant SEARCH_LIMIT = 10000 def add_rsid_to_variant(dataset, variant): """ - Add rsid to a variant in the database + Add rsid to a variant in the database based on position + Note that this may be inaccurate Args: dataset (str): short name of the dataset variant (dict): values for a variant @@ -23,16 +24,20 @@ def add_rsid_to_variant(dataset, variant): dbsnp_version = refset['dbsnp_version'] if variant['rsid'] == '.' or variant['rsid'] is None: - rsid = (db.DbSNP - .select() - .where((db.DbSNP.pos == variant['pos']) & - (db.DbSNP.version == dbsnp_version)) - .dicts() - .get()) - if rsid: - variant['rsid'] = 'rs{}'.format(rsid['rsid']) - else: - logging.error('add_rsid_to_variant({}, {}): unable to retrieve rsid'.format(dataset, variant)) + try: + rsid = (db.DbSNP + .select() + .where((db.DbSNP.pos == variant['pos']) & + (db.DbSNP.chrom == variant['chrom']) & + (db.DbSNP.version == dbsnp_version)) + .dicts() + .get()) + if rsid: + variant['rsid'] = 'rs{}'.format(rsid['rsid']) + else: + logging.error('add_rsid_to_variant({}, variant[dbid: {}]): unable to retrieve rsid'.format(dataset, variant['id'])) + except db.DbSNP.DoesNotExist: + logging.error('add_rsid_to_variant({}, variant[dbid: {}]): unable to retrieve rsid'.format(dataset, variant['id'])) REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') @@ -143,35 +148,50 @@ def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=N return -def get_coverage_for_transcript(chrom, start_pos, stop_pos=None): +def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_version=None): """ - Get the coverage for the list of bases given by start_pos->xstop_pos, inclusive + Get the coverage for the list of bases given by start_pos->end_pos, inclusive Args: + dataset (str): short name for the dataset chrom (str): chromosome start_pos (int): first position of interest end_pos (int): last position of interest; if None it will be set to start_pos + ds_version (str): version of the dataset Returns: list: coverage dicts for the region of interest """ # Is this function still relevant with postgres? # Only entries with reported cov are in database - coverage_array = get_coverage_for_bases(chrom, start_pos, stop_pos) + coverage_array = get_coverage_for_bases(dataset, chrom, start_pos, end_pos, ds_version) # only return coverages that have coverage (if that makes any sense?) # return coverage_array covered = [c for c in coverage_array if c['mean']] return covered -def get_exons_in_transcript(transcript_dbid): +def get_exons_in_transcript(dataset, transcript_id): """ Retrieve exons associated with the given transcript id Args: - transcript_dbid: the id of the transcript in the database (Transcript.id; not transcript_id) + dataset (str): short name of the dataset + transcript_id (str): the id of the transcript Returns: list: dicts with values for each exon sorted by start position """ - return sorted(list(db.Feature.select().where((db.Feature.transcript==transcript_dbid) & - (db.Feature.feature_type=='exon')).dicts()), + ref_dbid = db.get_reference_dbid_dataset(dataset) + + try: + transcript = (db.Transcript + .select() + .join(db.Gene) + .where((db.Transcript.transcript_id == transcript_id) & + (db.Gene.reference_set == ref_dbid)) + .get()) + except db.Transcript.DoesNotExist: + logging.error('get_exons_in_transcript({}, {}): unable to retrueve transcript'.format(dataset, transcript_id)) + return + return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & + (db.Feature.feature_type == 'exon')).dicts()), key=lambda k: k['start']) @@ -237,27 +257,27 @@ def get_genes_in_region(chrom, start_pos, stop_pos): logging.error('get_genes_in_region({}, {}, {}): no genes found'.format(chrom, start_pos, stop_pos)) -def get_number_of_variants_in_transcript(db, transcript_id): - total = db.variants.count({'transcripts': transcript_id}) - filtered = db.variants.count({'transcripts': transcript_id, 'filter': 'PASS'}) - return {'filtered': filtered, 'total': total} - - -def get_transcript(transcript_id): +def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None): """ - Retrieve transcript by transcript id - Also includes exons as ['exons'] + Get the total and filtered amount of variants in a transcript Args: - transcript_id (str): the id of the transcript + dataset (str): short name of the dataset + transcript_id (str): id of the transcript + ds_version (str): version of the dataset Returns: - dict: values for the transcript, including exons; empty if not found + dict: {filtered: nr_filtered, total: nr_total} """ - try: - transcript = db.Transcript.select().where(db.Transcript.transcript_id==transcript_id).dicts().get() - transcript['exons'] = get_exons_in_transcript(transcript['id']) - return transcript - except db.Transcript.DoesNotExist: - return {} + # will be implemented after database is updated + raise NotImplementedError + + dataset_version = db.get_dataset_version() + if not dataset_version: + return + + transcript = db.Transcript.select().where(db.Transcript.transcript_id) + total = db.variants.count({'transcripts': transcript_id}) + filtered = db.variants.count({'transcripts': transcript_id, 'filter': 'PASS'}) + return {'filtered': filtered, 'total': total} def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): @@ -293,6 +313,31 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): return {} +def get_transcript(dataset, transcript_id): + """ + Retrieve transcript by transcript id + Also includes exons as ['exons'] + Args: + dataset (str): short name of the dataset + transcript_id (str): the id of the transcript + Returns: + dict: values for the transcript, including exons; empty if not found + """ + ref_dbid = db.get_reference_dbid_dataset(dataset) + try: + transcript = (db.Transcript + .select() + .join(db.Gene) + .where((db.Transcript.transcript_id == transcript_id) & + (db.Gene.reference_set == ref_dbid)) + .dicts() + .get()) + transcript['exons'] = get_exons_in_transcript(dataset, transcript_id) + return transcript + except db.Transcript.DoesNotExist: + return {} + + def get_transcripts_in_gene(dataset, gene_id): """ Get the transcripts associated with a gene @@ -315,7 +360,7 @@ def get_transcripts_in_gene(dataset, gene_id): return [] -def get_variant(dataset, pos, chrom, ref, alt): +def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): """ Retrieve variant by position and change Retrieves rsid from db (if available) if not present in variant @@ -324,16 +369,17 @@ def get_variant(dataset, pos, chrom, ref, alt): pos (int): position of the variant chrom (str): name of the chromosome ref (str): reference sequence - ref (str): variant sequence + alt (str): variant sequence + ds_version (str): version of the dataset Returns: dict: values for the variant; empty if not found """ try: - variant = get_raw_variant(dataset, pos, chrom, ref, alt) + variant = get_raw_variant(dataset, pos, chrom, ref, alt, ds_version) if not variant or 'rsid' not in variant: return variant if variant['rsid'] == '.' or variant['rsid'] is None: - add_rsid_to_variant(variant) + add_rsid_to_variant(dataset, variant) else: if not str(variant['rsid']).startswith('rs'): variant['rsid'] = 'rs{}'.format(variant['rsid']) @@ -375,6 +421,33 @@ def get_variants_by_rsid(dataset, rsid, ds_version=None): return variants +def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): + """ + Variants that overlap a region + Args: + dataset (str): short name of the dataset + chrom (str): name of the chromosom + start_pos (int): start of the region + end_pos (int): start of the region + ds_version (str): version of the dataset + """ + dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + return + query = (db.Variant + .select() + .where((db.Variant.pos >= start_pos) & + (db.Variant.pos <= end_pos) & + (db.Variant.chrom == chrom) & + (db.Variant.dataset_version == dataset_version)) + .dicts()) + variants = [variant for variant in query] + # add_consequence_to_variants(variants) + #for variant in variants: + # remove_extraneous_information(variant) + return variants + + def get_variants_in_gene(dataset, gene_id): """ Retrieve variants present inside a gene @@ -406,9 +479,7 @@ def get_variants_in_transcript(transcript_id): Returns: dict: values for the variant; empty if not found """ - variants = [] - for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts(): - variants.append(variant) + variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] return variants variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Feature'] == transcript_id] add_consequence_to_variant(variant) @@ -417,23 +488,6 @@ def get_variants_in_transcript(transcript_id): return variants -def get_variants_in_region(db, chrom, start, stop): - """ - Variants that overlap a region - Unclear if this will include CNVs - """ - xstart = get_xpos(chrom, start) - xstop = get_xpos(chrom, stop) - variants = list(db.variants.find({ - 'xpos': {'$lte': xstop, '$gte': xstart} - }, projection={'_id': False}, limit=SEARCH_LIMIT)) - add_consequence_to_variants(variants) - for variant in variants: - add_rsid_to_variant(sdb, variant) - remove_extraneous_information(variant) - return list(variants) - - def remove_extraneous_information(variant): #del variant['genotype_depths'] #del variant['genotype_qualities'] diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 4e1fa629c..c9ab76fd9 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -13,11 +13,11 @@ def test_add_rsid_to_variant(): variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') rsid = variant['rsid'] variant['rsid'] = '.' - # lookups.add_rsid_to_variant('SweGen', variant) + lookups.add_rsid_to_variant('SweGen', variant) assert variant['rsid'] == rsid # "non-existing" del variant['rsid'] - # lookups.add_rsid_to_variant(variant) + lookups.add_rsid_to_variant(variant) assert variant['rsid'] == rsid @@ -32,7 +32,7 @@ def test_get_coverage_for_bases(): """ Test get_coverage_for_bases() """ - # coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500320) + coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, @@ -49,7 +49,7 @@ def test_get_coverage_for_bases(): def test_get_coverage_for_transcript(): - coverage = lookups.get_coverage_for_transcript('1', 55500283, 55500320) + coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, @@ -69,7 +69,7 @@ def test_get_exons_in_transcript(): """ Test get_exons_in_transcript() """ - result = lookups.get_exons_in_transcript(28186) + result = lookups.get_exons_in_transcript('SweGen', 'ENST00000346817') expected = [{'id': 326403, 'gene': 8600, 'transcript': 28186, 'chrom': '2', 'start': 202047893, 'stop': 202048032, 'strand': '+', 'feature_type': 'exon'}, {'id': 326404, 'gene': 8600, 'transcript': 28186, 'chrom': '2', @@ -172,6 +172,14 @@ def test_get_genes_in_region(): assert False +def test_get_number_of_variants_in_transcript(): + """ + Test get_number_of_variants_in_transcripts() + """ + assert False + lookups.get_number_of_variants_in_transcripts() + + def test_get_transcript(): """ Test get_transcript() @@ -199,7 +207,7 @@ def test_get_transcript(): {'id': 18, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 24739, 'stop': 24892, 'strand': '-', 'feature_type': 'exon'}, {'id': 17, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 29322, 'stop': 29371, 'strand': '-', 'feature_type': 'exon'}] - result = lookups.get_transcript('ENST00000438504') + result = lookups.get_transcript('SweGen', 'ENST00000438504') assert result['id'] == expected['id'] assert result['mim_annotation'] == expected['mim_annotation'] assert result['transcript_id'] == expected['transcript_id'] @@ -211,7 +219,7 @@ def test_get_transcript(): assert result['exons'] == exp_exon # non-existing - assert not lookups.get_transcript('INCORRECT') + assert not lookups.get_transcript('SweGen', 'INCORRECT') def test_get_transcripts_in_gene(): @@ -249,7 +257,7 @@ def test_get_variant(): # missing rsid in result, multiple transcripts # slow, need to fix db - # result = lookups.get_variant(47730411, '21', 'TA', 'T') + result = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') assert result['genes'] == ['ENSG00000160298'] assert result['transcripts'] == ['ENST00000417060', 'ENST00000397682', 'ENST00000397683', 'ENST00000397680', @@ -257,10 +265,8 @@ def test_get_variant(): 'ENST00000291691', 'ENST00000445935', 'ENST00000491666', 'ENST00000472607', 'ENST00000475776'] - assert result['rsid'] == 75050571 - - # need to add test for entry with missing rsid - # too slow query atm + assert result['rsid'] == 'rs75050571' + # TODO: add test for entry with missing rsid # incorrect position assert not lookups.get_variant(-1, '1', 'A', 'T') @@ -289,6 +295,24 @@ def test_get_variants_by_rsid(caplog): assert not lookups.get_variants_by_rsid('SweGen', 'rs1') +def test_get_variants_in_region(): + """ + Test get_variants_in_region() + """ + # normal + result = lookups.get_variants_in_region('SweGen', '22', 16079200, 16079400) + expected_pos = [16079227, 16079234, 16079289, 16079350] + assert [res['pos'] for res in result] == expected_pos + + # no positions covered + result = lookups.get_variants_in_region('SweGen', '22', 16079200, 16079000) + assert not result + + # incorrect dataset + result = lookups.get_variants_in_region('Incorrect_dataset', '22', 16079200, 16079400) + assert not result + + def test_get_variants_in_transcript(): """ Test get_variants_in_transcript() From 4fc466af503987f96e55e6fc793ba3c1df12aef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 10 Jan 2019 15:34:17 +0100 Subject: [PATCH 120/360] added the functionality of get_variants_from_dbsnp to get_variants_by_rsid --- backend/modules/browser/lookups.py | 43 ++++++++++++++++++++----- backend/modules/browser/test_lookups.py | 7 ++++ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 2346cda1e..a017c5486 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -42,7 +42,7 @@ def add_rsid_to_variant(dataset, variant): REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') -def get_awesomebar_result(dataset, query): +def get_awesomebar_result(dataset, query, ds_version=None): """ Parse the search input @@ -63,14 +63,16 @@ def get_awesomebar_result(dataset, query): Args: dataset (str): short name of dataset query (str): the search query + ds_version (str): the dataset version Returns: tuple: (datatype, identifier) """ query = query.strip() # Parse Variant types - variant = get_variants_by_rsid(db, query.lower()) + variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) if not variant: + variant = get_variants_by_rsid(dataset, query.lower(), check_position=True, ds_version=ds_version) variant = get_variants_from_dbsnp(db,sdb, query.lower()) if variant: @@ -388,12 +390,14 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): return {} -def get_variants_by_rsid(dataset, rsid, ds_version=None): +def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): """ Retrieve variants by their associated rsid + May also look up rsid and search for variants at the position Args: dataset (str): short name of dataset rsid (str): rsid of the variant (starting with rs) + check_position (bool): check for variants at the position of the rsid instead of by rsid ds_version (str): version of the dataset Returns: list: variant dicts; no hits @@ -411,11 +415,34 @@ def get_variants_by_rsid(dataset, rsid, ds_version=None): except ValueError: logging.error('get_variants_by_rsid({}, {}): not an integer after rs'.format(dataset, rsid)) return - query = (db.Variant - .select() - .where((db.Variant.rsid == rsid) & - (db.Variant.dataset_version == dataset_version)) - .dicts()) + if check_position: + refset = (db.Dataset + .select(db.ReferenceSet) + .join(db.ReferenceSet) + .where(db.Dataset.short_name == dataset) + .dicts() + .get()) + dbsnp_version = refset['dbsnp_version'] + + rsid_dbsnp = (db.DbSNP + .select() + .where((db.DbSNP.rsid == rsid) & + (db.DbSNP.version_id == dbsnp_version) ) + .dicts() + .get()) + query = (db.Variant + .select() + .where((db.Variant.pos == rsid_dbsnp['pos']) & + (db.Variant.chrom == rsid_dbsnp['chrom']) & + (db.Variant.dataset_version == dataset_version)) + .dicts()) + else: + query = (db.Variant + .select() + .where((db.Variant.rsid == rsid) & + (db.Variant.dataset_version == dataset_version)) + .dicts()) + variants = [variant for variant in query] # add_consequence_to_variants(variants) return variants diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index c9ab76fd9..613b26e1c 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -278,6 +278,13 @@ def test_get_variants_by_rsid(caplog): ''' # normal result = lookups.get_variants_by_rsid('SweGen', 'rs373706802') + assert result[0]['pos'] == 16080482 + assert result[0]['genes'] == ['ENSG00000229286', 'ENSG00000235265'] + assert result[0]['transcripts'] == ['ENST00000448070','ENST00000413156'] + + # by position + result = lookups.get_variants_by_rsid('SweGen', 'rs373706802', check_position=True) + assert result[0]['pos'] == 16080482 assert result[0]['genes'] == ['ENSG00000229286', 'ENSG00000235265'] assert result[0]['transcripts'] == ['ENST00000448070','ENST00000413156'] From 8ae076e7e733ca3ea4195c87509be901bfbf0fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 10 Jan 2019 16:36:16 +0100 Subject: [PATCH 121/360] awesomebar seems to work --- backend/modules/browser/lookups.py | 20 +++++++++++--------- backend/modules/browser/test_lookups.py | 15 ++++++++++++++- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index a017c5486..3c7a44ef6 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -46,7 +46,13 @@ def get_awesomebar_result(dataset, query, ds_version=None): """ Parse the search input - Datatype is one of 'gene', 'variant', or 'region' + Datatype is one of: + - 'gene' + - 'transcript' + - 'variant' + - 'dbsnp_variant_set' + - 'region' + Identifier is one of: - ensembl ID for gene - variant ID string for variant (eg. 1-1000-A-T) @@ -57,9 +63,6 @@ def get_awesomebar_result(dataset, query, ds_version=None): - if a gene symbol, return that gene's ensembl ID - if an RSID, return that variant's string - Finally, note that we don't return the whole object here - only it's identifier. - This could be important for performance later - Args: dataset (str): short name of dataset query (str): the search query @@ -73,7 +76,6 @@ def get_awesomebar_result(dataset, query, ds_version=None): variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) if not variant: variant = get_variants_by_rsid(dataset, query.lower(), check_position=True, ds_version=ds_version) - variant = get_variants_from_dbsnp(db,sdb, query.lower()) if variant: if len(variant) == 1: @@ -82,23 +84,23 @@ def get_awesomebar_result(dataset, query, ds_version=None): retval = ('dbsnp_variant_set', variant[0]['rsid']) return retval - gene = get_gene_by_name(sdb, query) + gene = get_gene_by_name(dataset, query) # From here out, all should be uppercase (gene, tx, region, variant_id) query = query.upper() if not gene: - gene = get_gene_by_name(sdb, query) + gene = get_gene_by_name(dataset, query) if gene: return 'gene', gene['gene_id'] # Ensembl formatted queries if query.startswith('ENS'): # Gene - gene = get_gene(sdb, query) + gene = get_gene(dataset, query) if gene: return 'gene', gene['gene_id'] # Transcript - transcript = get_transcript(sdb, query) + transcript = get_transcript(dataset, query) if transcript: return 'transcript', transcript['transcript_id'] diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 613b26e1c..18cc2722c 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -25,7 +25,20 @@ def test_get_awesomebar_result(): """ Test get_awesomebar_result() """ - assert False + result = lookups.get_awesomebar_result('SweGen', 'rs373706802') + assert result == ('dbsnp_variant_set', 373706802) + result = lookups.get_awesomebar_result('SweGen', 'rs783') + assert result == ('variant', '22-29461622-G-A') + result = lookups.get_awesomebar_result('SweGen', 'ADH6') + assert result == ('gene', 'ENSG00000172955') + result = lookups.get_awesomebar_result('SweGen', 'ENSG00000172955') + assert result == ('gene', 'ENSG00000172955') + result = lookups.get_awesomebar_result('SweGen', 'ENST00000237653') + assert result == ('transcript', 'ENST00000237653') + result = lookups.get_awesomebar_result('SweGen', '22-46615715-46615880') + assert result == ('region', '22-46615715-46615880') + result = lookups.get_awesomebar_result('SweGen', 'CHR22:46615715-46615880') + assert result == ('region', '22-46615715-46615880') def test_get_coverage_for_bases(): From 8c8336537aefe5152671b2324dcebeef4446df42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 14 Jan 2019 19:44:32 +0100 Subject: [PATCH 122/360] Fix for Google style for docstrings, some other changes --- backend/modules/browser/lookups.py | 83 ++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 26 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 3c7a44ef6..84b10af77 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -2,7 +2,7 @@ import db import logging -# from .utils import METRICS, AF_BUCKETS, get_xpos, xpos_to_pos, add_consequence_to_variants, add_consequence_to_variant +import utils SEARCH_LIMIT = 10000 @@ -11,6 +11,7 @@ def add_rsid_to_variant(dataset, variant): """ Add rsid to a variant in the database based on position Note that this may be inaccurate + Args: dataset (str): short name of the dataset variant (dict): values for a variant @@ -67,6 +68,7 @@ def get_awesomebar_result(dataset, query, ds_version=None): dataset (str): short name of dataset query (str): the search query ds_version (str): the dataset version + Returns: tuple: (datatype, identifier) """ @@ -124,12 +126,14 @@ def get_awesomebar_result(dataset, query, ds_version=None): def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=None): """ Get the coverage for the list of bases given by start_pos->end_pos, inclusive + Args: dataset (str): short name for the dataset chrom (str): chromosome start_pos (int): first position of interest end_pos (int): last position of interest; if None it will be set to start_pos ds_version (str): version of the dataset + Returns: list: coverage dicts for the region of interest: None if unable to retrieve """ @@ -155,12 +159,14 @@ def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=N def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_version=None): """ Get the coverage for the list of bases given by start_pos->end_pos, inclusive + Args: dataset (str): short name for the dataset chrom (str): chromosome start_pos (int): first position of interest end_pos (int): last position of interest; if None it will be set to start_pos ds_version (str): version of the dataset + Returns: list: coverage dicts for the region of interest """ @@ -176,9 +182,11 @@ def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_vers def get_exons_in_transcript(dataset, transcript_id): """ Retrieve exons associated with the given transcript id + Args: dataset (str): short name of the dataset transcript_id (str): the id of the transcript + Returns: list: dicts with values for each exon sorted by start position """ @@ -202,9 +210,11 @@ def get_exons_in_transcript(dataset, transcript_id): def get_gene(dataset, gene_id): """ Retrieve gene by gene id + Args: dataset (str): short name of the dataset gene_id (str): the id of the gene + Returns: dict: values for the gene; empty if not found """ @@ -222,8 +232,10 @@ def get_gene_by_name(dataset, gene_name): """ Retrieve gene by gene_name. First checks gene_name, then other_names. + Args: gene_name (str): the id of the gene + Returns: dict: values for the gene; empty if not found """ @@ -243,10 +255,12 @@ def get_gene_by_name(dataset, gene_name): def get_genes_in_region(chrom, start_pos, stop_pos): """ Retrieve genes located within a region + Args: chrom (str): chromosome name start_pos (int): start of region stop_pos (int): end of region + Returns: dict: values for the gene; empty if not found """ @@ -264,10 +278,12 @@ def get_genes_in_region(chrom, start_pos, stop_pos): def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None): """ Get the total and filtered amount of variants in a transcript + Args: dataset (str): short name of the dataset transcript_id (str): id of the transcript ds_version (str): version of the dataset + Returns: dict: {filtered: nr_filtered, total: nr_total} """ @@ -287,6 +303,7 @@ def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): """ Retrieve variant by position and change + Args: dataset (str): short name of the reference set pos (int): position of the variant @@ -294,6 +311,7 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): ref (str): reference sequence alt (str): variant sequence ds_version (str): dataset version + Returns: dict: values for the variant; empty if not found """ @@ -321,9 +339,11 @@ def get_transcript(dataset, transcript_id): """ Retrieve transcript by transcript id Also includes exons as ['exons'] + Args: dataset (str): short name of the dataset transcript_id (str): the id of the transcript + Returns: dict: values for the transcript, including exons; empty if not found """ @@ -368,6 +388,7 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): """ Retrieve variant by position and change Retrieves rsid from db (if available) if not present in variant + Args: dataset (str): short name of the dataset pos (int): position of the variant @@ -375,6 +396,7 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): ref (str): reference sequence alt (str): variant sequence ds_version (str): version of the dataset + Returns: dict: values for the variant; empty if not found """ @@ -396,11 +418,13 @@ def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): """ Retrieve variants by their associated rsid May also look up rsid and search for variants at the position + Args: dataset (str): short name of dataset rsid (str): rsid of the variant (starting with rs) check_position (bool): check for variants at the position of the rsid instead of by rsid ds_version (str): version of the dataset + Returns: list: variant dicts; no hits """ @@ -450,15 +474,41 @@ def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): return variants +def get_variants_in_gene(dataset, gene_id): + """ + Retrieve variants present inside a gene + + Args: + dataset: short name of the dataset + gene_id (str): id of the gene + + Returns: + list: values for the variants + """ + ref_dbid = db.get_reference_dbid_dataset(dataset) +# db.Variant.select().where(db.Variant.gene.contains(re + variants = [] + for variant in db.variants.find({'genes': gene_id}, projection={'_id': False}): + variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Gene'] == gene_id] + add_consequence_to_variant(variant) + remove_extraneous_information(variant) + variants.append(variant) + return variants + + def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): """ Variants that overlap a region + Args: dataset (str): short name of the dataset chrom (str): name of the chromosom start_pos (int): start of the region end_pos (int): start of the region ds_version (str): version of the dataset + + Returns: + list: variant dicts """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: @@ -477,43 +527,24 @@ def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): return variants -def get_variants_in_gene(dataset, gene_id): - """ - Retrieve variants present inside a gene - Args: - dataset: short name of the dataset - gene_id (str): id of the gene - Returns: - list: values for the variants - """ - ref_dbid = db.get_reference_dbid_dataset(dataset) -# db.Variant.select().where(db.Variant.gene.contains(re - variants = [] - for variant in db.variants.find({'genes': gene_id}, projection={'_id': False}): - variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Gene'] == gene_id] - add_consequence_to_variant(variant) - remove_extraneous_information(variant) - variants.append(variant) - return variants - - def get_variants_in_transcript(transcript_id): """ Retrieve variants inside a transcript + Args: pos (int): position of the variant chrom (str): name of the chromosome ref (str): reference sequence ref (str): variant sequence + Returns: dict: values for the variant; empty if not found """ variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] - return variants - variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Feature'] == transcript_id] - add_consequence_to_variant(variant) - remove_extraneous_information(variant) - variants.append(variant) + for variant in variants: + variant['vep_annotations'] = [annotation for annotation in variant['vep_annotations'] if x['Feature'] == transcript_id] + add_consequence_to_variant(variant) + remove_extraneous_information(variant) return variants From 7b45a8bc5578294928cc948ed85fc7a981a77a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 17 Jan 2019 15:00:16 +0100 Subject: [PATCH 123/360] starting to migrate the utils code --- backend/modules/browser/utils.py | 183 +++++++++++-------------------- 1 file changed, 63 insertions(+), 120 deletions(-) diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 68350916c..b6bf5a890 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -15,61 +15,69 @@ 'VQSLOD' ] +# Note that this is the current as of v81 with some included for backwards compatibility (VEP <= 75) +CSQ_ORDER = ["transcript_ablation", +"splice_acceptor_variant", +"splice_donor_variant", +"stop_gained", +"frameshift_variant", +"stop_lost", +"start_lost", # new in v81 +"initiator_codon_variant", # deprecated +"transcript_amplification", +"inframe_insertion", +"inframe_deletion", +"missense_variant", +"protein_altering_variant", # new in v79 +"splice_region_variant", +"incomplete_terminal_codon_variant", +"stop_retained_variant", +"synonymous_variant", +"coding_sequence_variant", +"mature_miRNA_variant", +"5_prime_UTR_variant", +"3_prime_UTR_variant", +"non_coding_transcript_exon_variant", +"non_coding_exon_variant", # deprecated +"intron_variant", +"NMD_transcript_variant", +"non_coding_transcript_variant", +"nc_transcript_variant", # deprecated +"upstream_gene_variant", +"downstream_gene_variant", +"TFBS_ablation", +"TFBS_amplification", +"TF_binding_site_variant", +"regulatory_region_ablation", +"regulatory_region_amplification", +"feature_elongation", +"regulatory_region_variant", +"feature_truncation", +"intergenic_variant", +""] +assert len(CSQ_ORDER) == len(set(CSQ_ORDER)) # No dupplicates -def add_transcript_coordinate_to_variants(sdb, variant_list, transcript_id): - """ - Each variant has a 'xpos' and 'pos' positional attributes. - This method takes a list of variants and adds a third position: the "transcript coordinates". - This is defined as the distance from the start of the transcript, in coding bases. - So a variant in the 7th base of the 6th exon of a transcript will have a transcript coordinate of - the sum of the size of the first 5 exons) + 7 - This is 0-based, so a variant in the first base of the first exon has a transcript coordinate of 0. - - You may want to add transcript coordinates for multiple transcripts, so this is stored in a variant as - variant['transcript_coordinates'][transcript_id] - - If a variant in variant_list does not have a `transcript_coordinates` dictionary, we create one - - If a variant start position for some reason does not fall in any exons in this transcript, its coordinate is 0. - This is perhaps logically inconsistent, - but it allows you to spot errors quickly if there's a pileup at the first base. - `None` would just break things. - - Consider the behavior if a 20 base deletion deletes parts of two exons. - I think the behavior in this method is consistent, but beware that it might break things downstream. - - Edits variant_list in place; no return val - """ - - import lookups - # make sure exons is sorted by (start, end) - exons = sorted(lookups.get_exons_in_transcript(sdb, transcript_id), key=itemgetter('start', 'stop')) - - # offset from start of base for exon in ith position (so first item in this list is always 0) - exon_offsets = [0 for i in range(len(exons))] - for i, exon in enumerate(exons): - for j in range(i+1, len(exons)): - exon_offsets[j] += exon['stop'] - exon['start'] - - for variant in variant_list: - if 'transcript_coordinates' not in variant: - variant['transcript_coordinates'] = {} - variant['transcript_coordinates'][transcript_id] = 0 - for i, exon in enumerate(exons): - if exon['start'] <= variant['pos'] <= exon['stop']: - variant['transcript_coordinates'][transcript_id] = exon_offsets[i] + variant['pos'] - exon['start'] - - -def xpos_to_pos(xpos): - return int(xpos % 1e9) +CSQ_ORDER_DICT = {csq:i for i,csq in enumerate(CSQ_ORDER)} +REV_CSQ_ORDER_DICT = dict(enumerate(CSQ_ORDER)) +assert all(csq == REV_CSQ_ORDER_DICT[CSQ_ORDER_DICT[csq]] for csq in CSQ_ORDER) def add_consequence_to_variants(variant_list): + """ + Add information about variant consequence to multiple variants + Args: + variant_list (list): list of variants + """ for variant in variant_list: add_consequence_to_variant(variant) def add_consequence_to_variant(variant): + """ + Add information about variant consequence to a variant + Args: + variant (dict): variant information + """ worst_csq = worst_csq_with_vep(variant['vep_annotations']) variant['major_consequence'] = '' if worst_csq is None: @@ -81,16 +89,16 @@ def add_consequence_to_variant(variant): variant['HGVS'] = get_proper_hgvs(worst_csq) variant['CANONICAL'] = worst_csq['CANONICAL'] - if csq_order_dict[variant['major_consequence']] <= csq_order_dict["frameshift_variant"]: + if CSQ_ORDER_DICT[variant['major_consequence']] <= CSQ_ORDER_DICT["frameshift_variant"]: variant['category'] = 'lof_variant' for annotation in variant['vep_annotations']: if annotation['LoF'] == '': annotation['LoF'] = 'NC' annotation['LoF_filter'] = 'Non-protein-coding gene' - elif csq_order_dict[variant['major_consequence']] <= csq_order_dict["missense_variant"]: + elif CSQ_ORDER_DICT[variant['major_consequence']] <= CSQ_ORDER_DICT["missense_variant"]: # Should be noted that this grabs inframe deletion, etc. variant['category'] = 'missense_variant' - elif csq_order_dict[variant['major_consequence']] <= csq_order_dict["synonymous_variant"]: + elif CSQ_ORDER_DICT[variant['major_consequence']] <= CSQ_ORDER_DICT["synonymous_variant"]: variant['category'] = 'synonymous_variant' else: variant['category'] = 'other_variant' @@ -145,55 +153,9 @@ def get_protein_hgvs(annotation): logging.error("Could not fetch protein hgvs - unknown amino acid") return annotation['HGVSp'].split(':')[-1] -# Note that this is the current as of v81 with some included for backwards compatibility (VEP <= 75) -csq_order = ["transcript_ablation", -"splice_acceptor_variant", -"splice_donor_variant", -"stop_gained", -"frameshift_variant", -"stop_lost", -"start_lost", # new in v81 -"initiator_codon_variant", # deprecated -"transcript_amplification", -"inframe_insertion", -"inframe_deletion", -"missense_variant", -"protein_altering_variant", # new in v79 -"splice_region_variant", -"incomplete_terminal_codon_variant", -"stop_retained_variant", -"synonymous_variant", -"coding_sequence_variant", -"mature_miRNA_variant", -"5_prime_UTR_variant", -"3_prime_UTR_variant", -"non_coding_transcript_exon_variant", -"non_coding_exon_variant", # deprecated -"intron_variant", -"NMD_transcript_variant", -"non_coding_transcript_variant", -"nc_transcript_variant", # deprecated -"upstream_gene_variant", -"downstream_gene_variant", -"TFBS_ablation", -"TFBS_amplification", -"TF_binding_site_variant", -"regulatory_region_ablation", -"regulatory_region_amplification", -"feature_elongation", -"regulatory_region_variant", -"feature_truncation", -"intergenic_variant", -""] -assert len(csq_order) == len(set(csq_order)) # No dupes! - -csq_order_dict = {csq:i for i,csq in enumerate(csq_order)} -rev_csq_order_dict = dict(enumerate(csq_order)) -assert all(csq == rev_csq_order_dict[csq_order_dict[csq]] for csq in csq_order) - def remove_extraneous_vep_annotations(annotation_list): - return [ann for ann in annotation_list if worst_csq_index(ann['Consequence'].split('&')) <= csq_order_dict['intron_variant']] + return [ann for ann in annotation_list if worst_csq_index(ann['Consequence'].split('&')) <= CSQ_ORDER_DICT['intron_variant']] def worst_csq_index(csq_list): @@ -202,7 +164,7 @@ def worst_csq_index(csq_list): Return index of the worst consequence (In this case, index of 'frameshift_variant', so 4) Works well with worst_csq_index('non_coding_exon_variant&nc_transcript_variant'.split('&')) """ - return min([csq_order_dict[csq] for csq in csq_list]) + return min([CSQ_ORDER_DICT[csq] for csq in csq_list]) def worst_csq_from_list(csq_list): @@ -211,7 +173,7 @@ def worst_csq_from_list(csq_list): Return the worst consequence (In this case, 'frameshift_variant') Works well with worst_csq_from_list('non_coding_exon_variant&nc_transcript_variant'.split('&')) """ - return rev_csq_order_dict[worst_csq_index(csq_list)] + return REV_CSQ_ORDER_DICT[worst_csq_index(csq_list)] def worst_csq_from_csq(csq): @@ -219,7 +181,7 @@ def worst_csq_from_csq(csq): Input possibly &-filled csq string (e.g. 'non_coding_exon_variant&nc_transcript_variant') Return the worst consequence (In this case, 'non_coding_exon_variant') """ - return rev_csq_order_dict[worst_csq_index(csq.split('&'))] + return REV_CSQ_ORDER_DICT[worst_csq_index(csq.split('&'))] def order_vep_by_csq(annotation_list): @@ -229,7 +191,7 @@ def order_vep_by_csq(annotation_list): """ for ann in annotation_list: ann['major_consequence'] = worst_csq_from_csq(ann['Consequence']) - return sorted(annotation_list, key=(lambda ann:csq_order_dict[ann['major_consequence']])) + return sorted(annotation_list, key=(lambda ann:CSQ_ORDER_DICT[ann['major_consequence']])) def worst_csq_with_vep(annotation_list): @@ -247,7 +209,7 @@ def worst_csq_with_vep(annotation_list): def annotation_severity(annotation): "Bigger is more important." - rv = -csq_order_dict[worst_csq_from_csq(annotation['Consequence'])] + rv = -CSQ_ORDER_DICT[worst_csq_from_csq(annotation['Consequence'])] if annotation['CANONICAL'] == 'YES': rv += 0.1 return rv @@ -257,25 +219,6 @@ def annotation_severity(annotation): CHROMOSOME_TO_CODE = { item: i+1 for i, item in enumerate(CHROMOSOMES) } -def get_single_location(chrom, pos): - """ - Gets a single location from chromosome and position - chr must be actual chromosme code (chrY) and pos must be integer - - Borrowed from xbrowse - """ - return CHROMOSOME_TO_CODE[chrom] * int(1e9) + pos - - -def get_xpos(chrom, pos): - """ - Borrowed from xbrowse - """ - if not chrom.startswith('chr'): - chrom = 'chr{}'.format(chrom) - return get_single_location(chrom, int(pos)) - - def get_minimal_representation(pos, ref, alt): """ Get the minimal representation of a variant, based on the ref + alt alleles in a VCF From bc67b6ec2ee68886677878285b253cef8266899c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 17 Jan 2019 15:00:29 +0100 Subject: [PATCH 124/360] first tests --- backend/modules/browser/test_utils.py | 52 +++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 backend/modules/browser/test_utils.py diff --git a/backend/modules/browser/test_utils.py b/backend/modules/browser/test_utils.py new file mode 100644 index 000000000..c42db4c35 --- /dev/null +++ b/backend/modules/browser/test_utils.py @@ -0,0 +1,52 @@ +""" +Tests for utils.py +""" + +import lookups +import utils + +import json + + +def test_add_consequence_to_variants(): + """ + Test add_consequence_to_variants() + """ + assert False + + +def test_add_consequence_to_variant(): + """ + Test add_consequence_to_variant() + """ + # variant = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') + variant2 = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') + # variant2['vep_annotations'] = + result = utils.add_consequence_to_variant(variant2) + # result = utils.add_consequence_to_variant(variant) + print(result) + print(result['major_consequence']) + print(result['category']) + + assert False + + +def test_annotation_severity(): + """ + Test annotation_severity() + """ + variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') + utils.annotation_severity(variant['vep_annotations']) + + +def test_worst_csq_from_csq(): + """ + Test worst_csq_from_csq() + """ + variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') + print(type(variant['vep_annotations'])) + print(variant['vep_annotations']) + vep = json.loads(variant['vep_annotations'])[0] + print(vep['Consequence']) + utils.worst_csq_from_csq(vep['Consequence']) + assert False From a2eba10df2af306b6632b801169eb3dbc838885c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 23 Jan 2019 10:36:08 +0100 Subject: [PATCH 125/360] improved test coverage, also a few small bugs fixed --- backend/modules/browser/lookups.py | 31 ++++++++-------- backend/modules/browser/test_lookups.py | 47 ++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 22 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 84b10af77..e0364054f 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -33,10 +33,7 @@ def add_rsid_to_variant(dataset, variant): (db.DbSNP.version == dbsnp_version)) .dicts() .get()) - if rsid: - variant['rsid'] = 'rs{}'.format(rsid['rsid']) - else: - logging.error('add_rsid_to_variant({}, variant[dbid: {}]): unable to retrieve rsid'.format(dataset, variant['id'])) + variant['rsid'] = 'rs{}'.format(rsid['rsid']) except db.DbSNP.DoesNotExist: logging.error('add_rsid_to_variant({}, variant[dbid: {}]): unable to retrieve rsid'.format(dataset, variant['id'])) @@ -143,17 +140,13 @@ def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=N if end_pos is None: end_pos = start_pos - try: - return [values for values in (db.Coverage - .select() - .where((db.Coverage.pos >= start_pos) & - (db.Coverage.pos <= end_pos) & - (db.Coverage.chrom == chrom) & - (db.Coverage.dataset_version == dataset_version.id)) - .dicts())] - except db.Coverage.DoesNotExist: - logging.error('get_coverage_for_bases({}, {}, {}, {}): '.format(dataset, chrom, start_pos, end_pos)) - return + return [values for values in (db.Coverage + .select() + .where((db.Coverage.pos >= start_pos) & + (db.Coverage.pos <= end_pos) & + (db.Coverage.chrom == chrom) & + (db.Coverage.dataset_version == dataset_version.id)) + .dicts())] def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_version=None): @@ -175,6 +168,8 @@ def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_vers coverage_array = get_coverage_for_bases(dataset, chrom, start_pos, end_pos, ds_version) # only return coverages that have coverage (if that makes any sense?) # return coverage_array + if not coverage_array: + return covered = [c for c in coverage_array if c['mean']] return covered @@ -191,7 +186,9 @@ def get_exons_in_transcript(dataset, transcript_id): list: dicts with values for each exon sorted by start position """ ref_dbid = db.get_reference_dbid_dataset(dataset) - + if not ref_dbid: + logging.error('get_exons_in_transcript({}, {}): unable to find dataset dbid'.format(dataset, transcript_id)) + return try: transcript = (db.Transcript .select() @@ -200,7 +197,7 @@ def get_exons_in_transcript(dataset, transcript_id): (db.Gene.reference_set == ref_dbid)) .get()) except db.Transcript.DoesNotExist: - logging.error('get_exons_in_transcript({}, {}): unable to retrueve transcript'.format(dataset, transcript_id)) + logging.error('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) return return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & (db.Feature.feature_type == 'exon')).dicts()), diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 18cc2722c..1a3f3fe46 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -39,12 +39,17 @@ def test_get_awesomebar_result(): assert result == ('region', '22-46615715-46615880') result = lookups.get_awesomebar_result('SweGen', 'CHR22:46615715-46615880') assert result == ('region', '22-46615715-46615880') + result = lookups.get_awesomebar_result('SweGen', 'CHR22-29461622-G-A') + assert result == ('variant', '22-29461622-G-A') + result = lookups.get_awesomebar_result('SweGen', 'DOES_NOT_EXIST') + assert result == ('not_found', 'DOES_NOT_EXIST') -def test_get_coverage_for_bases(): +def test_get_coverage_for_bases(caplog): """ Test get_coverage_for_bases() """ + # normal coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, @@ -60,8 +65,24 @@ def test_get_coverage_for_bases(): 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.961, 0.856, 0.117, 0.001]}] assert coverage == expected + # no end_pos + coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500290) + expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', + 'pos': 55500290, 'mean': 40.66, 'median': 39.0, + 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}] + + # no hits + coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) + assert not coverage + + # incorrect dataset + assert not lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) + def test_get_coverage_for_transcript(): + """ + Test get_coverage_for_transcript() + """ coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500320) expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', 'pos': 55500290, 'mean': 40.66, 'median': 39.0, @@ -76,9 +97,10 @@ def test_get_coverage_for_transcript(): 'pos': 55500320, 'mean': 39.69, 'median': 38.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.961, 0.856, 0.117, 0.001]}] assert coverage == expected + assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) -def test_get_exons_in_transcript(): +def test_get_exons_in_transcript(caplog): """ Test get_exons_in_transcript() """ @@ -101,6 +123,16 @@ def test_get_exons_in_transcript(): 'start': 202082312, 'stop': 202084804, 'strand': '+', 'feature_type': 'exon'}] assert result == expected + # bad dataset + result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000346817') + assert not result + assert caplog.messages[0] == 'get_exons_in_transcript(NO_DATASET, ENST00000346817): unable to find dataset dbid' + + # bad transcript + result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') + assert not result + assert caplog.messages[1] == 'get_exons_in_transcript(SweGen, BAD_TRANSCRIPT): unable to retrieve transcript' + def test_get_gene(): """ @@ -131,13 +163,13 @@ def test_get_gene(): # non-existing gene result = lookups.get_gene('SweGen', 'NOT_A_GENE') assert not result - + # non-existing dataset result = lookups.get_gene('NoDataset', 'ENSG00000223972') assert not result -def test_get_gene_by_name(): +def test_get_gene_by_name(caplog): """ Test get_gene_by_name() """ @@ -165,7 +197,8 @@ def test_get_gene_by_name(): # non-existing gene result = lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') assert not result - + assert caplog.messages[0] == 'get_gene_by_name(SweGen, NOT_A_GENE): unable to retrieve gene' + # non-existing dataset result = lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') assert not result @@ -294,6 +327,9 @@ def test_get_variants_by_rsid(caplog): assert result[0]['pos'] == 16080482 assert result[0]['genes'] == ['ENSG00000229286', 'ENSG00000235265'] assert result[0]['transcripts'] == ['ENST00000448070','ENST00000413156'] + print(type(result[0]['vep_annotations'])) + print(result[0]['vep_annotations']) + assert False # by position result = lookups.get_variants_by_rsid('SweGen', 'rs373706802', check_position=True) @@ -305,6 +341,7 @@ def test_get_variants_by_rsid(caplog): assert lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') is None assert lookups.get_variants_by_rsid('SweGen', '373706802') is None assert lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') is None + expected = ('get_dataset_version(incorrect_name, version=None): cannot retrieve dataset version', 'get_variants_by_rsid(SweGen, 373706802): rsid not starting with rs', 'get_variants_by_rsid(SweGen, rs3737o68o2): not an integer after rs') From 914442abe5fba71ae833f9db3f9d2079a53f7e1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 23 Jan 2019 13:36:33 +0100 Subject: [PATCH 126/360] adding tests and documentation --- backend/modules/browser/test_utils.py | 125 +++++++++++++-- backend/modules/browser/utils.py | 209 +++++++++++++++----------- 2 files changed, 232 insertions(+), 102 deletions(-) diff --git a/backend/modules/browser/test_utils.py b/backend/modules/browser/test_utils.py index c42db4c35..828464ec0 100644 --- a/backend/modules/browser/test_utils.py +++ b/backend/modules/browser/test_utils.py @@ -12,31 +12,89 @@ def test_add_consequence_to_variants(): """ Test add_consequence_to_variants() """ - assert False + variants = [] + variants.append(lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T')) + variants.append(lookups.get_variant('SweGen', 55500283, '1', 'A', 'T')) + variants[0]['vep_annotations'] = json.loads(variants[0]['vep_annotations']) # remove when db is fixed + variants[1]['vep_annotations'] = json.loads(variants[1]['vep_annotations']) # remove when db is fixed + + utils.add_consequence_to_variants(variants) + assert variants[0]['major_consequence'] == 'intron_variant' + assert variants[1]['major_consequence'] == 'upstream_gene_variant' def test_add_consequence_to_variant(): """ Test add_consequence_to_variant() """ - # variant = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') + variant = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') + variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed + utils.add_consequence_to_variant(variant) + assert variant['major_consequence'] == 'intron_variant' + variant2 = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - # variant2['vep_annotations'] = - result = utils.add_consequence_to_variant(variant2) - # result = utils.add_consequence_to_variant(variant) - print(result) - print(result['major_consequence']) - print(result['category']) + variant2['vep_annotations'] = json.loads(variant2['vep_annotations']) # remove when db is fixed + utils.add_consequence_to_variant(variant2) + assert variant2['major_consequence'] == 'upstream_gene_variant' - assert False - def test_annotation_severity(): """ Test annotation_severity() """ variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - utils.annotation_severity(variant['vep_annotations']) + variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed + res = utils.annotation_severity(variant['vep_annotations'][0]) + assert res == -26.9 + + +def test_get_flags_from_variant(): + """ + Test get_flags_from_variant() + """ + assert False + + +def test_get_minimal_representation(): + """ + Test get_minimal_representation() + """ + assert False + + +def test_get_proper_hgvs(): + """ + Test get_proper_hgvs() + """ + assert False + + +def test_get_protein_hgvs(): + """ + Test get_protein_hgvs() + """ + assert False + + +def test_get_transcript_hgvs(): + """ + Test get_transcript_hgvs() + """ + assert False + + +def test_order_vep_by_csq(): + """ + Test order_vep_by_csq() + """ + assert False + + +def test_remove_extraneous_vep_annotations(): + """ + Test remove_extraneous_vep_annotations() + """ + assert False def test_worst_csq_from_csq(): @@ -44,9 +102,44 @@ def test_worst_csq_from_csq(): Test worst_csq_from_csq() """ variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - print(type(variant['vep_annotations'])) - print(variant['vep_annotations']) - vep = json.loads(variant['vep_annotations'])[0] - print(vep['Consequence']) - utils.worst_csq_from_csq(vep['Consequence']) + variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed + res = utils.worst_csq_from_csq(variant['vep_annotations'][0]['Consequence']) + assert res == 'upstream_gene_variant' + res = utils.worst_csq_from_csq('non_coding_exon_variant&nc_transcript_variant') + assert res == 'non_coding_exon_variant' + + +def test_worst_csq_from_list(): + """ + Test worst_csq_from_list() + """ assert False + + +def test_worst_csq_index(): + """ + Test worst_csq_index() + """ + csqs = ['frameshift_variant', 'missense_variant'] + assert utils.worst_csq_index(csqs) == 4 + + +def test_worst_csq_with_vep(): + """ + Test worst_csq_from_vep() + """ + veps = [{'SYMBOL': '1', 'Consequence': 'intergenic_variant', 'CANONICAL': ''}, + {'SYMBOL': '2', 'Consequence': 'frameshift_variant', 'CANONICAL': ''}, + {'SYMBOL': '3', 'Consequence': 'intron_variant', 'CANONICAL': ''}, + {'SYMBOL': '4', 'Consequence': 'stop_lost', 'CANONICAL': ''}] + res = utils.worst_csq_with_vep(veps) + assert res == {'SYMBOL': '2', 'Consequence': 'frameshift_variant', + 'CANONICAL': '', 'major_consequence': 'frameshift_variant'} + + veps = [{'SYMBOL': '1', 'Consequence': 'frameshift_variant', 'CANONICAL': 'YES'}, + {'SYMBOL': '2', 'Consequence': 'frameshift_variant', 'CANONICAL': ''}, + {'SYMBOL': '3', 'Consequence': 'intron_variant', 'CANONICAL': ''}, + {'SYMBOL': '4', 'Consequence': 'stop_lost', 'CANONICAL': ''}] + res = utils.worst_csq_with_vep(veps) + assert res == {'SYMBOL': '1', 'Consequence': 'frameshift_variant', + 'CANONICAL': 'YES', 'major_consequence': 'frameshift_variant'} diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index b6bf5a890..8257db98b 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -2,20 +2,13 @@ from operator import itemgetter AF_BUCKETS = [0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1] -METRICS = [ - 'BaseQRankSum', - 'ClippingRankSum', - 'DP', - 'FS', - 'InbreedingCoeff', - 'MQ', - 'MQRankSum', - 'QD', - 'ReadPosRankSum', - 'VQSLOD' -] + +CHROMOSOMES = ['chr%s' % x for x in range(1, 23)] +CHROMOSOMES.extend(['chrX', 'chrY', 'chrM']) +CHROMOSOME_TO_CODE = { item: i+1 for i, item in enumerate(CHROMOSOMES) } # Note that this is the current as of v81 with some included for backwards compatibility (VEP <= 75) + CSQ_ORDER = ["transcript_ablation", "splice_acceptor_variant", "splice_donor_variant", @@ -61,10 +54,33 @@ REV_CSQ_ORDER_DICT = dict(enumerate(CSQ_ORDER)) assert all(csq == REV_CSQ_ORDER_DICT[CSQ_ORDER_DICT[csq]] for csq in CSQ_ORDER) +METRICS = [ + 'BaseQRankSum', + 'ClippingRankSum', + 'DP', + 'FS', + 'InbreedingCoeff', + 'MQ', + 'MQRankSum', + 'QD', + 'ReadPosRankSum', + 'VQSLOD' +] + +PROTEIN_LETTERS_1TO3 = { + 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', + 'F': 'Phe', 'G': 'Gly', 'H': 'His', 'I': 'Ile', + 'K': 'Lys', 'L': 'Leu', 'M': 'Met', 'N': 'Asn', + 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', 'S': 'Ser', + 'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr', + 'X': 'Ter', '*': 'Ter', 'U': 'Sec' +} + def add_consequence_to_variants(variant_list): """ Add information about variant consequence to multiple variants + Args: variant_list (list): list of variants """ @@ -75,6 +91,7 @@ def add_consequence_to_variants(variant_list): def add_consequence_to_variant(variant): """ Add information about variant consequence to a variant + Args: variant (dict): variant information """ @@ -105,7 +122,35 @@ def add_consequence_to_variant(variant): variant['flags'] = get_flags_from_variant(variant) +def annotation_severity(annotation): + """ + Evaluate severity of the consequences; "bigger is more important" + + Args: + annotation (dict): vep_annotation from a variant + + Returns: + float: severity score + """ + rv = -CSQ_ORDER_DICT[worst_csq_from_csq(annotation['Consequence'])] + if annotation['CANONICAL'] == 'YES': + rv += 0.1 + return rv + + def get_flags_from_variant(variant): + """ + Get flags from variant. + checks for: + - MNP (identical length of reference and variant) + - LoF (loss of function) + + Args: + variant (dict): a variant + + Returns: + list: flags for the variant + """ flags = [] if 'mnps' in variant: flags.append('MNP') @@ -119,14 +164,35 @@ def get_flags_from_variant(variant): return flags -protein_letters_1to3 = { - 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', - 'F': 'Phe', 'G': 'Gly', 'H': 'His', 'I': 'Ile', - 'K': 'Lys', 'L': 'Leu', 'M': 'Met', 'N': 'Asn', - 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', 'S': 'Ser', - 'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr', - 'X': 'Ter', '*': 'Ter', 'U': 'Sec' -} +def get_minimal_representation(pos, ref, alt): + """ + Get the minimal representation of a variant, based on the ref + alt alleles in a VCF + This is used to make sure that multiallelic variants in different datasets, + with different combinations of alternate alleles, can always be matched directly. + + Note that chromosome is ignored here - in xbrowse, we'll probably be dealing with 1D coordinates + Args: + pos (int): genomic position in a chromosome (1-based) + ref (str): ref allele string + alt (str): alt allele string + Returns: + tuple: (pos, ref, alt) of remapped coordinate + """ + pos = int(pos) + # If it's a simple SNV, don't remap anything + if len(ref) == 1 and len(alt) == 1: + return pos, ref, alt + + # strip off identical suffixes + while(alt[-1] == ref[-1] and min(len(alt),len(ref)) > 1): + alt = alt[:-1] + ref = ref[:-1] + # strip off identical prefixes and increment position + while(alt[0] == ref[0] and min(len(alt),len(ref)) > 1): + alt = alt[1:] + ref = ref[1:] + pos += 1 + return pos, ref, alt def get_proper_hgvs(csq): @@ -137,34 +203,35 @@ def get_proper_hgvs(csq): return get_protein_hgvs(csq) -def get_transcript_hgvs(csq): - return csq['HGVSc'].split(':')[-1] - - def get_protein_hgvs(annotation): """ Takes consequence dictionary, returns proper variant formatting for synonymous variants """ if '%3D' in annotation['HGVSp']: # "%3D" is "=" try: - amino_acids = ''.join([protein_letters_1to3[x] for x in annotation['Amino_acids']]) + amino_acids = ''.join([PROTEIN_LETTERS_1TO3[x] for x in annotation['Amino_acids']]) return "p." + amino_acids + annotation['Protein_position'] + amino_acids except KeyError: logging.error("Could not fetch protein hgvs - unknown amino acid") return annotation['HGVSp'].split(':')[-1] -def remove_extraneous_vep_annotations(annotation_list): - return [ann for ann in annotation_list if worst_csq_index(ann['Consequence'].split('&')) <= CSQ_ORDER_DICT['intron_variant']] +def get_transcript_hgvs(csq): + return csq['HGVSc'].split(':')[-1] -def worst_csq_index(csq_list): +def order_vep_by_csq(annotation_list): """ - Input list of consequences (e.g. ['frameshift_variant', 'missense_variant']) - Return index of the worst consequence (In this case, index of 'frameshift_variant', so 4) - Works well with worst_csq_index('non_coding_exon_variant&nc_transcript_variant'.split('&')) + Adds "major_consequence" to each annotation. + Returns them ordered from most deleterious to least. """ - return min([CSQ_ORDER_DICT[csq] for csq in csq_list]) + for ann in annotation_list: + ann['major_consequence'] = worst_csq_from_csq(ann['Consequence']) + return sorted(annotation_list, key=(lambda ann:CSQ_ORDER_DICT[ann['major_consequence']])) + + +def remove_extraneous_vep_annotations(annotation_list): + return [ann for ann in annotation_list if worst_csq_index(ann['Consequence'].split('&')) <= CSQ_ORDER_DICT['intron_variant']] def worst_csq_from_list(csq_list): @@ -178,73 +245,43 @@ def worst_csq_from_list(csq_list): def worst_csq_from_csq(csq): """ - Input possibly &-filled csq string (e.g. 'non_coding_exon_variant&nc_transcript_variant') - Return the worst consequence (In this case, 'non_coding_exon_variant') + Find worst consequence in a possibly &-filled consequence string + + Args: + csq (str): string of consequences, seperated with & (if multiple) + + Returns: + str: the worst consequence """ return REV_CSQ_ORDER_DICT[worst_csq_index(csq.split('&'))] -def order_vep_by_csq(annotation_list): +def worst_csq_index(csq_list): """ - Adds "major_consequence" to each annotation. - Returns them ordered from most deleterious to least. + Find the index of the worst consequence. + Corresponds to the lowest value (index) from CSQ_ORDER_DICT + + Args: + csq_list (list): consequences + + Returns: + int: index in CSQ_ODER_DICT of the worst consequence """ - for ann in annotation_list: - ann['major_consequence'] = worst_csq_from_csq(ann['Consequence']) - return sorted(annotation_list, key=(lambda ann:CSQ_ORDER_DICT[ann['major_consequence']])) + return min([CSQ_ORDER_DICT[csq] for csq in csq_list]) def worst_csq_with_vep(annotation_list): """ - Takes list of VEP annotations [{'Consequence': 'frameshift', Feature: 'ENST'}, ...] - Returns most severe annotation (as full VEP annotation [{'Consequence': 'frameshift', Feature: 'ENST'}]) - Also tacks on "major_consequence" for that annotation (i.e. worst_csq_from_csq) + Choose the vep annotation with the most severe consequence + + Args: + annotation_list (list): VEP annotations + + Returns: + dict: the annotation with the most severe consequence; also adds "major_consequence" for that annotation """ if not annotation_list: return None worst = max(annotation_list, key=annotation_severity) worst['major_consequence'] = worst_csq_from_csq(worst['Consequence']) return worst - - -def annotation_severity(annotation): - "Bigger is more important." - rv = -CSQ_ORDER_DICT[worst_csq_from_csq(annotation['Consequence'])] - if annotation['CANONICAL'] == 'YES': - rv += 0.1 - return rv - -CHROMOSOMES = ['chr%s' % x for x in range(1, 23)] -CHROMOSOMES.extend(['chrX', 'chrY', 'chrM']) -CHROMOSOME_TO_CODE = { item: i+1 for i, item in enumerate(CHROMOSOMES) } - - -def get_minimal_representation(pos, ref, alt): - """ - Get the minimal representation of a variant, based on the ref + alt alleles in a VCF - This is used to make sure that multiallelic variants in different datasets, - with different combinations of alternate alleles, can always be matched directly. - - Note that chromosome is ignored here - in xbrowse, we'll probably be dealing with 1D coordinates - Args: - pos (int): genomic position in a chromosome (1-based) - ref (str): ref allele string - alt (str): alt allele string - Returns: - tuple: (pos, ref, alt) of remapped coordinate - """ - pos = int(pos) - # If it's a simple SNV, don't remap anything - if len(ref) == 1 and len(alt) == 1: - return pos, ref, alt - - # strip off identical suffixes - while(alt[-1] == ref[-1] and min(len(alt),len(ref)) > 1): - alt = alt[:-1] - ref = ref[:-1] - # strip off identical prefixes and increment position - while(alt[0] == ref[0] and min(len(alt),len(ref)) > 1): - alt = alt[1:] - ref = ref[1:] - pos += 1 - return pos, ref, alt From dcc489f846dbb45eedcddd70063e144ddf6d6568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 23 Jan 2019 14:45:03 +0100 Subject: [PATCH 127/360] tests for get_flags_from_variant(), worst_csq_from_list(); removed get_minimal_representation as it seems unused --- backend/modules/browser/test_utils.py | 21 ++++++++++++------ backend/modules/browser/utils.py | 31 --------------------------- 2 files changed, 14 insertions(+), 38 deletions(-) diff --git a/backend/modules/browser/test_utils.py b/backend/modules/browser/test_utils.py index 828464ec0..0ce0c39a3 100644 --- a/backend/modules/browser/test_utils.py +++ b/backend/modules/browser/test_utils.py @@ -52,14 +52,20 @@ def test_get_flags_from_variant(): """ Test get_flags_from_variant() """ - assert False + fake_variant = {'vep_annotations':[{'LoF': 'LC', 'LoF_flags': 'something'}, + {'LoF': '', 'LoF_flags': ''}, + {'LoF': 'LC', 'LoF_flags': 'something'}]} + flags = utils.get_flags_from_variant(fake_variant) + assert flags == ['LC LoF', 'LoF flag'] + fake_variant = {'vep_annotations':[{'LoF': 'LC', 'LoF_flags': 'something'}, + {'LoF': 'HC', 'LoF_flags': 'something'}]} + flags = utils.get_flags_from_variant(fake_variant) + assert flags == ['LoF flag'] -def test_get_minimal_representation(): - """ - Test get_minimal_representation() - """ - assert False + fake_variant = {'mnps': 'no idea', 'vep_annotations':[]} + flags = utils.get_flags_from_variant(fake_variant) + assert flags == ['MNP'] def test_get_proper_hgvs(): @@ -113,7 +119,8 @@ def test_worst_csq_from_list(): """ Test worst_csq_from_list() """ - assert False + csqs = ['frameshift_variant', 'missense_variant'] + assert utils.worst_csq_from_list(csqs) == 'frameshift_variant' def test_worst_csq_index(): diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 8257db98b..833d14023 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -164,37 +164,6 @@ def get_flags_from_variant(variant): return flags -def get_minimal_representation(pos, ref, alt): - """ - Get the minimal representation of a variant, based on the ref + alt alleles in a VCF - This is used to make sure that multiallelic variants in different datasets, - with different combinations of alternate alleles, can always be matched directly. - - Note that chromosome is ignored here - in xbrowse, we'll probably be dealing with 1D coordinates - Args: - pos (int): genomic position in a chromosome (1-based) - ref (str): ref allele string - alt (str): alt allele string - Returns: - tuple: (pos, ref, alt) of remapped coordinate - """ - pos = int(pos) - # If it's a simple SNV, don't remap anything - if len(ref) == 1 and len(alt) == 1: - return pos, ref, alt - - # strip off identical suffixes - while(alt[-1] == ref[-1] and min(len(alt),len(ref)) > 1): - alt = alt[:-1] - ref = ref[:-1] - # strip off identical prefixes and increment position - while(alt[0] == ref[0] and min(len(alt),len(ref)) > 1): - alt = alt[1:] - ref = ref[1:] - pos += 1 - return pos, ref, alt - - def get_proper_hgvs(csq): # Needs major_consequence if csq['major_consequence'] in ('splice_donor_variant', 'splice_acceptor_variant', 'splice_region_variant'): From d40d7a1fdce01701e3b755ed3048cef4de412ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 24 Jan 2019 08:29:33 +0100 Subject: [PATCH 128/360] didn't confirm that genes were retrieved from the correct reference set --- backend/modules/browser/lookups.py | 28 ++++++++++++++++--------- backend/modules/browser/test_lookups.py | 2 +- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index e0364054f..18004f30a 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -240,20 +240,23 @@ def get_gene_by_name(dataset, gene_name): if not ref_dbid: return {} try: - return db.Gene.select().where(db.Gene.name==gene_name).dicts().get() + return db.Gene.select().where((db.Gene.reference_set == ref_dbid) & + (db.Gene.name==gene_name)).dicts().get() except db.Gene.DoesNotExist: try: - return db.Gene.select().where(db.Gene.other_names.contains(gene_name)).dicts().get() + return db.Gene.select().where((db.Gene.reference_set == ref_dbid) & + (db.Gene.other_names.contains(gene_name))).dicts().get() except db.Gene.DoesNotExist: logging.error('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) return {} -def get_genes_in_region(chrom, start_pos, stop_pos): +def get_genes_in_region(dataset, chrom, start_pos, stop_pos): """ Retrieve genes located within a region Args: + dataset (str): short name of the dataset chrom (str): chromosome name start_pos (int): start of region stop_pos (int): end of region @@ -261,12 +264,17 @@ def get_genes_in_region(chrom, start_pos, stop_pos): Returns: dict: values for the gene; empty if not found """ + ref_dbid = db.get_reference_dbid_dataset(dataset) + if not ref_dbid: + return {} + try: - gene_query = db.Gene.select().where((((db.Gene.start >= start_pos) & - (db.Gene.start <= stop_pos)) | - ((db.Gene.stop >= start_pos) & - (db.Gene.stop <= stop_pos))) & - (db.Gene.chrom == chrom)).dicts() + gene_query = db.Gene.select().where((db.Gene.reference_set == ref_dbid) & + ((((db.Gene.start >= start_pos) & + (db.Gene.start <= stop_pos)) | + ((db.Gene.stop >= start_pos) & + (db.Gene.stop <= stop_pos))) & + (db.Gene.chrom == chrom))).dicts() return [gene for gene in gene_query] except db.Gene.DoesNotExist: logging.error('get_genes_in_region({}, {}, {}): no genes found'.format(chrom, start_pos, stop_pos)) @@ -395,7 +403,7 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): ds_version (str): version of the dataset Returns: - dict: values for the variant; empty if not found + dict: values for the variant; None if not found """ try: variant = get_raw_variant(dataset, pos, chrom, ref, alt, ds_version) @@ -408,7 +416,7 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): variant['rsid'] = 'rs{}'.format(variant['rsid']) return variant except db.Variant.DoesNotExist: - return {} + return def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/test_lookups.py index 1a3f3fe46..a6942d32f 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/test_lookups.py @@ -212,7 +212,7 @@ def test_get_genes_in_region(): """ Test get_genes_in_region() """ - res = lookups.get_genes_in_region('4', 99080000, 99210000) + res = lookups.get_genes_in_region('SweGen', '4', 99080000, 99210000) # stop_pos missing in db, so needs to be updated when available # exp_names = assert False From 7799446bd1ea1b65e664a4c95ce89abe2c26e8af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 24 Jan 2019 09:19:19 +0100 Subject: [PATCH 129/360] fixes to allow docker testing --- backend/modules/browser/lookups.py | 2 +- backend/modules/browser/pgsql.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 18004f30a..b834e6068 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -2,7 +2,7 @@ import db import logging -import utils +from . import utils SEARCH_LIMIT = 10000 diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index a8d761377..ddf696a42 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -4,9 +4,9 @@ import logging -from . import db +import db + from . import lookups -from .utils import get_xpos def get_autocomplete(dataset, query): @@ -25,5 +25,5 @@ def get_autocomplete(dataset, query): return gene_names -def get_variant_list: +def get_variant_list(): pass From 5f439784489a749a283d608c676eaff9923aa81a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 24 Jan 2019 09:19:48 +0100 Subject: [PATCH 130/360] start of conversion of handlers --- backend/modules/browser/browser_handlers.py | 148 ++++++++++++-------- 1 file changed, 91 insertions(+), 57 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 7dff60210..017a611c0 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -1,27 +1,38 @@ +import logging + +import db import handlers from . import lookups -from . import mongodb from . import pgsql -from .utils import get_xpos, add_consequence_to_variant, remove_extraneous_vep_annotations, \ - order_vep_by_csq, get_proper_hgvs +#from .utils import get_xpos, add_consequence_to_variant, remove_extraneous_vep_annotations, \ +# order_vep_by_csq, get_proper_hgvs +# maximum length of requested region (GetRegion) +REGION_LIMIT = 100000 class GetTranscript(handlers.UnsafeHandler): + """ + Request information about a transcript + """ def get(self, dataset, transcript): + """ + Request information about a transcript + + Args: + dataset (str): short name of the dataset + transcript (str): the transcript id + + Returns: + dict: transcript (transcript and exons), gene (gene information) + """ transcript_id = transcript ret = {'transcript':{}, 'gene':{}, } - db_shared = mongodb.connect_db(dataset, True) - if not db_shared: - self.set_user_msg("Could not connect to database.", "error") - self.finish( ret ) - return - # Add transcript information - transcript = lookups.get_transcript(db_shared, transcript_id) + transcript = lookups.get_transcript(dataset, transcript_id) ret['transcript']['id'] = transcript['transcript_id'] ret['transcript']['number_of_CDS'] = len([t for t in transcript['exons'] if t['feature_type'] == 'CDS']) @@ -31,30 +42,50 @@ def get(self, dataset, transcript): ret['exons'] += [{'start':exon['start'], 'stop':exon['stop'], 'type':exon['feature_type']}] # Add gene information - gene = lookups.get_gene(db_shared, transcript['gene_id']) + gene = lookups.get_gene(dataset, transcript['gene_id']) ret['gene']['id'] = gene['gene_id'] ret['gene']['name'] = gene['gene_name'] ret['gene']['full_name'] = gene['full_gene_name'] ret['gene']['canonical_transcript'] = gene['canonical_transcript'] - gene_transcripts = lookups.get_transcripts_in_gene(db_shared, transcript['gene_id']) + gene_transcripts = lookups.get_transcripts_in_gene(dataset, transcript['gene_id']) ret['gene']['transcripts'] = [g['transcript_id'] for g in gene_transcripts] - self.finish( ret ) + self.finish(ret) class GetRegion(handlers.UnsafeHandler): + """ + Request information about genes in a region + """ def get(self, dataset, region): + """ + Request information about genes in a region + + Args: + dataset (str): short name of the dataset + region (str): the region in the format chr-startpos-endpos + + Returns: + dict: information about the region and the genes found there + """ region = region.split('-') - REGION_LIMIT = 100000 chrom = region[0] start = None stop = None - if len(region) > 1: - start = int(region[1]) - if len(region) > 2: - stop = int(region[2]) + + try: + if len(region) > 1: + start = int(region[1]) + if len(region) > 2: + stop = int(region[2]) + except ValueError: + logging.error('GetRegion: unable to parse region ({})'.format(region)) + self.send_error(status_code=400) + self.set_user_msg('Unable to parse region', 'error') + return + if not start: start = 0 if not stop and start: @@ -69,13 +100,7 @@ def get(self, dataset, region): }, } - db_shared = mongodb.connect_db(dataset, True) - if not db_shared: - self.set_user_msg("Could not connect to database.", "error") - self.finish( ret ) - return - - genes_in_region = lookups.get_genes_in_region(db_shared, chrom, start, stop) + genes_in_region = lookups.get_genes_in_region(dataset, chrom, start, stop) if genes_in_region: ret['region']['genes'] = [] for gene in genes_in_region: @@ -84,69 +109,78 @@ def get(self, dataset, region): 'full_gene_name':gene['full_gene_name'], }] - self.finish( ret ) + self.finish(ret) class GetGene(handlers.UnsafeHandler): - def get(self, dataset, gene): + """ + Request information about a gene + """ + def get(self, dataset, gene, ds_version=None): + """ + Request information about a gene + + Args: + dataset (str): short name of the dataset + gene (str): the gene id + """ gene_id = gene ret = {'gene':{'gene_id': gene_id}} - db = mongodb.connect_db(dataset, False) - db_shared = mongodb.connect_db(dataset, True) - if not db_shared or not db: - self.set_user_msg("Could not connect to database.", "error") - self.finish( ret ) - return # Gene - gene = lookups.get_gene(db_shared, gene_id) - ret['gene'] = gene + gene = lookups.get_gene(dataset, gene_id) + if gene: + ret['gene'] = gene # Add exons from transcript - transcript = lookups.get_transcript(db_shared, gene['canonical_transcript']) + transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) ret['exons'] = [] for exon in sorted(transcript['exons'], key=lambda k: k['start']): ret['exons'] += [{'start':exon['start'], 'stop':exon['stop'], 'type':exon['feature_type']}] # Variants - ret['gene']['variants'] = lookups.get_number_of_variants_in_transcript(db, gene['canonical_transcript']) + ret['gene']['variants'] = lookups.get_number_of_variants_in_transcript(dataset, gene['canonical_transcript'], ds_version) # Transcripts - transcripts_in_gene = lookups.get_transcripts_in_gene(db_shared, gene_id) + transcripts_in_gene = lookups.get_transcripts_in_gene(dataset, gene_id) if transcripts_in_gene: ret['transcripts'] = [] for transcript in transcripts_in_gene: ret['transcripts'] += [{'transcript_id':transcript['transcript_id']}] - self.finish( ret ) + self.finish(ret) class GetVariant(handlers.UnsafeHandler): + """ + Request information about a gene + """ def get(self, dataset, variant): + """ + Request information about a gene + Args: + dataset (str): short name of the dataset + variant (str): variant in the format chrom-pos-ref-alt + """ ret = {'variant':{}} - db = mongodb.connect_db(dataset, False) - db_shared = mongodb.connect_db(dataset, True) - - if not db_shared or not db: - self.set_user_msg("Could not connect to database.", "error") - self.finish( ret ) - return - # Variant v = variant.split('-') - variant = lookups.get_variant(db, db_shared, get_xpos(v[0], int(v[1])), v[2], v[3]) - - if variant is None: - variant = { - 'chrom': v[0], - 'pos': int(v[1]), - 'xpos': get_xpos(v[0], int(v[1])), - 'ref': v[2], - 'alt': v[3] - } + try: + v[1] = int(v[1]) + except ValueError: + logging.error('GetVariant: unable to parse variant ({})'.format(variant)) + self.send_error(status_code=400) + self.set_user_msg('Unable to parse variant', 'error') + return + variant = lookups.get_variant(dataset, v[0], v[1], v[2], v[3]) + + if not variant: + self.send_error(status_code=404) + self.set_user_msg('Variant not found', 'error') + return # Just get the information we need for item in ["variant_id", "chrom", "pos", "ref", "alt", "filter", "rsid", "allele_num", From 8719831e413c08425d0bf20c57711fa42569f5f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 24 Jan 2019 12:50:08 +0100 Subject: [PATCH 131/360] more fixes, especially in GetVariation --- backend/modules/browser/browser_handlers.py | 48 +++++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 017a611c0..d104b78e2 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -1,3 +1,4 @@ +import json # remove when db is fixed import logging import db @@ -5,8 +6,9 @@ from . import lookups from . import pgsql -#from .utils import get_xpos, add_consequence_to_variant, remove_extraneous_vep_annotations, \ -# order_vep_by_csq, get_proper_hgvs + +from .utils import add_consequence_to_variant, remove_extraneous_vep_annotations, \ + order_vep_by_csq, get_proper_hgvs # maximum length of requested region (GetRegion) REGION_LIMIT = 100000 @@ -175,19 +177,22 @@ def get(self, dataset, variant): self.send_error(status_code=400) self.set_user_msg('Unable to parse variant', 'error') return - variant = lookups.get_variant(dataset, v[0], v[1], v[2], v[3]) + orig_variant = variant + variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3]) if not variant: + logging.error('Variant not found ({})'.format(orig_variant)) self.send_error(status_code=404) self.set_user_msg('Variant not found', 'error') return # Just get the information we need - for item in ["variant_id", "chrom", "pos", "ref", "alt", "filter", "rsid", "allele_num", + for item in ["variant_id", "chrom", "pos", "ref", "alt", "filter_string", "rsid", "allele_num", "allele_freq", "allele_count", "orig_alt_alleles", "site_quality", "quality_metrics", "transcripts", "genes"]: ret['variant'][item] = variant[item] + variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed # Variant Effect Predictor (VEP) annotations # https://www.ensembl.org/info/docs/tools/vep/vep_formats.html ret['variant']['consequences'] = [] @@ -229,11 +234,8 @@ def get(self, dataset, variant): ['Allele Frequency', 'freq']], 'datasets':{}, 'total':{}} - for item in ['ans', 'acs', 'freq', 'homs']: - key = 'pop_' + item - if key not in variant: - continue - for _dataset, value in variant[key].items(): + for item in ['ans', 'allele_count', 'allelle_freq', 'hom_count']: + for _dataset, value in variant['pop_' + item].items(): if _dataset not in frequencies['datasets']: frequencies['datasets'][_dataset] = {'pop':_dataset} frequencies['datasets'][_dataset][item] = value @@ -249,7 +251,13 @@ def get(self, dataset, variant): class GetVariants(handlers.UnsafeHandler): + """ + Retrieve variants + """ def get(self, dataset, datatype, item): + """ + Retrieve variants + """ ret = mongodb.get_variant_list(dataset, datatype, item) # inconvenient way of doing humpBack-conversion headers = [] @@ -273,18 +281,20 @@ def get(self, dataset, datatype, item): class Search(handlers.UnsafeHandler): + """ + Perform a search for the wanted object + """ def get(self, dataset, query): - ret = {"dataset": dataset, "value": None, "type": None} - - db = mongodb.connect_db(dataset, False) - db_shared = mongodb.connect_db(dataset, True) + """ + Perform a search for the wanted object - if not db_shared or not db: - self.set_user_msg("Could not connect to database.", "error") - self.finish( ret ) - return + Args: + dataset (str): short name of the dataset + query (str): search query + """ + ret = {"dataset": dataset, "value": None, "type": None} - datatype, identifier = lookups.get_awesomebar_result(db, db_shared, query) + datatype, identifier = lookups.get_awesomebar_result(dataset, query) if datatype == "dbsnp_variant_set": datatype = "dbsnp" @@ -292,7 +302,7 @@ def get(self, dataset, query): ret["type"] = datatype ret["value"] = identifier - self.finish( ret ) + self.finish(ret) class Autocomplete(handlers.UnsafeHandler): From 179b878a7c97d8e7548f00945e8908d65f150b1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 24 Jan 2019 13:36:39 +0100 Subject: [PATCH 132/360] variation page is working, but with a few hacks to get around db problems --- backend/modules/browser/browser_handlers.py | 25 +++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index d104b78e2..13504c4b0 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -187,10 +187,12 @@ def get(self, dataset, variant): return # Just get the information we need - for item in ["variant_id", "chrom", "pos", "ref", "alt", "filter_string", "rsid", "allele_num", + variant['quality_metrics'] = json.loads(variant['quality_metrics']) # remove when db is fixed + for item in ["variant_id", "chrom", "pos", "ref", "alt", "rsid", "allele_num", "allele_freq", "allele_count", "orig_alt_alleles", "site_quality", "quality_metrics", "transcripts", "genes"]: ret['variant'][item] = variant[item] + ret['variant']['filter'] = variant['filter_string'] variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed # Variant Effect Predictor (VEP) annotations @@ -234,18 +236,23 @@ def get(self, dataset, variant): ['Allele Frequency', 'freq']], 'datasets':{}, 'total':{}} - for item in ['ans', 'allele_count', 'allelle_freq', 'hom_count']: - for _dataset, value in variant['pop_' + item].items(): - if _dataset not in frequencies['datasets']: - frequencies['datasets'][_dataset] = {'pop':_dataset} - frequencies['datasets'][_dataset][item] = value - if item not in frequencies['total']: - frequencies['total'][item] = 0 - frequencies['total'][item] += value + term_map = {'allele_num':'ans', 'allele_count':'acs', 'allele_freq':'freq', 'hom_count':'homs'} + if dataset not in frequencies['datasets']: + frequencies['datasets'][dataset] = {'pop':dataset} + for item in term_map: + if item not in frequencies['total']: + frequencies['total'][term_map[item]] = 0 + if variant[item] is None: + frequencies['datasets'][dataset][term_map[item]] = 0 + frequencies['total'][term_map[item]] += 0 + else: + frequencies['datasets'][dataset][term_map[item]] = variant[item] + frequencies['total'][term_map[item]] += variant[item] if 'freq' in frequencies['total']: frequencies['total']['freq'] /= len(frequencies['datasets'].keys()) ret['variant']['pop_freq'] = frequencies + logging.error(ret) self.finish( ret ) From 416cfd0dc2a2f43350469244e1a1c17fcad0e122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 28 Jan 2019 11:03:39 +0100 Subject: [PATCH 133/360] new function for retrieving genes by dbid; mixed updates to get all functions running --- backend/modules/browser/browser_handlers.py | 22 +++++-- backend/modules/browser/lookups.py | 68 +++++++++++++++------ 2 files changed, 68 insertions(+), 22 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 13504c4b0..3be480ac1 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -8,7 +8,7 @@ from . import pgsql from .utils import add_consequence_to_variant, remove_extraneous_vep_annotations, \ - order_vep_by_csq, get_proper_hgvs + order_vep_by_csq, get_proper_hgvs # maximum length of requested region (GetRegion) REGION_LIMIT = 100000 @@ -110,7 +110,6 @@ def get(self, dataset, region): 'gene_name':gene['gene_name'], 'full_gene_name':gene['full_gene_name'], }] - self.finish(ret) @@ -264,8 +263,13 @@ class GetVariants(handlers.UnsafeHandler): def get(self, dataset, datatype, item): """ Retrieve variants + + Args: + dataset (str): short name of the dataset + datatype (str): gene, region, or transcript + item (str): item to query """ - ret = mongodb.get_variant_list(dataset, datatype, item) + ret = pgsql.get_variant_list(dataset, datatype, item) # inconvenient way of doing humpBack-conversion headers = [] for a, h in ret['headers']: @@ -276,14 +280,22 @@ def get(self, dataset, datatype, item): class GetCoverage(handlers.UnsafeHandler): + """ + Retrieve coverage + """ def get(self, dataset, datatype, item): - ret = mongodb.get_coverage(dataset, datatype, item) + # ret = mongodb.get_coverage(dataset, datatype, item) + ret = None self.finish( ret ) class GetCoveragePos(handlers.UnsafeHandler): + """ + Retrieve coverage + """ def get(self, dataset, datatype, item): - ret = mongodb.get_coverage_pos(dataset, datatype, item) + # ret = mongodb.get_coverage_pos(dataset, datatype, item) + ret = None self.finish( ret ) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index b834e6068..90562d147 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,6 +1,9 @@ + +import json # remove when db is fixed +import logging import re + import db -import logging from . import utils @@ -225,6 +228,26 @@ def get_gene(dataset, gene_id): return {} +def get_gene_by_dbid(dataset, gene_dbid): + """ + Retrieve gene by gene database id + + Args: + dataset (str): short name of the dataset + gene_dbid (str): the database id of the gene + + Returns: + dict: values for the gene; empty if not found + """ + ref_dbid = db.get_reference_dbid_dataset(dataset) + if not ref_dbid: + return {} + try: + return db.Gene.select().where(db.Gene.id == id) + except db.Gene.DoesNotExist: + return {} + + def get_gene_by_name(dataset, gene_name): """ Retrieve gene by gene_name. @@ -292,16 +315,13 @@ def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None Returns: dict: {filtered: nr_filtered, total: nr_total} """ - # will be implemented after database is updated - raise NotImplementedError - - dataset_version = db.get_dataset_version() + dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: return - transcript = db.Transcript.select().where(db.Transcript.transcript_id) - total = db.variants.count({'transcripts': transcript_id}) - filtered = db.variants.count({'transcripts': transcript_id, 'filter': 'PASS'}) + variants = get_variants_in_transcript(dataset, transcript_id) + total = len(variants) + filtered = len(tuple(variant for variant in variants if variant['filter_string'] == 'PASS')) return {'filtered': filtered, 'total': total} @@ -335,7 +355,7 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): .dicts() .get()) except db.Variant.DoesNotExist: - logging.error(('get_raw_variant({}, {}, {}, {}, {}, {})'.format(dataset, pos, chrom, ref, alt, ds_version) + + logging.error(('get_raw_variant({}, {}, {}, {}, {}, {})'.format(dataset, pos, chrom, ref, alt, dataset_version.id) + ': unable to retrieve variant')) return {} @@ -491,8 +511,14 @@ def get_variants_in_gene(dataset, gene_id): list: values for the variants """ ref_dbid = db.get_reference_dbid_dataset(dataset) + variants = [variant for variant in db.Variant.select().where(db.Variant.genes.contains(transcript_id)).dicts()] # db.Variant.select().where(db.Variant.gene.contains(re variants = [] + ##### remove when db is fixed + for variant in variants: + variant['vep_annotations'] = json.loads(variant['vep_annotations']) + ##### + for variant in db.variants.find({'genes': gene_id}, projection={'_id': False}): variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Gene'] == gene_id] add_consequence_to_variant(variant) @@ -526,13 +552,19 @@ def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): (db.Variant.dataset_version == dataset_version)) .dicts()) variants = [variant for variant in query] - # add_consequence_to_variants(variants) - #for variant in variants: - # remove_extraneous_information(variant) + + ##### remove when db is fixed + for variant in variants: + variant['vep_annotations'] = json.loads(variant['vep_annotations']) + ##### + + utils.add_consequence_to_variants(variants) + for variant in variants: + remove_extraneous_information(variant) return variants -def get_variants_in_transcript(transcript_id): +def get_variants_in_transcript(dataset, transcript_id): """ Retrieve variants inside a transcript @@ -546,8 +578,13 @@ def get_variants_in_transcript(transcript_id): dict: values for the variant; empty if not found """ variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] + ##### remove when db is fixed + for variant in variants: + variant['vep_annotations'] = json.loads(variant['vep_annotations']) + ##### + for variant in variants: - variant['vep_annotations'] = [annotation for annotation in variant['vep_annotations'] if x['Feature'] == transcript_id] + variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] add_consequence_to_variant(variant) remove_extraneous_information(variant) return variants @@ -559,8 +596,5 @@ def remove_extraneous_information(variant): del variant['transcripts'] del variant['genes'] del variant['orig_alt_alleles'] - del variant['xpos'] - del variant['xstart'] - del variant['xstop'] del variant['site_quality'] del variant['vep_annotations'] From dccf4d9a0db6105a7e2e2fbe6a22372cfbdee6ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 28 Jan 2019 11:04:49 +0100 Subject: [PATCH 134/360] functions migrated from mongodb.py to pgsql.py, bug fixes --- backend/modules/browser/browser_handlers.py | 16 ++-- backend/modules/browser/pgsql.py | 97 ++++++++++++++++++++- 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 3be480ac1..087b0a672 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -44,7 +44,7 @@ def get(self, dataset, transcript): ret['exons'] += [{'start':exon['start'], 'stop':exon['stop'], 'type':exon['feature_type']}] # Add gene information - gene = lookups.get_gene(dataset, transcript['gene_id']) + gene = lookups.get_gene_by_dbid(dataset, transcript['gene']) ret['gene']['id'] = gene['gene_id'] ret['gene']['name'] = gene['gene_name'] ret['gene']['full_name'] = gene['full_gene_name'] @@ -283,20 +283,18 @@ class GetCoverage(handlers.UnsafeHandler): """ Retrieve coverage """ - def get(self, dataset, datatype, item): - # ret = mongodb.get_coverage(dataset, datatype, item) - ret = None - self.finish( ret ) + def get(self, dataset, datatype, item, ds_version=None): + ret = pgsql.get_coverage(dataset, datatype, item, ds_version) + self.finish(ret) class GetCoveragePos(handlers.UnsafeHandler): """ - Retrieve coverage + Retrieve coverage range """ def get(self, dataset, datatype, item): - # ret = mongodb.get_coverage_pos(dataset, datatype, item) - ret = None - self.finish( ret ) + ret = pgsql.get_coverage_pos(dataset, datatype, item) + self.finish(ret) class Search(handlers.UnsafeHandler): diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index ddf696a42..7298e3b7c 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -8,6 +8,7 @@ from . import lookups +EXON_PADDING = 50 def get_autocomplete(dataset, query): """ @@ -25,5 +26,97 @@ def get_autocomplete(dataset, query): return gene_names -def get_variant_list(): - pass +def get_variant_list(dataset, datatype, item): + headers = [['variant_id','Variant'], ['chrom','Chrom'], ['pos','Position'], + ['HGVS','Consequence'], ['filter','Filter'], ['major_consequence','Annotation'], + ['flags','Flags'], ['allele_count','Allele Count'], ['allele_num','Allele Number'], + ['hom_count','Number of Homozygous Alleles'], ['allele_freq','Allele Frequency']] + + if datatype == 'gene': + variants = lookups.get_variants_in_gene(dataset, item) + elif datatype == 'region': + chrom, start, stop = item.split('-') + variants = lookups.get_variants_in_region(dataset, chrom, start, stop) + elif datatype == 'transcript': + variants = lookups.get_variants_in_transcript(dataset, item) + + # Format output + def format_variant(variant): + if variant['rsid'] == '.': + variant['rsid'] = '' + variant['major_consequence'] = (variant['major_consequence'].replace('_variant','') + .replace('_prime_', '\'') + .replace('_', ' ')) + + # This is so an array values turns into a comma separated string instead + return {k: ", ".join(v) if isinstance(v,list) else v for k, v in variant.items()} + + variants = list(map(format_variant, variants)) + logging.error('VARIANTS_POST : ' + str(variants) + str(len(variants))) + return {'variants': variants, 'headers': headers} + + +def get_coverage(dataset, datatype, item, ds_version=None): + """ + Retrieve coverage for a gene/region/transcript + + Args: + dataset (str): short name of the dataset + datatype (str): type of "region" (gene/region/transcript) + item (str): the datatype item to look up + ds_version (str): the dataset version + """ + ret = {'coverage':[]} + + if datatype == 'gene': + gene = lookups.get_gene(dataset, item) + transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) + start = transcript['start'] - EXON_PADDING + stop = transcript['stop'] + EXON_PADDING + ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) + elif datatype == 'region': + chrom, start, stop = item.split('-') + start = int(start) + stop = int(stop) + ret['coverage'] = lookups.get_coverage_for_bases(dataset, chrom, start, stop, ds_version) + elif datatype == 'transcript': + transcript = lookups.get_transcript(dataset, item) + start = transcript['start'] - EXON_PADDING + stop = transcript['stop'] + EXON_PADDING + ret['coverage'] = lookups.get_coverage_for_transcript(dataset, start, stop, ds_version) + + return ret + + +def get_coverage_pos(dataset, datatype, item): + """ + Retrieve coverage range + + Args: + dataset (str): short name of the dataset + datatype (str): type of "region" (gene/region/transcript) + item (str): the datatype item to look up + ds_version (str): the dataset version + """ + ret = {'start':None, 'stop':None, 'chrom':None} + + if datatype == 'gene': + gene = lookups.get_gene(dataset, item) + transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) + elif datatype == 'transcript': + transcript = lookups.get_transcript(dataset, item) + + if datatype == 'region': + chrom, start, stop = item.split('-') + start = int(start) + stop = int(stop) + else: + start = transcript['start'] - EXON_PADDING + stop = transcript['stop'] + EXON_PADDING + chrom = transcript['chrom'] + + ret['start'] = start + ret['stop'] = stop + ret['chrom'] = chrom + + return ret From a882929e963b753846435be6231379a428f883db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 28 Jan 2019 14:08:31 +0100 Subject: [PATCH 135/360] A couple of ugly hacks, but now seems to work for everything but gene. Most hacks can be removed once database is fixed --- backend/modules/browser/browser_handlers.py | 8 +-- backend/modules/browser/lookups.py | 72 +++++++++++++-------- backend/modules/browser/pgsql.py | 4 +- backend/modules/browser/settings.py | 16 ----- 4 files changed, 50 insertions(+), 50 deletions(-) delete mode 100644 backend/modules/browser/settings.py diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 087b0a672..26202361f 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -46,13 +46,14 @@ def get(self, dataset, transcript): # Add gene information gene = lookups.get_gene_by_dbid(dataset, transcript['gene']) ret['gene']['id'] = gene['gene_id'] - ret['gene']['name'] = gene['gene_name'] - ret['gene']['full_name'] = gene['full_gene_name'] + ret['gene']['name'] = gene['name'] + ret['gene']['full_name'] = gene['full_name'] ret['gene']['canonical_transcript'] = gene['canonical_transcript'] - gene_transcripts = lookups.get_transcripts_in_gene(dataset, transcript['gene_id']) + gene_transcripts = lookups.get_transcripts_in_gene_by_dbid(transcript['gene']) ret['gene']['transcripts'] = [g['transcript_id'] for g in gene_transcripts] + logging.error('Transcript with data {}'.format(ret)) self.finish(ret) @@ -251,7 +252,6 @@ def get(self, dataset, variant): frequencies['total']['freq'] /= len(frequencies['datasets'].keys()) ret['variant']['pop_freq'] = frequencies - logging.error(ret) self.finish( ret ) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 90562d147..f2bb48742 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -202,8 +202,9 @@ def get_exons_in_transcript(dataset, transcript_id): except db.Transcript.DoesNotExist: logging.error('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) return + wanted_types = ('CDS', 'UTR', 'exon') return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & - (db.Feature.feature_type == 'exon')).dicts()), + (db.Feature.feature_type in wanted_types)).dicts()), key=lambda k: k['start']) @@ -243,7 +244,7 @@ def get_gene_by_dbid(dataset, gene_dbid): if not ref_dbid: return {} try: - return db.Gene.select().where(db.Gene.id == id) + return db.Gene.select().where(db.Gene.id == gene_dbid).dicts().get() except db.Gene.DoesNotExist: return {} @@ -409,6 +410,21 @@ def get_transcripts_in_gene(dataset, gene_id): return [] +def get_transcripts_in_gene_by_dbid(gene_dbid): + """ + Get the transcripts associated with a gene + Args: + gene_dbid (str): database id of the gene + Returns: + list: transcripts (dict) associated with the gene; empty if no hits + """ + try: + return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene_dbid).dicts()] + except db.Gene.DoesNotExist or db.Transcript.DoesNotExist: + logging.error('get_transcripts_in_gene({}): no matching transcripts'.format(gene_dbid)) + return [] + + def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): """ Retrieve variant by position and change @@ -511,19 +527,16 @@ def get_variants_in_gene(dataset, gene_id): list: values for the variants """ ref_dbid = db.get_reference_dbid_dataset(dataset) - variants = [variant for variant in db.Variant.select().where(db.Variant.genes.contains(transcript_id)).dicts()] -# db.Variant.select().where(db.Variant.gene.contains(re - variants = [] - ##### remove when db is fixed - for variant in variants: - variant['vep_annotations'] = json.loads(variant['vep_annotations']) - ##### - - for variant in db.variants.find({'genes': gene_id}, projection={'_id': False}): - variant['vep_annotations'] = [x for x in variant['vep_annotations'] if x['Gene'] == gene_id] - add_consequence_to_variant(variant) - remove_extraneous_information(variant) - variants.append(variant) + gene = get_gene(dataset, gene_id) + # temporary while waiting for db fix + variants = get_variants_in_region(dataset, gene['chrom'], gene['start'], gene['stop']) + # variants = [variant for variant in db.Variant.select().where(db.Variant.genes.contains(transcript_id)).dicts()] + +# for variant in variants: +# variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Gene'] == gene_id] +# add_consequence_to_variant(variant) +# remove_extraneous_information(variant) +# variants.append(variant) return variants @@ -555,11 +568,17 @@ def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): ##### remove when db is fixed for variant in variants: + variant['quality_metrics'] = json.loads(variant['quality_metrics']) variant['vep_annotations'] = json.loads(variant['vep_annotations']) + variant['hom_count'] = 0 + variant['filter'] = variant['filter_string'] ##### utils.add_consequence_to_variants(variants) for variant in variants: + if variant['rsid']: + variant['rsid'] = 'rs{}'.format(variant['rsid']) + # add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants @@ -569,24 +588,21 @@ def get_variants_in_transcript(dataset, transcript_id): Retrieve variants inside a transcript Args: - pos (int): position of the variant - chrom (str): name of the chromosome - ref (str): reference sequence - ref (str): variant sequence + dataset (str): short name of the dataset + transcript_id (str): id of the transcript (ENST) Returns: dict: values for the variant; empty if not found """ - variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] - ##### remove when db is fixed - for variant in variants: - variant['vep_annotations'] = json.loads(variant['vep_annotations']) - ##### + transcript = get_transcript(dataset, transcript_id) + # temporary while waiting for db fix + variants = get_variants_in_region(dataset, transcript['chrom'], transcript['start'], transcript['stop']) + # variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] - for variant in variants: - variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] - add_consequence_to_variant(variant) - remove_extraneous_information(variant) +# for variant in variants: +# variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] +# add_consequence_to_variant(variant) +# remove_extraneous_information(variant) return variants diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index 7298e3b7c..b549a5c25 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -52,7 +52,6 @@ def format_variant(variant): return {k: ", ".join(v) if isinstance(v,list) else v for k, v in variant.items()} variants = list(map(format_variant, variants)) - logging.error('VARIANTS_POST : ' + str(variants) + str(len(variants))) return {'variants': variants, 'headers': headers} @@ -83,7 +82,7 @@ def get_coverage(dataset, datatype, item, ds_version=None): transcript = lookups.get_transcript(dataset, item) start = transcript['start'] - EXON_PADDING stop = transcript['stop'] + EXON_PADDING - ret['coverage'] = lookups.get_coverage_for_transcript(dataset, start, stop, ds_version) + ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) return ret @@ -120,3 +119,4 @@ def get_coverage_pos(dataset, datatype, item): ret['chrom'] = chrom return ret + diff --git a/backend/modules/browser/settings.py b/backend/modules/browser/settings.py deleted file mode 100644 index 875a002bd..000000000 --- a/backend/modules/browser/settings.py +++ /dev/null @@ -1,16 +0,0 @@ -import json - -try: - json_settings_fh = open("settings.json") -except FileNotFoundError: - json_settings_fh = open("../settings.json") - -json_settings = json.load(json_settings_fh) -json_settings_fh.close() - -# Mongodb settings -mongo_host = json_settings["mongoHost"] -mongo_port = json_settings["mongoPort"] -mongo_user = json_settings["mongoUser"] -mongo_password = json_settings["mongoPassword"] -mongo_databases = json_settings["mongoDatabases"] From a8341eb6737565d4af21cd0daed0b1b5a7081b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 28 Jan 2019 14:11:22 +0100 Subject: [PATCH 136/360] fixed class order --- backend/modules/browser/browser_handlers.py | 181 ++++++++++---------- 1 file changed, 90 insertions(+), 91 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 26202361f..345281470 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -13,47 +13,86 @@ # maximum length of requested region (GetRegion) REGION_LIMIT = 100000 -class GetTranscript(handlers.UnsafeHandler): +class Autocomplete(handlers.UnsafeHandler): + def get(self, dataset, query): + ret = {} + + results = pgsql.get_autocomplete(dataset, query) + ret = {'values': sorted(list(set(results)))[:20]} + + self.finish( ret ) + + +class GetCoverage(handlers.UnsafeHandler): """ - Request information about a transcript + Retrieve coverage """ - def get(self, dataset, transcript): + def get(self, dataset, datatype, item, ds_version=None): + ret = pgsql.get_coverage(dataset, datatype, item, ds_version) + self.finish(ret) + + +class GetCoveragePos(handlers.UnsafeHandler): + """ + Retrieve coverage range + """ + def get(self, dataset, datatype, item): + ret = pgsql.get_coverage_pos(dataset, datatype, item) + self.finish(ret) + + +class Download(handlers.UnsafeHandler): + def get(self, dataset, datatype, item): + filename = "{}_{}_{}.csv".format(dataset, datatype, item) + self.set_header('Content-Type','text/csv') + self.set_header('content-Disposition','attachement; filename={}'.format(filename)) + + data = mongodb.get_variant_list(dataset, datatype, item) + # Write header + self.write(','.join([h[1] for h in data['headers']]) + '\n') + + for variant in data['variants']: + headers = [h[0] for h in data['headers']] + self.write(','.join(map(str, [variant[h] for h in headers])) + '\n') + + +class GetGene(handlers.UnsafeHandler): + """ + Request information about a gene + """ + def get(self, dataset, gene, ds_version=None): """ - Request information about a transcript + Request information about a gene Args: dataset (str): short name of the dataset - transcript (str): the transcript id - - Returns: - dict: transcript (transcript and exons), gene (gene information) + gene (str): the gene id """ - transcript_id = transcript - ret = {'transcript':{}, - 'gene':{}, - } + gene_id = gene - # Add transcript information - transcript = lookups.get_transcript(dataset, transcript_id) - ret['transcript']['id'] = transcript['transcript_id'] - ret['transcript']['number_of_CDS'] = len([t for t in transcript['exons'] if t['feature_type'] == 'CDS']) + ret = {'gene':{'gene_id': gene_id}} - # Add exon information + # Gene + gene = lookups.get_gene(dataset, gene_id) + if gene: + ret['gene'] = gene + + # Add exons from transcript + transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) ret['exons'] = [] for exon in sorted(transcript['exons'], key=lambda k: k['start']): ret['exons'] += [{'start':exon['start'], 'stop':exon['stop'], 'type':exon['feature_type']}] - # Add gene information - gene = lookups.get_gene_by_dbid(dataset, transcript['gene']) - ret['gene']['id'] = gene['gene_id'] - ret['gene']['name'] = gene['name'] - ret['gene']['full_name'] = gene['full_name'] - ret['gene']['canonical_transcript'] = gene['canonical_transcript'] + # Variants + ret['gene']['variants'] = lookups.get_number_of_variants_in_transcript(dataset, gene['canonical_transcript'], ds_version) - gene_transcripts = lookups.get_transcripts_in_gene_by_dbid(transcript['gene']) - ret['gene']['transcripts'] = [g['transcript_id'] for g in gene_transcripts] + # Transcripts + transcripts_in_gene = lookups.get_transcripts_in_gene(dataset, gene_id) + if transcripts_in_gene: + ret['transcripts'] = [] + for transcript in transcripts_in_gene: + ret['transcripts'] += [{'transcript_id':transcript['transcript_id']}] - logging.error('Transcript with data {}'.format(ret)) self.finish(ret) @@ -114,42 +153,45 @@ def get(self, dataset, region): self.finish(ret) -class GetGene(handlers.UnsafeHandler): +class GetTranscript(handlers.UnsafeHandler): """ - Request information about a gene + Request information about a transcript """ - def get(self, dataset, gene, ds_version=None): + def get(self, dataset, transcript): """ - Request information about a gene + Request information about a transcript Args: dataset (str): short name of the dataset - gene (str): the gene id - """ - gene_id = gene + transcript (str): the transcript id - ret = {'gene':{'gene_id': gene_id}} + Returns: + dict: transcript (transcript and exons), gene (gene information) + """ + transcript_id = transcript + ret = {'transcript':{}, + 'gene':{}, + } - # Gene - gene = lookups.get_gene(dataset, gene_id) - if gene: - ret['gene'] = gene + # Add transcript information + transcript = lookups.get_transcript(dataset, transcript_id) + ret['transcript']['id'] = transcript['transcript_id'] + ret['transcript']['number_of_CDS'] = len([t for t in transcript['exons'] if t['feature_type'] == 'CDS']) - # Add exons from transcript - transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) + # Add exon information ret['exons'] = [] for exon in sorted(transcript['exons'], key=lambda k: k['start']): ret['exons'] += [{'start':exon['start'], 'stop':exon['stop'], 'type':exon['feature_type']}] - # Variants - ret['gene']['variants'] = lookups.get_number_of_variants_in_transcript(dataset, gene['canonical_transcript'], ds_version) + # Add gene information + gene = lookups.get_gene_by_dbid(dataset, transcript['gene']) + ret['gene']['id'] = gene['gene_id'] + ret['gene']['name'] = gene['name'] + ret['gene']['full_name'] = gene['full_name'] + ret['gene']['canonical_transcript'] = gene['canonical_transcript'] - # Transcripts - transcripts_in_gene = lookups.get_transcripts_in_gene(dataset, gene_id) - if transcripts_in_gene: - ret['transcripts'] = [] - for transcript in transcripts_in_gene: - ret['transcripts'] += [{'transcript_id':transcript['transcript_id']}] + gene_transcripts = lookups.get_transcripts_in_gene_by_dbid(transcript['gene']) + ret['gene']['transcripts'] = [g['transcript_id'] for g in gene_transcripts] self.finish(ret) @@ -279,24 +321,6 @@ def get(self, dataset, datatype, item): self.finish( ret ) -class GetCoverage(handlers.UnsafeHandler): - """ - Retrieve coverage - """ - def get(self, dataset, datatype, item, ds_version=None): - ret = pgsql.get_coverage(dataset, datatype, item, ds_version) - self.finish(ret) - - -class GetCoveragePos(handlers.UnsafeHandler): - """ - Retrieve coverage range - """ - def get(self, dataset, datatype, item): - ret = pgsql.get_coverage_pos(dataset, datatype, item) - self.finish(ret) - - class Search(handlers.UnsafeHandler): """ Perform a search for the wanted object @@ -320,28 +344,3 @@ def get(self, dataset, query): ret["value"] = identifier self.finish(ret) - - -class Autocomplete(handlers.UnsafeHandler): - def get(self, dataset, query): - ret = {} - - results = pgsql.get_autocomplete(dataset, query) - ret = {'values': sorted(list(set(results)))[:20]} - - self.finish( ret ) - - -class Download(handlers.UnsafeHandler): - def get(self, dataset, datatype, item): - filename = "{}_{}_{}.csv".format(dataset, datatype, item) - self.set_header('Content-Type','text/csv') - self.set_header('content-Disposition','attachement; filename={}'.format(filename)) - - data = mongodb.get_variant_list(dataset, datatype, item) - # Write header - self.write(','.join([h[1] for h in data['headers']]) + '\n') - - for variant in data['variants']: - headers = [h[0] for h in data['headers']] - self.write(','.join(map(str, [variant[h] for h in headers])) + '\n') From 02cb50f6464567aea57f2c9170d11edd4fa7a466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 28 Jan 2019 14:39:56 +0100 Subject: [PATCH 137/360] multiple fixes to get a working system with the current database --- backend/modules/browser/browser_handlers.py | 10 ++++ backend/modules/browser/lookups.py | 8 ++- backend/modules/browser/pgsql.py | 57 ++++++++++----------- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 345281470..66319723c 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -74,6 +74,9 @@ def get(self, dataset, gene, ds_version=None): # Gene gene = lookups.get_gene(dataset, gene_id) + #### Remove when db is fixed + gene['stop'] = gene['start'] + 20000 + #### if gene: ret['gene'] = gene @@ -93,6 +96,11 @@ def get(self, dataset, gene, ds_version=None): for transcript in transcripts_in_gene: ret['transcripts'] += [{'transcript_id':transcript['transcript_id']}] + + # temporary fix for names + gene['gene_name'] = gene['name'] + gene['full_gene_name'] = gene['full_name'] + self.finish(ret) @@ -318,6 +326,8 @@ def get(self, dataset, datatype, item): n = a[0] + "".join([b[0].upper() + b[1:] for b in a.split("_")])[1:] headers += [[n, h]] ret['headers'] = headers + logging.error('Variant request {} items'.format(len(ret))) + logging.error('Variant request {} items'.format(ret)) self.finish( ret ) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index f2bb48742..639896429 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -529,7 +529,13 @@ def get_variants_in_gene(dataset, gene_id): ref_dbid = db.get_reference_dbid_dataset(dataset) gene = get_gene(dataset, gene_id) # temporary while waiting for db fix - variants = get_variants_in_region(dataset, gene['chrom'], gene['start'], gene['stop']) + logging.error('Found gene {}'.format(gene)) + #### remove when db is fixed + gene['stop'] = gene['start'] + 20000 + #### + + variants = get_variants_in_region(dataset, gene['chrom'], gene['start'], gene['stop']) + # variants = [variant for variant in db.Variant.select().where(db.Variant.genes.contains(transcript_id)).dicts()] # for variant in variants: diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index b549a5c25..b4c1cd53f 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -26,35 +26,6 @@ def get_autocomplete(dataset, query): return gene_names -def get_variant_list(dataset, datatype, item): - headers = [['variant_id','Variant'], ['chrom','Chrom'], ['pos','Position'], - ['HGVS','Consequence'], ['filter','Filter'], ['major_consequence','Annotation'], - ['flags','Flags'], ['allele_count','Allele Count'], ['allele_num','Allele Number'], - ['hom_count','Number of Homozygous Alleles'], ['allele_freq','Allele Frequency']] - - if datatype == 'gene': - variants = lookups.get_variants_in_gene(dataset, item) - elif datatype == 'region': - chrom, start, stop = item.split('-') - variants = lookups.get_variants_in_region(dataset, chrom, start, stop) - elif datatype == 'transcript': - variants = lookups.get_variants_in_transcript(dataset, item) - - # Format output - def format_variant(variant): - if variant['rsid'] == '.': - variant['rsid'] = '' - variant['major_consequence'] = (variant['major_consequence'].replace('_variant','') - .replace('_prime_', '\'') - .replace('_', ' ')) - - # This is so an array values turns into a comma separated string instead - return {k: ", ".join(v) if isinstance(v,list) else v for k, v in variant.items()} - - variants = list(map(format_variant, variants)) - return {'variants': variants, 'headers': headers} - - def get_coverage(dataset, datatype, item, ds_version=None): """ Retrieve coverage for a gene/region/transcript @@ -120,3 +91,31 @@ def get_coverage_pos(dataset, datatype, item): return ret + +def get_variant_list(dataset, datatype, item): + headers = [['variant_id','Variant'], ['chrom','Chrom'], ['pos','Position'], + ['HGVS','Consequence'], ['filter','Filter'], ['major_consequence','Annotation'], + ['flags','Flags'], ['allele_count','Allele Count'], ['allele_num','Allele Number'], + ['hom_count','Number of Homozygous Alleles'], ['allele_freq','Allele Frequency']] + + if datatype == 'gene': + variants = lookups.get_variants_in_gene(dataset, item) + elif datatype == 'region': + chrom, start, stop = item.split('-') + variants = lookups.get_variants_in_region(dataset, chrom, start, stop) + elif datatype == 'transcript': + variants = lookups.get_variants_in_transcript(dataset, item) + + # Format output + def format_variant(variant): + if variant['rsid'] == '.': + variant['rsid'] = '' + variant['major_consequence'] = (variant['major_consequence'].replace('_variant','') + .replace('_prime_', '\'') + .replace('_', ' ')) + + # This is so an array values turns into a comma separated string instead + return {k: ", ".join(v) if isinstance(v,list) else v for k, v in variant.items()} + + variants = list(map(format_variant, variants)) + return {'variants': variants, 'headers': headers} From 2b6aa28611239a22c12c95779878846ef15590f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 28 Jan 2019 16:12:39 +0100 Subject: [PATCH 138/360] test support added, mongodb removed --- backend/modules/browser/tests/__init__.py | 0 backend/modules/browser/{ => tests}/test_lookups.py | 2 +- backend/modules/browser/{ => tests}/test_utils.py | 6 ++++-- backend/run_pytest.sh | 5 +++++ backend/test_requirements.txt | 1 + 5 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 backend/modules/browser/tests/__init__.py rename backend/modules/browser/{ => tests}/test_lookups.py (99%) rename backend/modules/browser/{ => tests}/test_utils.py (75%) create mode 100755 backend/run_pytest.sh create mode 100644 backend/test_requirements.txt diff --git a/backend/modules/browser/tests/__init__.py b/backend/modules/browser/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/modules/browser/test_lookups.py b/backend/modules/browser/tests/test_lookups.py similarity index 99% rename from backend/modules/browser/test_lookups.py rename to backend/modules/browser/tests/test_lookups.py index a6942d32f..64e959ecb 100644 --- a/backend/modules/browser/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -2,7 +2,7 @@ Tests for the functions available in lookups.py """ -import lookups +from .. import lookups def test_add_rsid_to_variant(): diff --git a/backend/modules/browser/test_utils.py b/backend/modules/browser/tests/test_utils.py similarity index 75% rename from backend/modules/browser/test_utils.py rename to backend/modules/browser/tests/test_utils.py index 0ce0c39a3..df560c615 100644 --- a/backend/modules/browser/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -2,8 +2,8 @@ Tests for utils.py """ -import lookups -import utils +from .. import lookups +from .. import utils import json @@ -79,6 +79,8 @@ def test_get_protein_hgvs(): """ Test get_protein_hgvs() """ + annotation = {'MAX_AF_POPS': 'AA&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_OTH&gnomAD_SAS&AFR&AMR&EAS&EUR&SAS', 'TSL': '', 'APPRIS': '', 'gnomAD_ASJ_AF': '1', 'AMR_AF': '1', 'SYMBOL': 'ADH6', 'AFR_AF': '1', 'Feature': 'ENST00000237653', 'Codons': 'Tgt/Agt', 'MOTIF_NAME': '', 'DOMAINS': 'hmmpanther:PTHR11695:SF307&hmmpanther:PTHR11695&Gene3D:3.90.180.10', 'SIFT': 'tolerated(1)', 'VARIANT_CLASS': 'SNV', 'EA_AF': '0.9995', 'CDS_position': '4', 'CCDS': 'CCDS3647.1', 'Allele': 'T', 'PolyPhen': 'benign(0)', 'AA_AF': '1', 'gnomAD_EAS_AF': '1', 'IMPACT': 'MODERATE', 'HGVSp': '', 'ENSP': 'ENSP00000237653', 'MAX_AF': '1', 'LoF': '', 'INTRON': '', 'gnomAD_FIN_AF': '0.9999', 'Existing_variation': 'rs4699735', 'HGVSc': '', 'SOURCE': 'Ensembl', 'LoF_filter': '', 'gnomAD_AF': '0.9998', 'gnomAD_AMR_AF': '1', 'GENE_PHENO': '', 'gnomAD_OTH_AF': '1', 'LoF_flags': '', 'MOTIF_SCORE_CHANGE': '', 'UNIPARC': 'UPI00001AE69C', 'cDNA_position': '389', 'ALLELE_NUM': '1', 'EAS_AF': '1', 'Feature_type': 'Transcript', 'AF': '1', 'gnomAD_AFR_AF': '0.9999', 'HGNC_ID': '255', 'SAS_AF': '1', 'LoF_info': '', 'SWISSPROT': 'P28332', 'FLAGS': '', 'miRNA': '', 'Consequence': 'missense_variant', 'Protein_position': '2', 'Gene': 'ENSG00000172955', 'HIGH_INF_POS': '', 'STRAND': '-1', 'gnomAD_NFE_AF': '0.9995', 'EUR_AF': '1', 'DISTANCE': '', 'CLIN_SIG': '', 'PHENO': '', 'SYMBOL_SOURCE': 'HGNC', 'Amino_acids': 'C/S', 'TREMBL': '', 'gnomAD_SAS_AF': '1', 'REFSEQ_MATCH': '', 'PUBMED': '', 'BIOTYPE': 'protein_coding', 'EXON': '1/8', 'SOMATIC': '', 'MOTIF_POS': '', 'CANONICAL': ''} + print(utils.get_protein_hgvs(annotation)) assert False diff --git a/backend/run_pytest.sh b/backend/run_pytest.sh new file mode 100755 index 000000000..95bda78cb --- /dev/null +++ b/backend/run_pytest.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +BROWSER=modules/browser + +py.test . --cov==${BROWSER}/lookups --cov==${BROWSER}/utils diff --git a/backend/test_requirements.txt b/backend/test_requirements.txt new file mode 100644 index 000000000..c75c448bb --- /dev/null +++ b/backend/test_requirements.txt @@ -0,0 +1 @@ +pytest-cov From e6b3cd5d3cd7e90a3711c7486df45526d4924222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 28 Jan 2019 16:15:25 +0100 Subject: [PATCH 139/360] fix slight error --- backend/run_pytest.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/run_pytest.sh b/backend/run_pytest.sh index 95bda78cb..cce3d98bf 100755 --- a/backend/run_pytest.sh +++ b/backend/run_pytest.sh @@ -1,5 +1,5 @@ #!/bin/sh -BROWSER=modules/browser +BROWSER=modules/browser/ -py.test . --cov==${BROWSER}/lookups --cov==${BROWSER}/utils +py.test . --cov=${BROWSER} From 7d73335266c816ad21cbb074c16504c01932b7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 29 Jan 2019 14:02:19 +0100 Subject: [PATCH 140/360] more tests added, complete coverage in lookups; also some fixes due to problems found during testing --- backend/modules/browser/browser_handlers.py | 2 +- backend/modules/browser/lookups.py | 85 ++++++------ backend/modules/browser/tests/.coveragerc | 5 + backend/modules/browser/tests/test_lookups.py | 122 ++++++++++++------ backend/run_pytest.sh | 3 +- 5 files changed, 124 insertions(+), 93 deletions(-) create mode 100644 backend/modules/browser/tests/.coveragerc diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 66319723c..4e1db8a6d 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -192,7 +192,7 @@ def get(self, dataset, transcript): ret['exons'] += [{'start':exon['start'], 'stop':exon['stop'], 'type':exon['feature_type']}] # Add gene information - gene = lookups.get_gene_by_dbid(dataset, transcript['gene']) + gene = lookups.get_gene_by_dbid(transcript['gene']) ret['gene']['id'] = gene['gene_id'] ret['gene']['name'] = gene['name'] ret['gene']['full_name'] = gene['full_name'] diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 639896429..abf691a87 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -135,11 +135,11 @@ def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=N ds_version (str): version of the dataset Returns: - list: coverage dicts for the region of interest: None if unable to retrieve + list: coverage dicts for the region of interest. None if failed """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return + return None if end_pos is None: end_pos = start_pos @@ -172,7 +172,7 @@ def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_vers # only return coverages that have coverage (if that makes any sense?) # return coverage_array if not coverage_array: - return + return None covered = [c for c in coverage_array if c['mean']] return covered @@ -191,7 +191,7 @@ def get_exons_in_transcript(dataset, transcript_id): ref_dbid = db.get_reference_dbid_dataset(dataset) if not ref_dbid: logging.error('get_exons_in_transcript({}, {}): unable to find dataset dbid'.format(dataset, transcript_id)) - return + return None try: transcript = (db.Transcript .select() @@ -201,7 +201,7 @@ def get_exons_in_transcript(dataset, transcript_id): .get()) except db.Transcript.DoesNotExist: logging.error('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) - return + return None wanted_types = ('CDS', 'UTR', 'exon') return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & (db.Feature.feature_type in wanted_types)).dicts()), @@ -229,24 +229,22 @@ def get_gene(dataset, gene_id): return {} -def get_gene_by_dbid(dataset, gene_dbid): +def get_gene_by_dbid(gene_dbid): """ Retrieve gene by gene database id Args: - dataset (str): short name of the dataset gene_dbid (str): the database id of the gene Returns: dict: values for the gene; empty if not found """ - ref_dbid = db.get_reference_dbid_dataset(dataset) - if not ref_dbid: - return {} try: return db.Gene.select().where(db.Gene.id == gene_dbid).dicts().get() except db.Gene.DoesNotExist: return {} + except ValueError: + return {} def get_gene_by_name(dataset, gene_name): @@ -292,16 +290,13 @@ def get_genes_in_region(dataset, chrom, start_pos, stop_pos): if not ref_dbid: return {} - try: - gene_query = db.Gene.select().where((db.Gene.reference_set == ref_dbid) & - ((((db.Gene.start >= start_pos) & - (db.Gene.start <= stop_pos)) | - ((db.Gene.stop >= start_pos) & - (db.Gene.stop <= stop_pos))) & - (db.Gene.chrom == chrom))).dicts() - return [gene for gene in gene_query] - except db.Gene.DoesNotExist: - logging.error('get_genes_in_region({}, {}, {}): no genes found'.format(chrom, start_pos, stop_pos)) + gene_query = db.Gene.select().where((db.Gene.reference_set == ref_dbid) & + ((((db.Gene.start >= start_pos) & + (db.Gene.start <= stop_pos)) | + ((db.Gene.stop >= start_pos) & + (db.Gene.stop <= stop_pos))) & + (db.Gene.chrom == chrom))).dicts() + return [gene for gene in gene_query] def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None): @@ -314,11 +309,11 @@ def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None ds_version (str): version of the dataset Returns: - dict: {filtered: nr_filtered, total: nr_total} + dict: {filtered: nr_filtered, total: nr_total}, None if error """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return + return None variants = get_variants_in_transcript(dataset, transcript_id) total = len(variants) @@ -344,7 +339,7 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: return - + try: return (db.Variant .select() @@ -356,8 +351,8 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): .dicts() .get()) except db.Variant.DoesNotExist: - logging.error(('get_raw_variant({}, {}, {}, {}, {}, {})'.format(dataset, pos, chrom, ref, alt, dataset_version.id) + - ': unable to retrieve variant')) + logging.error('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' + .format(dataset, pos, chrom, ref, alt, dataset_version.id)) return {} @@ -404,11 +399,12 @@ def get_transcripts_in_gene(dataset, gene_id): try: gene = db.Gene.select().where((db.Gene.reference_set == ref_dbid) & (db.Gene.gene_id == gene_id)).dicts().get() - return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] - except db.Gene.DoesNotExist or db.Transcript.DoesNotExist: - logging.error('get_transcripts_in_gene({}, {}): unable to retrieve gene or transcript'.format(dataset, gene_id)) + except db.Gene.DoesNotExist: + logging.error('get_transcripts_in_gene({}, {}): unable to retrieve gene'.format(dataset, gene_id)) return [] + return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] + def get_transcripts_in_gene_by_dbid(gene_dbid): """ @@ -418,11 +414,7 @@ def get_transcripts_in_gene_by_dbid(gene_dbid): Returns: list: transcripts (dict) associated with the gene; empty if no hits """ - try: - return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene_dbid).dicts()] - except db.Gene.DoesNotExist or db.Transcript.DoesNotExist: - logging.error('get_transcripts_in_gene({}): no matching transcripts'.format(gene_dbid)) - return [] + return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene_dbid).dicts()] def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): @@ -441,18 +433,15 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): Returns: dict: values for the variant; None if not found """ - try: - variant = get_raw_variant(dataset, pos, chrom, ref, alt, ds_version) - if not variant or 'rsid' not in variant: - return variant - if variant['rsid'] == '.' or variant['rsid'] is None: - add_rsid_to_variant(dataset, variant) - else: - if not str(variant['rsid']).startswith('rs'): - variant['rsid'] = 'rs{}'.format(variant['rsid']) + variant = get_raw_variant(dataset, pos, chrom, ref, alt, ds_version) + if not variant or 'rsid' not in variant: + return variant + if variant['rsid'] == '.' or variant['rsid'] is None: + add_rsid_to_variant(dataset, variant) + else: + if not str(variant['rsid']).startswith('rs'): + variant['rsid'] = 'rs{}'.format(variant['rsid']) return variant - except db.Variant.DoesNotExist: - return def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): @@ -528,14 +517,12 @@ def get_variants_in_gene(dataset, gene_id): """ ref_dbid = db.get_reference_dbid_dataset(dataset) gene = get_gene(dataset, gene_id) - # temporary while waiting for db fix - logging.error('Found gene {}'.format(gene)) #### remove when db is fixed gene['stop'] = gene['start'] + 20000 #### variants = get_variants_in_region(dataset, gene['chrom'], gene['start'], gene['stop']) - + # variants = [variant for variant in db.Variant.select().where(db.Variant.genes.contains(transcript_id)).dicts()] # for variant in variants: @@ -579,7 +566,7 @@ def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): variant['hom_count'] = 0 variant['filter'] = variant['filter_string'] ##### - + utils.add_consequence_to_variants(variants) for variant in variants: if variant['rsid']: @@ -601,6 +588,8 @@ def get_variants_in_transcript(dataset, transcript_id): dict: values for the variant; empty if not found """ transcript = get_transcript(dataset, transcript_id) + if not transcript: + return {} # temporary while waiting for db fix variants = get_variants_in_region(dataset, transcript['chrom'], transcript['start'], transcript['stop']) # variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] diff --git a/backend/modules/browser/tests/.coveragerc b/backend/modules/browser/tests/.coveragerc new file mode 100644 index 000000000..fc2753f56 --- /dev/null +++ b/backend/modules/browser/tests/.coveragerc @@ -0,0 +1,5 @@ +[run] +omit = + # omit anything in a .local directory anywhere + */tests/* + */__init__.py \ No newline at end of file diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 64e959ecb..ea559d809 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -104,29 +104,13 @@ def test_get_exons_in_transcript(caplog): """ Test get_exons_in_transcript() """ - result = lookups.get_exons_in_transcript('SweGen', 'ENST00000346817') - expected = [{'id': 326403, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202047893, 'stop': 202048032, 'strand': '+', 'feature_type': 'exon'}, - {'id': 326404, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202050495, 'stop': 202050848, 'strand': '+', 'feature_type': 'exon'}, - {'id': 326406, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202052430, 'stop': 202052523, 'strand': '+', 'feature_type': 'exon'}, - {'id': 326408, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202057708, 'stop': 202057843, 'strand': '+', 'feature_type': 'exon'}, - {'id': 326410, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202060566, 'stop': 202060672, 'strand': '+', 'feature_type': 'exon'}, - {'id': 326412, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202072799, 'stop': 202072907, 'strand': '+', 'feature_type': 'exon'}, - {'id': 326414, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202073794, 'stop': 202074286, 'strand': '+', 'feature_type': 'exon'}, - {'id': 326416, 'gene': 8600, 'transcript': 28186, 'chrom': '2', - 'start': 202082312, 'stop': 202084804, 'strand': '+', 'feature_type': 'exon'}] - assert result == expected + result = lookups.get_exons_in_transcript('SweGen', 'ENST00000215855') + assert len(result) == 14 # bad dataset - result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000346817') + result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') assert not result - assert caplog.messages[0] == 'get_exons_in_transcript(NO_DATASET, ENST00000346817): unable to find dataset dbid' + assert caplog.messages[0] == 'get_exons_in_transcript(NO_DATASET, ENST00000215855): unable to find dataset dbid' # bad transcript result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') @@ -142,23 +126,15 @@ def test_get_gene(): expected = {'id': 1, 'reference_set': 1, 'gene_id': 'ENSG00000223972', - 'gene_name': 'DDX11L1', + 'name': 'DDX11L1', 'full_name': 'DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1', 'canonical_transcript': 'ENST00000456328', 'chrom': '1', - 'start_pos': 11870, + 'start': 11870, 'strand': '+'} result = lookups.get_gene('SweGen', 'ENSG00000223972') - print(result) - assert result['id'] == expected['id'] - assert result['reference_set'] == expected['reference_set'] - assert result['gene_id'] == expected['gene_id'] - assert result['name'] == expected['gene_name'] - assert result['full_name'] == expected['full_name'] - assert result['canonical_transcript'] == expected['canonical_transcript'] - assert result['chrom'] == expected['chrom'] - assert result['start'] == expected['start_pos'] - assert result['strand'] == expected['strand'] + for val in expected: + assert result[val] == expected[val] # non-existing gene result = lookups.get_gene('SweGen', 'NOT_A_GENE') @@ -169,6 +145,30 @@ def test_get_gene(): assert not result +def test_get_gene_by_dbid(): + """ + Test get_gene_by_dbid() + """ + # normal entry + expected = {'id': 53626, + 'reference_set': 1, + 'gene_id': 'ENSG00000226444', + 'name': 'ACTR3BP6', + 'full_name': 'ACTR3B pseudogene 6', + 'canonical_transcript': 'ENST00000421366', + 'chrom': '22', + 'start': 16967411, + 'strand': '+'} + result = lookups.get_gene_by_dbid(53626) + for val in expected: + assert result[val] == expected[val] + + # non-existing genes + result = lookups.get_gene_by_dbid('NOT_A_GENE') + assert not result + result = lookups.get_gene_by_dbid(-1) + assert not result + def test_get_gene_by_name(caplog): """ Test get_gene_by_name() @@ -212,18 +212,34 @@ def test_get_genes_in_region(): """ Test get_genes_in_region() """ - res = lookups.get_genes_in_region('SweGen', '4', 99080000, 99210000) # stop_pos missing in db, so needs to be updated when available - # exp_names = - assert False + # normal + res = lookups.get_genes_in_region('SweGen', '22', 25595800, 25615800) + expected_names = set(['ENSG00000100053', 'ENSG00000236641', 'ENSG00000244752']) + names = set(gene['gene_id'] for gene in res) + assert names == expected_names + # bad dataset + res = lookups.get_genes_in_region('bad_dataset', '22', 25595800, 25615800) + # nothing found + res = lookups.get_genes_in_region('SweGen', '22', 25595800, 25595801) + assert not res def test_get_number_of_variants_in_transcript(): """ Test get_number_of_variants_in_transcripts() """ - assert False - lookups.get_number_of_variants_in_transcripts() + # normal + res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENST00000424770') + assert res == {'filtered': 1, 'total': 23} + + # bad transcript + res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENSTASDSADA') + assert res == {'filtered': 0, 'total': 0} + + # bad dataset + res = lookups.get_number_of_variants_in_transcript('bad_dataset', 'ENST00000424770') + assert res is None def test_get_transcript(): @@ -281,6 +297,9 @@ def test_get_transcripts_in_gene(): 'chrom': '1', 'start': 228320, 'stop': 228776, 'strand': '-'}] assert res == expected + assert not lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') + assert not lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') + def test_get_raw_variant(): """ @@ -290,8 +309,19 @@ def test_get_raw_variant(): assert result['genes'] == ['ENSG00000169174'] assert result['transcripts'] == ['ENST00000302118'] assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') + assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') +def test_get_transcripts_in_gene_by_dbid(): + """ + Test get_transcripts_in_gene_by_dbid() + """ + res = lookups.get_transcripts_in_gene_by_dbid(53626) + assert len(res) == 2 + res = lookups.get_transcripts_in_gene_by_dbid(-1) + assert not res + + def test_get_variant(): """ Test get_variant() @@ -327,9 +357,6 @@ def test_get_variants_by_rsid(caplog): assert result[0]['pos'] == 16080482 assert result[0]['genes'] == ['ENSG00000229286', 'ENSG00000235265'] assert result[0]['transcripts'] == ['ENST00000448070','ENST00000413156'] - print(type(result[0]['vep_annotations'])) - print(result[0]['vep_annotations']) - assert False # by position result = lookups.get_variants_by_rsid('SweGen', 'rs373706802', check_position=True) @@ -352,6 +379,16 @@ def test_get_variants_by_rsid(caplog): assert not lookups.get_variants_by_rsid('SweGen', 'rs1') +def test_get_variants_in_gene(): + """ + Test get_variants_in_gene() + """ + res = lookups.get_variants_in_gene('SweGen', 'ENSG00000198062') + assert len(res) == 1185 + assert not lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') + assert not lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') + + def test_get_variants_in_region(): """ Test get_variants_in_region() @@ -374,6 +411,5 @@ def test_get_variants_in_transcript(): """ Test get_variants_in_transcript() """ - # res = lookups.get_variants_in_transcript('ENST00000302118') - # assert len(res) == 426 - assert False + res = lookups.get_variants_in_transcript('SweGen', 'ENST00000452800') + assert len(res) == 1414 diff --git a/backend/run_pytest.sh b/backend/run_pytest.sh index cce3d98bf..27152697b 100755 --- a/backend/run_pytest.sh +++ b/backend/run_pytest.sh @@ -2,4 +2,5 @@ BROWSER=modules/browser/ -py.test . --cov=${BROWSER} +py.test . --cov=${BROWSER} --cov-config=modules/browser/tests/.coveragerc + From e4841f32dd98d929314e92c2c6e947e891532835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 29 Jan 2019 15:55:25 +0100 Subject: [PATCH 141/360] More tests for utils, including some fixing in utils --- backend/modules/browser/tests/test_utils.py | 23 ++++++++++++++++++--- backend/modules/browser/utils.py | 20 +++++++++++++++--- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index df560c615..85001c26d 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -48,6 +48,14 @@ def test_annotation_severity(): assert res == -26.9 +def test_data_structures(): + """ + Test the constants + """ + assert len(utils.CSQ_ORDER) == len(set(utils.CSQ_ORDER)) # No duplicates + assert all(csq == utils.REV_CSQ_ORDER_DICT[utils.CSQ_ORDER_DICT[csq]] for csq in utils.CSQ_ORDER) + + def test_get_flags_from_variant(): """ Test get_flags_from_variant() @@ -79,9 +87,18 @@ def test_get_protein_hgvs(): """ Test get_protein_hgvs() """ - annotation = {'MAX_AF_POPS': 'AA&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_OTH&gnomAD_SAS&AFR&AMR&EAS&EUR&SAS', 'TSL': '', 'APPRIS': '', 'gnomAD_ASJ_AF': '1', 'AMR_AF': '1', 'SYMBOL': 'ADH6', 'AFR_AF': '1', 'Feature': 'ENST00000237653', 'Codons': 'Tgt/Agt', 'MOTIF_NAME': '', 'DOMAINS': 'hmmpanther:PTHR11695:SF307&hmmpanther:PTHR11695&Gene3D:3.90.180.10', 'SIFT': 'tolerated(1)', 'VARIANT_CLASS': 'SNV', 'EA_AF': '0.9995', 'CDS_position': '4', 'CCDS': 'CCDS3647.1', 'Allele': 'T', 'PolyPhen': 'benign(0)', 'AA_AF': '1', 'gnomAD_EAS_AF': '1', 'IMPACT': 'MODERATE', 'HGVSp': '', 'ENSP': 'ENSP00000237653', 'MAX_AF': '1', 'LoF': '', 'INTRON': '', 'gnomAD_FIN_AF': '0.9999', 'Existing_variation': 'rs4699735', 'HGVSc': '', 'SOURCE': 'Ensembl', 'LoF_filter': '', 'gnomAD_AF': '0.9998', 'gnomAD_AMR_AF': '1', 'GENE_PHENO': '', 'gnomAD_OTH_AF': '1', 'LoF_flags': '', 'MOTIF_SCORE_CHANGE': '', 'UNIPARC': 'UPI00001AE69C', 'cDNA_position': '389', 'ALLELE_NUM': '1', 'EAS_AF': '1', 'Feature_type': 'Transcript', 'AF': '1', 'gnomAD_AFR_AF': '0.9999', 'HGNC_ID': '255', 'SAS_AF': '1', 'LoF_info': '', 'SWISSPROT': 'P28332', 'FLAGS': '', 'miRNA': '', 'Consequence': 'missense_variant', 'Protein_position': '2', 'Gene': 'ENSG00000172955', 'HIGH_INF_POS': '', 'STRAND': '-1', 'gnomAD_NFE_AF': '0.9995', 'EUR_AF': '1', 'DISTANCE': '', 'CLIN_SIG': '', 'PHENO': '', 'SYMBOL_SOURCE': 'HGNC', 'Amino_acids': 'C/S', 'TREMBL': '', 'gnomAD_SAS_AF': '1', 'REFSEQ_MATCH': '', 'PUBMED': '', 'BIOTYPE': 'protein_coding', 'EXON': '1/8', 'SOMATIC': '', 'MOTIF_POS': '', 'CANONICAL': ''} - print(utils.get_protein_hgvs(annotation)) - assert False + annotation = {'HGVSc': 'ENST00000343518.6:c.35C>T', + 'HGVSp': 'ENSP00000340610.6:p.Ser12Phe'} + result = utils.get_protein_hgvs(annotation) + assert result == 'p.Ser12Phe' + annotation = {'HGVSc': 'ENST00000343518.6:c.27G>A', + 'HGVSp': 'ENST00000343518.6:c.27G>A(p.%3D)', + 'Protein_position': '9', + 'Amino_acids': 'P'} + result = utils.get_protein_hgvs(annotation) + assert result == 'p.Pro9Pro' + annotation['Amino_acids'] = 'Z' + assert not utils.get_protein_hgvs(annotation) def test_get_transcript_hgvs(): diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 833d14023..8396ff643 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -48,11 +48,9 @@ "feature_truncation", "intergenic_variant", ""] -assert len(CSQ_ORDER) == len(set(CSQ_ORDER)) # No dupplicates CSQ_ORDER_DICT = {csq:i for i,csq in enumerate(CSQ_ORDER)} REV_CSQ_ORDER_DICT = dict(enumerate(CSQ_ORDER)) -assert all(csq == REV_CSQ_ORDER_DICT[CSQ_ORDER_DICT[csq]] for csq in CSQ_ORDER) METRICS = [ 'BaseQRankSum', @@ -165,6 +163,15 @@ def get_flags_from_variant(variant): def get_proper_hgvs(csq): + """ + Get HGVS for change, either at transcript or protein level + + Args: + annotation (dict): VEP annotation with HGVS information + + Returns: + str: variant effect at aa level in HGVS format (p.), None if parsing fails + """ # Needs major_consequence if csq['major_consequence'] in ('splice_donor_variant', 'splice_acceptor_variant', 'splice_region_variant'): return get_transcript_hgvs(csq) @@ -174,7 +181,13 @@ def get_proper_hgvs(csq): def get_protein_hgvs(annotation): """ - Takes consequence dictionary, returns proper variant formatting for synonymous variants + Aa changes in HGVS format + + Args: + annotation (dict): VEP annotation with HGVS information + + Returns: + str: variant effect at aa level in HGVS format (p.), None if parsing fails """ if '%3D' in annotation['HGVSp']: # "%3D" is "=" try: @@ -182,6 +195,7 @@ def get_protein_hgvs(annotation): return "p." + amino_acids + annotation['Protein_position'] + amino_acids except KeyError: logging.error("Could not fetch protein hgvs - unknown amino acid") + return None return annotation['HGVSp'].split(':')[-1] From 309bcdbd8a691d96cedd860243c3e5a7bab5f697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 30 Jan 2019 10:46:46 +0100 Subject: [PATCH 142/360] incorrect indentation of a return statement --- backend/modules/browser/lookups.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index abf691a87..f03f651fb 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -13,7 +13,6 @@ def add_rsid_to_variant(dataset, variant): """ Add rsid to a variant in the database based on position - Note that this may be inaccurate Args: dataset (str): short name of the dataset @@ -441,7 +440,7 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): else: if not str(variant['rsid']).startswith('rs'): variant['rsid'] = 'rs{}'.format(variant['rsid']) - return variant + return variant def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): From 2325da3431169dc41888e1d75fbead1fe2d9c4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 30 Jan 2019 10:47:11 +0100 Subject: [PATCH 143/360] 100% coverage, all passing in utils --- backend/modules/browser/tests/test_utils.py | 34 ++++++- backend/modules/browser/utils.py | 101 ++++++++++++++------ 2 files changed, 99 insertions(+), 36 deletions(-) diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index 85001c26d..fe5be6dbc 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -80,8 +80,14 @@ def test_get_proper_hgvs(): """ Test get_proper_hgvs() """ - assert False - + annotation = {'HGVSc': 'ENST00000343518.6:c.35C>T', + 'HGVSp': 'ENSP00000340610.6:p.Ser12Phe', + 'major_consequence': 'splice_donor_variant'} + assert utils.get_proper_hgvs(annotation) == 'c.35C>T' + annotation['major_consequence'] = 'coding_sequence_variant' + assert utils.get_proper_hgvs(annotation) == 'p.Ser12Phe' + assert not utils.get_proper_hgvs(dict()) + def test_get_protein_hgvs(): """ @@ -99,27 +105,45 @@ def test_get_protein_hgvs(): assert result == 'p.Pro9Pro' annotation['Amino_acids'] = 'Z' assert not utils.get_protein_hgvs(annotation) + assert not utils.get_protein_hgvs(dict()) def test_get_transcript_hgvs(): """ Test get_transcript_hgvs() + """ - assert False + annotation = {'HGVSc': 'ENST00000343518.6:c.35C>T', + 'HGVSp': 'ENSP00000340610.6:p.Ser12Phe'} + assert utils.get_transcript_hgvs(annotation) == 'c.35C>T' + assert not utils.get_transcript_hgvs(dict()) def test_order_vep_by_csq(): """ Test order_vep_by_csq() """ - assert False + annotation = [{'Consequence': 'frameshift_variant'}, + {'Consequence': 'transcript_ablation'}, + {'Consequence': 'mature_miRNA_variant'}] + expected = [{'Consequence': 'transcript_ablation', + 'major_consequence': 'transcript_ablation'}, + {'Consequence': 'frameshift_variant', + 'major_consequence': 'frameshift_variant'}, + {'Consequence': 'mature_miRNA_variant', + 'major_consequence': 'mature_miRNA_variant'}] + result = utils.order_vep_by_csq(annotation) + assert result == expected + assert utils.order_vep_by_csq([dict()]) == [{'major_consequence': ''}] def test_remove_extraneous_vep_annotations(): """ Test remove_extraneous_vep_annotations() """ - assert False + annotation = [{'Consequence': 'frameshift_variant'}, + {'Consequence': 'feature_elongation&TF_binding_site_variant'}] + assert utils.remove_extraneous_vep_annotations(annotation) == [{'Consequence': 'frameshift_variant'}] def test_worst_csq_from_csq(): diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 8396ff643..b2b896d09 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -122,7 +122,7 @@ def add_consequence_to_variant(variant): def annotation_severity(annotation): """ - Evaluate severity of the consequences; "bigger is more important" + Evaluate severity of the consequences; "bigger is more important". Args: annotation (dict): vep_annotation from a variant @@ -139,9 +139,9 @@ def annotation_severity(annotation): def get_flags_from_variant(variant): """ Get flags from variant. - checks for: - - MNP (identical length of reference and variant) - - LoF (loss of function) + Checks for: + - MNP (identical length of reference and variant) + - LoF (loss of function) Args: variant (dict): a variant @@ -162,9 +162,9 @@ def get_flags_from_variant(variant): return flags -def get_proper_hgvs(csq): +def get_proper_hgvs(annotation): """ - Get HGVS for change, either at transcript or protein level + Get HGVS for change, either at transcript or protein level. Args: annotation (dict): VEP annotation with HGVS information @@ -173,15 +173,19 @@ def get_proper_hgvs(csq): str: variant effect at aa level in HGVS format (p.), None if parsing fails """ # Needs major_consequence - if csq['major_consequence'] in ('splice_donor_variant', 'splice_acceptor_variant', 'splice_region_variant'): - return get_transcript_hgvs(csq) - - return get_protein_hgvs(csq) + try: + if annotation['major_consequence'] in ('splice_donor_variant', + 'splice_acceptor_variant', + 'splice_region_variant'): + return get_transcript_hgvs(annotation) + return get_protein_hgvs(annotation) + except KeyError: + return None def get_protein_hgvs(annotation): """ - Aa changes in HGVS format + Aa changes in HGVS format. Args: annotation (dict): VEP annotation with HGVS information @@ -189,39 +193,73 @@ def get_protein_hgvs(annotation): Returns: str: variant effect at aa level in HGVS format (p.), None if parsing fails """ - if '%3D' in annotation['HGVSp']: # "%3D" is "=" - try: - amino_acids = ''.join([PROTEIN_LETTERS_1TO3[x] for x in annotation['Amino_acids']]) - return "p." + amino_acids + annotation['Protein_position'] + amino_acids - except KeyError: - logging.error("Could not fetch protein hgvs - unknown amino acid") - return None - return annotation['HGVSp'].split(':')[-1] + try: + if '%3D' in annotation['HGVSp']: # "%3D" is "=" + amino_acids = ''.join([PROTEIN_LETTERS_1TO3[aa] for aa in annotation['Amino_acids']]) + return "p." + amino_acids + annotation['Protein_position'] + amino_acids + return annotation['HGVSp'].split(':')[-1] + except KeyError: + logging.error("Could not fetch protein hgvs") + return None -def get_transcript_hgvs(csq): - return csq['HGVSc'].split(':')[-1] +def get_transcript_hgvs(annotation): + """ + Nucleotide change in HGVS format. + Args: + annotation (dict): VEP annotation with HGVS information -def order_vep_by_csq(annotation_list): + Returns: + str: variant effect at nucleotide level in HGVS format (c.), None if parsing fails + """ + try: + return annotation['HGVSc'].split(':')[-1] + except KeyError: + return None + + +def order_vep_by_csq(annotation_list: list): """ - Adds "major_consequence" to each annotation. - Returns them ordered from most deleterious to least. + Adds "major_consequence" to each annotation, orders by severity. + + Args: + annotation_list (list): VEP annotations (as dict) + + Returns: + list: annotations ordered by major consequence severity """ for ann in annotation_list: - ann['major_consequence'] = worst_csq_from_csq(ann['Consequence']) + try: + ann['major_consequence'] = worst_csq_from_csq(ann['Consequence']) + except KeyError: + ann['major_consequence'] = '' return sorted(annotation_list, key=(lambda ann:CSQ_ORDER_DICT[ann['major_consequence']])) -def remove_extraneous_vep_annotations(annotation_list): - return [ann for ann in annotation_list if worst_csq_index(ann['Consequence'].split('&')) <= CSQ_ORDER_DICT['intron_variant']] +def remove_extraneous_vep_annotations(annotation_list: list): + """ + Remove annotations with low-impact consequences (less than intron variant) + + Args: + annotation_list (list): VEP annotations (as dict) + + Returns: + list: VEP annotations with higher impact + """ + return [ann for ann in annotation_list + if worst_csq_index(ann['Consequence'].split('&')) <= CSQ_ORDER_DICT['intron_variant']] def worst_csq_from_list(csq_list): """ - Input list of consequences (e.g. ['frameshift_variant', 'missense_variant']) - Return the worst consequence (In this case, 'frameshift_variant') - Works well with worst_csq_from_list('non_coding_exon_variant&nc_transcript_variant'.split('&')) + Choose the worst consequence + + Args: + csq_list (list): list of consequences + + Returns: + str: the worst consequence """ return REV_CSQ_ORDER_DICT[worst_csq_index(csq_list)] @@ -256,12 +294,13 @@ def worst_csq_index(csq_list): def worst_csq_with_vep(annotation_list): """ Choose the vep annotation with the most severe consequence + Adds a"major_consequence" field for that annotation Args: annotation_list (list): VEP annotations Returns: - dict: the annotation with the most severe consequence; also adds "major_consequence" for that annotation + dict: the annotation with the most severe consequence """ if not annotation_list: return None From b14ba703a3803e8bb4d2e52004de13312d0b18b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 31 Jan 2019 08:45:30 +0100 Subject: [PATCH 144/360] pylint fixes in lookups --- backend/modules/browser/lookups.py | 23 +++++++++---------- backend/modules/browser/tests/test_lookups.py | 13 +++++------ 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index f03f651fb..f162ed0a8 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,4 +1,3 @@ - import json # remove when db is fixed import logging import re @@ -333,11 +332,11 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): ds_version (str): dataset version Returns: - dict: values for the variant; empty if not found + dict: values for the variant; None if not found """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return + return None try: return (db.Variant @@ -352,7 +351,7 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): except db.Variant.DoesNotExist: logging.error('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' .format(dataset, pos, chrom, ref, alt, dataset_version.id)) - return {} + return None def get_transcript(dataset, transcript_id): @@ -455,21 +454,21 @@ def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): ds_version (str): version of the dataset Returns: - list: variant dicts; no hits + list: variants as dict; no hits returns None """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return + return None if not rsid.startswith('rs'): logging.error('get_variants_by_rsid({}, {}): rsid not starting with rs'.format(dataset, rsid)) - return + return None try: rsid = int(rsid.lstrip('rs')) except ValueError: logging.error('get_variants_by_rsid({}, {}): not an integer after rs'.format(dataset, rsid)) - return + return None if check_position: refset = (db.Dataset .select(db.ReferenceSet) @@ -544,11 +543,11 @@ def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): ds_version (str): version of the dataset Returns: - list: variant dicts + list: variant dicts, None if no hits """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return + return None query = (db.Variant .select() .where((db.Variant.pos >= start_pos) & @@ -584,11 +583,11 @@ def get_variants_in_transcript(dataset, transcript_id): transcript_id (str): id of the transcript (ENST) Returns: - dict: values for the variant; empty if not found + dict: values for the variant; None if not found """ transcript = get_transcript(dataset, transcript_id) if not transcript: - return {} + return None # temporary while waiting for db fix variants = get_variants_in_region(dataset, transcript['chrom'], transcript['start'], transcript['stop']) # variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index ea559d809..c4d34c768 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -17,7 +17,7 @@ def test_add_rsid_to_variant(): assert variant['rsid'] == rsid # "non-existing" del variant['rsid'] - lookups.add_rsid_to_variant(variant) + lookups.add_rsid_to_variant('SweGen', variant) assert variant['rsid'] == rsid @@ -45,7 +45,7 @@ def test_get_awesomebar_result(): assert result == ('not_found', 'DOES_NOT_EXIST') -def test_get_coverage_for_bases(caplog): +def test_get_coverage_for_bases(): """ Test get_coverage_for_bases() """ @@ -206,7 +206,7 @@ def test_get_gene_by_name(caplog): # name in other_names result = lookups.get_gene_by_name('SweGen', 'NIR') assert result['gene_id'] == 'ENSG00000188976' - + def test_get_genes_in_region(): """ @@ -299,7 +299,7 @@ def test_get_transcripts_in_gene(): assert not lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') assert not lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') - + def test_get_raw_variant(): """ @@ -321,7 +321,7 @@ def test_get_transcripts_in_gene_by_dbid(): res = lookups.get_transcripts_in_gene_by_dbid(-1) assert not res - + def test_get_variant(): """ Test get_variant() @@ -342,10 +342,9 @@ def test_get_variant(): 'ENST00000491666', 'ENST00000472607', 'ENST00000475776'] assert result['rsid'] == 'rs75050571' - # TODO: add test for entry with missing rsid # incorrect position - assert not lookups.get_variant(-1, '1', 'A', 'T') + assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') def test_get_variants_by_rsid(caplog): From cc2fe67a774346d245b7e7b60167d93eb4da8bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 31 Jan 2019 08:50:03 +0100 Subject: [PATCH 145/360] pylint fixes in utils --- backend/modules/browser/tests/test_utils.py | 10 ++++----- backend/modules/browser/utils.py | 25 ++++++++++----------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index fe5be6dbc..bb74f5693 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -31,12 +31,12 @@ def test_add_consequence_to_variant(): variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed utils.add_consequence_to_variant(variant) assert variant['major_consequence'] == 'intron_variant' - + variant2 = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') variant2['vep_annotations'] = json.loads(variant2['vep_annotations']) # remove when db is fixed utils.add_consequence_to_variant(variant2) assert variant2['major_consequence'] == 'upstream_gene_variant' - + def test_annotation_severity(): """ @@ -46,7 +46,7 @@ def test_annotation_severity(): variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed res = utils.annotation_severity(variant['vep_annotations'][0]) assert res == -26.9 - + def test_data_structures(): """ @@ -55,7 +55,7 @@ def test_data_structures(): assert len(utils.CSQ_ORDER) == len(set(utils.CSQ_ORDER)) # No duplicates assert all(csq == utils.REV_CSQ_ORDER_DICT[utils.CSQ_ORDER_DICT[csq]] for csq in utils.CSQ_ORDER) - + def test_get_flags_from_variant(): """ Test get_flags_from_variant() @@ -87,7 +87,7 @@ def test_get_proper_hgvs(): annotation['major_consequence'] = 'coding_sequence_variant' assert utils.get_proper_hgvs(annotation) == 'p.Ser12Phe' assert not utils.get_proper_hgvs(dict()) - + def test_get_protein_hgvs(): """ diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index b2b896d09..c045f6536 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -1,5 +1,4 @@ import logging -from operator import itemgetter AF_BUCKETS = [0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1] @@ -139,7 +138,7 @@ def annotation_severity(annotation): def get_flags_from_variant(variant): """ Get flags from variant. - Checks for: + Checks for: - MNP (identical length of reference and variant) - LoF (loss of function) @@ -186,7 +185,7 @@ def get_proper_hgvs(annotation): def get_protein_hgvs(annotation): """ Aa changes in HGVS format. - + Args: annotation (dict): VEP annotation with HGVS information @@ -195,8 +194,8 @@ def get_protein_hgvs(annotation): """ try: if '%3D' in annotation['HGVSp']: # "%3D" is "=" - amino_acids = ''.join([PROTEIN_LETTERS_1TO3[aa] for aa in annotation['Amino_acids']]) - return "p." + amino_acids + annotation['Protein_position'] + amino_acids + amino_acids = ''.join([PROTEIN_LETTERS_1TO3[aa] for aa in annotation['Amino_acids']]) + return "p." + amino_acids + annotation['Protein_position'] + amino_acids return annotation['HGVSp'].split(':')[-1] except KeyError: logging.error("Could not fetch protein hgvs") @@ -212,7 +211,7 @@ def get_transcript_hgvs(annotation): Returns: str: variant effect at nucleotide level in HGVS format (c.), None if parsing fails - """ + """ try: return annotation['HGVSc'].split(':')[-1] except KeyError: @@ -222,7 +221,7 @@ def get_transcript_hgvs(annotation): def order_vep_by_csq(annotation_list: list): """ Adds "major_consequence" to each annotation, orders by severity. - + Args: annotation_list (list): VEP annotations (as dict) @@ -240,7 +239,7 @@ def order_vep_by_csq(annotation_list: list): def remove_extraneous_vep_annotations(annotation_list: list): """ Remove annotations with low-impact consequences (less than intron variant) - + Args: annotation_list (list): VEP annotations (as dict) @@ -254,7 +253,7 @@ def remove_extraneous_vep_annotations(annotation_list: list): def worst_csq_from_list(csq_list): """ Choose the worst consequence - + Args: csq_list (list): list of consequences @@ -266,11 +265,11 @@ def worst_csq_from_list(csq_list): def worst_csq_from_csq(csq): """ - Find worst consequence in a possibly &-filled consequence string + Find worst consequence in a possibly &-filled consequence string Args: csq (str): string of consequences, seperated with & (if multiple) - + Returns: str: the worst consequence """ @@ -297,8 +296,8 @@ def worst_csq_with_vep(annotation_list): Adds a"major_consequence" field for that annotation Args: - annotation_list (list): VEP annotations - + annotation_list (list): VEP annotations + Returns: dict: the annotation with the most severe consequence """ From baaf98274894378c379acacb79967bda69ecdd94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 31 Jan 2019 09:23:46 +0100 Subject: [PATCH 146/360] type hints added --- backend/modules/browser/lookups.py | 44 ++++++++++++++++-------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index f162ed0a8..6fd698d65 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -9,7 +9,7 @@ SEARCH_LIMIT = 10000 -def add_rsid_to_variant(dataset, variant): +def add_rsid_to_variant(dataset:str, variant:str): """ Add rsid to a variant in the database based on position @@ -41,7 +41,7 @@ def add_rsid_to_variant(dataset, variant): REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') -def get_awesomebar_result(dataset, query, ds_version=None): +def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): """ Parse the search input @@ -121,7 +121,7 @@ def get_awesomebar_result(dataset, query, ds_version=None): return 'not_found', query -def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=None): +def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=None, ds_version:str=None): """ Get the coverage for the list of bases given by start_pos->end_pos, inclusive @@ -150,7 +150,7 @@ def get_coverage_for_bases(dataset, chrom, start_pos, end_pos=None, ds_version=N .dicts())] -def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_version=None): +def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:int=None, ds_version:str=None): """ Get the coverage for the list of bases given by start_pos->end_pos, inclusive @@ -175,7 +175,7 @@ def get_coverage_for_transcript(dataset, chrom, start_pos, end_pos=None, ds_vers return covered -def get_exons_in_transcript(dataset, transcript_id): +def get_exons_in_transcript(dataset:str, transcript_id:str): """ Retrieve exons associated with the given transcript id @@ -206,7 +206,7 @@ def get_exons_in_transcript(dataset, transcript_id): key=lambda k: k['start']) -def get_gene(dataset, gene_id): +def get_gene(dataset:str, gene_id:str): """ Retrieve gene by gene id @@ -227,7 +227,7 @@ def get_gene(dataset, gene_id): return {} -def get_gene_by_dbid(gene_dbid): +def get_gene_by_dbid(gene_dbid:str): """ Retrieve gene by gene database id @@ -245,7 +245,7 @@ def get_gene_by_dbid(gene_dbid): return {} -def get_gene_by_name(dataset, gene_name): +def get_gene_by_name(dataset:str, gene_name:str): """ Retrieve gene by gene_name. First checks gene_name, then other_names. @@ -271,7 +271,7 @@ def get_gene_by_name(dataset, gene_name): return {} -def get_genes_in_region(dataset, chrom, start_pos, stop_pos): +def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int): """ Retrieve genes located within a region @@ -297,7 +297,7 @@ def get_genes_in_region(dataset, chrom, start_pos, stop_pos): return [gene for gene in gene_query] -def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None): +def get_number_of_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=None): """ Get the total and filtered amount of variants in a transcript @@ -314,12 +314,14 @@ def get_number_of_variants_in_transcript(dataset, transcript_id, ds_version=None return None variants = get_variants_in_transcript(dataset, transcript_id) + if not variants: + return None total = len(variants) filtered = len(tuple(variant for variant in variants if variant['filter_string'] == 'PASS')) return {'filtered': filtered, 'total': total} -def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): +def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): """ Retrieve variant by position and change @@ -354,7 +356,7 @@ def get_raw_variant(dataset, pos, chrom, ref, alt, ds_version=None): return None -def get_transcript(dataset, transcript_id): +def get_transcript(dataset:str, transcript_id:str): """ Retrieve transcript by transcript id Also includes exons as ['exons'] @@ -381,7 +383,7 @@ def get_transcript(dataset, transcript_id): return {} -def get_transcripts_in_gene(dataset, gene_id): +def get_transcripts_in_gene(dataset:str, gene_id:str): """ Get the transcripts associated with a gene Args: @@ -404,18 +406,18 @@ def get_transcripts_in_gene(dataset, gene_id): return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] -def get_transcripts_in_gene_by_dbid(gene_dbid): +def get_transcripts_in_gene_by_dbid(gene_dbid:int): """ Get the transcripts associated with a gene Args: - gene_dbid (str): database id of the gene + gene_dbid (int): database id of the gene Returns: list: transcripts (dict) associated with the gene; empty if no hits """ return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene_dbid).dicts()] -def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): +def get_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): """ Retrieve variant by position and change Retrieves rsid from db (if available) if not present in variant @@ -442,7 +444,7 @@ def get_variant(dataset, pos, chrom, ref, alt, ds_version=None): return variant -def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): +def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_version:str=None): """ Retrieve variants by their associated rsid May also look up rsid and search for variants at the position @@ -502,12 +504,12 @@ def get_variants_by_rsid(dataset, rsid, check_position=False, ds_version=None): return variants -def get_variants_in_gene(dataset, gene_id): +def get_variants_in_gene(dataset:str, gene_id:str): """ Retrieve variants present inside a gene Args: - dataset: short name of the dataset + dataset (str): short name of the dataset gene_id (str): id of the gene Returns: @@ -531,7 +533,7 @@ def get_variants_in_gene(dataset, gene_id): return variants -def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): +def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, ds_version:str=None): """ Variants that overlap a region @@ -574,7 +576,7 @@ def get_variants_in_region(dataset, chrom, start_pos, end_pos, ds_version=None): return variants -def get_variants_in_transcript(dataset, transcript_id): +def get_variants_in_transcript(dataset:str, transcript_id:str): """ Retrieve variants inside a transcript From 3c0f9042f65b24e8ee9fb8d57df1717714338031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 1 Feb 2019 13:50:16 +0100 Subject: [PATCH 147/360] migrate the download function --- backend/modules/browser/browser_handlers.py | 24 ++++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 4e1db8a6d..b518e5d88 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -1,7 +1,6 @@ import json # remove when db is fixed import logging -import db import handlers from . import lookups @@ -17,7 +16,7 @@ class Autocomplete(handlers.UnsafeHandler): def get(self, dataset, query): ret = {} - results = pgsql.get_autocomplete(dataset, query) + results = pgsql.get_autocomplete(query) ret = {'values': sorted(list(set(results)))[:20]} self.finish( ret ) @@ -42,12 +41,21 @@ def get(self, dataset, datatype, item): class Download(handlers.UnsafeHandler): - def get(self, dataset, datatype, item): + def get(self, dataset: str, datatype, item, ds_version=None): + """ + Download variants as csv + + Args: + dataset (str): dataset short name + datatype (str): type of data + item (str): query item + ds_version (str): dataset version + """ filename = "{}_{}_{}.csv".format(dataset, datatype, item) self.set_header('Content-Type','text/csv') self.set_header('content-Disposition','attachement; filename={}'.format(filename)) - data = mongodb.get_variant_list(dataset, datatype, item) + data = pgsql.get_variant_list(dataset, datatype, item) # Write header self.write(','.join([h[1] for h in data['headers']]) + '\n') @@ -111,7 +119,7 @@ class GetRegion(handlers.UnsafeHandler): def get(self, dataset, region): """ Request information about genes in a region - + Args: dataset (str): short name of the dataset region (str): the region in the format chr-startpos-endpos @@ -135,7 +143,7 @@ def get(self, dataset, region): self.send_error(status_code=400) self.set_user_msg('Unable to parse region', 'error') return - + if not start: start = 0 if not stop and start: @@ -207,7 +215,7 @@ def get(self, dataset, transcript): class GetVariant(handlers.UnsafeHandler): """ Request information about a gene - """ + """ def get(self, dataset, variant): """ Request information about a gene @@ -229,7 +237,7 @@ def get(self, dataset, variant): return orig_variant = variant variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3]) - + if not variant: logging.error('Variant not found ({})'.format(orig_variant)) self.send_error(status_code=404) From 6dbccd40b1d11fab77b7c78e0698c62da7148117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 1 Feb 2019 13:51:26 +0100 Subject: [PATCH 148/360] some fixes for tests, documentation additions --- backend/modules/browser/pgsql.py | 34 +++++++++++++------ backend/modules/browser/tests/test_lookups.py | 5 +-- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index b4c1cd53f..9a7be7f38 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -10,23 +10,20 @@ EXON_PADDING = 50 -def get_autocomplete(dataset, query): +def get_autocomplete(query:str): """ Provide autocomplete suggestions based on the query - NOTE: dataset is not used for sql Args: - dataset (str): name of the dataset query (str): the query to compare to the available gene names Returns: list: A list of genes names whose beginning matches the query """ genes = db.Gene.select(db.Gene.name).where(db.Gene.name.startswith(query)) gene_names = [str(gene.name) for gene in genes] - logging.error('Autocomplete: {}'.format(gene_names)) return gene_names -def get_coverage(dataset, datatype, item, ds_version=None): +def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): """ Retrieve coverage for a gene/region/transcript @@ -35,6 +32,9 @@ def get_coverage(dataset, datatype, item, ds_version=None): datatype (str): type of "region" (gene/region/transcript) item (str): the datatype item to look up ds_version (str): the dataset version + + Returns: + dict: start, stop, coverage list """ ret = {'coverage':[]} @@ -58,7 +58,7 @@ def get_coverage(dataset, datatype, item, ds_version=None): return ret -def get_coverage_pos(dataset, datatype, item): +def get_coverage_pos(dataset:str, datatype:str, item:str): """ Retrieve coverage range @@ -66,7 +66,9 @@ def get_coverage_pos(dataset, datatype, item): dataset (str): short name of the dataset datatype (str): type of "region" (gene/region/transcript) item (str): the datatype item to look up - ds_version (str): the dataset version + + Returns: + dict: start, stop, chromosome """ ret = {'start':None, 'stop':None, 'chrom':None} @@ -90,9 +92,21 @@ def get_coverage_pos(dataset, datatype, item): ret['chrom'] = chrom return ret - -def get_variant_list(dataset, datatype, item): + +def get_variant_list(dataset:str, datatype:str, item:str, ds_version:str=None): + """ + Retrieve variants for a datatype + + Args: + dataset (str): dataset short name + datatype (str): type of data + item (str): query item + ds_version (str): dataset version + + Returns: + dict: {variants:list, headers:list} + """ headers = [['variant_id','Variant'], ['chrom','Chrom'], ['pos','Position'], ['HGVS','Consequence'], ['filter','Filter'], ['major_consequence','Annotation'], ['flags','Flags'], ['allele_count','Allele Count'], ['allele_num','Allele Number'], @@ -116,6 +130,6 @@ def format_variant(variant): # This is so an array values turns into a comma separated string instead return {k: ", ".join(v) if isinstance(v,list) else v for k, v in variant.items()} - + variants = list(map(format_variant, variants)) return {'variants': variants, 'headers': headers} diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index c4d34c768..043707a01 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -64,6 +64,7 @@ def test_get_coverage_for_bases(): 'pos': 55500320, 'mean': 39.69, 'median': 38.0, 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.961, 0.856, 0.117, 0.001]}] assert coverage == expected + assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no end_pos coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500290) @@ -231,11 +232,11 @@ def test_get_number_of_variants_in_transcript(): """ # normal res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENST00000424770') - assert res == {'filtered': 1, 'total': 23} + assert res == {'filtered': 243, 'total': 309} # bad transcript res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENSTASDSADA') - assert res == {'filtered': 0, 'total': 0} + assert res is None # bad dataset res = lookups.get_number_of_variants_in_transcript('bad_dataset', 'ENST00000424770') From 400bc177241f987de77fabad46f2a72a28c2f301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 4 Feb 2019 09:08:32 +0100 Subject: [PATCH 149/360] Api routes updated to support dataset version as well. Thanks to @kusalananda for help with regex. --- backend/modules/browser/browser_handlers.py | 14 +++++++------- backend/modules/browser/route.py | 20 ++++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index b518e5d88..7ddc3c664 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -13,7 +13,7 @@ REGION_LIMIT = 100000 class Autocomplete(handlers.UnsafeHandler): - def get(self, dataset, query): + def get(self, dataset, query, ds_version=None): ret = {} results = pgsql.get_autocomplete(query) @@ -35,7 +35,7 @@ class GetCoveragePos(handlers.UnsafeHandler): """ Retrieve coverage range """ - def get(self, dataset, datatype, item): + def get(self, dataset, datatype, item, ds_version=None): ret = pgsql.get_coverage_pos(dataset, datatype, item) self.finish(ret) @@ -116,7 +116,7 @@ class GetRegion(handlers.UnsafeHandler): """ Request information about genes in a region """ - def get(self, dataset, region): + def get(self, dataset, region, ds_version=None): """ Request information about genes in a region @@ -173,7 +173,7 @@ class GetTranscript(handlers.UnsafeHandler): """ Request information about a transcript """ - def get(self, dataset, transcript): + def get(self, dataset, transcript, ds_version=None): """ Request information about a transcript @@ -216,7 +216,7 @@ class GetVariant(handlers.UnsafeHandler): """ Request information about a gene """ - def get(self, dataset, variant): + def get(self, dataset, variant, ds_version=None): """ Request information about a gene @@ -318,7 +318,7 @@ class GetVariants(handlers.UnsafeHandler): """ Retrieve variants """ - def get(self, dataset, datatype, item): + def get(self, dataset, datatype, item, ds_version=None): """ Retrieve variants @@ -343,7 +343,7 @@ class Search(handlers.UnsafeHandler): """ Perform a search for the wanted object """ - def get(self, dataset, query): + def get(self, dataset, query, ds_version=None): """ Perform a search for the wanted object diff --git a/backend/modules/browser/route.py b/backend/modules/browser/route.py index df6913617..350c74f30 100755 --- a/backend/modules/browser/route.py +++ b/backend/modules/browser/route.py @@ -1,14 +1,14 @@ from . import browser_handlers as handlers # Browser links -routes = [ (r"/api/datasets/(?P[^\/]+)/browser/gene/(?P[^\/]+)", handlers.GetGene), - (r"/api/datasets/(?P[^\/]+)/browser/region/(?P[^\/]+)", handlers.GetRegion), - (r"/api/datasets/(?P[^\/]+)/browser/transcript/(?P[^\/]+)", handlers.GetTranscript), - (r"/api/datasets/(?P[^\/]+)/browser/variant/(?P[^\/]+)", handlers.GetVariant), - (r"/api/datasets/(?P[^\/]+)/browser/variants/(?P[^\/]+)/(?P[^\/]+)", handlers.GetVariants), - (r"/api/datasets/(?P[^\/]+)/browser/coverage/(?P[^\/]+)/(?P[^\/]+)", handlers.GetCoverage), - (r"/api/datasets/(?P[^\/]+)/browser/coverage_pos/(?P[^\/]+)/(?P[^\/]+)", handlers.GetCoveragePos), - (r"/api/datasets/(?P[^\/]+)/browser/autocomplete/(?P[^\/]+)", handlers.Autocomplete), - (r"/api/datasets/(?P[^\/]+)/browser/search/(?P[^\/]+)", handlers.Search), - (r"/api/datasets/(?P[^\/]+)/browser/download/(?P[^\/]+)/(?P[^\/]+)", handlers.Download), +routes = [(r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/gene/(?P[^/]+)" , handlers.GetGene), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/region/(?P[^\/]+)", handlers.GetRegion), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/transcript/(?P[^/]+)", handlers.GetTranscript), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/variant/(?P[^/]+)", handlers.GetVariant), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/variants/(?P[^/]+)/(?P[^/]+)", handlers.GetVariants), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/coverage/(?P[^/]+)/(?P[^/]+)", handlers.GetCoverage), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/coverage_pos/(?P[^/]+)/(?P[^/]+)", handlers.GetCoveragePos), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/autocomplete/(?P[^/]+)", handlers.Autocomplete), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/search/(?P[^/]+)", handlers.Search), + (r"/api/datasets/(?P[^/]+)/(?:version/(?P[^/]+)/)?browser/download/(?P[^/]+)/(?P[^/]+)", handlers.Download), ] From 7bc68c20394b1e7d5686e991f0c6f504123d436d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Feb 2019 14:47:24 +0100 Subject: [PATCH 150/360] updated get_gene_by_name for new schema --- backend/modules/browser/lookups.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 6fd698d65..900c76a9c 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -264,9 +264,9 @@ def get_gene_by_name(dataset:str, gene_name:str): (db.Gene.name==gene_name)).dicts().get() except db.Gene.DoesNotExist: try: - return db.Gene.select().where((db.Gene.reference_set == ref_dbid) & - (db.Gene.other_names.contains(gene_name))).dicts().get() - except db.Gene.DoesNotExist: + return db.GeneOtherNames.select().join(db.Gene).where((db.GeneOtherNames.name == gene_name) & + (db.Gene.reference_set == ref_dbid)).dicts().get() + except db.GeneOtherNames.DoesNotExist: logging.error('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) return {} From c3ae8e4a8d4c7262d4920deb5461a5b028d537ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Feb 2019 15:00:55 +0100 Subject: [PATCH 151/360] should fix get_variants_in_gene for new db schema --- backend/modules/browser/lookups.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 900c76a9c..ff56cc815 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -504,32 +504,32 @@ def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_ver return variants -def get_variants_in_gene(dataset:str, gene_id:str): +def get_variants_in_gene(dataset:str, gene_id:str, ds_version=None): """ Retrieve variants present inside a gene Args: dataset (str): short name of the dataset gene_id (str): id of the gene + ds_version (str): version of the dataset Returns: list: values for the variants """ ref_dbid = db.get_reference_dbid_dataset(dataset) - gene = get_gene(dataset, gene_id) - #### remove when db is fixed - gene['stop'] = gene['start'] + 20000 - #### + if not ref_dbid: + return None + dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + return None - variants = get_variants_in_region(dataset, gene['chrom'], gene['start'], gene['stop']) + gene = get_gene(dataset, gene_id) - # variants = [variant for variant in db.Variant.select().where(db.Variant.genes.contains(transcript_id)).dicts()] + variants = [variant for variant in db.Variant.select() + .join(VariantGenes) + .where((db.VariantGenes.name == gene_id) & + (db.Variant.dataset_version == dataset_version)).dicts()] -# for variant in variants: -# variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Gene'] == gene_id] -# add_consequence_to_variant(variant) -# remove_extraneous_information(variant) -# variants.append(variant) return variants From 40ffcf177b7503112a490edb1f5dec8aeb5b4feb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 8 Feb 2019 15:15:13 +0100 Subject: [PATCH 152/360] should fix get_variants_in_transcripts for new db schema, also some other fixes in the get_variants_in functions --- backend/modules/browser/lookups.py | 43 +++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index ff56cc815..24754640a 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -504,7 +504,7 @@ def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_ver return variants -def get_variants_in_gene(dataset:str, gene_id:str, ds_version=None): +def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): """ Retrieve variants present inside a gene @@ -527,9 +527,13 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version=None): variants = [variant for variant in db.Variant.select() .join(VariantGenes) - .where((db.VariantGenes.name == gene_id) & + .where((db.VariantGenes.gene == gene['id']) & (db.Variant.dataset_version == dataset_version)).dicts()] + utils.add_consequence_to_variants(variants) + for variant in variants: + add_rsid_to_variant(dataset, variant) + remove_extraneous_information(variant) return variants @@ -571,33 +575,42 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d for variant in variants: if variant['rsid']: variant['rsid'] = 'rs{}'.format(variant['rsid']) - # add_rsid_to_variant(dataset, variant) + add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants -def get_variants_in_transcript(dataset:str, transcript_id:str): +def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=None): """ Retrieve variants inside a transcript Args: dataset (str): short name of the dataset transcript_id (str): id of the transcript (ENST) + ds_version (str): version of the dataset Returns: dict: values for the variant; None if not found """ - transcript = get_transcript(dataset, transcript_id) - if not transcript: - return None - # temporary while waiting for db fix - variants = get_variants_in_region(dataset, transcript['chrom'], transcript['start'], transcript['stop']) - # variants = [variant for variant in db.Variant.select().where(db.Variant.transcripts.contains(transcript_id)).dicts()] - -# for variant in variants: -# variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] -# add_consequence_to_variant(variant) -# remove_extraneous_information(variant) + ref_dbid = db.get_reference_dbid_dataset(dataset) + if not ref_dbid: + return None + dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + return None + + transcript = get_transcript(dataset, gene_id) + + variants = [variant for variant in db.Variant.select() + .join(VariantTranscripts) + .where((db.VariantTranscripts.transcript == transcript['id']) & + (db.Variant.dataset_version == dataset_version)).dicts()] + + utils.add_consequence_to_variants(variants) + for variant in variants: + variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] + add_rsid_to_variant(dataset, variant) + remove_extraneous_information(variant) return variants From 2d7c4f5a21b7b082ae014aea45da35948b51e06d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 11 Feb 2019 12:46:59 +0100 Subject: [PATCH 153/360] a couple of small fixes --- backend/modules/browser/lookups.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 24754640a..4ed40f8f7 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -526,7 +526,7 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): gene = get_gene(dataset, gene_id) variants = [variant for variant in db.Variant.select() - .join(VariantGenes) + .join(db.VariantGenes) .where((db.VariantGenes.gene == gene['id']) & (db.Variant.dataset_version == dataset_version)).dicts()] @@ -599,10 +599,10 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No if not dataset_version: return None - transcript = get_transcript(dataset, gene_id) + transcript = get_transcript(dataset, transcript_id) variants = [variant for variant in db.Variant.select() - .join(VariantTranscripts) + .join(db.VariantTranscripts) .where((db.VariantTranscripts.transcript == transcript['id']) & (db.Variant.dataset_version == dataset_version)).dicts()] From c407e9feeac1d36bf7045a7425914e5caad7902d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 12 Feb 2019 10:39:13 +0100 Subject: [PATCH 154/360] remove when db is fixed --- backend/modules/browser/browser_handlers.py | 6 ------ backend/modules/browser/tests/test_utils.py | 8 -------- 2 files changed, 14 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 7ddc3c664..00135db70 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -1,4 +1,3 @@ -import json # remove when db is fixed import logging import handlers @@ -82,9 +81,6 @@ def get(self, dataset, gene, ds_version=None): # Gene gene = lookups.get_gene(dataset, gene_id) - #### Remove when db is fixed - gene['stop'] = gene['start'] + 20000 - #### if gene: ret['gene'] = gene @@ -245,14 +241,12 @@ def get(self, dataset, variant, ds_version=None): return # Just get the information we need - variant['quality_metrics'] = json.loads(variant['quality_metrics']) # remove when db is fixed for item in ["variant_id", "chrom", "pos", "ref", "alt", "rsid", "allele_num", "allele_freq", "allele_count", "orig_alt_alleles", "site_quality", "quality_metrics", "transcripts", "genes"]: ret['variant'][item] = variant[item] ret['variant']['filter'] = variant['filter_string'] - variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed # Variant Effect Predictor (VEP) annotations # https://www.ensembl.org/info/docs/tools/vep/vep_formats.html ret['variant']['consequences'] = [] diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index bb74f5693..96c80db98 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -5,8 +5,6 @@ from .. import lookups from .. import utils -import json - def test_add_consequence_to_variants(): """ @@ -15,8 +13,6 @@ def test_add_consequence_to_variants(): variants = [] variants.append(lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T')) variants.append(lookups.get_variant('SweGen', 55500283, '1', 'A', 'T')) - variants[0]['vep_annotations'] = json.loads(variants[0]['vep_annotations']) # remove when db is fixed - variants[1]['vep_annotations'] = json.loads(variants[1]['vep_annotations']) # remove when db is fixed utils.add_consequence_to_variants(variants) assert variants[0]['major_consequence'] == 'intron_variant' @@ -28,12 +24,10 @@ def test_add_consequence_to_variant(): Test add_consequence_to_variant() """ variant = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') - variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed utils.add_consequence_to_variant(variant) assert variant['major_consequence'] == 'intron_variant' variant2 = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - variant2['vep_annotations'] = json.loads(variant2['vep_annotations']) # remove when db is fixed utils.add_consequence_to_variant(variant2) assert variant2['major_consequence'] == 'upstream_gene_variant' @@ -43,7 +37,6 @@ def test_annotation_severity(): Test annotation_severity() """ variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed res = utils.annotation_severity(variant['vep_annotations'][0]) assert res == -26.9 @@ -151,7 +144,6 @@ def test_worst_csq_from_csq(): Test worst_csq_from_csq() """ variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - variant['vep_annotations'] = json.loads(variant['vep_annotations']) # remove when db is fixed res = utils.worst_csq_from_csq(variant['vep_annotations'][0]['Consequence']) assert res == 'upstream_gene_variant' res = utils.worst_csq_from_csq('non_coding_exon_variant&nc_transcript_variant') From e9faf47ebe67a8d5c9f530a4296e63185b6a7700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 12 Feb 2019 13:06:01 +0100 Subject: [PATCH 155/360] adding genes and transcripts to variants after db schema update --- backend/modules/browser/lookups.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 4ed40f8f7..a531ef1ef 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -341,15 +341,23 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio return None try: - return (db.Variant - .select() - .where((db.Variant.pos == pos) & - (db.Variant.ref == ref) & - (db.Variant.alt == alt) & - (db.Variant.chrom == chrom) & - (db.Variant.dataset_version == dataset_version.id)) - .dicts() - .get()) + variant = (db.Variant + .select() + .where((db.Variant.pos == pos) & + (db.Variant.ref == ref) & + (db.Variant.alt == alt) & + (db.Variant.chrom == chrom) & + (db.Variant.dataset_version == dataset_version.id)) + .dicts() + .get()) + variant['genes'] = [gene for gene in + db.VariantGenes.select(db.VariantGenes.gene) + .where(db.VariantGenes.variant == variant['id']) + .dicts()] + variant['transcripts'] = [transcript for transcript in + db.VariantTranscripts.select(db.VariantTranscripts.transcript) + .where(db.VariantTranscripts.variant == variant['id']) + .dicts()] except db.Variant.DoesNotExist: logging.error('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' .format(dataset, pos, chrom, ref, alt, dataset_version.id)) From e48d54cca1c54381f156d68a80dffd891cb20480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 10:54:56 +0100 Subject: [PATCH 156/360] seems to work with new db schema, but speed is not optimal --- backend/modules/browser/lookups.py | 41 ++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index a531ef1ef..34f2c00a0 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -358,6 +358,7 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio db.VariantTranscripts.select(db.VariantTranscripts.transcript) .where(db.VariantTranscripts.variant == variant['id']) .dicts()] + return variant except db.Variant.DoesNotExist: logging.error('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' .format(dataset, pos, chrom, ref, alt, dataset_version.id)) @@ -537,10 +538,18 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): .join(db.VariantGenes) .where((db.VariantGenes.gene == gene['id']) & (db.Variant.dataset_version == dataset_version)).dicts()] + ##### remove when db is fixed + for variant in variants: + variant['hom_count'] = 0 + variant['filter'] = variant['filter_string'] + ##### utils.add_consequence_to_variants(variants) for variant in variants: - add_rsid_to_variant(dataset, variant) + if variant['rsid'] and variant['rsid'] != '.': + variant['rsid'] = 'rs{}'.format(variant['rsid']) + else: + add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants @@ -569,21 +578,21 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d (db.Variant.chrom == chrom) & (db.Variant.dataset_version == dataset_version)) .dicts()) + variants = [variant for variant in query] ##### remove when db is fixed for variant in variants: - variant['quality_metrics'] = json.loads(variant['quality_metrics']) - variant['vep_annotations'] = json.loads(variant['vep_annotations']) variant['hom_count'] = 0 variant['filter'] = variant['filter_string'] ##### utils.add_consequence_to_variants(variants) for variant in variants: - if variant['rsid']: + if variant['rsid'] and variant['rsid'] != '.': variant['rsid'] = 'rs{}'.format(variant['rsid']) - add_rsid_to_variant(dataset, variant) + else: + add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants @@ -610,14 +619,24 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No transcript = get_transcript(dataset, transcript_id) variants = [variant for variant in db.Variant.select() - .join(db.VariantTranscripts) - .where((db.VariantTranscripts.transcript == transcript['id']) & - (db.Variant.dataset_version == dataset_version)).dicts()] + .join(db.VariantTranscripts) + .where((db.VariantTranscripts.transcript == transcript['id']) & + (db.Variant.dataset_version == dataset_version)) + .dicts()] + + ##### remove when db is fixed + for variant in variants: + variant['hom_count'] = 0 + variant['filter'] = variant['filter_string'] + ##### utils.add_consequence_to_variants(variants) for variant in variants: variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] - add_rsid_to_variant(dataset, variant) + if variant['rsid'] and variant['rsid'] != '.': + variant['rsid'] = 'rs{}'.format(variant['rsid']) + else: + add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants @@ -625,8 +644,8 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No def remove_extraneous_information(variant): #del variant['genotype_depths'] #del variant['genotype_qualities'] - del variant['transcripts'] - del variant['genes'] +# del variant['transcripts'] +# del variant['genes'] del variant['orig_alt_alleles'] del variant['site_quality'] del variant['vep_annotations'] From 19f16988487179f8b002649ab9f9026481bf99f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 13:43:56 +0100 Subject: [PATCH 157/360] decreased logging --- backend/modules/browser/browser_handlers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 00135db70..7b5b2b17f 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -328,9 +328,7 @@ def get(self, dataset, datatype, item, ds_version=None): n = a[0] + "".join([b[0].upper() + b[1:] for b in a.split("_")])[1:] headers += [[n, h]] ret['headers'] = headers - logging.error('Variant request {} items'.format(len(ret))) - logging.error('Variant request {} items'.format(ret)) - self.finish( ret ) + self.finish(ret) class Search(handlers.UnsafeHandler): From 85644cb2c49e242f86e96f031297a1c35f2c03cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 13:44:48 +0100 Subject: [PATCH 158/360] rsid fixing; they can't be '.' with new db --- backend/modules/browser/lookups.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 34f2c00a0..0cfd57d2e 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -25,7 +25,7 @@ def add_rsid_to_variant(dataset:str, variant:str): .get()) dbsnp_version = refset['dbsnp_version'] - if variant['rsid'] == '.' or variant['rsid'] is None: + if not variant['rsid']: try: rsid = (db.DbSNP .select() @@ -36,7 +36,8 @@ def add_rsid_to_variant(dataset:str, variant:str): .get()) variant['rsid'] = 'rs{}'.format(rsid['rsid']) except db.DbSNP.DoesNotExist: - logging.error('add_rsid_to_variant({}, variant[dbid: {}]): unable to retrieve rsid'.format(dataset, variant['id'])) + pass + # logging.error('add_rsid_to_variant({}, variant[dbid: {}]): unable to retrieve rsid'.format(dataset, variant['id'])) REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') @@ -546,7 +547,7 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): utils.add_consequence_to_variants(variants) for variant in variants: - if variant['rsid'] and variant['rsid'] != '.': + if variant['rsid']: variant['rsid'] = 'rs{}'.format(variant['rsid']) else: add_rsid_to_variant(dataset, variant) @@ -589,7 +590,7 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d utils.add_consequence_to_variants(variants) for variant in variants: - if variant['rsid'] and variant['rsid'] != '.': + if variant['rsid']: variant['rsid'] = 'rs{}'.format(variant['rsid']) else: add_rsid_to_variant(dataset, variant) @@ -633,7 +634,7 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No utils.add_consequence_to_variants(variants) for variant in variants: variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] - if variant['rsid'] and variant['rsid'] != '.': + if variant['rsid']: variant['rsid'] = 'rs{}'.format(variant['rsid']) else: add_rsid_to_variant(dataset, variant) From 428b12e3b9a5370197458f014e889b0a80336851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 13:45:23 +0100 Subject: [PATCH 159/360] converting tests to chromosome 22 --- backend/modules/browser/tests/test_lookups.py | 92 +++++++++---------- 1 file changed, 42 insertions(+), 50 deletions(-) diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 043707a01..3160418da 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -9,16 +9,14 @@ def test_add_rsid_to_variant(): """ Test add_rsid_to_variant() """ - # "with ." - variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - rsid = variant['rsid'] - variant['rsid'] = '.' + variant = lookups.get_variant('SweGen', 34730985, '22', 'G', 'A') lookups.add_rsid_to_variant('SweGen', variant) - assert variant['rsid'] == rsid - # "non-existing" - del variant['rsid'] + assert variant['rsid'] == 'rs924645261' + variant = lookups.get_variant('SweGen', 16113980, '22', 'C', 'T') + rsid = variant['rsid'] + variant['rsid'] = '' lookups.add_rsid_to_variant('SweGen', variant) - assert variant['rsid'] == rsid + assert variant['rsid'] == 'rs9680543' def test_get_awesomebar_result(): @@ -50,27 +48,17 @@ def test_get_coverage_for_bases(): Test get_coverage_for_bases() """ # normal - coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500320) - expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500290, 'mean': 40.66, 'median': 39.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, - {'id': 5474063, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500300, 'mean': 40.7, 'median': 39.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.971, 0.878, 0.132, 0.001]}, - {'id': 5474064, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500310, 'mean': 40.35, 'median': 39.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.995, 0.974, 0.859, 0.138, 0.001]}, - {'id': 5474065, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500320, 'mean': 39.69, 'median': 38.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.961, 0.856, 0.117, 0.001]}] - assert coverage == expected + coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546423, 46549652) + assert len(coverage) == 323 + assert coverage[0] == {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no end_pos - coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500290) - expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500290, 'mean': 40.66, 'median': 39.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}] + coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546430) + assert coverage == [{'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430}] + assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no hits coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) @@ -84,21 +72,25 @@ def test_get_coverage_for_transcript(): """ Test get_coverage_for_transcript() """ - coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500320) - expected = [{'id': 5474062, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500290, 'mean': 40.66, 'median': 39.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.97, 0.867, 0.127, 0.001]}, - {'id': 5474063, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500300, 'mean': 40.7, 'median': 39.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.971, 0.878, 0.132, 0.001]}, - {'id': 5474064, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500310, 'mean': 40.35, 'median': 39.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.995, 0.974, 0.859, 0.138, 0.001]}, - {'id': 5474065, 'dataset_version': 4, 'chrom': '1', - 'pos': 55500320, 'mean': 39.69, 'median': 38.0, - 'coverage': [1.0, 1.0, 1.0, 1.0, 0.996, 0.961, 0.856, 0.117, 0.001]}] - assert coverage == expected - assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) + # normal + coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546423, 46549652) + assert len(coverage) == 323 + assert coverage[0] == {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} + assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 + + # no end_pos + coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546430) + assert coverage == [{'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430}] + assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 + + # no hits + coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) + assert not coverage + + # incorrect dataset + assert not lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) def test_get_exons_in_transcript(caplog): @@ -124,16 +116,16 @@ def test_get_gene(): Test get_gene() """ # normal entry - expected = {'id': 1, - 'reference_set': 1, - 'gene_id': 'ENSG00000223972', - 'name': 'DDX11L1', - 'full_name': 'DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1', - 'canonical_transcript': 'ENST00000456328', - 'chrom': '1', - 'start': 11870, + expected = {'gene_id': 'ENSG00000223972', + 'name': 'SNORA15', + 'full_name': '', + 'canonical_transcript': 'ENST00000516131', + 'chrom': '22', + 'start': 19237396, + 'stop': 19237489, 'strand': '+'} - result = lookups.get_gene('SweGen', 'ENSG00000223972') + + result = lookups.get_gene('SweGen', 'ENSG00000251940') for val in expected: assert result[val] == expected[val] From 314eb20753faf9478af3f7d308858788691a058c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 14:03:34 +0100 Subject: [PATCH 160/360] fix for finding other names; return the db.Gene part --- backend/modules/browser/lookups.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 0cfd57d2e..011737ff6 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -261,12 +261,19 @@ def get_gene_by_name(dataset:str, gene_name:str): if not ref_dbid: return {} try: - return db.Gene.select().where((db.Gene.reference_set == ref_dbid) & - (db.Gene.name==gene_name)).dicts().get() + return (db.Gene.select() + .where((db.Gene.reference_set == ref_dbid) & + (db.Gene.name==gene_name)) + .dicts() + .get()) except db.Gene.DoesNotExist: try: - return db.GeneOtherNames.select().join(db.Gene).where((db.GeneOtherNames.name == gene_name) & - (db.Gene.reference_set == ref_dbid)).dicts().get() + return (db.GeneOtherNames.select(db.Gene) + .join(db.Gene) + .where((db.GeneOtherNames.name == gene_name) & + (db.Gene.reference_set == ref_dbid)) + .dicts() + .get()) except db.GeneOtherNames.DoesNotExist: logging.error('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) return {} From dccc240cd5c3128c0b7d028444bf32c9bdea94a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 14:42:35 +0100 Subject: [PATCH 161/360] handling a few cases of missing data observed during testing --- backend/modules/browser/lookups.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 011737ff6..243c46b0c 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -325,6 +325,7 @@ def get_number_of_variants_in_transcript(dataset:str, transcript_id:str, ds_vers if not variants: return None total = len(variants) + filtered = len(tuple(variant for variant in variants if variant['filter_string'] == 'PASS')) return {'filtered': filtered, 'total': total} @@ -383,9 +384,11 @@ def get_transcript(dataset:str, transcript_id:str): transcript_id (str): the id of the transcript Returns: - dict: values for the transcript, including exons; empty if not found + dict: values for the transcript, including exons; None if not found """ ref_dbid = db.get_reference_dbid_dataset(dataset) + if not ref_dbid: + return None try: transcript = (db.Transcript .select() @@ -397,7 +400,7 @@ def get_transcript(dataset:str, transcript_id:str): transcript['exons'] = get_exons_in_transcript(dataset, transcript_id) return transcript except db.Transcript.DoesNotExist: - return {} + return None def get_transcripts_in_gene(dataset:str, gene_id:str): @@ -625,6 +628,8 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No return None transcript = get_transcript(dataset, transcript_id) + if not transcript: + return None variants = [variant for variant in db.Variant.select() .join(db.VariantTranscripts) From 887baed71cb8f5ca7a298d2cd7a03927c8a3a052 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 15:03:34 +0100 Subject: [PATCH 162/360] fix incorrect naming of returned genes/transcripts --- backend/modules/browser/lookups.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 243c46b0c..2d94255e1 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -359,12 +359,14 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio (db.Variant.dataset_version == dataset_version.id)) .dicts() .get()) - variant['genes'] = [gene for gene in - db.VariantGenes.select(db.VariantGenes.gene) + variant['genes'] = [gene['gene_id'] for gene in + db.VariantGenes.select(db.Gene.gene_id) + .join(db.Gene) .where(db.VariantGenes.variant == variant['id']) .dicts()] - variant['transcripts'] = [transcript for transcript in - db.VariantTranscripts.select(db.VariantTranscripts.transcript) + variant['transcripts'] = [transcript['transcript_id'] for transcript in + db.VariantTranscripts.select(db.Transcript.transcript_id) + .join(db.Transcript) .where(db.VariantTranscripts.variant == variant['id']) .dicts()] return variant From f8337395f0691ad2719d7a8155666aba31398bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 13 Feb 2019 15:04:14 +0100 Subject: [PATCH 163/360] further migration of test to chromosome 22 --- backend/modules/browser/tests/test_lookups.py | 113 ++++++------------ 1 file changed, 38 insertions(+), 75 deletions(-) diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 3160418da..82d077eea 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -116,9 +116,9 @@ def test_get_gene(): Test get_gene() """ # normal entry - expected = {'gene_id': 'ENSG00000223972', + expected = {'gene_id': 'ENSG00000251940', 'name': 'SNORA15', - 'full_name': '', + 'full_name': None, 'canonical_transcript': 'ENST00000516131', 'chrom': '22', 'start': 19237396, @@ -143,14 +143,13 @@ def test_get_gene_by_dbid(): Test get_gene_by_dbid() """ # normal entry - expected = {'id': 53626, - 'reference_set': 1, - 'gene_id': 'ENSG00000226444', + expected = {'gene_id': 'ENSG00000226444', 'name': 'ACTR3BP6', 'full_name': 'ACTR3B pseudogene 6', 'canonical_transcript': 'ENST00000421366', 'chrom': '22', - 'start': 16967411, + 'start': 16967410, + 'stop': 16969212, 'strand': '+'} result = lookups.get_gene_by_dbid(53626) for val in expected: @@ -162,30 +161,23 @@ def test_get_gene_by_dbid(): result = lookups.get_gene_by_dbid(-1) assert not result + def test_get_gene_by_name(caplog): """ Test get_gene_by_name() """ # normal entry - expected = {'id': 1, - 'reference_set': 1, - 'gene_id': 'ENSG00000223972', - 'gene_name': 'DDX11L1', - 'full_name': 'DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1', - 'canonical_transcript': 'ENST00000456328', - 'chrom': '1', - 'start_pos': 11870, + expected = {'gene_id': 'ENSG00000226444', + 'name': 'ACTR3BP6', + 'full_name': 'ACTR3B pseudogene 6', + 'canonical_transcript': 'ENST00000421366', + 'chrom': '22', + 'start': 16967410, + 'stop': 16969212, 'strand': '+'} - result = lookups.get_gene_by_name('SweGen', 'DDX11L1') - assert result['id'] == expected['id'] - assert result['reference_set'] == expected['reference_set'] - assert result['gene_id'] == expected['gene_id'] - assert result['name'] == expected['gene_name'] - assert result['full_name'] == expected['full_name'] - assert result['canonical_transcript'] == expected['canonical_transcript'] - assert result['chrom'] == expected['chrom'] - assert result['start'] == expected['start_pos'] - assert result['strand'] == expected['strand'] + result = lookups.get_gene_by_name('SweGen', 'ACTR3BP6') + for val in expected: + assert result[val] == expected[val] # non-existing gene result = lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') @@ -197,8 +189,9 @@ def test_get_gene_by_name(caplog): assert not result # name in other_names - result = lookups.get_gene_by_name('SweGen', 'NIR') - assert result['gene_id'] == 'ENSG00000188976' + result = lookups.get_gene_by_name('SweGen', 'BCL8C') + print(result) + assert result['gene_id'] == 'ENSG00000223875' def test_get_genes_in_region(): @@ -224,7 +217,7 @@ def test_get_number_of_variants_in_transcript(): """ # normal res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENST00000424770') - assert res == {'filtered': 243, 'total': 309} + assert res == {'filtered': 66, 'total': 309} # bad transcript res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENSTASDSADA') @@ -240,38 +233,16 @@ def test_get_transcript(): Test get_transcript() """ # normal entry - expected = {'id': 5, - 'transcript_id': 'ENST00000438504', - 'gene': '2', - 'mim_annotation': 'Was protein family homolog 1; wash1', - 'chrom': '1', - 'mim_gene_accession': 613632, - 'start_pos': 14364, - 'stop_pos': 29371, - 'strand': '-'} - exp_exon = [{'id': 28, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14364, 'stop': 14830, 'strand': '-', 'feature_type': 'exon'}, - {'id': 27, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 14971, 'stop': 15039, 'strand': '-', 'feature_type': 'exon'}, - {'id': 26, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15797, 'stop': 15902, 'strand': '-', 'feature_type': 'exon'}, - {'id': 25, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 15905, 'stop': 15948, 'strand': '-', 'feature_type': 'exon'}, - {'id': 24, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16608, 'stop': 16766, 'strand': '-', 'feature_type': 'exon'}, - {'id': 23, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 16855, 'stop': 17056, 'strand': '-', 'feature_type': 'exon'}, - {'id': 22, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17234, 'stop': 17365, 'strand': '-', 'feature_type': 'exon'}, - {'id': 21, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17603, 'stop': 17743, 'strand': '-', 'feature_type': 'exon'}, - {'id': 20, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 17916, 'stop': 18062, 'strand': '-', 'feature_type': 'exon'}, - {'id': 19, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 18269, 'stop': 18380, 'strand': '-', 'feature_type': 'exon'}, - {'id': 18, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 24739, 'stop': 24892, 'strand': '-', 'feature_type': 'exon'}, - {'id': 17, 'gene': 2, 'transcript': 5, 'chrom': '1', 'start': 29322, 'stop': 29371, 'strand': '-', 'feature_type': 'exon'}] - - result = lookups.get_transcript('SweGen', 'ENST00000438504') - assert result['id'] == expected['id'] - assert result['mim_annotation'] == expected['mim_annotation'] - assert result['transcript_id'] == expected['transcript_id'] - assert result['mim_gene_accession'] == expected['mim_gene_accession'] - assert result['chrom'] == expected['chrom'] - assert result['start'] == expected['start_pos'] - assert result['stop'] == expected['stop_pos'] - assert result['strand'] == expected['strand'] - assert result['exons'] == exp_exon + expected = {'transcript_id': 'ENST00000398242', + 'chrom': '22', + 'start': 16122720, + 'stop': 16123768, + 'strand': '+'} + + result = lookups.get_transcript('SweGen', 'ENST00000398242') + for val in expected: + assert result[val] == expected[val] + assert len(result['exons']) == 1 # non-existing assert not lookups.get_transcript('SweGen', 'INCORRECT') @@ -281,14 +252,8 @@ def test_get_transcripts_in_gene(): """ Test get_transcripts_in_gene() """ - res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000241670') - expected = [{'id': 39, 'transcript_id': 'ENST00000424429', 'gene': 19, - 'mim_gene_accession': None, 'mim_annotation': None, - 'chrom': '1', 'start': 228293, 'stop': 228655, 'strand': '-'}, - {'id': 40, 'transcript_id': 'ENST00000450734', 'gene': 19, - 'mim_gene_accession': None, 'mim_annotation': None, - 'chrom': '1', 'start': 228320, 'stop': 228776, 'strand': '-'}] - assert res == expected + res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000103197') + assert len(res) == 27 assert not lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') assert not lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') @@ -298,9 +263,9 @@ def test_get_raw_variant(): """ Test get_raw_variant """ - result = lookups.get_raw_variant('SweGen', 55500283, '1', 'A', 'T') - assert result['genes'] == ['ENSG00000169174'] - assert result['transcripts'] == ['ENST00000302118'] + result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A') + assert result['genes'] == ['ENSG00000233866'] + assert result['transcripts'] == ['ENST00000424770'] assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') @@ -319,13 +284,11 @@ def test_get_variant(): """ Test get_variant() """ - result = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - assert result['genes'] == ['ENSG00000169174'] - assert result['transcripts'] == ['ENST00000302118'] - assert result['rsid'] == 'rs75050571' + result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A') + assert result['genes'] == ['ENSG00000233866'] + assert result['transcripts'] == ['ENST00000424770'] # missing rsid in result, multiple transcripts - # slow, need to fix db result = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') assert result['genes'] == ['ENSG00000160298'] assert result['transcripts'] == ['ENST00000417060', 'ENST00000397682', From 862f0c81dc8322de3dd28c167773aeca62ac3661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 13:21:07 +0100 Subject: [PATCH 164/360] lookups updated for testing only chr22 --- backend/modules/browser/lookups.py | 18 +++++++++-- backend/modules/browser/tests/test_lookups.py | 31 +++++++------------ 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 2d94255e1..e23cc2be8 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -500,12 +500,14 @@ def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_ver .where(db.Dataset.short_name == dataset) .dicts() .get()) + if not refset: + return [] dbsnp_version = refset['dbsnp_version'] rsid_dbsnp = (db.DbSNP .select() .where((db.DbSNP.rsid == rsid) & - (db.DbSNP.version_id == dbsnp_version) ) + (db.DbSNP.version_id == dbsnp_version)) .dicts() .get()) query = (db.Variant @@ -522,7 +524,19 @@ def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_ver .dicts()) variants = [variant for variant in query] - # add_consequence_to_variants(variants) + for variant in variants: + variant['genes'] = [gene['gene_id'] for gene in + db.VariantGenes.select(db.Gene.gene_id) + .join(db.Gene) + .where(db.VariantGenes.variant == variant['id']) + .dicts()] + variant['transcripts'] = [transcript['transcript_id'] for transcript in + db.VariantTranscripts.select(db.Transcript.transcript_id) + .join(db.Transcript) + .where(db.VariantTranscripts.variant == variant['id']) + .dicts()] + + utils.add_consequence_to_variants(variants) return variants diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 82d077eea..4ab9674d5 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -288,17 +288,6 @@ def test_get_variant(): assert result['genes'] == ['ENSG00000233866'] assert result['transcripts'] == ['ENST00000424770'] - # missing rsid in result, multiple transcripts - result = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') - assert result['genes'] == ['ENSG00000160298'] - assert result['transcripts'] == ['ENST00000417060', 'ENST00000397682', - 'ENST00000397683', 'ENST00000397680', - 'ENST00000397685', 'ENST00000397679', - 'ENST00000291691', 'ENST00000445935', - 'ENST00000491666', 'ENST00000472607', - 'ENST00000475776'] - assert result['rsid'] == 'rs75050571' - # incorrect position assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') @@ -308,16 +297,18 @@ def test_get_variants_by_rsid(caplog): Test get_variants_by_rsid() ''' # normal - result = lookups.get_variants_by_rsid('SweGen', 'rs373706802') - assert result[0]['pos'] == 16080482 - assert result[0]['genes'] == ['ENSG00000229286', 'ENSG00000235265'] - assert result[0]['transcripts'] == ['ENST00000448070','ENST00000413156'] + result = lookups.get_variants_by_rsid('SweGen', 'rs185758992') + assert result[0]['pos'] == 38481311 + assert set(result[0]['genes']) == set(['ENSG00000100156', 'ENSG00000128298', 'ENSG00000272720']) + assert len(result[0]['genes']) == 3 + assert len(result[0]['transcripts']) == 6 # by position - result = lookups.get_variants_by_rsid('SweGen', 'rs373706802', check_position=True) - assert result[0]['pos'] == 16080482 - assert result[0]['genes'] == ['ENSG00000229286', 'ENSG00000235265'] - assert result[0]['transcripts'] == ['ENST00000448070','ENST00000413156'] + result = lookups.get_variants_by_rsid('SweGen', 'rs185758992', check_position=True) + assert result[0]['pos'] == 38481311 + assert set(result[0]['genes']) == set(['ENSG00000100156', 'ENSG00000128298', 'ENSG00000272720']) + assert len(result[0]['genes']) == 3 + assert len(result[0]['transcripts']) == 6 # errors assert lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') is None @@ -367,4 +358,4 @@ def test_get_variants_in_transcript(): Test get_variants_in_transcript() """ res = lookups.get_variants_in_transcript('SweGen', 'ENST00000452800') - assert len(res) == 1414 + assert len(res) == 1174 From 667a8a5f728295ff42059ead10cfe823b32989fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 13:21:30 +0100 Subject: [PATCH 165/360] pgsql updated for testing only chr22 --- backend/modules/browser/pgsql.py | 45 ++++++++------- backend/modules/browser/tests/test_pgsql.py | 64 +++++++++++++++++++++ 2 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 backend/modules/browser/tests/test_pgsql.py diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index 9a7be7f38..2289ad22d 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -40,10 +40,12 @@ def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): if datatype == 'gene': gene = lookups.get_gene(dataset, item) - transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) - start = transcript['start'] - EXON_PADDING - stop = transcript['stop'] + EXON_PADDING - ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) + if gene: + transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) + if transcript: + start = transcript['start'] - EXON_PADDING + stop = transcript['stop'] + EXON_PADDING + ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) elif datatype == 'region': chrom, start, stop = item.split('-') start = int(start) @@ -51,9 +53,10 @@ def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): ret['coverage'] = lookups.get_coverage_for_bases(dataset, chrom, start, stop, ds_version) elif datatype == 'transcript': transcript = lookups.get_transcript(dataset, item) - start = transcript['start'] - EXON_PADDING - stop = transcript['stop'] + EXON_PADDING - ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) + if transcript: + start = transcript['start'] - EXON_PADDING + stop = transcript['stop'] + EXON_PADDING + ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) return ret @@ -72,24 +75,22 @@ def get_coverage_pos(dataset:str, datatype:str, item:str): """ ret = {'start':None, 'stop':None, 'chrom':None} - if datatype == 'gene': - gene = lookups.get_gene(dataset, item) - transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) - elif datatype == 'transcript': - transcript = lookups.get_transcript(dataset, item) - if datatype == 'region': chrom, start, stop = item.split('-') - start = int(start) - stop = int(stop) + if start and stop and chrom: + ret['start'] = int(start) + ret['stop'] = int(stop) + ret['chrom'] = chrom else: - start = transcript['start'] - EXON_PADDING - stop = transcript['stop'] + EXON_PADDING - chrom = transcript['chrom'] - - ret['start'] = start - ret['stop'] = stop - ret['chrom'] = chrom + if datatype == 'gene': + gene = lookups.get_gene(dataset, item) + transcript = lookups.get_transcript(dataset, gene['canonical_transcript']) + elif datatype == 'transcript': + transcript = lookups.get_transcript(dataset, item) + if transcript: + ret['start'] = transcript['start'] - EXON_PADDING + ret['stop'] = transcript['stop'] + EXON_PADDING + ret['chrom'] = transcript['chrom'] return ret diff --git a/backend/modules/browser/tests/test_pgsql.py b/backend/modules/browser/tests/test_pgsql.py new file mode 100644 index 000000000..ac0014602 --- /dev/null +++ b/backend/modules/browser/tests/test_pgsql.py @@ -0,0 +1,64 @@ +""" +Tests for the functions available in pgsql.py +""" + +from .. import pgsql + + +def test_get_autocomplete(): + """ + Test get_autocomplete() + """ + res = pgsql.get_autocomplete('ADH') + expected = set(['ADH1A', 'ADH1B', 'ADH1C', 'ADH4', + 'ADH5', 'ADH6', 'ADH7', 'ADH5P2', + 'ADH5P3', 'ADH5P4', 'ADHFE1']) + assert set(res) == expected + + +def test_get_coverage(): + """ + Test get_coverage() + """ + res = pgsql.get_coverage('SweGen', 'gene', 'ENSG00000231565') + assert len(res['coverage']) == 144 + res = pgsql.get_coverage('SweGen', 'region', '22-46615715-46615880') + assert len(res['coverage']) == 17 + res = pgsql.get_coverage('SweGen', 'transcript', 'ENST00000438441') + assert len(res['coverage']) == 144 + + assert not pgsql.get_coverage('BAD_SET', 'transcript', 'ENST00000438441')['coverage'] + + +def test_get_coverage_pos(): + """ + Test get_coverage_pos() + """ + res = pgsql.get_coverage_pos('SweGen', 'gene', 'ENSG00000231565') + assert res['chrom'] == '22' + assert res['start'] == 16364817 + assert res['stop'] == 16366254 + res = pgsql.get_coverage_pos('SweGen', 'region', '22-46615715-46615880') + assert res['chrom'] == '22' + assert res['start'] == 46615715 + assert res['stop'] == 46615880 + res = pgsql.get_coverage_pos('SweGen', 'transcript', 'ENST00000438441') + assert res['chrom'] == '22' + assert res['start'] == 16364817 + assert res['stop'] == 16366254 + + res = pgsql.get_coverage_pos('BAD_SET', 'transcript', 'ENST00000438441') + for value in res.values(): + assert not value + + +def test_get_variant_list(): + """ + Test get_variant_list() + """ + res = pgsql.get_variant_list('SweGen', 'gene', 'ENSG00000231565') + assert len(res['variants']) == 405 + res = pgsql.get_variant_list('SweGen', 'region', '22-46615715-46615880') + assert len(res['variants']) == 3 + res = pgsql.get_variant_list('SweGen', 'transcript', 'ENST00000438441') + assert len(res['variants']) == 405 From 133b6d6404fec984590aa80e7a4ce52b7e0da1ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 13:30:49 +0100 Subject: [PATCH 166/360] utils updated to test chr 22 --- backend/modules/browser/tests/test_utils.py | 24 ++++++++++++--------- backend/modules/browser/utils.py | 2 ++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index 96c80db98..aabfe1685 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -11,11 +11,11 @@ def test_add_consequence_to_variants(): Test add_consequence_to_variants() """ variants = [] - variants.append(lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T')) - variants.append(lookups.get_variant('SweGen', 55500283, '1', 'A', 'T')) + variants.append(lookups.get_variant('SweGen', 38481311, '22', 'C', 'T')) + variants.append(lookups.get_variant('SweGen', 38480546, '22', 'TG', 'TGG')) utils.add_consequence_to_variants(variants) - assert variants[0]['major_consequence'] == 'intron_variant' + assert variants[0]['major_consequence'] == 'missense_variant' assert variants[1]['major_consequence'] == 'upstream_gene_variant' @@ -23,20 +23,24 @@ def test_add_consequence_to_variant(): """ Test add_consequence_to_variant() """ - variant = lookups.get_variant('SweGen', 47730411, '21', 'TA', 'T') + variant = lookups.get_variant('SweGen', 38481311, '22', 'C', 'T') utils.add_consequence_to_variant(variant) - assert variant['major_consequence'] == 'intron_variant' + assert variant['major_consequence'] == 'missense_variant' - variant2 = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') - utils.add_consequence_to_variant(variant2) - assert variant2['major_consequence'] == 'upstream_gene_variant' + variant = lookups.get_variant('SweGen', 38480546, '22', 'TG', 'TGG') + utils.add_consequence_to_variant(variant) + assert variant['major_consequence'] == 'upstream_gene_variant' + + # bad variant + variant = lookups.get_variant('SweGen', 38481311, '444', 'C', 'T') + assert not variant def test_annotation_severity(): """ Test annotation_severity() """ - variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') + variant = lookups.get_variant('SweGen', 38481311, '22', 'C', 'T') res = utils.annotation_severity(variant['vep_annotations'][0]) assert res == -26.9 @@ -143,7 +147,7 @@ def test_worst_csq_from_csq(): """ Test worst_csq_from_csq() """ - variant = lookups.get_variant('SweGen', 55500283, '1', 'A', 'T') + variant = lookups.get_variant('SweGen', 38481311, '22', 'C', 'T') res = utils.worst_csq_from_csq(variant['vep_annotations'][0]['Consequence']) assert res == 'upstream_gene_variant' res = utils.worst_csq_from_csq('non_coding_exon_variant&nc_transcript_variant') diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index c045f6536..79f63a180 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -92,6 +92,8 @@ def add_consequence_to_variant(variant): Args: variant (dict): variant information """ + if not variant: + return dict() worst_csq = worst_csq_with_vep(variant['vep_annotations']) variant['major_consequence'] = '' if worst_csq is None: From 816f84271c936656bfeb2bf409092bcad8385b7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 14:06:21 +0100 Subject: [PATCH 167/360] rsid != '.' in postgres --- backend/modules/browser/pgsql.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index 2289ad22d..f339b3687 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -123,8 +123,6 @@ def get_variant_list(dataset:str, datatype:str, item:str, ds_version:str=None): # Format output def format_variant(variant): - if variant['rsid'] == '.': - variant['rsid'] = '' variant['major_consequence'] = (variant['major_consequence'].replace('_variant','') .replace('_prime_', '\'') .replace('_', ' ')) From 9a9e5187977fb6bf07e04d936580887bdd87b0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 14:13:19 +0100 Subject: [PATCH 168/360] increased coverage --- backend/modules/browser/lookups.py | 14 ++++---------- backend/modules/browser/tests/test_lookups.py | 17 ++++++++--------- backend/modules/browser/tests/test_utils.py | 1 + 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index e23cc2be8..37f3be895 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -138,7 +138,7 @@ def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=No """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + return [] if end_pos is None: end_pos = start_pos @@ -171,7 +171,7 @@ def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:i # only return coverages that have coverage (if that makes any sense?) # return coverage_array if not coverage_array: - return None + return [] covered = [c for c in coverage_array if c['mean']] return covered @@ -189,7 +189,7 @@ def get_exons_in_transcript(dataset:str, transcript_id:str): """ ref_dbid = db.get_reference_dbid_dataset(dataset) if not ref_dbid: - logging.error('get_exons_in_transcript({}, {}): unable to find dataset dbid'.format(dataset, transcript_id)) + logging.info('get_exons_in_transcript({}, {}): unable to find dataset dbid'.format(dataset, transcript_id)) return None try: transcript = (db.Transcript @@ -199,7 +199,7 @@ def get_exons_in_transcript(dataset:str, transcript_id:str): (db.Gene.reference_set == ref_dbid)) .get()) except db.Transcript.DoesNotExist: - logging.error('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) + logging.info('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) return None wanted_types = ('CDS', 'UTR', 'exon') return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & @@ -500,8 +500,6 @@ def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_ver .where(db.Dataset.short_name == dataset) .dicts() .get()) - if not refset: - return [] dbsnp_version = refset['dbsnp_version'] rsid_dbsnp = (db.DbSNP @@ -556,8 +554,6 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): if not ref_dbid: return None dataset_version = db.get_dataset_version(dataset, ds_version) - if not dataset_version: - return None gene = get_gene(dataset, gene_id) @@ -640,8 +636,6 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No if not ref_dbid: return None dataset_version = db.get_dataset_version(dataset, ds_version) - if not dataset_version: - return None transcript = get_transcript(dataset, transcript_id) if not transcript: diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 4ab9674d5..154729805 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -73,27 +73,27 @@ def test_get_coverage_for_transcript(): Test get_coverage_for_transcript() """ # normal - coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546423, 46549652) + coverage = lookups.get_coverage_for_transcript('SweGen', '22', 46546423, 46549652) assert len(coverage) == 323 assert coverage[0] == {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} - assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 + assert len(lookups.get_coverage_for_transcript('SweGen', '22', 46615715, 46615880)) == 17 # no end_pos - coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546430) + coverage = lookups.get_coverage_for_transcript('SweGen', '22', 46546430) assert coverage == [{'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430}] - assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 + assert len(lookups.get_coverage_for_transcript('SweGen', '22', 46615715, 46615880)) == 17 # no hits - coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) + coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500285) assert not coverage # incorrect dataset - assert not lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) + assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) -def test_get_exons_in_transcript(caplog): +def test_get_exons_in_transcript(): """ Test get_exons_in_transcript() """ @@ -103,12 +103,10 @@ def test_get_exons_in_transcript(caplog): # bad dataset result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') assert not result - assert caplog.messages[0] == 'get_exons_in_transcript(NO_DATASET, ENST00000215855): unable to find dataset dbid' # bad transcript result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') assert not result - assert caplog.messages[1] == 'get_exons_in_transcript(SweGen, BAD_TRANSCRIPT): unable to retrieve transcript' def test_get_gene(): @@ -359,3 +357,4 @@ def test_get_variants_in_transcript(): """ res = lookups.get_variants_in_transcript('SweGen', 'ENST00000452800') assert len(res) == 1174 + res = lookups.get_variants_in_transcript('BAD_DATASET', 'ENST00000452800') diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index aabfe1685..5e704505a 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -33,6 +33,7 @@ def test_add_consequence_to_variant(): # bad variant variant = lookups.get_variant('SweGen', 38481311, '444', 'C', 'T') + utils.add_consequence_to_variant(variant) assert not variant From aeef79523fc305957b98ef223e738bb919c4b3d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 14:14:08 +0100 Subject: [PATCH 169/360] no longer needs json --- backend/modules/browser/lookups.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 37f3be895..9555f77fa 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,4 +1,3 @@ -import json # remove when db is fixed import logging import re From ecec24c14572ad2ee49a7d0ea9ab49bfbb191d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 14:18:29 +0100 Subject: [PATCH 170/360] pylint fixes --- backend/modules/browser/pgsql.py | 3 --- backend/modules/browser/utils.py | 26 +++++++++++++------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/backend/modules/browser/pgsql.py b/backend/modules/browser/pgsql.py index f339b3687..94aa9c3ad 100644 --- a/backend/modules/browser/pgsql.py +++ b/backend/modules/browser/pgsql.py @@ -1,9 +1,6 @@ """ Replaces mongodb.py """ - -import logging - import db from . import lookups diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 79f63a180..73e5e7a5a 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -74,7 +74,7 @@ } -def add_consequence_to_variants(variant_list): +def add_consequence_to_variants(variant_list:list): """ Add information about variant consequence to multiple variants @@ -85,7 +85,7 @@ def add_consequence_to_variants(variant_list): add_consequence_to_variant(variant) -def add_consequence_to_variant(variant): +def add_consequence_to_variant(variant:dict): """ Add information about variant consequence to a variant @@ -93,7 +93,7 @@ def add_consequence_to_variant(variant): variant (dict): variant information """ if not variant: - return dict() + return worst_csq = worst_csq_with_vep(variant['vep_annotations']) variant['major_consequence'] = '' if worst_csq is None: @@ -121,7 +121,7 @@ def add_consequence_to_variant(variant): variant['flags'] = get_flags_from_variant(variant) -def annotation_severity(annotation): +def annotation_severity(annotation:dict): """ Evaluate severity of the consequences; "bigger is more important". @@ -137,7 +137,7 @@ def annotation_severity(annotation): return rv -def get_flags_from_variant(variant): +def get_flags_from_variant(variant:dict): """ Get flags from variant. Checks for: @@ -163,7 +163,7 @@ def get_flags_from_variant(variant): return flags -def get_proper_hgvs(annotation): +def get_proper_hgvs(annotation:dict): """ Get HGVS for change, either at transcript or protein level. @@ -204,7 +204,7 @@ def get_protein_hgvs(annotation): return None -def get_transcript_hgvs(annotation): +def get_transcript_hgvs(annotation:dict): """ Nucleotide change in HGVS format. @@ -220,7 +220,7 @@ def get_transcript_hgvs(annotation): return None -def order_vep_by_csq(annotation_list: list): +def order_vep_by_csq(annotation_list:list): """ Adds "major_consequence" to each annotation, orders by severity. @@ -238,7 +238,7 @@ def order_vep_by_csq(annotation_list: list): return sorted(annotation_list, key=(lambda ann:CSQ_ORDER_DICT[ann['major_consequence']])) -def remove_extraneous_vep_annotations(annotation_list: list): +def remove_extraneous_vep_annotations(annotation_list:list): """ Remove annotations with low-impact consequences (less than intron variant) @@ -252,7 +252,7 @@ def remove_extraneous_vep_annotations(annotation_list: list): if worst_csq_index(ann['Consequence'].split('&')) <= CSQ_ORDER_DICT['intron_variant']] -def worst_csq_from_list(csq_list): +def worst_csq_from_list(csq_list:list): """ Choose the worst consequence @@ -265,7 +265,7 @@ def worst_csq_from_list(csq_list): return REV_CSQ_ORDER_DICT[worst_csq_index(csq_list)] -def worst_csq_from_csq(csq): +def worst_csq_from_csq(csq:str): """ Find worst consequence in a possibly &-filled consequence string @@ -278,7 +278,7 @@ def worst_csq_from_csq(csq): return REV_CSQ_ORDER_DICT[worst_csq_index(csq.split('&'))] -def worst_csq_index(csq_list): +def worst_csq_index(csq_list:list): """ Find the index of the worst consequence. Corresponds to the lowest value (index) from CSQ_ORDER_DICT @@ -292,7 +292,7 @@ def worst_csq_index(csq_list): return min([CSQ_ORDER_DICT[csq] for csq in csq_list]) -def worst_csq_with_vep(annotation_list): +def worst_csq_with_vep(annotation_list:list): """ Choose the vep annotation with the most severe consequence Adds a"major_consequence" field for that annotation From 0d202c20ab6b59025569af01b5eda6f8721a3152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 19 Feb 2019 14:21:11 +0100 Subject: [PATCH 171/360] pylint fixes for tests --- backend/modules/browser/tests/test_lookups.py | 3 +-- backend/modules/browser/tests/test_pgsql.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 154729805..4b1d1ef9f 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -13,7 +13,6 @@ def test_add_rsid_to_variant(): lookups.add_rsid_to_variant('SweGen', variant) assert variant['rsid'] == 'rs924645261' variant = lookups.get_variant('SweGen', 16113980, '22', 'C', 'T') - rsid = variant['rsid'] variant['rsid'] = '' lookups.add_rsid_to_variant('SweGen', variant) assert variant['rsid'] == 'rs9680543' @@ -122,7 +121,7 @@ def test_get_gene(): 'start': 19237396, 'stop': 19237489, 'strand': '+'} - + result = lookups.get_gene('SweGen', 'ENSG00000251940') for val in expected: assert result[val] == expected[val] diff --git a/backend/modules/browser/tests/test_pgsql.py b/backend/modules/browser/tests/test_pgsql.py index ac0014602..34e41baf4 100644 --- a/backend/modules/browser/tests/test_pgsql.py +++ b/backend/modules/browser/tests/test_pgsql.py @@ -14,7 +14,7 @@ def test_get_autocomplete(): 'ADH5', 'ADH6', 'ADH7', 'ADH5P2', 'ADH5P3', 'ADH5P4', 'ADHFE1']) assert set(res) == expected - + def test_get_coverage(): """ @@ -37,7 +37,7 @@ def test_get_coverage_pos(): res = pgsql.get_coverage_pos('SweGen', 'gene', 'ENSG00000231565') assert res['chrom'] == '22' assert res['start'] == 16364817 - assert res['stop'] == 16366254 + assert res['stop'] == 16366254 res = pgsql.get_coverage_pos('SweGen', 'region', '22-46615715-46615880') assert res['chrom'] == '22' assert res['start'] == 46615715 From 8d9a45e587e40e7528c99272e30086f33e7148da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 21 Feb 2019 14:10:44 +0100 Subject: [PATCH 172/360] start of api testing for the handlers using requests --- .../browser/tests/test_browser_handlers.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 backend/modules/browser/tests/test_browser_handlers.py diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py new file mode 100644 index 000000000..4bfba7ed2 --- /dev/null +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -0,0 +1,18 @@ +""" +Test the browser handlers +""" + +import requests + +BASE_URL="http://localhost:4000" + +def test_get_gene(): + """ + Test GetGene.get() + """ + dataset = 'SweGen' + gene = 'ENSG00000172955' + response = requests.get('{}/api/datasets/{}/browser/gene/{}'.format(BASE_URL, dataset, gene)) + expected = '{"gene": {"id": 13918, "referenceSet": 1, "geneId": "ENSG00000172955", "name": "ADH6", "fullName": "alcohol dehydrogenase 6 (class V)", "canonicalTranscript": "ENST00000394899", "chrom": "4", "start": 100123795, "stop": 100140694, "strand": "-", "variants": null, "geneName": "ADH6", "fullGeneName": "alcohol dehydrogenase 6 (class V)"}, "exons": [{"start": 100123796, "stop": 100125400, "type": "exon"}, {"start": 100123796, "stop": 100125378, "type": "UTR"}, {"start": 100125379, "stop": 100125400, "type": "CDS"}, {"start": 100126082, "stop": 100126220, "type": "exon"}, {"start": 100126082, "stop": 100126220, "type": "CDS"}, {"start": 100128603, "stop": 100128738, "type": "exon"}, {"start": 100128603, "stop": 100128738, "type": "CDS"}, {"start": 100129825, "stop": 100130085, "type": "exon"}, {"start": 100129825, "stop": 100130085, "type": "CDS"}, {"start": 100131239, "stop": 100131455, "type": "exon"}, {"start": 100131239, "stop": 100131455, "type": "CDS"}, {"start": 100131572, "stop": 100131659, "type": "exon"}, {"start": 100131572, "stop": 100131659, "type": "CDS"}, {"start": 100134763, "stop": 100134904, "type": "exon"}, {"start": 100134763, "stop": 100134904, "type": "CDS"}, {"start": 100137318, "stop": 100137419, "type": "exon"}, {"start": 100137318, "stop": 100137419, "type": "CDS"}, {"start": 100140292, "stop": 100140403, "type": "exon"}, {"start": 100140292, "stop": 100140309, "type": "CDS"}, {"start": 100140310, "stop": 100140403, "type": "UTR"}], "transcripts": [{"transcriptId": "ENST00000394897"}, {"transcriptId": "ENST00000394899"}, {"transcriptId": "ENST00000512708"}, {"transcriptId": "ENST00000507484"}, {"transcriptId": "ENST00000407820"}, {"transcriptId": "ENST00000237653"}, {"transcriptId": "ENST00000508558"}, {"transcriptId": "ENST00000504257"}, {"transcriptId": "ENST00000513262"}]}' + assert response.text == expected + From 3c0f69503a223c7578a35597ad8390acdbf3591f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 21 Feb 2019 14:56:44 +0100 Subject: [PATCH 173/360] skeleton for all tests, initial test for region and transcript --- .../browser/tests/test_browser_handlers.py | 86 ++++++++++++++++++- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py index 4bfba7ed2..f25a81f62 100644 --- a/backend/modules/browser/tests/test_browser_handlers.py +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -3,16 +3,94 @@ """ import requests +import json BASE_URL="http://localhost:4000" +def test_get_autocomplete(): + """ + Test GetAutocomplete.get() + """ + assert False + + +def test_download(): + """ + Test GetCoveragePos.get() + """ + assert False + + +def test_get_coverage(): + """ + Test GetCoverage.get() + """ + assert False + + +def test_get_coverage_pos(): + """ + Test GetCoveragePos.get() + """ + assert False + + def test_get_gene(): """ Test GetGene.get() """ dataset = 'SweGen' - gene = 'ENSG00000172955' - response = requests.get('{}/api/datasets/{}/browser/gene/{}'.format(BASE_URL, dataset, gene)) - expected = '{"gene": {"id": 13918, "referenceSet": 1, "geneId": "ENSG00000172955", "name": "ADH6", "fullName": "alcohol dehydrogenase 6 (class V)", "canonicalTranscript": "ENST00000394899", "chrom": "4", "start": 100123795, "stop": 100140694, "strand": "-", "variants": null, "geneName": "ADH6", "fullGeneName": "alcohol dehydrogenase 6 (class V)"}, "exons": [{"start": 100123796, "stop": 100125400, "type": "exon"}, {"start": 100123796, "stop": 100125378, "type": "UTR"}, {"start": 100125379, "stop": 100125400, "type": "CDS"}, {"start": 100126082, "stop": 100126220, "type": "exon"}, {"start": 100126082, "stop": 100126220, "type": "CDS"}, {"start": 100128603, "stop": 100128738, "type": "exon"}, {"start": 100128603, "stop": 100128738, "type": "CDS"}, {"start": 100129825, "stop": 100130085, "type": "exon"}, {"start": 100129825, "stop": 100130085, "type": "CDS"}, {"start": 100131239, "stop": 100131455, "type": "exon"}, {"start": 100131239, "stop": 100131455, "type": "CDS"}, {"start": 100131572, "stop": 100131659, "type": "exon"}, {"start": 100131572, "stop": 100131659, "type": "CDS"}, {"start": 100134763, "stop": 100134904, "type": "exon"}, {"start": 100134763, "stop": 100134904, "type": "CDS"}, {"start": 100137318, "stop": 100137419, "type": "exon"}, {"start": 100137318, "stop": 100137419, "type": "CDS"}, {"start": 100140292, "stop": 100140403, "type": "exon"}, {"start": 100140292, "stop": 100140309, "type": "CDS"}, {"start": 100140310, "stop": 100140403, "type": "UTR"}], "transcripts": [{"transcriptId": "ENST00000394897"}, {"transcriptId": "ENST00000394899"}, {"transcriptId": "ENST00000512708"}, {"transcriptId": "ENST00000507484"}, {"transcriptId": "ENST00000407820"}, {"transcriptId": "ENST00000237653"}, {"transcriptId": "ENST00000508558"}, {"transcriptId": "ENST00000504257"}, {"transcriptId": "ENST00000513262"}]}' - assert response.text == expected + gene_id = 'ENSG00000015475' + response = requests.get('{}/api/datasets/{}/browser/gene/{}'.format(BASE_URL, dataset, gene_id)) + expected = {"name": "BID", "canonicalTranscript": "ENST00000317361", "chrom": "22", "strand": "-", "geneName": "BID"} + gene = json.loads(response.text) + + for value in expected: + assert gene['gene'][value] == expected[value] + assert len(gene['exons']) == 14 + assert len(gene['transcripts']) == 10 + +def test_get_region(): + """ + Test GetRegion.get() + """ + dataset = 'SweGen' + region_def = '22-46615715-46615880' + response = requests.get('{}/api/datasets/{}/browser/region/{}'.format(BASE_URL, dataset, region_def)) + region = json.loads(response.text) + assert region == {'region': {'chrom': '22', 'start': 46615715, 'stop': 46615880, 'limit': 100000}} + + +def test_get_transcript(): + """ + Test GetTranscript.get() + """ + dataset = 'SweGen' + transcript_id = 'ENST00000317361' + response = requests.get('{}/api/datasets/{}/browser/transcript/{}'.format(BASE_URL, dataset, transcript_id)) + transcript = json.loads(response.text) + + assert transcript['gene']['id'] == 'ENSG00000015475' + assert len(transcript['exons']) == 14 + + +def test_get_variant(): + """ + Test GetVariant.get() + """ + assert False + + +def test_get_variants(): + """ + Test GetVariants.get() + """ + assert False + + +def test_searhc(): + """ + Test Search.get() + """ + assert False From cf3b5f45f4e58a1a4b871fa1fdfd9123d6ce0f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 08:50:23 +0100 Subject: [PATCH 174/360] added version to browser requests in frontend --- .../src/js/controller.browserController.js | 20 ++++---- frontend/src/js/factory.browser.js | 47 +++++++++++-------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/frontend/src/js/controller.browserController.js b/frontend/src/js/controller.browserController.js index 6537e6c4c..95986c814 100644 --- a/frontend/src/js/controller.browserController.js +++ b/frontend/src/js/controller.browserController.js @@ -54,7 +54,7 @@ if ($routeParams.transcript) { localThis.itemType = "transcript"; localThis.item = $routeParams.transcript; - Browser.getTranscript($routeParams.dataset, $routeParams.transcript).then( function(data) { + Browser.getTranscript($routeParams.dataset, $routeParams.version, $routeParams.transcript).then( function(data) { localThis.transcript = data.transcript; localThis.gene = data.gene; localThis.coverage.region.exons = data.exons; @@ -63,21 +63,21 @@ if ($routeParams.region) { localThis.itemType = "region"; localThis.item = $routeParams.region; - Browser.getRegion($routeParams.dataset, $routeParams.region).then( function(data) { + Browser.getRegion($routeParams.dataset, $routeParams.version, $routeParams.region).then( function(data) { localThis.region = data.region; }); } if ($routeParams.gene) { localThis.itemType = "gene"; localThis.item = $routeParams.gene; - Browser.getGene($routeParams.dataset, $routeParams.gene).then( function(data) { + Browser.getGene($routeParams.dataset, $routeParams.version, $routeParams.gene).then( function(data) { localThis.gene = data.gene; localThis.transcripts = data.transcripts; localThis.coverage.region.exons = data.exons; }); } if (localThis.itemType) { - Browser.getVariants($routeParams.dataset, localThis.itemType, localThis.item).then( function(data) { + Browser.getVariants($routeParams.dataset, $routeParams.version, localThis.itemType, localThis.item).then( function(data) { localThis.variants = data.variants; localThis.headers = data.headers; @@ -92,12 +92,12 @@ localThis.filterVariants(); }); - Browser.getCoveragePos($routeParams.dataset, localThis.itemType, localThis.item).then( function(data) { + Browser.getCoveragePos($routeParams.dataset, $routeParams.version, localThis.itemType, localThis.item).then( function(data) { localThis.coverage.region.start = data.start; localThis.coverage.region.stop = data.stop; localThis.coverage.region.chrom = data.chrom; }); - Browser.getCoverage($routeParams.dataset, localThis.itemType, localThis.item).then(function(data) { + Browser.getCoverage($routeParams.dataset, $routeParams.version, localThis.itemType, localThis.item).then(function(data) { localThis.coverage.data = data.coverage; localThis.coverage.loaded = true; }, function() { @@ -105,11 +105,11 @@ }); } if ($routeParams.variant) { - Browser.getVariant($routeParams.dataset, $routeParams.variant).then( function(data) { + Browser.getVariant($routeParams.dataset, $routeParams.version, $routeParams.variant).then( function(data) { localThis.variant = data.variant; }); } - Dataset.getDataset($routeParams.dataset, $routeParams.version) + Dataset.getDataset($routeParams.dataset, $routeParams.version, $routeParams.version) .then(function(data) { localThis.dataset = data.dataset; }, @@ -131,7 +131,7 @@ localThis.query = query; } if (localThis.query) { - Browser.search($routeParams.dataset, localThis.query).then( function(data) { + Browser.search($routeParams.dataset, $routeParams.version, localThis.query).then( function(data) { var url = browserLink(`${data.type}/${data.value}`); if ( data.type == "error" || data.type == "not_found" ) { @@ -145,7 +145,7 @@ function autocomplete() { localThis.activeSuggestion = -1; if (localThis.query) { - Browser.autocomplete($routeParams.dataset, localThis.query) + Browser.autocomplete($routeParams.dataset, $routeParams.version, localThis.query) .then( function(data) { localThis.suggestions = data.values; }); diff --git a/frontend/src/js/factory.browser.js b/frontend/src/js/factory.browser.js index 66959bcd0..7c125bee2 100644 --- a/frontend/src/js/factory.browser.js +++ b/frontend/src/js/factory.browser.js @@ -13,56 +13,65 @@ getCoveragePos:getCoveragePos, }; - function getGene(dataset, gene) { - return $http.get("/api/datasets/" + dataset + "/browser/gene/" + gene).then(function(data) { + function baseUrl(dataset, version) { + var url = "/api/datasets/" + dataset + "/"; + if ( version ) { + url += "version/" + version + "/" + } + url += 'browser/'; + return url; + } + + function getGene(dataset, version, gene) { + return $http.get(baseUrl(dataset, version) + "/gene/" + gene).then(function(data) { return data.data; }); } - function getRegion(dataset, region) { - return $http.get("/api/datasets/" + dataset + "/browser/region/" + region).then(function(data) { + function getRegion(dataset, version, region) { + return $http.get(baseUrl(dataset, version) + "/region/" + region).then(function(data) { return data.data; }); } - function getTranscript(dataset, transcript) { - return $http.get("/api/datasets/" + dataset + "/browser/transcript/" + transcript).then(function(data) { + function getTranscript(dataset, version, transcript) { + return $http.get(baseUrl(dataset, version) + "/transcript/" + transcript).then(function(data) { return data.data; }); } - function getVariant(dataset, variant) { - return $http.get("/api/datasets/" + dataset + "/browser/variant/" + variant).then(function(data) { + function getVariant(dataset, version, variant) { + return $http.get(baseUrl(dataset, version) + "variant/" + variant).then(function(data) { return data.data; - }); + }); } - function search(dataset, query) { - return $http.get("/api/datasets/" + dataset + "/browser/search/" + query).then(function(data) { + function search(dataset, version, query) { + return $http.get(baseUrl(dataset, version) + "/search/" + query).then(function(data) { return data.data; }); } - function autocomplete(dataset, query) { - return $http.get("/api/datasets/" + dataset + "/browser/autocomplete/" + query).then(function(data) { + function autocomplete(dataset, version, query) { + return $http.get(baseUrl(dataset, version) + "/autocomplete/" + query).then(function(data) { return data.data; }); } - function getVariants(dataset, datatype, item) { - return $http.get("api/datasets/" + dataset + "/browser/variants/" + datatype + "/" + item).then(function(data) { + function getVariants(dataset, version, datatype, item) { + return $http.get(baseUrl(dataset, version) + "/variants/" + datatype + "/" + item).then(function(data) { return data.data; }); } - function getCoverage(dataset, datatype, item) { - return $http.get("api/datasets/" + dataset + "/browser/coverage/" + datatype + "/" + item).then(function(data) { + function getCoverage(dataset, version, datatype, item) { + return $http.get(baseUrl(dataset, version) + "/coverage/" + datatype + "/" + item).then(function(data) { return data.data; }); } - function getCoveragePos(dataset, datatype, item) { - return $http.get("api/datasets/" + dataset + "/browser/coverage_pos/" + datatype + "/" + item).then(function(data) { + function getCoveragePos(dataset, version, datatype, item) { + return $http.get(baseUrl(dataset, version) + "/coverage_pos/" + datatype + "/" + item).then(function(data) { return data.data; }); } From 2fefda85f09c9f857417ea78520ccc72a17cb7cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 09:53:58 +0100 Subject: [PATCH 175/360] updated tests with version --- backend/modules/browser/tests/.coveragerc | 12 ++++++++++- backend/modules/browser/tests/test_lookups.py | 20 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/backend/modules/browser/tests/.coveragerc b/backend/modules/browser/tests/.coveragerc index fc2753f56..cea6a995c 100644 --- a/backend/modules/browser/tests/.coveragerc +++ b/backend/modules/browser/tests/.coveragerc @@ -2,4 +2,14 @@ omit = # omit anything in a .local directory anywhere */tests/* - */__init__.py \ No newline at end of file + */__init__.py + */venv/* + */virtualenv/* + +[report] +omit = + # omit anything in a .local directory anywhere + */tests/* + */__init__.py + */venv/* + */virtualenv/* diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 4b1d1ef9f..cb8cefb5e 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -282,12 +282,21 @@ def test_get_variant(): Test get_variant() """ result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A') + assert result['variant_id'] == '22-16057464-G-A' assert result['genes'] == ['ENSG00000233866'] assert result['transcripts'] == ['ENST00000424770'] + result = lookups.get_variant('SweGen', 9435852, '21', 'T', 'C') + assert not result # incorrect position assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') + # with version + result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A', "20161223") + assert not result + result = lookups.get_variant('SweGen', 9435852, '21', 'T', 'C', "20161223") + assert result['variant_id'] == '21-9435852-T-C' + def test_get_variants_by_rsid(caplog): ''' @@ -299,6 +308,11 @@ def test_get_variants_by_rsid(caplog): assert set(result[0]['genes']) == set(['ENSG00000100156', 'ENSG00000128298', 'ENSG00000272720']) assert len(result[0]['genes']) == 3 assert len(result[0]['transcripts']) == 6 + assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778') + # with version + assert not lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223') + result = lookups.get_variants_by_rsid('SweGen', 'rs76676778', '20161223') + assert result[0]['variant_id'] == '21-9411609-G-T' # by position result = lookups.get_variants_by_rsid('SweGen', 'rs185758992', check_position=True) @@ -306,9 +320,15 @@ def test_get_variants_by_rsid(caplog): assert set(result[0]['genes']) == set(['ENSG00000100156', 'ENSG00000128298', 'ENSG00000272720']) assert len(result[0]['genes']) == 3 assert len(result[0]['transcripts']) == 6 + assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778', check_position=True) + # with version + assert not lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223', check_position=True) + result = lookups.get_variants_by_rsid('SweGen', 'rs76676778', '20161223', check_position=True) + assert result[0]['variant_id'] == '21-9411609-G-T' # errors assert lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') is None + assert lookups.get_variants_by_rsid('SweGen', 'rs37356766700', check_position=True) is None assert lookups.get_variants_by_rsid('SweGen', '373706802') is None assert lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') is None From b19a54ccde4ecc9472dca36987253c7536a5ac17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 09:54:25 +0100 Subject: [PATCH 176/360] fix for exception due to rsid not in db --- backend/modules/browser/lookups.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 9555f77fa..adc4f2a52 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -465,7 +465,7 @@ def get_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:st return variant -def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_version:str=None): +def get_variants_by_rsid(dataset:str, rsid:str, ds_version:str=None, check_position:str=False): """ Retrieve variants by their associated rsid May also look up rsid and search for variants at the position @@ -500,13 +500,16 @@ def get_variants_by_rsid(dataset:str, rsid:str, check_position:str=False, ds_ver .dicts() .get()) dbsnp_version = refset['dbsnp_version'] - - rsid_dbsnp = (db.DbSNP - .select() - .where((db.DbSNP.rsid == rsid) & - (db.DbSNP.version_id == dbsnp_version)) - .dicts() - .get()) + try: + rsid_dbsnp = (db.DbSNP + .select() + .where((db.DbSNP.rsid == rsid) & + (db.DbSNP.version_id == dbsnp_version)) + .dicts() + .get()) + except db.DbSNP.DoesNotExist: + logging.error('get_variants_by_rsid({}, {}): rsid not in dbsnp'.format(dataset, rsid)) + return None query = (db.Variant .select() .where((db.Variant.pos == rsid_dbsnp['pos']) & From 19102cd58b10b29238fee78147db1fd5e1993a59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 09:56:19 +0100 Subject: [PATCH 177/360] functions in alphabetical order; variant call using version as well --- backend/modules/browser/browser_handlers.py | 42 +++++++++++---------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 7b5b2b17f..4e32ca83c 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -1,3 +1,7 @@ +""" +Request handlers for the browser +""" + import logging import handlers @@ -18,24 +22,6 @@ def get(self, dataset, query, ds_version=None): results = pgsql.get_autocomplete(query) ret = {'values': sorted(list(set(results)))[:20]} - self.finish( ret ) - - -class GetCoverage(handlers.UnsafeHandler): - """ - Retrieve coverage - """ - def get(self, dataset, datatype, item, ds_version=None): - ret = pgsql.get_coverage(dataset, datatype, item, ds_version) - self.finish(ret) - - -class GetCoveragePos(handlers.UnsafeHandler): - """ - Retrieve coverage range - """ - def get(self, dataset, datatype, item, ds_version=None): - ret = pgsql.get_coverage_pos(dataset, datatype, item) self.finish(ret) @@ -63,6 +49,24 @@ def get(self, dataset: str, datatype, item, ds_version=None): self.write(','.join(map(str, [variant[h] for h in headers])) + '\n') +class GetCoverage(handlers.UnsafeHandler): + """ + Retrieve coverage + """ + def get(self, dataset, datatype, item, ds_version=None): + ret = pgsql.get_coverage(dataset, datatype, item, ds_version) + self.finish(ret) + + +class GetCoveragePos(handlers.UnsafeHandler): + """ + Retrieve coverage range + """ + def get(self, dataset, datatype, item, ds_version=None): + ret = pgsql.get_coverage_pos(dataset, datatype, item) + self.finish(ret) + + class GetGene(handlers.UnsafeHandler): """ Request information about a gene @@ -232,7 +236,7 @@ def get(self, dataset, variant, ds_version=None): self.set_user_msg('Unable to parse variant', 'error') return orig_variant = variant - variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3]) + variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3], ds_version) if not variant: logging.error('Variant not found ({})'.format(orig_variant)) From efa91e5e87b18409a8e5a3dc15d5e89499bb3ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 12:13:19 +0100 Subject: [PATCH 178/360] updated coveragerc --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index 8db5982de..fd89de1f0 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,4 +2,4 @@ omit = /usr/local/*,/home/travis/virtualenv/* [report] -omit = /usr/local/*,/home/travis/virtualenv/* +omit = /usr/local/*,/home/travis/virtualenv/*, */__init.py__, */test*.py From 185b1274f26d487ab7e6ce334c0e13607ee4ff1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 12:48:34 +0100 Subject: [PATCH 179/360] function parameter type hints added --- backend/modules/browser/browser_handlers.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 4e32ca83c..bb9dc7b62 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -16,7 +16,7 @@ REGION_LIMIT = 100000 class Autocomplete(handlers.UnsafeHandler): - def get(self, dataset, query, ds_version=None): + def get(self, dataset:str, query:str, ds_version:str=None): ret = {} results = pgsql.get_autocomplete(query) @@ -26,7 +26,7 @@ def get(self, dataset, query, ds_version=None): class Download(handlers.UnsafeHandler): - def get(self, dataset: str, datatype, item, ds_version=None): + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): """ Download variants as csv @@ -53,7 +53,7 @@ class GetCoverage(handlers.UnsafeHandler): """ Retrieve coverage """ - def get(self, dataset, datatype, item, ds_version=None): + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): ret = pgsql.get_coverage(dataset, datatype, item, ds_version) self.finish(ret) @@ -62,7 +62,7 @@ class GetCoveragePos(handlers.UnsafeHandler): """ Retrieve coverage range """ - def get(self, dataset, datatype, item, ds_version=None): + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): ret = pgsql.get_coverage_pos(dataset, datatype, item) self.finish(ret) @@ -71,7 +71,7 @@ class GetGene(handlers.UnsafeHandler): """ Request information about a gene """ - def get(self, dataset, gene, ds_version=None): + def get(self, dataset:str, gene:str, ds_version:str=None): """ Request information about a gene @@ -116,7 +116,7 @@ class GetRegion(handlers.UnsafeHandler): """ Request information about genes in a region """ - def get(self, dataset, region, ds_version=None): + def get(self, dataset:str, region:str, ds_version:str=None): """ Request information about genes in a region @@ -173,7 +173,7 @@ class GetTranscript(handlers.UnsafeHandler): """ Request information about a transcript """ - def get(self, dataset, transcript, ds_version=None): + def get(self, dataset:str, transcript:str, ds_version:str=None): """ Request information about a transcript @@ -216,7 +216,7 @@ class GetVariant(handlers.UnsafeHandler): """ Request information about a gene """ - def get(self, dataset, variant, ds_version=None): + def get(self, dataset:str, variant:str, ds_version:str=None): """ Request information about a gene @@ -316,7 +316,7 @@ class GetVariants(handlers.UnsafeHandler): """ Retrieve variants """ - def get(self, dataset, datatype, item, ds_version=None): + def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): """ Retrieve variants @@ -339,7 +339,7 @@ class Search(handlers.UnsafeHandler): """ Perform a search for the wanted object """ - def get(self, dataset, query, ds_version=None): + def get(self, dataset:str, query:str, ds_version:str=None): """ Perform a search for the wanted object From 36f16dc2bdbf5af1605002211c8f292b4b8d1575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 14:19:00 +0100 Subject: [PATCH 180/360] updates to test to limit everything to chromosome 21 and 22 --- .coveragerc | 4 +- .../browser/tests/test_browser_handlers.py | 89 ++++++++++++++++--- backend/modules/browser/tests/test_lookups.py | 57 ++++++------ backend/modules/browser/tests/test_pgsql.py | 7 +- 4 files changed, 115 insertions(+), 42 deletions(-) diff --git a/.coveragerc b/.coveragerc index fd89de1f0..c18574f4f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,5 @@ [run] -omit = /usr/local/*,/home/travis/virtualenv/* +omit = /usr/local/*,/home/travis/virtualenv/*,*venv* [report] -omit = /usr/local/*,/home/travis/virtualenv/*, */__init.py__, */test*.py +omit = /usr/local/*,/home/travis/virtualenv/*, */__init.py__, */test*.py, *venv* diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py index f25a81f62..302914f4f 100644 --- a/backend/modules/browser/tests/test_browser_handlers.py +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -5,34 +5,58 @@ import requests import json -BASE_URL="http://localhost:4000" +BASE_URL="http://localhost:4001" def test_get_autocomplete(): """ Test GetAutocomplete.get() """ - assert False + dataset = 'SweGen' + + query = 'PA' + response = requests.get('{}/api/datasets/{}/browser/autocomplete/{}'.format(BASE_URL, dataset, query)) + data = json.loads(response.text) + assert set(data["values"]) == set(["PABPC1P9", "PACSIN2", "PANX2", "PARP4P3", + "PARVB", "PARVG", "PATZ1", "PAXBP1", "PAXBP1-AS1"]) def test_download(): """ - Test GetCoveragePos.get() + Test Download.get() """ - assert False + dataset = 'SweGen' + + data_type = 'transcript' + data_item = 'ENST00000438441' + response = requests.get('{}/api/datasets/{}/browser/download/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) + assert len(response.text.split('\n')) == 407 def test_get_coverage(): """ Test GetCoverage.get() """ - assert False + dataset = 'SweGen' + + data_type = 'transcript' + data_item = 'ENST00000438441' + response = requests.get('{}/api/datasets/{}/browser/coverage/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) + data = json.loads(response.text) + assert len(data['coverage']) == 144 def test_get_coverage_pos(): """ Test GetCoveragePos.get() """ - assert False + dataset = 'SweGen' + data_type = 'region' + data_item = '22-100001-100101' + response = requests.get('{}/api/datasets/{}/browser/coverage_pos/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) + cov_pos = json.loads(response.text) + assert cov_pos['start'] == 100001 + assert cov_pos['stop'] == 100101 + assert cov_pos['chrom'] == '22' def test_get_gene(): @@ -50,6 +74,12 @@ def test_get_gene(): assert len(gene['exons']) == 14 assert len(gene['transcripts']) == 10 + dataset = 'SweGen' + gene_id = 'ENSG00000015475' + response = requests.get('{}/api/datasets/{}/browser/gene/{}'.format(BASE_URL, dataset, gene_id)) + expected = {"name": "BID", "canonicalTranscript": "ENST00000317361", "chrom": "22", "strand": "-", "geneName": "BID"} + gene = json.loads(response.text) + def test_get_region(): """ @@ -79,18 +109,57 @@ def test_get_variant(): """ Test GetVariant.get() """ - assert False + dataset = 'SweGen' + variant_id = '22-16057464-G-A' + response = requests.get('{}/api/datasets/{}/browser/variant/{}'.format(BASE_URL, dataset, variant_id)) + variant = json.loads(response.text) + + assert variant['variant']['variantId'] == '22-16057464-G-A' + assert variant['variant']['genes'] == ['ENSG00000233866'] + assert variant['variant']['transcripts'] == ['ENST00000424770'] + + variant_id = '21-9435852-T-C' + version = '20161223' + response = requests.get('{}/api/datasets/{}/browser/variant/{}'.format(BASE_URL, dataset, variant_id)) + assert response.status_code == 404 + response = requests.get('{}/api/datasets/{}/version/{}/browser/variant/{}'.format(BASE_URL, dataset, version, variant_id)) + variant = json.loads(response.text) + assert variant['variant']['variantId'] == '21-9435852-T-C' def test_get_variants(): """ Test GetVariants.get() """ - assert False + dataset = 'SweGen' + data_type = 'gene' + data_item = 'ENSG00000231565' + response = requests.get('{}/api/datasets/{}/browser/variants/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) + data = json.loads(response.text) + assert len(data['variants']) == 405 -def test_searhc(): + data_type = 'region' + data_item = '22-46615715-46615880' + response = requests.get('{}/api/datasets/{}/browser/variants/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) + data = json.loads(response.text) + assert len(data['variants']) == 3 + + data_type = 'transcript' + data_item = 'ENST00000438441' + response = requests.get('{}/api/datasets/{}/browser/variants/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) + data = json.loads(response.text) + assert len(data['variants']) == 405 + + +def test_search(): """ Test Search.get() """ - assert False + dataset = 'SweGen' + + query = 'NF1P3' + response = requests.get('{}/api/datasets/{}/browser/search/{}'.format(BASE_URL, dataset, query)) + data = json.loads(response.text) + assert data['type'] == 'gene' + assert data['value'] == 'ENSG00000183249' diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index cb8cefb5e..f584ea87e 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -26,12 +26,12 @@ def test_get_awesomebar_result(): assert result == ('dbsnp_variant_set', 373706802) result = lookups.get_awesomebar_result('SweGen', 'rs783') assert result == ('variant', '22-29461622-G-A') - result = lookups.get_awesomebar_result('SweGen', 'ADH6') - assert result == ('gene', 'ENSG00000172955') - result = lookups.get_awesomebar_result('SweGen', 'ENSG00000172955') - assert result == ('gene', 'ENSG00000172955') - result = lookups.get_awesomebar_result('SweGen', 'ENST00000237653') - assert result == ('transcript', 'ENST00000237653') + result = lookups.get_awesomebar_result('SweGen', 'NF1P3') + assert result == ('gene', 'ENSG00000183249') + result = lookups.get_awesomebar_result('SweGen', 'ENSG00000183249') + assert result == ('gene', 'ENSG00000183249') + result = lookups.get_awesomebar_result('SweGen', 'ENST00000457709') + assert result == ('transcript', 'ENST00000457709') result = lookups.get_awesomebar_result('SweGen', '22-46615715-46615880') assert result == ('region', '22-46615715-46615880') result = lookups.get_awesomebar_result('SweGen', 'CHR22:46615715-46615880') @@ -49,14 +49,19 @@ def test_get_coverage_for_bases(): # normal coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546423, 46549652) assert len(coverage) == 323 - assert coverage[0] == {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], - 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} + expected = {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} + for val in expected: + assert coverage[0][val] == expected[val] + assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no end_pos coverage = lookups.get_coverage_for_bases('SweGen', '22', 46546430) - assert coverage == [{'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], - 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430}] + expected = {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} + for val in expected: + assert coverage[0][val] == expected[val] assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no hits @@ -74,14 +79,18 @@ def test_get_coverage_for_transcript(): # normal coverage = lookups.get_coverage_for_transcript('SweGen', '22', 46546423, 46549652) assert len(coverage) == 323 - assert coverage[0] == {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], - 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} + expected = {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} + for val in expected: + assert coverage[0][val] == expected[val] assert len(lookups.get_coverage_for_transcript('SweGen', '22', 46615715, 46615880)) == 17 # no end_pos coverage = lookups.get_coverage_for_transcript('SweGen', '22', 46546430) - assert coverage == [{'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], - 'dataset_version': 4, 'id': 2954967, 'mean': 24.94, 'median': 24.0, 'pos': 46546430}] + expected = {'chrom': '22', 'coverage': [1.0, 1.0, 0.993, 0.91, 0.697, 0.426, 0.2, 0.009, 0.0], + 'dataset_version': 4, 'mean': 24.94, 'median': 24.0, 'pos': 46546430} + for val in expected: + assert coverage[0][val] == expected[val] assert len(lookups.get_coverage_for_transcript('SweGen', '22', 46615715, 46615880)) == 17 # no hits @@ -148,7 +157,8 @@ def test_get_gene_by_dbid(): 'start': 16967410, 'stop': 16969212, 'strand': '+'} - result = lookups.get_gene_by_dbid(53626) + gene = lookups.get_gene('SweGen', 'ENSG00000226444') + result = lookups.get_gene_by_dbid(gene['id']) for val in expected: assert result[val] == expected[val] @@ -249,8 +259,8 @@ def test_get_transcripts_in_gene(): """ Test get_transcripts_in_gene() """ - res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000103197') - assert len(res) == 27 + res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000228314') + assert len(res) == 3 assert not lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') assert not lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') @@ -271,8 +281,9 @@ def test_get_transcripts_in_gene_by_dbid(): """ Test get_transcripts_in_gene_by_dbid() """ - res = lookups.get_transcripts_in_gene_by_dbid(53626) - assert len(res) == 2 + gene = lookups.get_gene('SweGen', 'ENSG00000228314') + res = lookups.get_transcripts_in_gene_by_dbid(gene['id']) + assert len(res) == 3 res = lookups.get_transcripts_in_gene_by_dbid(-1) assert not res @@ -298,7 +309,7 @@ def test_get_variant(): assert result['variant_id'] == '21-9435852-T-C' -def test_get_variants_by_rsid(caplog): +def test_get_variants_by_rsid(): ''' Test get_variants_by_rsid() ''' @@ -332,12 +343,6 @@ def test_get_variants_by_rsid(caplog): assert lookups.get_variants_by_rsid('SweGen', '373706802') is None assert lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') is None - expected = ('get_dataset_version(incorrect_name, version=None): cannot retrieve dataset version', - 'get_variants_by_rsid(SweGen, 373706802): rsid not starting with rs', - 'get_variants_by_rsid(SweGen, rs3737o68o2): not an integer after rs') - for comparison in zip(caplog.messages, expected): - assert comparison[0] == comparison[1] - # no variants with rsid available assert not lookups.get_variants_by_rsid('SweGen', 'rs1') diff --git a/backend/modules/browser/tests/test_pgsql.py b/backend/modules/browser/tests/test_pgsql.py index 34e41baf4..b5d677127 100644 --- a/backend/modules/browser/tests/test_pgsql.py +++ b/backend/modules/browser/tests/test_pgsql.py @@ -9,10 +9,9 @@ def test_get_autocomplete(): """ Test get_autocomplete() """ - res = pgsql.get_autocomplete('ADH') - expected = set(['ADH1A', 'ADH1B', 'ADH1C', 'ADH4', - 'ADH5', 'ADH6', 'ADH7', 'ADH5P2', - 'ADH5P3', 'ADH5P4', 'ADHFE1']) + res = pgsql.get_autocomplete('PA') + expected = set(["PABPC1P9", "PACSIN2", "PANX2", "PARP4P3", + "PARVB", "PARVG", "PATZ1", "PAXBP1", "PAXBP1-AS1"]) assert set(res) == expected From 2c66d717ce965402255202d83f5ba113ef35e85b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 14:55:23 +0100 Subject: [PATCH 181/360] removed double / --- frontend/src/js/factory.browser.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/frontend/src/js/factory.browser.js b/frontend/src/js/factory.browser.js index 7c125bee2..4cb4ae1cc 100644 --- a/frontend/src/js/factory.browser.js +++ b/frontend/src/js/factory.browser.js @@ -23,19 +23,19 @@ } function getGene(dataset, version, gene) { - return $http.get(baseUrl(dataset, version) + "/gene/" + gene).then(function(data) { + return $http.get(baseUrl(dataset, version) + "gene/" + gene).then(function(data) { return data.data; }); } function getRegion(dataset, version, region) { - return $http.get(baseUrl(dataset, version) + "/region/" + region).then(function(data) { + return $http.get(baseUrl(dataset, version) + "region/" + region).then(function(data) { return data.data; }); } function getTranscript(dataset, version, transcript) { - return $http.get(baseUrl(dataset, version) + "/transcript/" + transcript).then(function(data) { + return $http.get(baseUrl(dataset, version) + "transcript/" + transcript).then(function(data) { return data.data; }); } @@ -47,31 +47,31 @@ } function search(dataset, version, query) { - return $http.get(baseUrl(dataset, version) + "/search/" + query).then(function(data) { + return $http.get(baseUrl(dataset, version) + "search/" + query).then(function(data) { return data.data; }); } function autocomplete(dataset, version, query) { - return $http.get(baseUrl(dataset, version) + "/autocomplete/" + query).then(function(data) { + return $http.get(baseUrl(dataset, version) + "autocomplete/" + query).then(function(data) { return data.data; }); } function getVariants(dataset, version, datatype, item) { - return $http.get(baseUrl(dataset, version) + "/variants/" + datatype + "/" + item).then(function(data) { + return $http.get(baseUrl(dataset, version) + "variants/" + datatype + "/" + item).then(function(data) { return data.data; }); } function getCoverage(dataset, version, datatype, item) { - return $http.get(baseUrl(dataset, version) + "/coverage/" + datatype + "/" + item).then(function(data) { + return $http.get(baseUrl(dataset, version) + "coverage/" + datatype + "/" + item).then(function(data) { return data.data; }); } function getCoveragePos(dataset, version, datatype, item) { - return $http.get(baseUrl(dataset, version) + "/coverage_pos/" + datatype + "/" + item).then(function(data) { + return $http.get(baseUrl(dataset, version) + "coverage_pos/" + datatype + "/" + item).then(function(data) { return data.data; }); } From 883406f458f4feb5fcb70fa4ce96515274cfa391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 19:48:06 +0100 Subject: [PATCH 182/360] give search support for versions --- backend/modules/browser/browser_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index bb9dc7b62..ed1de23be 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -349,7 +349,7 @@ def get(self, dataset:str, query:str, ds_version:str=None): """ ret = {"dataset": dataset, "value": None, "type": None} - datatype, identifier = lookups.get_awesomebar_result(dataset, query) + datatype, identifier = lookups.get_awesomebar_result(dataset, query, ds_version) if datatype == "dbsnp_variant_set": datatype = "dbsnp" From 1d532e515133540ef09305f8d35307d14f048b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 19:54:55 +0100 Subject: [PATCH 183/360] id is implicit --- backend/modules/browser/lookups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index adc4f2a52..83b00b72b 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -355,7 +355,7 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio (db.Variant.ref == ref) & (db.Variant.alt == alt) & (db.Variant.chrom == chrom) & - (db.Variant.dataset_version == dataset_version.id)) + (db.Variant.dataset_version == dataset_version)) .dicts() .get()) variant['genes'] = [gene['gene_id'] for gene in From d659d9a3adc633a63b514af9e44db623d6563940 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 22 Feb 2019 19:57:11 +0100 Subject: [PATCH 184/360] increase coverage of tests --- .../browser/tests/test_browser_handlers.py | 15 +++++++++++++++ backend/modules/browser/tests/test_utils.py | 1 + 2 files changed, 16 insertions(+) diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py index 302914f4f..73a6f5ede 100644 --- a/backend/modules/browser/tests/test_browser_handlers.py +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -91,6 +91,14 @@ def test_get_region(): region = json.loads(response.text) assert region == {'region': {'chrom': '22', 'start': 46615715, 'stop': 46615880, 'limit': 100000}} + region_def = '22-46A1615715-46615880' + response = requests.get('{}/api/datasets/{}/browser/region/{}'.format(BASE_URL, dataset, region_def)) + assert response.status_code == 400 + + region_def = '22-46A1615715-46615880' + response = requests.get('{}/api/datasets/{}/browser/region/{}'.format(BASE_URL, dataset, region_def)) + assert response.status_code == 400 + def test_get_transcript(): """ @@ -163,3 +171,10 @@ def test_search(): data = json.loads(response.text) assert data['type'] == 'gene' assert data['value'] == 'ENSG00000183249' + + query = '21-9411281-T-C' + version = '20161223' + response = requests.get('{}/api/datasets/{}/version/{}/browser/search/{}'.format(BASE_URL, dataset, version, query)) + data = json.loads(response.text) + assert data['type'] == 'variant' + assert data['value'] == '21-9411281-T-C' diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index 5e704505a..d987e8c93 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -190,3 +190,4 @@ def test_worst_csq_with_vep(): res = utils.worst_csq_with_vep(veps) assert res == {'SYMBOL': '1', 'Consequence': 'frameshift_variant', 'CANONICAL': 'YES', 'major_consequence': 'frameshift_variant'} + assert not utils.worst_csq_with_vep([]) From 89ca2762964d2c75025e621794512a5b370bea5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 25 Feb 2019 15:43:43 +0100 Subject: [PATCH 185/360] remove add_rsid_to_variant() for now to avoid giving incorrect rsids --- backend/modules/browser/lookups.py | 42 +----------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 83b00b72b..fa412dd4c 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -7,38 +7,6 @@ SEARCH_LIMIT = 10000 - -def add_rsid_to_variant(dataset:str, variant:str): - """ - Add rsid to a variant in the database based on position - - Args: - dataset (str): short name of the dataset - variant (dict): values for a variant - """ - refset = (db.Dataset - .select(db.ReferenceSet) - .join(db.ReferenceSet) - .where(db.Dataset.short_name == dataset) - .dicts() - .get()) - dbsnp_version = refset['dbsnp_version'] - - if not variant['rsid']: - try: - rsid = (db.DbSNP - .select() - .where((db.DbSNP.pos == variant['pos']) & - (db.DbSNP.chrom == variant['chrom']) & - (db.DbSNP.version == dbsnp_version)) - .dicts() - .get()) - variant['rsid'] = 'rs{}'.format(rsid['rsid']) - except db.DbSNP.DoesNotExist: - pass - # logging.error('add_rsid_to_variant({}, variant[dbid: {}]): unable to retrieve rsid'.format(dataset, variant['id'])) - - REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): @@ -457,9 +425,7 @@ def get_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:st variant = get_raw_variant(dataset, pos, chrom, ref, alt, ds_version) if not variant or 'rsid' not in variant: return variant - if variant['rsid'] == '.' or variant['rsid'] is None: - add_rsid_to_variant(dataset, variant) - else: + if variant['rsid']: if not str(variant['rsid']).startswith('rs'): variant['rsid'] = 'rs{}'.format(variant['rsid']) return variant @@ -573,8 +539,6 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): for variant in variants: if variant['rsid']: variant['rsid'] = 'rs{}'.format(variant['rsid']) - else: - add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants @@ -616,8 +580,6 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d for variant in variants: if variant['rsid']: variant['rsid'] = 'rs{}'.format(variant['rsid']) - else: - add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants @@ -660,8 +622,6 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No variant['vep_annotations'] = [anno for anno in variant['vep_annotations'] if anno['Feature'] == transcript_id] if variant['rsid']: variant['rsid'] = 'rs{}'.format(variant['rsid']) - else: - add_rsid_to_variant(dataset, variant) remove_extraneous_information(variant) return variants From 3a740ffbbf124ceff9b2f63e94cd3c87c11dcdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 26 Feb 2019 10:01:47 +0100 Subject: [PATCH 186/360] remove add_rsid_to_variant tests --- backend/modules/browser/tests/test_lookups.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index f584ea87e..a22496fbe 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -5,19 +5,6 @@ from .. import lookups -def test_add_rsid_to_variant(): - """ - Test add_rsid_to_variant() - """ - variant = lookups.get_variant('SweGen', 34730985, '22', 'G', 'A') - lookups.add_rsid_to_variant('SweGen', variant) - assert variant['rsid'] == 'rs924645261' - variant = lookups.get_variant('SweGen', 16113980, '22', 'C', 'T') - variant['rsid'] = '' - lookups.add_rsid_to_variant('SweGen', variant) - assert variant['rsid'] == 'rs9680543' - - def test_get_awesomebar_result(): """ Test get_awesomebar_result() @@ -354,6 +341,8 @@ def test_get_variants_in_gene(): res = lookups.get_variants_in_gene('SweGen', 'ENSG00000198062') assert len(res) == 1185 assert not lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') + res = lookups.get_variants_in_gene('ACpop', 'ENSG00000040608') + assert len(res) == 260 assert not lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') From 5c4e4fee23bba00ee1de7429ed2eb41f9ba79f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 07:04:41 +0100 Subject: [PATCH 187/360] correct port --- backend/modules/browser/tests/test_browser_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py index 73a6f5ede..c4f09d59a 100644 --- a/backend/modules/browser/tests/test_browser_handlers.py +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -5,7 +5,7 @@ import requests import json -BASE_URL="http://localhost:4001" +BASE_URL="http://localhost:4000" def test_get_autocomplete(): """ From 8c9ef5fa6f463a4ae55e4176de8ad9b99bd8f89b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 07:58:50 +0100 Subject: [PATCH 188/360] no reason to keep the mongo settings --- backend/settings.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/backend/settings.py b/backend/settings.py index 249cd91a9..ff925bb3a 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -32,13 +32,6 @@ ## Generated with base64.b64encode(uuid.uuid4().bytes + uuid.uuid4().bytes) cookie_secret = json_settings["cookieSecret"] -# Mongodb settings -mongo_host = json_settings["mongoHost"] -mongo_port = json_settings["mongoPort"] -mongo_user = json_settings["mongoUser"] -mongo_password = json_settings["mongoPassword"] -mongo_databases = json_settings["mongoDatabases"] - # PostgreSQL settings psql_host = json_settings["postgresHost"] psql_port = json_settings["postgresPort"] From 09dce469c7fcfd73c18f0dd22fbc84d66d3a8e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 13:34:42 +0100 Subject: [PATCH 189/360] remove number_of_variants_in_transcript --- backend/modules/browser/browser_handlers.py | 3 -- backend/modules/browser/lookups.py | 34 +++---------------- backend/modules/browser/tests/test_lookups.py | 19 ----------- 3 files changed, 5 insertions(+), 51 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index ed1de23be..911d72cef 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -94,9 +94,6 @@ def get(self, dataset:str, gene:str, ds_version:str=None): for exon in sorted(transcript['exons'], key=lambda k: k['start']): ret['exons'] += [{'start':exon['start'], 'stop':exon['stop'], 'type':exon['feature_type']}] - # Variants - ret['gene']['variants'] = lookups.get_number_of_variants_in_transcript(dataset, gene['canonical_transcript'], ds_version) - # Transcripts transcripts_in_gene = lookups.get_transcripts_in_gene(dataset, gene_id) if transcripts_in_gene: diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index fa412dd4c..07e684ed6 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -183,16 +183,16 @@ def get_gene(dataset:str, gene_id:str): gene_id (str): the id of the gene Returns: - dict: values for the gene; empty if not found + dict: values for the gene; None if not found """ ref_dbid = db.get_reference_dbid_dataset(dataset) if not ref_dbid: - return {} + return None try: return db.Gene.select().where((db.Gene.gene_id == gene_id) & (db.Gene.reference_set == ref_dbid)).dicts().get() except db.Gene.DoesNotExist: - return {} + return None def get_gene_by_dbid(gene_dbid:str): @@ -272,31 +272,6 @@ def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int): return [gene for gene in gene_query] -def get_number_of_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=None): - """ - Get the total and filtered amount of variants in a transcript - - Args: - dataset (str): short name of the dataset - transcript_id (str): id of the transcript - ds_version (str): version of the dataset - - Returns: - dict: {filtered: nr_filtered, total: nr_total}, None if error - """ - dataset_version = db.get_dataset_version(dataset, ds_version) - if not dataset_version: - return None - - variants = get_variants_in_transcript(dataset, transcript_id) - if not variants: - return None - total = len(variants) - - filtered = len(tuple(variant for variant in variants if variant['filter_string'] == 'PASS')) - return {'filtered': filtered, 'total': total} - - def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): """ Retrieve variant by position and change @@ -522,7 +497,8 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): if not ref_dbid: return None dataset_version = db.get_dataset_version(dataset, ds_version) - + if not dataset_version: + return None gene = get_gene(dataset, gene_id) variants = [variant for variant in db.Variant.select() diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index a22496fbe..6f9863d3b 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -205,23 +205,6 @@ def test_get_genes_in_region(): assert not res -def test_get_number_of_variants_in_transcript(): - """ - Test get_number_of_variants_in_transcripts() - """ - # normal - res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENST00000424770') - assert res == {'filtered': 66, 'total': 309} - - # bad transcript - res = lookups.get_number_of_variants_in_transcript('SweGen', 'ENSTASDSADA') - assert res is None - - # bad dataset - res = lookups.get_number_of_variants_in_transcript('bad_dataset', 'ENST00000424770') - assert res is None - - def test_get_transcript(): """ Test get_transcript() @@ -341,8 +324,6 @@ def test_get_variants_in_gene(): res = lookups.get_variants_in_gene('SweGen', 'ENSG00000198062') assert len(res) == 1185 assert not lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') - res = lookups.get_variants_in_gene('ACpop', 'ENSG00000040608') - assert len(res) == 260 assert not lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') From c3fac5721ee0df9cd6d6b2f7ed12322b2002e723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 27 Feb 2019 21:39:38 +0100 Subject: [PATCH 190/360] variant counting added to variant list --- frontend/src/js/controller.browserController.js | 2 +- frontend/templates/ng-templates/browser-variant-list.jj2 | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/frontend/src/js/controller.browserController.js b/frontend/src/js/controller.browserController.js index 95986c814..9f4cb4059 100644 --- a/frontend/src/js/controller.browserController.js +++ b/frontend/src/js/controller.browserController.js @@ -88,9 +88,9 @@ variant.isMissense = variant.majorConsequence == "missense"; }; localThis.variants.map(mapFunction); + localThis.passed = localThis.variants.filter(v => v.isPass).length; localThis.filterVariants(); - }); Browser.getCoveragePos($routeParams.dataset, $routeParams.version, localThis.itemType, localThis.item).then( function(data) { localThis.coverage.region.start = data.start; diff --git a/frontend/templates/ng-templates/browser-variant-list.jj2 b/frontend/templates/ng-templates/browser-variant-list.jj2 index 1ee8a3d9a..815ff2a19 100644 --- a/frontend/templates/ng-templates/browser-variant-list.jj2 +++ b/frontend/templates/ng-templates/browser-variant-list.jj2 @@ -1,4 +1,7 @@

+
+ Variants: {{ctrl.passed}} (including filtered: {{ctrl.variants.length}}) +
From 4ab495be509a1fb4252004b8b583bad9d02c8b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Thu, 28 Feb 2019 07:22:43 +0100 Subject: [PATCH 191/360] remove number of variants from gene part --- frontend/templates/ng-templates/browser-gene.html | 3 --- 1 file changed, 3 deletions(-) diff --git a/frontend/templates/ng-templates/browser-gene.html b/frontend/templates/ng-templates/browser-gene.html index 15439acc4..50e128a5b 100644 --- a/frontend/templates/ng-templates/browser-gene.html +++ b/frontend/templates/ng-templates/browser-gene.html @@ -15,9 +15,6 @@

Gene: {{ ctrl.gene.geneName }}

{{ ctrl.gene.geneName }}
{{ ctrl.gene.fullGeneName }}
-
Number of variants
-
{{ ctrl.gene.variants.filtered }} (Including filtered: {{ ctrl.gene.variants.total }})
-
UCSC Browser