diff --git a/.travis.yml b/.travis.yml index 7dc29446c..df42263ba 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,10 @@ language: python -python: - - "3.6" +matrix: + include: + - python: 3.6 + dist: bionic + - python: 3.7 + dist: bionic services: - docker before_install: diff --git a/backend/application.py b/backend/application.py index 22069299d..5641c31a4 100644 --- a/backend/application.py +++ b/backend/application.py @@ -63,12 +63,14 @@ class GetSchema(handlers.UnsafeHandler): def get(self): dataset = None version = None + beacon = None try: url = self.get_argument('url') match = re.match(".*/dataset/([^/]+)(/version/([^/]+))?", url) if match: dataset = match.group(1) version = match.group(3) + beacon = re.match(".*/dataset/.*/beacon", url) except tornado.web.MissingArgumentError: pass @@ -126,6 +128,21 @@ def get(self): except db.DatasetVersionCurrent.DoesNotExist as e: logging.error("Dataset does not exist: {}".format(e)) + if beacon: + base = {"@context": "http://schema.org", + "@id": "https://swefreq.nbis.se/api/beacon-elixir/", # or maybe "se.nbis.swefreq" as in the beacon api? + "@type": "Beacon", + "dataset": [dataset_schema], + "dct:conformsTo": "https://bioschemas.org/specifications/drafts/Beacon/", + "name": "Swefreq Beacon", + "provider": base["provider"], + "supportedRefs": ["GRCh37"], + "description": "Beacon API Web Server based on the GA4GH Beacon API", + "version": "1.1.0", # beacon api version + "aggregator": False, + "url": "https://swefreq.nbis.se/api/beacon-elixir/" + } + self.finish(base) @@ -307,7 +324,6 @@ def get(self): 'email': user.email, 'affiliation': user.affiliation, 'country': user.country, - 'login_type': self.get_secure_cookie('identity_type').decode('utf-8'), } self.finish(ret) @@ -522,7 +538,7 @@ def get(self, dataset): )) query = peewee.prefetch(users, access) - self.finish({'data': _build_json_response(query, lambda u: u.access_pending_prefetch)}) + self.finish({'data': _build_json_response(query, lambda u: u.access_pending)}) class DatasetUsersCurrent(handlers.AdminHandler): @@ -537,7 +553,7 @@ def get(self, dataset): )) query = peewee.prefetch(users, access) self.finish({'data': _build_json_response( - query, lambda u: u.access_current_prefetch)}) + query, lambda u: u.access_current)}) class UserDatasetAccess(handlers.SafeHandler): diff --git a/backend/auth.py b/backend/auth.py index c9514d025..7f1586b1e 100644 --- a/backend/auth.py +++ b/backend/auth.py @@ -18,7 +18,6 @@ def get(self): self.set_secure_cookie('user', self.get_argument("user")) self.set_secure_cookie('email', self.get_argument("email")) self.set_secure_cookie('identity', self.get_argument("email")) - self.set_secure_cookie('identity_type', 'google') self.finish() @@ -56,27 +55,10 @@ async def get(self): user_token = await self.get_user_token(self.get_argument('code')) user = await self.get_user(user_token["access_token"]) - extra_login = None - try: # check if the user is already logged in - extra_login = self.get_secure_cookie('identity_type').decode('utf-8') - - # Store other login in separate cookies (elixir is main login) - # This is hardcoded for google right now, as that is the only option - if extra_login == 'google': - google_identity = self.get_secure_cookie('identity').decode('utf-8') - self.set_secure_cookie('google_identity', google_identity) - - except AttributeError: # if the user isn't logged in - pass - self.set_secure_cookie('access_token', user_token["access_token"]) self.set_secure_cookie('user', user["name"]) self.set_secure_cookie('email', user["email"]) self.set_secure_cookie('identity', user["sub"]) - self.set_secure_cookie('identity_type', 'elixir') - - if extra_login: - self.set_secure_cookie('identity_type', 'elixir_%s' % extra_login) redirect = self.get_secure_cookie("login_redirect") self.clear_cookie("login_redirect") @@ -162,164 +144,3 @@ def get(self): self.redirect(redirect) -class GoogleLoginHandler(BaseHandler, tornado.auth.GoogleOAuth2Mixin): - """ - See http://www.tornadoweb.org/en/stable/auth.html#google for documentation - on this. Here I have copied the example more or less verbatim. - """ - @tornado.gen.coroutine - def get(self): - if self.get_argument("code", False): - logging.debug("Requesting user token") - user_token = yield self.get_authenticated_user( - redirect_uri=self.application.settings['redirect_uri'], - code=self.get_argument('code'), - callback = lambda *_, **__: None) - - logging.debug("Requesting user info") - user = yield self.oauth2_request( - "https://www.googleapis.com/plus/v1/people/me", - access_token=user_token["access_token"], - callback = lambda *_, **__: None) - - try: - # Check if there is the user is already in the database. - # This will generate an exception if the user does not exist, preventing login - db.User.select().where(db.User.identity == self._get_google_email(user)).get() - - extra_login = None - try: # check if the user is already logged in - extra_login = self.get_secure_cookie('identity_type').decode('utf-8') - - # Store this login in separate cookies (elixir is main login) - # This is hardcoded for elixir right now, as that is the only option - if extra_login == 'elixir': - google_identity = self._get_google_email(user) - self.set_secure_cookie('google_identity', google_identity) - - self.set_secure_cookie('identity_type', '%s_google' % extra_login) - - except AttributeError: # if the user isn't logged in - self.set_secure_cookie('user', user["displayName"]) - self.set_secure_cookie('access_token', user_token["access_token"]) - self.set_secure_cookie('email', self._get_google_email(user)) - self.set_secure_cookie('identity', self._get_google_email(user)) - self.set_secure_cookie('identity_type', 'google') - - except db.User.DoesNotExist: - msg = "You have no user information logged in our database, so you may directly log in using elixir without updating." - self.set_user_msg(msg, "success") - - url = self.get_secure_cookie("login_redirect") - self.clear_cookie("login_redirect") - if url is None: - url = '/' - self.redirect(url) - - else: - logging.debug("Redirecting to google for login") - self.set_secure_cookie('login_redirect', self.get_argument("next", '/'), 1) - self.authorize_redirect( - redirect_uri=self.application.settings['redirect_uri'], - client_id=self.application.oauth_key, - scope=['profile', 'email'], - response_type='code', - extra_params={'approval_prompt': 'auto'}) - - def _get_google_email(self, user): #pylint: disable=no-self-use - email = '' - # There can be several emails registered for a user. - for email in user["emails"]: - if email.get('type', '') == 'account': - return email['value'] - - return user['emails'][0]['value'] - - -class GoogleLogoutHandler(BaseHandler, tornado.auth.GoogleOAuth2Mixin): - def get(self): - def handle_request(response): - if response.error: - logging.info("Error, failed in logout") - logging.info(response.error) - else: - logging.info("User logged out") - - sAccessToken = self.get_secure_cookie("access_token") - sLogoutUrl = "https://accounts.google.com/o/oauth2/revoke?token=" + str(sAccessToken) - http_client = tornado.httpclient.AsyncHTTPClient() - http_client.fetch(sLogoutUrl, handle_request) - - self.clear_all_cookies() - - redirect = self.get_argument("next", '/') - self.redirect(redirect) - - -class UpdateUserHandler(handlers.SafeHandler): - def post(self): - """ - If a user is logged in to elixir, and also has google login cookies, the - google users information in the database will be updated with the elixir - users information. - """ - # set redirect - try: - redirect = self.get_argument("next") - except tornado.web.MissingArgumentError: - redirect = self.get_cookie("login_redirect", '/') - self.clear_cookie("login_redirect") - - try: - # Double check so that the elixir user isn't already have any credentials - # in the database. - - elixir_identity = self.get_secure_cookie('user') - - (db.User.select() - .join(db.DatasetAccess) - .where( - db.User.user == db.DatasetAccess.user, - db.User.identity == elixir_identity) - .get()) - msg = "This elixir account already has its own credentials. Sadly, you will have to contact us directly to merge your accounts." - self.set_user_msg(msg, "error") - self.finish({'redirect':'/login'}) - return - except db.User.DoesNotExist: - # This is what we want - pass - - try: - # Check if we have a google login, will throw an AttributeError - # if the cookie isn't available - google_identity = self.get_secure_cookie('google_identity').decode('utf-8') - - # Try to update the google user in the database with the elixir information - # This throws a peewee.IntegrityError if the elixir account is already in - # the database - db.User.update( name = self.get_secure_cookie('user').decode('utf-8'), - email = self.get_secure_cookie('email').decode('utf-8'), - identity = self.get_secure_cookie('identity').decode('utf-8'), - identity_type = 'elixir' - ).where( db.User.identity == google_identity ).execute() - - self.set_secure_cookie('identity_type', 'updated') - except AttributeError: - # This will happen when we don't have a google cookie - msg = "You need to log in to a google account to be able to transfer credentials" - self.set_user_msg(msg, "info") - - self.finish({'redirect':'/login'}) - return - except peewee.IntegrityError: - # This will happen if the elixir account is already in the database - msg = "This elixir account is already in our database, so it can't be used to update another google account." - self.set_user_msg(msg, "error") - self.finish({'redirect':'/login'}) - return - - msg = "Your account has been updated! You may now use the site as you used to, using your Elixir account." - self.set_user_msg(msg, "success") - - self.finish({'redirect':redirect}) diff --git a/backend/db.py b/backend/db.py index f9b429b0f..08cc6a998 100644 --- a/backend/db.py +++ b/backend/db.py @@ -61,10 +61,10 @@ class ReferenceSet(BaseModel): shared between reference sets, so it uses a foreign key instead. """ class Meta: - db_table = 'reference_sets' + table_name = 'reference_sets' schema = 'data' - name = CharField(db_column="reference_name", null=True) + name = CharField(column_name="reference_name", null=True) ensembl_version = CharField() gencode_version = CharField() dbnsfp_version = CharField() @@ -73,52 +73,52 @@ class Meta: class Gene(BaseModel): class Meta: - db_table = 'genes' + table_name = 'genes' schema = 'data' - reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name="genes") + reference_set = ForeignKeyField(ReferenceSet, column_name="reference_set", backref="genes") gene_id = CharField(unique=True, max_length=15) - name = CharField(db_column="gene_name", null=True) + name = CharField(column_name="gene_name", null=True) full_name = CharField(null=True) canonical_transcript = CharField(null=True, max_length=15) chrom = CharField(max_length=10) - start = IntegerField(db_column="start_pos") - stop = IntegerField(db_column="end_pos") + start = IntegerField(column_name="start_pos") + stop = IntegerField(column_name="end_pos") strand = EnumField(choices=['+','-']) class GeneOtherNames(BaseModel): class Meta: - db_table = 'gene_other_names' + table_name = 'gene_other_names' schema = 'data' - gene = ForeignKeyField(Gene, db_column="gene", related_name="other_names") + gene = ForeignKeyField(Gene, column_name="gene", backref="other_names") name = CharField(null=True) class Transcript(BaseModel): class Meta: - db_table = 'transcripts' + table_name = 'transcripts' schema = 'data' transcript_id = CharField(max_length=15) - gene = ForeignKeyField(Gene, db_column="gene", related_name="transcripts") + gene = ForeignKeyField(Gene, column_name="gene", backref="transcripts") mim_gene_accession = IntegerField() mim_annotation = CharField() chrom = CharField(max_length=10) - start = IntegerField(db_column="start_pos") - stop = IntegerField(db_column="stop_pos") + start = IntegerField(column_name="start_pos") + stop = IntegerField(column_name="stop_pos") strand = EnumField(choices = ['+', '-']) class Feature(BaseModel): class Meta: - db_table = 'features' + table_name = 'features' schema = 'data' - gene = ForeignKeyField(Gene, db_column="gene", related_name='exons') - transcript = ForeignKeyField(Transcript, db_column="transcript", related_name='transcripts') + gene = ForeignKeyField(Gene, column_name="gene", backref='exons') + transcript = ForeignKeyField(Transcript, column_name="transcript", backref='transcripts') chrom = CharField(max_length=10) - start = IntegerField(db_column="start_pos") - stop = IntegerField(db_column="stop_pos") + start = IntegerField(column_name="start_pos") + stop = IntegerField(column_name="stop_pos") strand = EnumField(choices = ['+', '-']) feature_type = CharField() @@ -131,10 +131,10 @@ class Collection(BaseModel): A collection is a source of data which can be sampled into a SampleSet. """ class Meta: - db_table = 'collections' + table_name = 'collections' schema = 'data' - name = CharField(db_column="study_name", null = True) + name = CharField(column_name="study_name", null = True) ethnicity = CharField(null = True) @@ -144,7 +144,7 @@ class Study(BaseModel): one or more datasets. """ class Meta: - db_table = 'studies' + table_name = 'studies' schema = 'data' pi_name = CharField() @@ -152,7 +152,7 @@ class Meta: contact_name = CharField() contact_email = CharField() title = CharField() - description = TextField(db_column="study_description", null=True) + description = TextField(column_name="study_description", null=True) publication_date = DateTimeField() ref_doi = CharField(null=True) @@ -164,15 +164,15 @@ class Dataset(BaseModel): Most studies only have a single dataset, but multiple are allowed. """ class Meta: - db_table = 'datasets' + table_name = 'datasets' schema = 'data' - study = ForeignKeyField(Study, db_column="study", related_name='datasets') + study = ForeignKeyField(Study, column_name="study", backref='datasets') short_name = CharField() full_name = CharField() browser_uri = CharField(null=True) beacon_uri = CharField(null=True) - description = TextField(db_column="beacon_description", null=True) + description = TextField(column_name="beacon_description", null=True) avg_seq_depth = FloatField(null=True) seq_type = CharField(null=True) seq_tech = CharField(null=True) @@ -189,24 +189,24 @@ def has_image(self): class SampleSet(BaseModel): class Meta: - db_table = 'sample_sets' + table_name = 'sample_sets' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='sample_sets') - collection = ForeignKeyField(Collection, db_column="collection", related_name='sample_sets') + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='sample_sets') + collection = ForeignKeyField(Collection, column_name="collection", backref='sample_sets') sample_size = IntegerField() phenotype = CharField(null=True) class DatasetVersion(BaseModel): class Meta: - db_table = 'dataset_versions' + table_name = 'dataset_versions' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='versions') - reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='dataset_versions') - version = CharField(db_column="dataset_version") - description = TextField(db_column="dataset_description") + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='versions') + reference_set = ForeignKeyField(ReferenceSet, column_name="reference_set", backref='dataset_versions') + version = CharField(column_name="dataset_version") + description = TextField(column_name="dataset_description") terms = TextField() available_from = DateTimeField() ref_doi = CharField(null=True) @@ -221,23 +221,23 @@ class Meta: class DatasetFile(BaseModel): class Meta: - db_table = 'dataset_files' + table_name = 'dataset_files' schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version", related_name='files') - name = CharField(db_column="basename") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version", backref='files') + name = CharField(column_name="basename") uri = CharField() file_size = IntegerField() class DatasetLogo(BaseModel): class Meta: - db_table = 'dataset_logos' + table_name = 'dataset_logos' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='logo') + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='logo') mimetype = CharField() - data = BlobField(db_column="bytes") + data = BlobField(column_name="bytes") ### @@ -246,10 +246,10 @@ class Meta: class Variant(BaseModel): class Meta: - db_table = "variants" + table_name = "variants" schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version", related_name="variants") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version", backref="variants") rsid = IntegerField() chrom = CharField(max_length=10) pos = IntegerField() @@ -267,22 +267,42 @@ class Meta: vep_annotations = BinaryJSONField() +class VariantMate(BaseModel): + class Meta: + table_name = "mate" + schema = 'data' + + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version", backref="mate") + chrom = CharField(max_length=10) + pos = IntegerField() + ref = CharField() + alt = CharField() + chrom_id = CharField() + mate_chrom = CharField() + mate_start = IntegerField() + mate_id = CharField() + allele_freq = FloatField() + variant_id = CharField() + allele_count = IntegerField() + allele_num = IntegerField() + + class VariantGenes(BaseModel): class Meta: - db_table = 'variant_genes' + table_name = 'variant_genes' schema = 'data' - variant = ForeignKeyField(Variant, db_column="variant", related_name="genes") - gene = ForeignKeyField(Gene, db_column="gene", related_name="variants") + variant = ForeignKeyField(Variant, column_name="variant", backref="genes") + gene = ForeignKeyField(Gene, column_name="gene", backref="variants") class VariantTranscripts(BaseModel): class Meta: - db_table = 'variant_transcripts' + table_name = 'variant_transcripts' schema = 'data' - variant = ForeignKeyField(Variant, db_column="variant", related_name="transcripts") - transcript = ForeignKeyField(Transcript, db_column="transcript", related_name="variants") + variant = ForeignKeyField(Variant, column_name="variant", backref="transcripts") + transcript = ForeignKeyField(Transcript, column_name="transcript", backref="variants") class Coverage(BaseModel): @@ -297,10 +317,10 @@ class Coverage(BaseModel): coverage of at least 20 in this position. """ class Meta: - db_table = "coverage" + table_name = "coverage" schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version") chrom = CharField(max_length=10) pos = IntegerField() mean = FloatField() @@ -310,10 +330,10 @@ class Meta: class Metrics(BaseModel): class Meta: - db_table = "metrics" + table_name = "metrics" schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version") metric = CharField() mids = ArrayField(IntegerField) hist = ArrayField(IntegerField) @@ -321,13 +341,13 @@ class Meta: class User(BaseModel): class Meta: - db_table = "users" + table_name = "users" schema = 'users' - name = CharField(db_column="username", null=True) + name = CharField(column_name="username", null=True) email = CharField(unique=True) identity = CharField(unique=True) - identity_type = EnumField(null=False, choices=['google', 'elixir']) + identity_type = EnumField(null=False, choices=['google', 'elixir'], default='elixir') affiliation = CharField(null=True) country = CharField(null=True) @@ -372,10 +392,10 @@ def has_requested_access(self, dataset): class SFTPUser(BaseModel): class Meta: - db_table = "sftp_users" + table_name = "sftp_users" schema = 'users' - user = ForeignKeyField(User, related_name='sftp_user') + user = ForeignKeyField(User, backref='sftp_user') user_uid = IntegerField(unique=True) user_name = CharField(null=False) password_hash = CharField(null=False) @@ -384,60 +404,60 @@ class Meta: class UserAccessLog(BaseModel): class Meta: - db_table = "user_access_log" + table_name = "user_access_log" schema = 'users' - user = ForeignKeyField(User, related_name='access_logs') - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_logs') + user = ForeignKeyField(User, backref='access_logs') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access_logs') action = EnumField(null=True, choices=['access_granted','access_revoked','access_requested','private_link']) ts = DateTimeField() class UserConsentLog(BaseModel): class Meta: - db_table = "user_consent_log" + table_name = "user_consent_log" schema = 'users' - user = ForeignKeyField(User, related_name='consent_logs') - dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='consent_logs') + user = ForeignKeyField(User, backref='consent_logs') + dataset_version = ForeignKeyField(DatasetVersion, column_name='dataset_version', backref='consent_logs') ts = DateTimeField() class UserDownloadLog(BaseModel): class Meta: - db_table = "user_download_log" + table_name = "user_download_log" schema = 'users' - user = ForeignKeyField(User, related_name='download_logs') - dataset_file = ForeignKeyField(DatasetFile, db_column='dataset_file', related_name='download_logs') + user = ForeignKeyField(User, backref='download_logs') + dataset_file = ForeignKeyField(DatasetFile, column_name='dataset_file', backref='download_logs') ts = DateTimeField() class DatasetAccess(BaseModel): class Meta: - db_table = "dataset_access" + table_name = "dataset_access" schema = 'users' - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access') - user = ForeignKeyField(User, related_name='dataset_access') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access') + user = ForeignKeyField(User, backref='dataset_access') wants_newsletter = BooleanField(null=True) is_admin = BooleanField(null=True) class Linkhash(BaseModel): class Meta: - db_table = "linkhash" + table_name = "linkhash" schema = 'users' - dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='link_hashes') - user = ForeignKeyField(User, related_name='link_hashes') + dataset_version = ForeignKeyField(DatasetVersion, column_name='dataset_version', backref='link_hashes') + user = ForeignKeyField(User, backref='link_hashes') hash = CharField() expires_on = DateTimeField() class BeaconCounts(BaseModel): class Meta: - db_table = "beacon_dataset_counts_table" + table_name = "beacon_dataset_counts_table" schema = 'beacon' datasetid = CharField(primary_key=True) @@ -451,31 +471,31 @@ class Meta: class DatasetVersionCurrent(DatasetVersion): class Meta: - db_table = 'dataset_version_current' + table_name = 'dataset_version_current' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='current_version') - reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='current_version') + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='current_version') + reference_set = ForeignKeyField(ReferenceSet, column_name="reference_set", backref='current_version') class DatasetAccessCurrent(DatasetAccess): class Meta: - db_table = 'dataset_access_current' + table_name = 'dataset_access_current' schema = 'users' - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_current') - user = ForeignKeyField(User, related_name='access_current') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access_current') + user = ForeignKeyField(User, backref='access_current') has_access = IntegerField() access_requested = DateTimeField() class DatasetAccessPending(DatasetAccess): class Meta: - db_table = 'dataset_access_pending' + table_name = 'dataset_access_pending' schema = 'users' - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_pending') - user = ForeignKeyField(User, related_name='access_pending') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access_pending') + user = ForeignKeyField(User, backref='access_pending') has_access = IntegerField() access_requested = DateTimeField() @@ -567,7 +587,7 @@ def get_dataset_version(dataset:str, version:str=None): def build_dict_from_row(row): d = {} - for field, value in row.__dict__['_data'].items(): + for field, value in row.__dict__['__data__'].items(): if field == "id": continue d[field] = value diff --git a/backend/handlers.py b/backend/handlers.py index 3d838eb72..5c171eb0f 100644 --- a/backend/handlers.py +++ b/backend/handlers.py @@ -35,7 +35,6 @@ def get_current_user(self): email = self.get_secure_cookie('email') name = self.get_secure_cookie('user') identity = self.get_secure_cookie('identity') - identity_type = self.get_secure_cookie('identity_type') # Fix ridiculous bug with quotation marks showing on the web if name and (name[0] == '"') and (name[-1] == '"'): @@ -49,8 +48,7 @@ def get_current_user(self): try: return db.User(email = email.decode('utf-8'), name = name.decode('utf-8'), - identity = identity.decode('utf-8'), - identity_type = identity_type.decode('utf-8')) + identity = identity.decode('utf-8')) except peewee.OperationalError as e: logging.error("Can't create new user: {}".format(e)) else: diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 6abc27649..26da598b4 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -5,6 +5,7 @@ import db import handlers +from . import error from . import lookups from . import utils @@ -24,7 +25,7 @@ def get(self, dataset:str, query:str, ds_version:str=None): dataset, ds_version = utils.parse_dataset(dataset, ds_version) ret = {} - results = lookups.get_autocomplete(dataset, query, ds_version) + results = lookups.autocomplete(dataset, query, ds_version) ret = {'values': sorted(list(set(results)))[:20]} self.finish(ret) @@ -87,12 +88,13 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): ds_version (str): dataset version """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) - ret = utils.get_coverage(dataset, datatype, item, ds_version) - if 'bad_region' in ret: - self.send_error(status_code=400, reason="Unable to parse the region") + try: + ret = utils.get_coverage(dataset, datatype, item, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return - if 'region_too_large' in ret: - self.send_error(status_code=400, reason="The region is too large") + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) return self.finish(ret) @@ -114,7 +116,7 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): try: ret = utils.get_coverage_pos(dataset, datatype, item, ds_version) except ValueError: - logging.error('GetCoveragePos: unable to parse region ({})'.format(region)) + logging.error('GetCoveragePos: unable to parse region ({})'.format(item)) self.send_error(status_code=400, reason='Unable to parse region') return @@ -139,7 +141,15 @@ def get(self, dataset:str, gene:str, ds_version:str=None): ret = {'gene':{'gene_id': gene_id}} # Gene - gene = lookups.get_gene(dataset, gene_id, ds_version) + try: + gene = lookups.get_gene(dataset, gene_id, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) + return + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) + return + if not gene: self.send_error(status_code=404, reason='Gene not found') return @@ -181,9 +191,9 @@ def get(self, dataset:str, region:str, ds_version:str=None): try: chrom, start, stop = utils.parse_region(region) - except ValueError: - logging.error('GetRegion: unable to parse region ({})'.format(region)) - self.send_error(status_code=400, reason='Unable to parse region') + except error.ParsingError as err: + self.send_error(status_code=400, reason=str(err)) + logging.warning('GetRegion: unable to parse region ({})'.format(region)) return ret = {'region':{'chrom': chrom, @@ -193,7 +203,7 @@ def get(self, dataset:str, region:str, ds_version:str=None): } if utils.is_region_too_large(start, stop): - self.send_error(status_code=400, reason="The region is too large") + self.send_error(status_code=400, reason='Region too large') return genes_in_region = lookups.get_genes_in_region(dataset, chrom, start, stop, ds_version) @@ -229,10 +239,12 @@ def get(self, dataset:str, transcript:str, ds_version:str=None): } # Add transcript information - transcript = lookups.get_transcript(dataset, transcript_id, ds_version) - if not transcript: - self.send_error(status_code=404, reason='Transcript not found') + try: + transcript = lookups.get_transcript(dataset, transcript_id, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return + ret['transcript']['id'] = transcript['transcript_id'] ret['transcript']['number_of_CDS'] = len([t for t in transcript['exons'] if t['feature_type'] == 'CDS']) @@ -270,18 +282,21 @@ def get(self, dataset:str, variant:str, ds_version:str=None): ret = {'variant':{}} # Variant v = variant.split('-') + if len(v) != 4: + logging.error('GetVariant: unable to parse variant ({})'.format(variant)) + self.send_error(status_code=400, reason=f'Unable to parse variant {variant}') try: v[1] = int(v[1]) except ValueError: - logging.error('GetVariant: unable to parse variant ({})'.format(variant)) - self.send_error(status_code=400, reason="Unable to parse variant") + logging.error('GetVariant: position not an integer ({})'.format(variant)) + self.send_error(status_code=400, reason=f'Position is not an integer in variant {variant}') return orig_variant = variant - variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3], ds_version) - - if not variant: + try: + variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3], ds_version) + except error.NotFoundError as err: logging.error('Variant not found ({})'.format(orig_variant)) - self.send_error(status_code=404, reason='Variant not found') + self.send_error(status_code=404, reason=str(err)) return # Just get the information we need @@ -320,7 +335,6 @@ def get(self, dataset:str, variant:str, ds_version:str=None): 'canonical': annotation['CANONICAL'], 'modification': annotation['HGVSp'].split(":")[1] if ':' in annotation['HGVSp'] else None}] - # Dataset frequencies. # This is reported per variable in the database data, with dataset # information inside the variables, so here we reorder to make the @@ -332,9 +346,11 @@ def get(self, dataset:str, variant:str, ds_version:str=None): dsvs = [dsv for dsv in dsvs if dsv.reference_set == curr_dsv.reference_set] dsv_groups = [(curr_dsv, variant)] for dsv in dsvs: - hit = lookups.get_variant(dsv.dataset.short_name, v[1], v[0], v[2], v[3], dsv.version) - if hit: - dsv_groups.append((dsv, hit)) + try: + hit = lookups.get_variant(dsv.dataset.short_name, v[1], v[0], v[2], v[3], dsv.version) + except error.NotFoundError: + continue + dsv_groups.append((dsv, hit)) frequencies = {'headers':[['Dataset','pop'], ['Allele Count','acs'], @@ -379,12 +395,13 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): item (str): item to query """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) - ret = utils.get_variant_list(dataset, datatype, item, ds_version) - if not ret: - self.send_error(status_code=500, reason='Unable to retrieve variants') + try: + ret = utils.get_variant_list(dataset, datatype, item, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return - if 'region_too_large' in ret: - self.send_error(status_code=400, reason="The region is too large") + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) return # inconvenient way of doing humpBack-conversion diff --git a/backend/modules/browser/error.py b/backend/modules/browser/error.py new file mode 100644 index 000000000..606bb5c2e --- /dev/null +++ b/backend/modules/browser/error.py @@ -0,0 +1,11 @@ +class NotFoundError(Exception): + """The query returned nothing from the database.""" + pass + +class ParsingError(Exception): + """Failed to parse the request.""" + pass + +class MalformedRequest(Exception): + """Bad request (e.g. too large region).""" + pass diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 652e7f359..3a518cb7f 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,17 +1,18 @@ """Lookup functions for the variant browser.""" - import logging import re import db +from . import error + SEARCH_LIMIT = 10000 REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') -def get_autocomplete(dataset:str, query:str, ds_version:str=None): +def autocomplete(dataset:str, query:str, ds_version:str=None): """ Provide autocomplete suggestions based on the query. @@ -27,7 +28,7 @@ def get_autocomplete(dataset:str, query:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') query = (db.Gene.select(db.Gene.name) .where(((db.Gene.name.startswith(query)) & (db.Gene.reference_set == ref_set)))) @@ -40,6 +41,7 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): Parse the search input. Datatype is one of: + * `gene` * `transcript` * `variant` @@ -47,11 +49,13 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): * `region` Identifier is one of: + * ensembl ID for gene * variant ID string for variant (eg. 1-1000-A-T) * region ID string for region (eg. 1-1000-2000) Follow these steps: + * if query is an ensembl ID, return it * if a gene symbol, return that gene's ensembl ID * if an RSID, return that variant's string @@ -68,32 +72,46 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): query = query.strip() # Parse Variant types - variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) - if variant: + try: + variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) + except (error.NotFoundError, error.ParsingError): + pass + else: if len(variant) == 1: - retval = ('variant', variant[0]['variant_id']) - else: - retval = ('dbsnp_variant_set', variant[0]['rsid']) - return retval + return ('variant', variant[0]['variant_id']) + return ('dbsnp_variant_set', variant[0]['rsid']) - gene = get_gene_by_name(dataset, query) - # From here out, all should be uppercase (gene, tx, region, variant_id) - query = query.upper() - if not gene: + # Gene + try: gene = get_gene_by_name(dataset, query) - if gene: + except error.NotFoundError: + pass + else: return 'gene', gene['gene_id'] + # Capital letters for all other queries + query = query.upper() + try: + gene = get_gene_by_name(dataset, query) + except error.NotFoundError: + pass + else: + return 'gene', gene['gene_id'] # Ensembl formatted queries if query.startswith('ENS'): # Gene - gene = get_gene(dataset, query) - if gene: + try: + gene = get_gene(dataset, query) + except error.NotFoundError: + pass + else: return 'gene', gene['gene_id'] - # Transcript - transcript = get_transcript(dataset, query) - if transcript: + try: + transcript = get_transcript(dataset, query) + except error.NotFoundError: + pass + else: return 'transcript', transcript['transcript_id'] # Region and variant queries @@ -105,8 +123,13 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): target_type = 'region' if match.group(2) == ":": target = target.replace(":","-") + if match.group(5) and set(match.group(4)).issubset(set("ACGT")): target_type = 'variant' + try: + get_raw_variant(dataset, match.group(3), match.group(1), match.group(4), match.group(5), ds_version) + except error.NotFoundError as err: + target_type = 'not_found' return target_type, target @@ -130,17 +153,19 @@ def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=No """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return [] + raise error.NotFoundError(f'Unable to find the dataset version in the database') if end_pos is None: end_pos = start_pos - return [values for values in (db.Coverage - .select() - .where((db.Coverage.pos >= start_pos) & - (db.Coverage.pos <= end_pos) & - (db.Coverage.chrom == chrom) & - (db.Coverage.dataset_version == dataset_version.id)) - .dicts())] + coverage = [row for row in (db.Coverage.select() + .where((db.Coverage.pos >= start_pos) & + (db.Coverage.pos <= end_pos) & + (db.Coverage.chrom == chrom) & + (db.Coverage.dataset_version == dataset_version.id)) + .dicts())] + if not coverage: + raise error.NotFoundError('No coverage found for the region') + return coverage def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:int=None, ds_version:str=None): @@ -186,7 +211,8 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: logging.info('get_exons_in_transcript({}, {}): unable to find dataset dbid'.format(dataset, transcript_id)) - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: transcript = (db.Transcript .select() @@ -196,11 +222,14 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): .get()) except db.Transcript.DoesNotExist: logging.info('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) - return None + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data.') wanted_types = ('CDS', 'UTR', 'exon') - return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & - (db.Feature.feature_type in wanted_types)).dicts()), - key=lambda k: k['start']) + features = sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & + (db.Feature.feature_type in wanted_types)).dicts()), + key=lambda k: k['start']) + if not features: + raise error.NotFoundError(f'No features found for transcript {transcript_id} in reference data.') + return features def get_gene(dataset:str, gene_id:str, ds_version:str=None): @@ -219,12 +248,13 @@ def get_gene(dataset:str, gene_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: return db.Gene.select().where((db.Gene.gene_id == gene_id) & (db.Gene.reference_set == ref_set)).dicts().get() except db.Gene.DoesNotExist: - return None + raise error.NotFoundError(f'Gene {gene_id} not found in reference data.') def get_gene_by_dbid(gene_dbid:str): @@ -262,7 +292,8 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return {} + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: return (db.Gene.select() .where((db.Gene.reference_set == ref_set) & @@ -278,8 +309,8 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): .dicts() .get()) except db.GeneOtherNames.DoesNotExist: - logging.error('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) - return {} + logging.info('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) + raise error.NotFoundError(f'Gene {gene_name} not found in reference data') def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_version:str=None): @@ -300,13 +331,13 @@ def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return {} + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') - gene_query = db.Gene.select().where((db.Gene.reference_set == ref_set) & - (db.Gene.start <= stop_pos) & - (db.Gene.stop >= start_pos) & - (db.Gene.chrom == chrom)).dicts() - return [gene for gene in gene_query] + genes = db.Gene.select().where((db.Gene.reference_set == ref_set) & + (db.Gene.start <= stop_pos) & + (db.Gene.stop >= start_pos) & + (db.Gene.chrom == chrom)).dicts() + return genes def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): @@ -327,7 +358,7 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') try: variant = (db.Variant @@ -351,9 +382,9 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio .dicts()] return variant except db.Variant.DoesNotExist: - logging.error('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' - .format(dataset, pos, chrom, ref, alt, dataset_version.id)) - return None + logging.info('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' + .format(dataset, pos, chrom, ref, alt, dataset_version.id)) + raise error.NotFoundError(f'Variant {chrom}-{pos}-{ref}-{alt} not found') def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): @@ -374,7 +405,7 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') try: transcript = (db.Transcript .select(db.Transcript, db.Gene.gene_id) @@ -386,7 +417,8 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): transcript['exons'] = get_exons_in_transcript(dataset, transcript_id) return transcript except db.Transcript.DoesNotExist: - return None + logging.info('get_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data') def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): @@ -405,14 +437,15 @@ def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - logging.error('get_transcripts_in_gene({}, {}): unable to get referenceset dbid'.format(dataset, gene_id)) - return [] + logging.warning('get_transcripts_in_gene({}, {}): unable to get referenceset dbid'.format(dataset, gene_id)) + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: gene = db.Gene.select().where((db.Gene.reference_set == ref_set) & (db.Gene.gene_id == gene_id)).dicts().get() except db.Gene.DoesNotExist: - logging.error('get_transcripts_in_gene({}, {}): unable to retrieve gene'.format(dataset, gene_id)) - return [] + logging.info('get_transcripts_in_gene({}, {}): unable to retrieve gene'.format(dataset, gene_id)) + raise error.NotFoundError(f'Gene {gene_id} not found in reference data') return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] @@ -469,24 +502,25 @@ def get_variants_by_rsid(dataset:str, rsid:str, ds_version:str=None): """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') if not rsid.startswith('rs'): logging.error('get_variants_by_rsid({}, {}): rsid not starting with rs'.format(dataset, rsid)) - return None + raise error.ParsingError('rsid not starting with rs') try: rsid = int(rsid.lstrip('rs')) except ValueError: logging.error('get_variants_by_rsid({}, {}): not an integer after rs'.format(dataset, rsid)) - return None - query = (db.Variant - .select() - .where((db.Variant.rsid == rsid) & - (db.Variant.dataset_version == dataset_version)) - .dicts()) + raise error.ParsingError('Not an integer after rs') + variants = (db.Variant + .select() + .where((db.Variant.rsid == rsid) & + (db.Variant.dataset_version == dataset_version)) + .dicts()) - variants = [variant for variant in query] + if not variants: + raise error.NotFoundError('No variants found for rsid {rsid}') return variants @@ -505,10 +539,10 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') gene = get_gene(dataset, gene_id, ds_version) if not gene: - return None + raise error.NotFoundError(f'Gene {gene_id} not found in reference data') variants = [variant for variant in db.Variant.select() .join(db.VariantGenes) @@ -544,7 +578,7 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') query = (db.Variant .select() .where((db.Variant.pos >= start_pos) & @@ -582,10 +616,12 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No """ dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + raise error.NotFoundError(f'Unable to find the dataset version in the database') transcript = get_transcript(dataset, transcript_id, ds_version) if not transcript: - return None + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data') variants = [variant for variant in db.Variant.select() .join(db.VariantTranscripts) diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py index 19d174884..b69797afb 100644 --- a/backend/modules/browser/tests/test_browser_handlers.py +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -1,7 +1,6 @@ """ Test the browser handlers """ - import requests import json @@ -63,7 +62,7 @@ def test_get_coverage(): assert response.status_code == 400 data_item = '1-1-5' response = requests.get('{}/api/dataset/{}/browser/coverage/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) - assert response.status_code == 200 + assert response.status_code == 404 def test_get_coverage_pos(): @@ -244,9 +243,15 @@ def test_search(): assert data['type'] == 'dbsnp' assert data['value'] == 142856307 - query = '21-9411281-T-C' + query = '22-1234321-A-T' + response = requests.get('{}/api/dataset/{}/browser/search/{}'.format(BASE_URL, dataset, query)) + data = json.loads(response.text) + assert data['type'] == 'not_found' + assert data['value'] == '22-1234321-A-T' + + query = '21-29461622-G-A' version = '20161223' response = requests.get('{}/api/dataset/{}/version/{}/browser/search/{}'.format(BASE_URL, dataset, version, query)) data = json.loads(response.text) assert data['type'] == 'variant' - assert data['value'] == '21-9411281-T-C' + assert data['value'] == '21-29461622-G-A' diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 21a13f5c9..6de69d606 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -2,19 +2,22 @@ Tests for the functions available in lookups.py """ +import pytest + +from .. import error from .. import lookups -def test_get_autocomplete(): +def test_autocomplete(): """ Test get_autocomplete() """ - res = lookups.get_autocomplete('SweGen', 'PA') + res = lookups.autocomplete('SweGen', 'PA') expected = set(["PABPC1P9", "PACSIN2", "PANX2", "PARP4P3", "PARVB", "PARVG", "PATZ1", "PAXBP1", "PAXBP1-AS1"]) assert set(res) == expected - res = lookups.get_autocomplete('Bad_dataset', 'PA') - assert not res + with pytest.raises(error.NotFoundError): + res = lookups.autocomplete('Bad_dataset', 'PA') def test_get_awesomebar_result(): @@ -33,6 +36,8 @@ def test_get_awesomebar_result(): assert result == ('transcript', 'ENST00000457709') result = lookups.get_awesomebar_result('SweGen', '22-46615715-46615880') assert result == ('region', '22-46615715-46615880') + result = lookups.get_awesomebar_result('SweGen', '22-1234321-A-A') + assert result == ('not_found', '22-1234321-A-A') result = lookups.get_awesomebar_result('SweGen', 'CHR22:46615715-46615880') assert result == ('region', '22-46615715-46615880') result = lookups.get_awesomebar_result('SweGen', 'CHR22-29461622-G-A') @@ -64,11 +69,12 @@ def test_get_coverage_for_bases(): assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no hits - coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) - assert not coverage + with pytest.raises(error.NotFoundError): + lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) # incorrect dataset - assert not lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) + with pytest.raises(error.NotFoundError): + lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) def test_get_coverage_for_transcript(): @@ -93,11 +99,12 @@ def test_get_coverage_for_transcript(): assert len(lookups.get_coverage_for_transcript('SweGen', '22', 46615715, 46615880)) == 17 # no hits - coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500285) - assert not coverage + with pytest.raises(error.NotFoundError): + coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500285) # incorrect dataset - assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) + with pytest.raises(error.NotFoundError): + assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) def test_get_exons_in_transcript(): @@ -108,12 +115,12 @@ def test_get_exons_in_transcript(): assert len(result) == 14 # bad dataset - result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') # bad transcript - result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') def test_get_gene(): @@ -135,12 +142,12 @@ def test_get_gene(): assert result[val] == expected[val] # non-existing gene - result = lookups.get_gene('SweGen', 'NOT_A_GENE') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_gene('SweGen', 'NOT_A_GENE') # non-existing dataset - result = lookups.get_gene('NoDataset', 'ENSG00000223972') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_gene('NoDataset', 'ENSG00000223972') def test_get_gene_by_dbid(): @@ -186,17 +193,15 @@ def test_get_gene_by_name(caplog): assert result[val] == expected[val] # non-existing gene - result = lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') - assert not result - assert caplog.messages[0] == 'get_gene_by_name(SweGen, NOT_A_GENE): unable to retrieve gene' + with pytest.raises(error.NotFoundError): + lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') # non-existing dataset - result = lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') - assert not result + with pytest.raises(error.NotFoundError): + lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') # name in other_names result = lookups.get_gene_by_name('SweGen', 'BCL8C') - print(result) assert result['gene_id'] == 'ENSG00000223875' @@ -214,10 +219,10 @@ def test_get_genes_in_region(): expected_ids = ['ENSG00000231565'] assert [gene['gene_id'] for gene in res] == expected_ids # bad dataset - res = lookups.get_genes_in_region('bad_dataset', '22', 25595800, 25615800) + with pytest.raises(error.NotFoundError): + lookups.get_genes_in_region('bad_dataset', '22', 25595800, 25615800) # nothing found - res = lookups.get_genes_in_region('SweGen', '22', 25595800, 25595801) - assert not res + assert not lookups.get_genes_in_region('SweGen', '22', 25595800, 25595801) def test_get_transcript(): @@ -237,7 +242,8 @@ def test_get_transcript(): assert len(result['exons']) == 1 # non-existing - assert not lookups.get_transcript('SweGen', 'INCORRECT') + with pytest.raises(error.NotFoundError): + lookups.get_transcript('SweGen', 'INCORRECT') def test_get_transcripts_in_gene(): @@ -247,8 +253,10 @@ def test_get_transcripts_in_gene(): res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000228314') assert len(res) == 3 - assert not lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') - assert not lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') + with pytest.raises(error.NotFoundError): + lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') + with pytest.raises(error.NotFoundError): + lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') def test_get_raw_variant(): @@ -260,8 +268,10 @@ def test_get_raw_variant(): assert len(result['genes']) == len(['ENSG00000229286', 'ENSG00000235265']) assert set(result['transcripts']) == set(['ENST00000448070', 'ENST00000413156']) assert len(result['transcripts']) == len(['ENST00000448070', 'ENST00000413156']) - assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') - assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') def test_get_transcripts_in_gene_by_dbid(): @@ -285,15 +295,20 @@ def test_get_variant(): assert len(result['genes']) == len(['ENSG00000229286', 'ENSG00000235265']) assert set(result['transcripts']) == set(['ENST00000448070', 'ENST00000413156']) assert len(result['transcripts']) == len(['ENST00000448070', 'ENST00000413156']) - result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T') - assert not result + + # not found + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 12321, '21', 'G', 'G') + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T') # incorrect position - assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') # with version - result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A', "20161223") - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A', "20161223") result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T', "20161223") assert result['variant_id'] == '21-9411609-G-T' @@ -306,19 +321,25 @@ def test_get_variants_by_rsid(): result = lookups.get_variants_by_rsid('SweGen', 'rs142856307') assert result[0]['pos'] == 16285954 assert len(result) == 5 - assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778') # with version - assert not lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223') + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223') result = lookups.get_variants_by_rsid('SweGen', 'rs76676778', '20161223') assert result[0]['variant_id'] == '21-9411609-G-T' # errors - assert lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') is None - assert lookups.get_variants_by_rsid('SweGen', '373706802') is None - assert lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') is None + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') + with pytest.raises(error.ParsingError): + lookups.get_variants_by_rsid('SweGen', '373706802') + with pytest.raises(error.ParsingError): + lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') # no variants with rsid available - assert not lookups.get_variants_by_rsid('SweGen', 'rs1') + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('SweGen', 'rs1') def test_get_variants_in_gene(): @@ -327,9 +348,17 @@ def test_get_variants_in_gene(): """ res = lookups.get_variants_in_gene('SweGen', 'ENSG00000198062') assert len(res) == 512 - assert not lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') - assert not lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') - assert not lookups.get_variants_in_gene('SweGen', 'ENSG00000198062', "BAD_VERSION") + + # existing gene without variants + assert not lookups.get_variants_in_gene('SweGen', 'ENSG00000128298') + + # bad requests + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('SweGen', 'ENSG00000198062', "BAD_VERSION") def test_get_variants_in_region(): @@ -342,12 +371,14 @@ def test_get_variants_in_region(): assert [res['pos'] for res in result] == expected_pos # no positions covered - result = lookups.get_variants_in_region('SweGen', '22', 16079200, 16079000) - assert not result + assert not lookups.get_variants_in_region('SweGen', '22', 16079200, 16079000) + + # no variants found + assert not lookups.get_variants_in_region('SweGen', '22', 106079000, 106079200) # incorrect dataset - result = lookups.get_variants_in_region('Incorrect_dataset', '22', 16079200, 16079400) - assert not result + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_region('Incorrect_dataset', '22', 16079200, 16079400) def test_get_variants_in_transcript(): @@ -356,5 +387,9 @@ def test_get_variants_in_transcript(): """ res = lookups.get_variants_in_transcript('SweGen', 'ENST00000452800') assert len(res) == 508 - assert not lookups.get_variants_in_transcript('BAD_DATASET', 'ENST00000452800') - assert not lookups.get_variants_in_transcript('SweGen', 'ENST123') + + # bad requests + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_in_transcript('BAD_DATASET', 'ENST00000452800') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_in_transcript('SweGen', 'ENST123') diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index 74cbbd7f5..83361475e 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -2,6 +2,9 @@ Tests for utils.py """ +import pytest + +from .. import error from .. import lookups from .. import utils @@ -52,7 +55,7 @@ def test_add_consequence_to_variant(): assert variant['major_consequence'] == '' # bad variant - variant = lookups.get_variant('SweGen', 38481311, '444', 'C', 'T') + variant = {} utils.add_consequence_to_variant(variant) assert not variant @@ -74,18 +77,22 @@ def test_get_coverage(): assert len(res['coverage']) == 144 res = utils.get_coverage('SweGen', 'region', '22-46615715-46615880') assert len(res['coverage']) == 17 - res = utils.get_coverage('SweGen', 'region', '22:46615715-46615880') - assert not res['coverage'] - res = utils.get_coverage('SweGen', 'region', '22-46615715asd-46615880') - assert not res['coverage'] - assert res['bad_region'] res = utils.get_coverage('SweGen', 'transcript', 'ENST00000438441') assert len(res['coverage']) == 144 - assert not utils.get_coverage('BAD_SET', 'transcript', 'ENST00000438441')['coverage'] + # bad regions + with pytest.raises(error.ParsingError): + res = utils.get_coverage('SweGen', 'region', '22-46615715asd-46615880') + # is seen as 22:46615715-46615880-46615880 + with pytest.raises(error.NotFoundError): + utils.get_coverage('SweGen', 'region', '22:46615715-46615880') + + # no coverage found + with pytest.raises(error.NotFoundError): + utils.get_coverage('BAD_SET', 'transcript', 'ENST00000438441')['coverage'] - res = utils.get_coverage('SweGen', 'region', '22-1-1000000') - assert res['region_too_large'] + with pytest.raises(error.MalformedRequest): + res = utils.get_coverage('SweGen', 'region', '22-1-1000000') def test_get_coverage_pos(): @@ -105,9 +112,20 @@ def test_get_coverage_pos(): assert res['start'] == 16364817 assert res['stop'] == 16366254 - res = utils.get_coverage_pos('BAD_SET', 'transcript', 'ENST00000438441') - for value in res.values(): - assert not value + # bad requests + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('BAD_SET', 'transcript', 'ENST00000438441') + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('SweGen', 'transcript', 'ENST1234321') + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('SweGen', 'gene', 'ENSG1234321') + with pytest.raises(error.ParsingError): + utils.get_coverage_pos('BAD_SET', 'region', '1:1:1:1') + + # too large request + with pytest.raises(error.MalformedRequest): + utils.get_coverage_pos('SweGen', 'region', '1-1-10000000') + def test_data_structures(): @@ -191,15 +209,25 @@ def test_get_variant_list(): assert len(res['variants']) == 13 res = utils.get_variant_list('SweGen', 'transcript', 'ENST00000438441') assert len(res['variants']) == 178 - res = utils.get_variant_list('SweGen', 'transcript', 'ENSTWEIRD') - assert not res - res = utils.get_variant_list('SweGen', 'region', '22-1-1000000') - assert res['region_too_large'] - res = utils.get_variant_list('SweGen', 'region', '22-16272587') assert len(res['variants']) == 4 + # bad requests + with pytest.raises(error.NotFoundError): + utils.get_variant_list('SweGen', 'transcript', 'ENSTWEIRD') + with pytest.raises(error.NotFoundError): + utils.get_variant_list('Bad_dataset', 'transcript', 'ENSTWEIRD') + with pytest.raises(error.NotFoundError): + utils.get_variant_list('SweGen', 'gene', 'ENSG1234321') + with pytest.raises(error.ParsingError): + utils.get_variant_list('SweGen', 'region', '1-1-1-1-1') + + # too large region + with pytest.raises(error.MalformedRequest): + utils.get_variant_list('SweGen', 'region', '22-1-1000000') + + def test_order_vep_by_csq(): """ Test order_vep_by_csq() @@ -224,6 +252,24 @@ def test_parse_dataset(): assert utils.parse_dataset('hg19:SweGen:180101') == ('SweGen', '180101') +def test_parse_region(): + assert utils.parse_region('1-2-3') == ('1', 2, 3) + assert utils.parse_region('X-15-30') == ('X', 15, 30) + assert utils.parse_region('1-2') == ('1', 2, 2) + + # bad regions + with pytest.raises(error.ParsingError): + print(utils.parse_region('1:2:2')) + with pytest.raises(error.ParsingError): + utils.parse_region('1-2-2-2') + with pytest.raises(error.ParsingError): + utils.parse_region('asdfgh') + with pytest.raises(error.ParsingError): + utils.parse_region('X-15-z') + with pytest.raises(error.ParsingError): + utils.parse_region('X-y-15') + + def test_remove_extraneous_vep_annotations(): """ Test remove_extraneous_vep_annotations() diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 589e2fe72..d3c15aaf8 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -2,6 +2,7 @@ import logging +from . import error from . import lookups # for coverage @@ -176,12 +177,10 @@ def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) elif datatype == 'region': - try: - chrom, start, stop = parse_region(item) - except ValueError: - return {'coverage': [], 'bad_region':True} + chrom, start, stop = parse_region(item) + if is_region_too_large(start, stop): - return {'coverage': [], 'region_too_large': True} + raise error.MalformedRequest('Region too large') ret['coverage'] = lookups.get_coverage_for_bases(dataset, chrom, start, stop, ds_version) elif datatype == 'transcript': @@ -211,15 +210,15 @@ def get_coverage_pos(dataset:str, datatype:str, item:str, ds_version:str=None): if datatype == 'region': chrom, start, stop = parse_region(item) + if is_region_too_large(start, stop): + raise error.MalformedRequest('Region too large') ret['start'] = start ret['stop'] = stop ret['chrom'] = chrom else: if datatype == 'gene': gene = lookups.get_gene(dataset, item) - if gene: - transcript = lookups.get_transcript(dataset, gene['canonical_transcript'], ds_version) - else: transcript = None + transcript = lookups.get_transcript(dataset, gene['canonical_transcript'], ds_version) elif datatype == 'transcript': transcript = lookups.get_transcript(dataset, item, ds_version) if transcript: @@ -343,15 +342,10 @@ def get_variant_list(dataset:str, datatype:str, item:str, ds_version:str=None): variants = lookups.get_variants_in_gene(dataset, item, ds_version) elif datatype == 'region': - try: - chrom, start, stop = parse_region(item) - start = int(start) - stop = int(stop) - except ValueError: - return None + chrom, start, stop = parse_region(item) if is_region_too_large(start, stop): - return {'variants': [], 'headers': [], 'region_too_large': True} + raise error.MalformedRequest('Region too large') variants = lookups.get_variants_in_region(dataset, chrom, start, stop, ds_version) elif datatype == 'transcript': @@ -453,7 +447,7 @@ def parse_region(region:str): Parse a region with either one or two positions Args: - region (str): region, e.g. `3:1000000` or `3:100100` + region (str): region, e.g. `3-100-200` or `3-100` Returns: tuple: (chrom, start, pos) @@ -465,11 +459,14 @@ def parse_region(region:str): elif len(parts) == 3: chrom, start, stop = parts else: - raise ValueError - - start = int(start) - stop = int(stop) + raise error.ParsingError(f'Unable to parse region {region}.') + try: + start = int(start) + stop = int(stop) + except ValueError: + raise error.ParsingError(f'Unable to parse region {region} (positions not integers).') + return chrom, start, stop diff --git a/backend/requirements.txt b/backend/requirements.txt index 247babe5a..298f31d4d 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,14 +1,14 @@ -Jinja2==2.10.1 -MarkupSafe==1.1.1 appdirs==1.4.3 -certifi==2019.3.9 +certifi==2019.6.16 chardet==3.0.4 idna==2.8 +Jinja2==2.10.1 +MarkupSafe==1.1.1 packaging==19.0 -peewee==2.9.2 +peewee==3.9.6 +psycopg2-binary==2.8.3 pyparsing==2.4.0 -requests==2.21.0 +requests==2.22.0 six==1.12.0 tornado==6.0.2 -urllib3==1.25.0 -psycopg2-binary==2.8.2 +urllib3==1.25.3 diff --git a/backend/route.py b/backend/route.py index ce8d53817..bf75d164f 100644 --- a/backend/route.py +++ b/backend/route.py @@ -19,16 +19,11 @@ tornado_settings = {"debug": False, "cookie_secret": swefreq_settings.cookie_secret, "login_url": "/login", - "google_oauth": { - "key": swefreq_settings.google_key, - "secret": swefreq_settings.google_secret - }, "elixir_oauth": { "id": swefreq_settings.elixir["id"], "secret": swefreq_settings.elixir["secret"], "redirect_uri": swefreq_settings.elixir["redirectUri"], }, - "redirect_uri": swefreq_settings.redirect_uri, "xsrf_cookies": True, "template_path": "templates/", } @@ -49,10 +44,7 @@ def __init__(self, settings): (r"/logout", auth.ElixirLogoutHandler), (r"/elixir/login", auth.ElixirLoginHandler), (r"/elixir/logout", auth.ElixirLogoutHandler), - (r"/google/login", auth.GoogleLoginHandler), - (r"/google/logout", auth.GoogleLogoutHandler), ## API Methods - (r"/api/users/elixir_transfer", auth.UpdateUserHandler), (r"/api/countries", application.CountryList), (r"/api/users/me", application.GetUser), (r"/api/users/datasets", application.UserDatasetAccess), @@ -93,9 +85,6 @@ def __init__(self, settings): self.declared_handlers.insert(-1, ("/developer/login", auth.DeveloperLoginHandler)) self.declared_handlers.insert(-1, ("/developer/quit", application.QuitHandler)) - # google oauth key - self.oauth_key = tornado_settings["google_oauth"]["key"] - # Setup the Tornado Application tornado.web.Application.__init__(self, self.declared_handlers, **settings) diff --git a/backend/settings.py b/backend/settings.py index ff925bb3a..c6543e4b5 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -1,4 +1,3 @@ - import os import sys import json @@ -23,10 +22,6 @@ json_settings = json.load(json_settings_fh) json_settings_fh.close() -google_key = json_settings["googleKey"] -google_secret = json_settings["googleSecret"] -redirect_uri = json_settings["redirectUri"] - elixir = json_settings["elixir"] ## Generated with base64.b64encode(uuid.uuid4().bytes + uuid.uuid4().bytes) diff --git a/frontend/assets/img/SLL_logo.png b/frontend/assets/img/SLL_logo.png index 5225326ff..71effa006 100644 Binary files a/frontend/assets/img/SLL_logo.png and b/frontend/assets/img/SLL_logo.png differ diff --git a/frontend/src/js/app.routes.js b/frontend/src/js/app.routes.js index 7b76d01d4..98962acc5 100644 --- a/frontend/src/js/app.routes.js +++ b/frontend/src/js/app.routes.js @@ -17,7 +17,6 @@ .config(["$routeProvider", "$locationProvider", "$httpProvider", function($routeProvider, $locationProvider, $httpProvider) { $routeProvider .when("/", { templateUrl: "static/templates/ng-templates/home.html" }) - .when("/login", { templateUrl: "static/templates/ng-templates/login.html" }) .when("/profile", { templateUrl: "static/templates/ng-templates/profile.html" }) .when("/error", { templateUrl: "static/templates/ng-templates/error.html" }) .when("/security_warning", { templateUrl: "static/templates/ng-templates/security-warning.html" }) diff --git a/frontend/templates/index.html b/frontend/templates/index.html index c4aa36439..0f092aa31 100644 --- a/frontend/templates/index.html +++ b/frontend/templates/index.html @@ -19,19 +19,6 @@ -[% if develop %] -[% else %] - - - -[% endif %] @@ -57,11 +44,7 @@ @@ -101,7 +84,7 @@
- +
diff --git a/frontend/templates/ng-templates/dataset-browser.html b/frontend/templates/ng-templates/dataset-browser.html index c4e2a7a5a..aa6a1dcc0 100644 --- a/frontend/templates/ng-templates/dataset-browser.html +++ b/frontend/templates/ng-templates/dataset-browser.html @@ -26,12 +26,11 @@

Examples - Gene: - PCSK9, Transcript: - ENST00000407236, Variant: - 22-46615880-T-C, Multi-allelic - variant: + PCSK9, Transcript: + ENST00000407236, Variant: + 22-46615880-T-C, Reference SNP ID: rs1800234, Region: - 22:46615715-46615880 + 22:46615715-46615880

diff --git a/frontend/templates/ng-templates/login.html b/frontend/templates/ng-templates/login.html deleted file mode 100644 index 77dbe210b..000000000 --- a/frontend/templates/ng-templates/login.html +++ /dev/null @@ -1,65 +0,0 @@ - -
- -
-
-
-
-

Your account has been updated!

- You may now use the site as you used to, using your Elixir account. -
-
-
-
-
-

Login to swefreq is changing

-
-

- To transfer your login credentials from a google account to an elixir account, - just use the buttons below to log in to both google and elixir, we will then - give you the option to update your account with the elixir information. -

-
- - - - - - - - - -
- -
-
-
-
-
-
-
Transfer Account Credentials
-
-

- We can now transfer your credentials from your google account to your Elixir account! -

-
- Note that this will make you unable to log in with your google account in the future, - as your user data will be permanently updated with the elixir information. -
-
-
- -
-
-
-
-
-
-
-
diff --git a/scripts/backup.sh b/scripts/backup.sh index fec0c3be5..38e826308 100755 --- a/scripts/backup.sh +++ b/scripts/backup.sh @@ -14,7 +14,7 @@ data_home="/data/SweFreq" userdb_base="$data_home/userdb-backup" userdb_dir="$userdb_base/$( date '+%Y-%m' )" -userdb_file="$userdb_dir/tornado-userdb.$( date '+%Y%m%d-%H%M%S' ).dump" +userdb_file="$userdb_dir/users.$( date '+%Y%m%d-%H%M%S' ).dump" release_backups="$data_home/data-backup/release" container_dir="ubuntu@swefreq-proxy:/opt/release" @@ -30,11 +30,9 @@ fi trap 'rm -f "$tmpbackup" "$tmpbackup.gz"' EXIT tmpbackup="$( mktemp -p "$userdb_base" )" -# Dump database, and remove the "Dump completed" comment at the end to -# be able to compare with previous dump. -lxc exec swefreq-web -- \ -mysqldump --complete-insert --user=swefreq --host=swefreq-db swefreq | -sed '/^-- Dump completed on/d' >"$tmpbackup" +# Dump database (only the "users" schema). +lxc exec swefreq-db -- \ +pg_dump --host=localhost --user=swefreq --schema=users --no-password swefreq >"$tmpbackup" gzip --best "$tmpbackup" diff --git a/scripts/importer/data_importer/data_importer.py b/scripts/importer/data_importer/data_importer.py index 2b4f044ab..4d61dfbaf 100644 --- a/scripts/importer/data_importer/data_importer.py +++ b/scripts/importer/data_importer/data_importer.py @@ -87,10 +87,12 @@ def _download_and_open(self, base_url, version=None): filename = self._download(base_url, version) return self._open(filename) - def _open(self, filename): + def _open(self, filename, binary=True): + mode = 'rb' if binary else 'rt' + encoding = None if binary else 'utf8' try: logging.debug("Opening file {}".format(filename)) - return gzip.open(filename, 'rb') if filename.endswith(".gz") else open(filename) + return gzip.open(filename, mode, encoding=encoding) if filename.endswith(".gz") else open(filename) except IOError as error: logging.error("IOERROR: {}".format(error)) diff --git a/scripts/importer/data_importer/raw_data_importer.py b/scripts/importer/data_importer/raw_data_importer.py index 8879011f0..b1594b04a 100644 --- a/scripts/importer/data_importer/raw_data_importer.py +++ b/scripts/importer/data_importer/raw_data_importer.py @@ -128,8 +128,8 @@ def _insert_coverage(self): counter = 0 with db.database.atomic(): for filename in self.settings.coverage_file: - for line in self._open(filename): - line = bytes(line).decode('utf8').strip() + for line in self._open(filename, binary=False): + line = line.strip() if line.startswith("#"): continue @@ -168,8 +168,93 @@ def _insert_coverage(self): db.Coverage.insert_many(batch).execute() if self.counter['coverage'] is not None: last_progress = self._update_progress_bar(counter, self.counter['coverage'], last_progress, finished=True) - if not self.settings.dry_run: - logging.info("Inserted {} coverage records in {}".format(counter, self._time_since(start))) + self.log_insertion(counter, "coverage", start) + + def _parse_manta(self): + header = [("chrom", str), ("pos", int), ("chrom_id", str), ("ref", str), ("alt", str)] + + batch = [] + samples = 0 + counter = 0 + start = time.time() + for filename in self.settings.variant_file: + for line in self._open(filename): + line = line.strip() + if line.startswith("#"): + if line.startswith('#CHROM'): + samples = len(line.split('\t')[9:]) + continue + + base = {} + for i, item in enumerate(line.split("\t")): + if i == 0: + base['dataset_version'] = self.dataset_version + if i < 5: + base[header[i][0]] = header[i][1](item) + elif i == 7: + # only parse column 7 (maybe also for non-beacon-import?) + info = dict([(x.split('=', 1)) if '=' in x else (x, x) for x in re.split(';(?=\w)', item)]) + + if info.get('SVTYPE') != 'BND': + continue + + if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'): + # A BND from GL or MT. GL is an unplaced scaffold, MT is mitochondria. + continue + + if 'NSAMPLES' in info: + # save this unless we already know the sample size + samples = int(info['NSAMPLES']) + + alt_alleles = base['alt'].split(",") + for i, alt in enumerate(alt_alleles): + data = dict(base) + data['allele_freq'] = float(info.get('FRQ')) + data['alt'], data['mate_chrom'], data['mate_start'] = re.search('(.+)[[\]](.*?):(\d+)[[\]]', alt).groups() + if data['mate_chrom'].startswith('GL') or data['mate_chrom'].startswith('MT'): + # A BND from a chromosome to GL or MT. + # TODO ask a bioinformatician if these cases should be included or not + continue + data['mate_id'] = info.get('MATEID', '') + data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], alt) + data['allele_count'] = data.get('allele_count', 0) + data['allele_num'] = data.get('allele_num', 0) + batch += [data] + if self.settings.add_reversed_mates: + # If the vcf only contains one line per breakend, add the reversed version to the database here. + reversed = dict(data) + # Note: in general, ref and alt cannot be assumed to be the same in the reversed direction, + # but our data (so far) only contains N, so we just keep them as is for now. + reversed.update({'mate_chrom': data['chrom'], 'chrom': data['mate_chrom'], + 'mate_start': data['pos'], 'pos': data['mate_start'], + 'chrom_id': data['mate_id'], 'mate_id': data['chrom_id']}) + reversed['variant_id'] = '{}-{}-{}-{}'.format(reversed['chrom'], reversed['pos'], reversed['ref'], alt) + counter += 1 # increase the counter; reversed BNDs are usually kept at their own vcf row + batch += [reversed] + + counter += 1 # count variants (one per vcf row) + + if len(batch) >= self.settings.batch_size: + if not self.settings.dry_run: + db.VariantMate.insert_many(batch).execute() + + batch = [] + # Update progress + if not self.counter['variants']: + last_progress = self._update_progress_bar(counter, self.counter['variants'], last_progress) + + if batch and not self.settings.dry_run: + db.VariantMate.insert_many(batch).execute() + + if self.settings.set_vcf_sampleset_size and samples: + self.sampleset.sample_size = samples + self.sampleset.save() + + self.dataset_version.num_variants = counter + self.dataset_version.save() + if not self.counter['variants']: + last_progress = self._update_progress_bar(counter, self.counter['variants'], last_progress, finished=True) + self.log_insertion(counter, "breakend", start) def _insert_variants(self): """ @@ -202,8 +287,8 @@ def _insert_variants(self): db.Transcript.transcript_id) .join(db.Gene) .where(db.Gene.reference_set == ref_set))} - for line in self._open(filename): - line = bytes(line).decode('utf8').strip() + for line in self._open(filename, binary=False): + line = line.strip() if line.startswith("#"): # Check for some information that we need @@ -240,9 +325,8 @@ def _insert_variants(self): annotations = [dict(zip(vep_field_names, x.split('|'))) for x in consequence_array if len(vep_field_names) == len(x.split('|'))] alt_alleles = base['alt'].split(",") - if base['rsid'].startswith('rs'): - rsids = [int(rsid.strip('rs')) for rsid in base['rsid'].split(';')] - else: + rsids = [int(rsid.strip('rs')) for rsid in base['rsid'].split(';') if rsid.startswith('rs')] + if not rsids: rsids = [None] try: @@ -329,26 +413,25 @@ def _insert_variants(self): last_progress = self._update_progress_bar(counter, self.counter['variants'], last_progress) if batch and not self.settings.dry_run: - if not self.settings.dry_run: - if not self.settings.beacon_only: - try: - curr_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id - except db.Variant.DoesNotExist: - # assumes next id will be 1 if table is empty - curr_id = 0 + if not self.settings.beacon_only: + try: + curr_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id + except db.Variant.DoesNotExist: + # assumes next id will be 1 if table is empty + curr_id = 0 - db.Variant.insert_many(batch).execute() + db.Variant.insert_many(batch).execute() - if not self.settings.beacon_only: - last_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id - if last_id-curr_id == len(batch): - indexes = list(range(curr_id+1, last_id+1)) - else: - indexes = [] - for entry in batch: - indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) - self.add_variant_genes(indexes, genes, ref_genes) - self.add_variant_transcripts(indexes, transcripts, ref_transcripts) + if not self.settings.beacon_only: + last_id = db.Variant.select(db.Variant.id).order_by(db.Variant.id.desc()).limit(1).get().id + if last_id-curr_id == len(batch): + indexes = list(range(curr_id+1, last_id+1)) + else: + indexes = [] + for entry in batch: + indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id) + self.add_variant_genes(indexes, genes, ref_genes) + self.add_variant_transcripts(indexes, transcripts, ref_transcripts) if self.settings.set_vcf_sampleset_size and samples: self.sampleset.sample_size = samples @@ -358,8 +441,8 @@ def _insert_variants(self): self.dataset_version.save() if self.counter['variants'] != None: last_progress = self._update_progress_bar(counter, self.counter['variants'], last_progress, finished=True) - if not self.settings.dry_run: - logging.info("Inserted {} variant records in {}".format(counter, self._time_since(start))) + + self.log_insertion(counter, "variant", start) def get_callcount(self, data): """Increment the call count by the calls found at this position.""" @@ -387,8 +470,8 @@ def count_entries(self): self.counter['coverage'] = 0 logging.info("Counting coverage lines") for filename in self.settings.coverage_file: - for line in self._open(filename): - line = bytes(line).decode('utf8').strip() + for line in self._open(filename, binary=False): + line = line.strip() if line.startswith("#"): continue self.counter['coverage'] += 1 @@ -398,8 +481,8 @@ def count_entries(self): self.counter['variants'] = 0 logging.info("Counting variant lines") for filename in self.settings.variant_file: - for line in self._open(filename): - line = bytes(line).decode('utf8').strip() + for line in self._open(filename, binary=False): + line = line.strip() if line.startswith("#"): continue self.counter['variants'] += 1 @@ -412,7 +495,11 @@ def prepare_data(self): def start_import(self): self._set_dataset_info() - if self.settings.variant_file: + if self.settings.add_mates: + self._parse_manta() + if self.settings.count_calls: + logging.warning('Do not know how to count calls in the manta file. Skipping this...') + elif self.settings.variant_file: self._insert_variants() if self.settings.count_calls: self._create_beacon_counts() @@ -435,3 +522,7 @@ def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list, batch += connected_transcripts if not self.settings.dry_run: db.VariantTranscripts.insert_many(batch).execute() + + def log_insertion(self, counter, type, start): + action = "Inserted" if not self.settings.dry_run else "Dry-ran insertion of" + logging.info("{} {} {} records in {}".format(action, counter, type, self._time_since(start))) diff --git a/scripts/importer/importer.py b/scripts/importer/importer.py index 1bf2e4303..e489efae6 100755 --- a/scripts/importer/importer.py +++ b/scripts/importer/importer.py @@ -98,6 +98,10 @@ PARSER.add_argument("--beacon-only", action="store_true", help=("Variants are intended only for Beacon, loosening" " the requirements")) + PARSER.add_argument("--add_mates", action="store_true", + help=("Parse MANTA file and add the breakends to the db")) + PARSER.add_argument("--add_reversed_mates", action="store_true", + help=("Assume input data only contain one line per BND, covering both directions")) ARGS = PARSER.parse_args() diff --git a/settings_sample.json b/settings_sample.json index 50f3168ac..a530f12f0 100644 --- a/settings_sample.json +++ b/settings_sample.json @@ -1,10 +1,6 @@ { "cookieSecret" : "Something random for tornado to sign cookies with", - "googleKey" : "a key from google", - "googleSecret" : "a secret from google", - "redirectUri" : "https://google oauth redirect uri", - "mysqlHost" : "127.0.0.1", "mysqlPasswd" : "password", "mysqlSchema" : "swefreq", diff --git a/sql/beacon_schema.sql b/sql/beacon_schema.sql index e8b0f0e8f..4b3ed0a05 100644 --- a/sql/beacon_schema.sql +++ b/sql/beacon_schema.sql @@ -26,9 +26,13 @@ CREATE TABLE IF NOT EXISTS beacon.beacon_dataset_counts_table ( ); +-------------------------------------------------------------------------------- +-- Beacon views. +-- + CREATE OR REPLACE VIEW beacon.available_datasets AS SELECT * FROM data.dataset_versions - WHERE available_from < now() AND beacon_access != 'None'; + WHERE available_from < now() AND beacon_access != 'PRIVATE'; CREATE OR REPLACE VIEW beacon.beacon_dataset_table AS -- original type @@ -81,10 +85,33 @@ CREATE OR REPLACE VIEW beacon.beacon_data_table AS ON av.reference_set = r.id ; +CREATE OR REPLACE VIEW beacon.beacon_mate_table AS + SELECT dm.id AS index, + concat_ws(':', r.reference_build, + d.short_name, + av.dataset_version) AS datasetId, + substr(dm.chrom, 1, 2) AS chromosome, + dm.pos - 1 AS chromosomeStart, + dm.chrom_id as chromosomePos, + dm.mate_chrom as mate, + dm.mate_start - 1 as mateStart, + dm.mate_id as matePos, + dm.ref as reference, + dm.alt as alternate, + dm.allele_count as alleleCount, + dm.allele_num as callCount, + dm.allele_freq as frequency, + dm.mate_start - 1 as "end", + 'BND' as variantType + FROM data.mate AS dm + JOIN beacon.available_datasets as av + ON dm.dataset_version = av.id + JOIN data.datasets as d + ON av.dataset = d.id + JOIN data.reference_sets AS r + ON av.reference_set = r.id +; --------------------------------------------------------------------------------- --- Beacon views. --- CREATE OR REPLACE VIEW beacon.dataset_metadata(name, datasetId, description, assemblyId, createDateTime, updateDateTime, version, callCount, variantCount, sampleCount, externalUrl, accessType) diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 89bede22a..b7b2e9f0d 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -172,6 +172,24 @@ CREATE TABLE IF NOT EXISTS data.variants ( vep_annotations jsonb ); +-- For storing breakends +CREATE TABLE IF NOT EXISTS data.mate ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + dataset_version integer REFERENCES data.dataset_versions, + chrom_id varchar(128), -- column 3 in vcf + pos integer, + ref varchar, + alt varchar, + chrom varchar(10), + mate_chrom varchar(10), + mate_start integer, + mate_id varchar(128), + allele_freq real, + variant_id varchar, + allele_count integer, + allele_num integer +); + CREATE TABLE IF NOT EXISTS data.variant_genes ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, variant integer REFERENCES data.variants, diff --git a/test/data/browser_test_data.sql b/test/data/browser_test_data.sql index 8f62af24f..c5966b1f6 100644 --- a/test/data/browser_test_data.sql +++ b/test/data/browser_test_data.sql @@ -14,10 +14,12 @@ COPY data.collections (id, study_name, ethnicity) FROM stdin; COPY data.studies (id, pi_name, pi_email, contact_name, contact_email, title, study_description, publication_date, ref_doi) FROM stdin; 1 name email name email SweGen \N 2001-01-01 00:00:00 doi +2 name2 email2 name2 email2 SweGen2 \N 2001-01-02 00:00:00 doi \. COPY data.datasets (id, study, short_name, full_name, browser_uri, beacon_uri, beacon_description, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size) FROM stdin; 1 1 SweGen SweGen url \N \N 0 type method place 0 +2 1 SweGen2 SweGen2 url \N \N 0 type method place 0 \. COPY data.reference_sets (id, reference_build, reference_name, ensembl_version, gencode_version, dbnsfp_version) FROM stdin; @@ -25,10 +27,11 @@ COPY data.reference_sets (id, reference_build, reference_name, ensembl_version, \. COPY data.dataset_versions (id, dataset, reference_set, dataset_version, dataset_description, terms, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels, portal_avail, file_access, beacon_access) FROM stdin; -2 1 1 20170823 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC -3 1 1 20171025 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC 1 1 1 20161223 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC -4 1 1 20180409 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +2 1 1 20170823 desc terms 2001-01-02 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +3 1 1 20171025 desc terms 2001-01-03 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +4 1 1 20180409 desc terms 2001-01-04 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +5 2 1 20190409 desc terms 2001-01-05 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC \. COPY data.coverage (id, dataset_version, chrom, pos, mean, median, coverage) FROM stdin; @@ -2647,6 +2650,10 @@ COPY data.variants (id, dataset_version, variant_type, rsid, chrom, pos, ref, al 1665 4 \N 75186185 22 16371114 A G 191977 {22-16371114-A-G} 0 0.30399999 VQSRTrancheSNP99.90to100.00 22-16371114-A-G 608 2000 {"DP": "84373", "FS": "2.211", "MQ": "31.17", "QD": "3.14", "VQSLOD": "-50.73", "MQRankSum": "-1.184", "BaseQRankSum": "-4.11", "ReadPosRankSum": "1.07", "ClippingRankSum": "0.012"} [{"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "", "Gene": "ENSG00000231565", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "G", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "1", "SYMBOL": "NEK2P2", "TREMBL": "", "AFR_MAF": "", "AMR_MAF": "", "BIOTYPE": "processed_pseudogene", "DOMAINS": "", "EAS_MAF": "", "EUR_MAF": "", "Feature": "ENST00000438441", "HGNC_ID": "37816", "SAS_MAF": "", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "4910", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "downstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs4068944", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "", "Gene": "ENSG00000230471", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "G", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "1", "SYMBOL": "LA16c-2F2.8", "TREMBL": "", "AFR_MAF": "", "AMR_MAF": "", "BIOTYPE": "lincRNA", "DOMAINS": "", "EAS_MAF": "", "EUR_MAF": "", "Feature": "ENST00000428118", "HGNC_ID": "", "SAS_MAF": "", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "1967", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "Clone_based_vega_gene", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs4068944", "MOTIF_SCORE_CHANGE": ""}] 1668 4 \N 783 22 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] 1669 4 \N \N 22 29465622 G A 288.69 {22-29465622-G-A} \N 0.00166667 PASS 22-25275494-G-A 15 1000 {"DP": "10377", "FS": "0", "MQ": "60", "QD": "9.31", "VQSLOD": "0.894", "MQRankSum": "0.58", "BaseQRankSum": "1.34", "ReadPosRankSum": "-0.54", "ClippingRankSum": "-0.821", "InbreedingCoeff": "-0.0017"} [{"AF": "", "LoF": "HC", "TSL": "", "CCDS": "CCDS46675.1", "ENSP": "ENSP00000383211", "EXON": "", "Gene": "ENSG00000167037", "SIFT": "", "AA_AF": "", "EA_AF": "", "FLAGS": "", "HGVSc": "ENST00000400358.4:c.1495+1G>A", "HGVSp": "", "PHENO": "", "miRNA": "", "AFR_AF": "", "AMR_AF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EAS_AF": "", "EUR_AF": "", "IMPACT": "HIGH", "INTRON": "14/24", "MAX_AF": "", "PUBMED": "", "SAS_AF": "", "SOURCE": "Ensembl", "STRAND": "1", "SYMBOL": "SGSM1", "TREMBL": "", "BIOTYPE": "protein_coding", "DOMAINS": "", "Feature": "ENST00000400358", "HGNC_ID": "29410", "SOMATIC": "", "UNIPARC": "UPI0001533DB1", "BAM_EDIT": "", "CLIN_SIG": "", "DISTANCE": "", "LoF_info": "BRANCHPOINT_DISTANCE:NA&DONOR_ESE:17&DONOR_ISS:9&EXON_END:25275493&DONOR_ISE:4&EXON_START:25275429&DONOR_ESS:9&MUTANT_DONOR_MES:-1.73390323294901&INTRON_START:25275494&DONOR_GERP_DIFF:0&DONOR_DISRUPTION_PROB:0.995351102026242&INTRON_END:25280019&DONOR_MES_DIFF:8.18202723619546&DONOR_DISRUPTING&RESCUE_DONOR_MES:-1.73390323294901&RESCUE_DONOR_POS:0&CRYPTIC_DONOR_MES:-6.95778366793159&CRYPTIC_DONOR_POS:-2&INTRON_SIZE:4526", "PolyPhen": "", "USED_REF": "G", "CANONICAL": "", "GIVEN_REF": "G", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "Q2NKQ1", "gnomAD_AF": "0", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "splice_donor_variant", "HGVS_OFFSET": "", "MAX_AF_POPS": "gnomAD_AFR&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_FIN&gnomAD_NFE&gnomAD_OTH&gnomAD_SAS", "CDS_position": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "REFSEQ_MATCH": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "gnomAD_AFR_AF": "0", "gnomAD_AMR_AF": "0", "gnomAD_ASJ_AF": "0", "gnomAD_EAS_AF": "0", "gnomAD_FIN_AF": "0", "gnomAD_NFE_AF": "0", "gnomAD_OTH_AF": "0", "gnomAD_SAS_AF": "0", "Protein_position": "", "Existing_variation": "rs1299387256", "MOTIF_SCORE_CHANGE": ""}] +1670 1 \N \N 21 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] +1671 5 \N 783 22 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] +1672 5 \N \N 22 29465622 G A 288.69 {22-29465622-G-A} \N 0.00166667 PASS 22-25275494-G-A 15 1000 {"DP": "10377", "FS": "0", "MQ": "60", "QD": "9.31", "VQSLOD": "0.894", "MQRankSum": "0.58", "BaseQRankSum": "1.34", "ReadPosRankSum": "-0.54", "ClippingRankSum": "-0.821", "InbreedingCoeff": "-0.0017"} [{"AF": "", "LoF": "HC", "TSL": "", "CCDS": "CCDS46675.1", "ENSP": "ENSP00000383211", "EXON": "", "Gene": "ENSG00000167037", "SIFT": "", "AA_AF": "", "EA_AF": "", "FLAGS": "", "HGVSc": "ENST00000400358.4:c.1495+1G>A", "HGVSp": "", "PHENO": "", "miRNA": "", "AFR_AF": "", "AMR_AF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EAS_AF": "", "EUR_AF": "", "IMPACT": "HIGH", "INTRON": "14/24", "MAX_AF": "", "PUBMED": "", "SAS_AF": "", "SOURCE": "Ensembl", "STRAND": "1", "SYMBOL": "SGSM1", "TREMBL": "", "BIOTYPE": "protein_coding", "DOMAINS": "", "Feature": "ENST00000400358", "HGNC_ID": "29410", "SOMATIC": "", "UNIPARC": "UPI0001533DB1", "BAM_EDIT": "", "CLIN_SIG": "", "DISTANCE": "", "LoF_info": "BRANCHPOINT_DISTANCE:NA&DONOR_ESE:17&DONOR_ISS:9&EXON_END:25275493&DONOR_ISE:4&EXON_START:25275429&DONOR_ESS:9&MUTANT_DONOR_MES:-1.73390323294901&INTRON_START:25275494&DONOR_GERP_DIFF:0&DONOR_DISRUPTION_PROB:0.995351102026242&INTRON_END:25280019&DONOR_MES_DIFF:8.18202723619546&DONOR_DISRUPTING&RESCUE_DONOR_MES:-1.73390323294901&RESCUE_DONOR_POS:0&CRYPTIC_DONOR_MES:-6.95778366793159&CRYPTIC_DONOR_POS:-2&INTRON_SIZE:4526", "PolyPhen": "", "USED_REF": "G", "CANONICAL": "", "GIVEN_REF": "G", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "Q2NKQ1", "gnomAD_AF": "0", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "splice_donor_variant", "HGVS_OFFSET": "", "MAX_AF_POPS": "gnomAD_AFR&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_FIN&gnomAD_NFE&gnomAD_OTH&gnomAD_SAS", "CDS_position": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "REFSEQ_MATCH": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "gnomAD_AFR_AF": "0", "gnomAD_AMR_AF": "0", "gnomAD_ASJ_AF": "0", "gnomAD_EAS_AF": "0", "gnomAD_FIN_AF": "0", "gnomAD_NFE_AF": "0", "gnomAD_OTH_AF": "0", "gnomAD_SAS_AF": "0", "Protein_position": "", "Existing_variation": "rs1299387256", "MOTIF_SCORE_CHANGE": ""}] +1673 5 \N \N 21 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] \. COPY data.variant_genes (id, variant, gene) FROM stdin; diff --git a/test/data/load_dummy_data.sql b/test/data/load_dummy_data.sql index 09ff5a751..fbbde7500 100644 --- a/test/data/load_dummy_data.sql +++ b/test/data/load_dummy_data.sql @@ -39,21 +39,21 @@ INSERT INTO data.dataset_files(id, dataset_version, basename, uri, file_size) INSERT INTO users.users(id, username, email, affiliation, country, identity, identity_type) VALUES (1000100, 'Not req yet', 'email0', 'i', '', 'email0', 'elixir'), - (1000101, 'Requested access', 'email1', 'w1', '', 'email1', 'google'), + (1000101, 'Requested access', 'email1', 'w1', '', 'email1', 'elixir'), (1000102, 'Approved access', 'email2', 'c1', '', 'email2', 'elixir'), - (1000103, 'Denied access', 'email3', 'i', '', 'email3', 'google'), + (1000103, 'Denied access', 'email3', 'i', '', 'email3', 'elixir'), (1000104, 'Approved then denied', 'email4', 'i', '', 'email4', 'elixir'), - (1000105, 'R->A->D->R', 'email5', 'w1', '', 'email5', 'google'), + (1000105, 'R->A->D->R', 'email5', 'w1', '', 'email5', 'elixir'), (1000106, 'R->A->D->R->A', 'email6', 'c1', '', 'email6', 'elixir'), - (1000107, 'R->A->D->R->D', 'email7', 'i', '', 'email7', 'google'), + (1000107, 'R->A->D->R->D', 'email7', 'i', '', 'email7', 'elixir'), (1000108, 'Combo1 w1 w2', 'email8', 'w1 w2', '', 'email8', 'elixir'), - (1000109, 'Combo2 w1 c2', 'email9', 'w1 c2', '', 'email9', 'google'), + (1000109, 'Combo2 w1 c2', 'email9', 'w1 c2', '', 'email9', 'elixir'), (1000110, 'Combo3 c1 w2', 'email10', 'c1 w2', '', 'email10', 'elixir'), - (1000111, 'Combo4 c1 c2', 'email11', 'c1 c2', '', 'email11', 'google'), + (1000111, 'Combo4 c1 c2', 'email11', 'c1 c2', '', 'email11', 'elixir'), (1000112, 'Combo5 c1 i2', 'email12', 'c1 i2', '', 'email12', 'elixir'), - (1000113, 'Admin1', 'admin1', 'Rootspace', '', 'admin1', 'google'), + (1000113, 'Admin1', 'admin1', 'Rootspace', '', 'admin1', 'elixir'), (1000114, 'Admin2', 'admin2', 'Rootspace', '', 'admin2', 'elixir'), - (1000115, 'Admin12', 'admin12', 'Rootspace', '', 'admin12', 'google'); + (1000115, 'Admin12', 'admin12', 'Rootspace', '', 'admin12', 'elixir'); INSERT INTO users.dataset_access(user_id, dataset) VALUES (1000100, 1000001), (1000101, 1000001), (1000102, 1000001), (1000103, 1000001), (1000104, 1000001), (1000105, 1000001), diff --git a/test/travis_before_install.sh b/test/travis_before_install.sh index e59af8f6b..2abb68b09 100755 --- a/test/travis_before_install.sh +++ b/test/travis_before_install.sh @@ -5,4 +5,4 @@ PSQL_PORT="5433" docker pull "postgres:$PSQL_VERSION" -docker run --rm -d -p "$PSQL_PORT:5432" "postgres:$PSQL_VERSION" +docker run --rm -e "POSTGRES_DB=swefreq" -d -p "$PSQL_PORT:5432" "postgres:$PSQL_VERSION" diff --git a/test/travis_script.sh b/test/travis_script.sh index 01551b0cc..eae53190d 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -1,10 +1,13 @@ #!/bin/sh -ex +DBNAME=swefreq + ## SETUP SETTINGS cp settings_sample.json settings.json + sed -i.tmp 's/"postgresHost" : "postgres host"/"postgresHost" : "127.0.0.1"/' settings.json sed -i.tmp 's/"postgresPort" : 5432/"postgresPort" : 5433/' settings.json -sed -i.tmp 's/"postgresName" : "swefreq"/"postgresName" : ""/' settings.json +sed -i.tmp "s/\"postgresName\" : \"swefreq\"/\"postgresName\" : \"$DBNAME\"/" settings.json echo 'SETTINGS' cat settings.json @@ -15,20 +18,20 @@ echo '>>> Test 1. The SQL Patch' LATEST_RELEASE=$(git tag | grep '^v' | sort -V | tail -n 1) git show "$LATEST_RELEASE:sql/*_schema.sql" > master-schema.sql -psql -U postgres -h 127.0.0.1 -p 5433 -f master-schema.sql -psql -U postgres -h 127.0.0.1 -p 5433 -f sql/patch-master-db.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f master-schema.sql "$DBNAME" +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/patch-master-db.sql "$DBNAME" # Empty the database -psql -U postgres -h 127.0.0.1 -p 5433 <<__END__ +psql -U postgres -h 127.0.0.1 -p 5433 "$DBNAME" <<__END__ DROP SCHEMA data; DROP SCHEMA users; __END__ echo '>>> Test 2. Load the swefreq schema' -psql -U postgres -h 127.0.0.1 -p 5433 -f sql/data_schema.sql -psql -U postgres -h 127.0.0.1 -p 5433 -f sql/user_schema.sql -psql -U postgres -h 127.0.0.1 -p 5433 -f test/data/load_dummy_data.sql -psql -U postgres -h 127.0.0.1 -p 5433 -f test/data/browser_test_data.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/data_schema.sql "$DBNAME" +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/user_schema.sql "$DBNAME" +psql -U postgres -h 127.0.0.1 -p 5433 -f test/data/load_dummy_data.sql "$DBNAME" +psql -U postgres -h 127.0.0.1 -p 5433 -f test/data/browser_test_data.sql "$DBNAME" echo '>>> Test 3. Check that the backend starts'