diff --git a/.travis.yml b/.travis.yml index 7dc29446c..df42263ba 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,10 @@ language: python -python: - - "3.6" +matrix: + include: + - python: 3.6 + dist: bionic + - python: 3.7 + dist: bionic services: - docker before_install: diff --git a/backend/application.py b/backend/application.py index 22069299d..5641c31a4 100644 --- a/backend/application.py +++ b/backend/application.py @@ -63,12 +63,14 @@ class GetSchema(handlers.UnsafeHandler): def get(self): dataset = None version = None + beacon = None try: url = self.get_argument('url') match = re.match(".*/dataset/([^/]+)(/version/([^/]+))?", url) if match: dataset = match.group(1) version = match.group(3) + beacon = re.match(".*/dataset/.*/beacon", url) except tornado.web.MissingArgumentError: pass @@ -126,6 +128,21 @@ def get(self): except db.DatasetVersionCurrent.DoesNotExist as e: logging.error("Dataset does not exist: {}".format(e)) + if beacon: + base = {"@context": "http://schema.org", + "@id": "https://swefreq.nbis.se/api/beacon-elixir/", # or maybe "se.nbis.swefreq" as in the beacon api? + "@type": "Beacon", + "dataset": [dataset_schema], + "dct:conformsTo": "https://bioschemas.org/specifications/drafts/Beacon/", + "name": "Swefreq Beacon", + "provider": base["provider"], + "supportedRefs": ["GRCh37"], + "description": "Beacon API Web Server based on the GA4GH Beacon API", + "version": "1.1.0", # beacon api version + "aggregator": False, + "url": "https://swefreq.nbis.se/api/beacon-elixir/" + } + self.finish(base) @@ -307,7 +324,6 @@ def get(self): 'email': user.email, 'affiliation': user.affiliation, 'country': user.country, - 'login_type': self.get_secure_cookie('identity_type').decode('utf-8'), } self.finish(ret) @@ -522,7 +538,7 @@ def get(self, dataset): )) query = peewee.prefetch(users, access) - self.finish({'data': _build_json_response(query, lambda u: u.access_pending_prefetch)}) + self.finish({'data': _build_json_response(query, lambda u: u.access_pending)}) class DatasetUsersCurrent(handlers.AdminHandler): @@ -537,7 +553,7 @@ def get(self, dataset): )) query = peewee.prefetch(users, access) self.finish({'data': _build_json_response( - query, lambda u: u.access_current_prefetch)}) + query, lambda u: u.access_current)}) class UserDatasetAccess(handlers.SafeHandler): diff --git a/backend/auth.py b/backend/auth.py index c9514d025..7f1586b1e 100644 --- a/backend/auth.py +++ b/backend/auth.py @@ -18,7 +18,6 @@ def get(self): self.set_secure_cookie('user', self.get_argument("user")) self.set_secure_cookie('email', self.get_argument("email")) self.set_secure_cookie('identity', self.get_argument("email")) - self.set_secure_cookie('identity_type', 'google') self.finish() @@ -56,27 +55,10 @@ async def get(self): user_token = await self.get_user_token(self.get_argument('code')) user = await self.get_user(user_token["access_token"]) - extra_login = None - try: # check if the user is already logged in - extra_login = self.get_secure_cookie('identity_type').decode('utf-8') - - # Store other login in separate cookies (elixir is main login) - # This is hardcoded for google right now, as that is the only option - if extra_login == 'google': - google_identity = self.get_secure_cookie('identity').decode('utf-8') - self.set_secure_cookie('google_identity', google_identity) - - except AttributeError: # if the user isn't logged in - pass - self.set_secure_cookie('access_token', user_token["access_token"]) self.set_secure_cookie('user', user["name"]) self.set_secure_cookie('email', user["email"]) self.set_secure_cookie('identity', user["sub"]) - self.set_secure_cookie('identity_type', 'elixir') - - if extra_login: - self.set_secure_cookie('identity_type', 'elixir_%s' % extra_login) redirect = self.get_secure_cookie("login_redirect") self.clear_cookie("login_redirect") @@ -162,164 +144,3 @@ def get(self): self.redirect(redirect) -class GoogleLoginHandler(BaseHandler, tornado.auth.GoogleOAuth2Mixin): - """ - See http://www.tornadoweb.org/en/stable/auth.html#google for documentation - on this. Here I have copied the example more or less verbatim. - """ - @tornado.gen.coroutine - def get(self): - if self.get_argument("code", False): - logging.debug("Requesting user token") - user_token = yield self.get_authenticated_user( - redirect_uri=self.application.settings['redirect_uri'], - code=self.get_argument('code'), - callback = lambda *_, **__: None) - - logging.debug("Requesting user info") - user = yield self.oauth2_request( - "https://www.googleapis.com/plus/v1/people/me", - access_token=user_token["access_token"], - callback = lambda *_, **__: None) - - try: - # Check if there is the user is already in the database. - # This will generate an exception if the user does not exist, preventing login - db.User.select().where(db.User.identity == self._get_google_email(user)).get() - - extra_login = None - try: # check if the user is already logged in - extra_login = self.get_secure_cookie('identity_type').decode('utf-8') - - # Store this login in separate cookies (elixir is main login) - # This is hardcoded for elixir right now, as that is the only option - if extra_login == 'elixir': - google_identity = self._get_google_email(user) - self.set_secure_cookie('google_identity', google_identity) - - self.set_secure_cookie('identity_type', '%s_google' % extra_login) - - except AttributeError: # if the user isn't logged in - self.set_secure_cookie('user', user["displayName"]) - self.set_secure_cookie('access_token', user_token["access_token"]) - self.set_secure_cookie('email', self._get_google_email(user)) - self.set_secure_cookie('identity', self._get_google_email(user)) - self.set_secure_cookie('identity_type', 'google') - - except db.User.DoesNotExist: - msg = "You have no user information logged in our database, so you may directly log in using elixir without updating." - self.set_user_msg(msg, "success") - - url = self.get_secure_cookie("login_redirect") - self.clear_cookie("login_redirect") - if url is None: - url = '/' - self.redirect(url) - - else: - logging.debug("Redirecting to google for login") - self.set_secure_cookie('login_redirect', self.get_argument("next", '/'), 1) - self.authorize_redirect( - redirect_uri=self.application.settings['redirect_uri'], - client_id=self.application.oauth_key, - scope=['profile', 'email'], - response_type='code', - extra_params={'approval_prompt': 'auto'}) - - def _get_google_email(self, user): #pylint: disable=no-self-use - email = '' - # There can be several emails registered for a user. - for email in user["emails"]: - if email.get('type', '') == 'account': - return email['value'] - - return user['emails'][0]['value'] - - -class GoogleLogoutHandler(BaseHandler, tornado.auth.GoogleOAuth2Mixin): - def get(self): - def handle_request(response): - if response.error: - logging.info("Error, failed in logout") - logging.info(response.error) - else: - logging.info("User logged out") - - sAccessToken = self.get_secure_cookie("access_token") - sLogoutUrl = "https://accounts.google.com/o/oauth2/revoke?token=" + str(sAccessToken) - http_client = tornado.httpclient.AsyncHTTPClient() - http_client.fetch(sLogoutUrl, handle_request) - - self.clear_all_cookies() - - redirect = self.get_argument("next", '/') - self.redirect(redirect) - - -class UpdateUserHandler(handlers.SafeHandler): - def post(self): - """ - If a user is logged in to elixir, and also has google login cookies, the - google users information in the database will be updated with the elixir - users information. - """ - # set redirect - try: - redirect = self.get_argument("next") - except tornado.web.MissingArgumentError: - redirect = self.get_cookie("login_redirect", '/') - self.clear_cookie("login_redirect") - - try: - # Double check so that the elixir user isn't already have any credentials - # in the database. - - elixir_identity = self.get_secure_cookie('user') - - (db.User.select() - .join(db.DatasetAccess) - .where( - db.User.user == db.DatasetAccess.user, - db.User.identity == elixir_identity) - .get()) - msg = "This elixir account already has its own credentials. Sadly, you will have to contact us directly to merge your accounts." - self.set_user_msg(msg, "error") - self.finish({'redirect':'/login'}) - return - except db.User.DoesNotExist: - # This is what we want - pass - - try: - # Check if we have a google login, will throw an AttributeError - # if the cookie isn't available - google_identity = self.get_secure_cookie('google_identity').decode('utf-8') - - # Try to update the google user in the database with the elixir information - # This throws a peewee.IntegrityError if the elixir account is already in - # the database - db.User.update( name = self.get_secure_cookie('user').decode('utf-8'), - email = self.get_secure_cookie('email').decode('utf-8'), - identity = self.get_secure_cookie('identity').decode('utf-8'), - identity_type = 'elixir' - ).where( db.User.identity == google_identity ).execute() - - self.set_secure_cookie('identity_type', 'updated') - except AttributeError: - # This will happen when we don't have a google cookie - msg = "You need to log in to a google account to be able to transfer credentials" - self.set_user_msg(msg, "info") - - self.finish({'redirect':'/login'}) - return - except peewee.IntegrityError: - # This will happen if the elixir account is already in the database - msg = "This elixir account is already in our database, so it can't be used to update another google account." - self.set_user_msg(msg, "error") - self.finish({'redirect':'/login'}) - return - - msg = "Your account has been updated! You may now use the site as you used to, using your Elixir account." - self.set_user_msg(msg, "success") - - self.finish({'redirect':redirect}) diff --git a/backend/db.py b/backend/db.py index f9b429b0f..08cc6a998 100644 --- a/backend/db.py +++ b/backend/db.py @@ -61,10 +61,10 @@ class ReferenceSet(BaseModel): shared between reference sets, so it uses a foreign key instead. """ class Meta: - db_table = 'reference_sets' + table_name = 'reference_sets' schema = 'data' - name = CharField(db_column="reference_name", null=True) + name = CharField(column_name="reference_name", null=True) ensembl_version = CharField() gencode_version = CharField() dbnsfp_version = CharField() @@ -73,52 +73,52 @@ class Meta: class Gene(BaseModel): class Meta: - db_table = 'genes' + table_name = 'genes' schema = 'data' - reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name="genes") + reference_set = ForeignKeyField(ReferenceSet, column_name="reference_set", backref="genes") gene_id = CharField(unique=True, max_length=15) - name = CharField(db_column="gene_name", null=True) + name = CharField(column_name="gene_name", null=True) full_name = CharField(null=True) canonical_transcript = CharField(null=True, max_length=15) chrom = CharField(max_length=10) - start = IntegerField(db_column="start_pos") - stop = IntegerField(db_column="end_pos") + start = IntegerField(column_name="start_pos") + stop = IntegerField(column_name="end_pos") strand = EnumField(choices=['+','-']) class GeneOtherNames(BaseModel): class Meta: - db_table = 'gene_other_names' + table_name = 'gene_other_names' schema = 'data' - gene = ForeignKeyField(Gene, db_column="gene", related_name="other_names") + gene = ForeignKeyField(Gene, column_name="gene", backref="other_names") name = CharField(null=True) class Transcript(BaseModel): class Meta: - db_table = 'transcripts' + table_name = 'transcripts' schema = 'data' transcript_id = CharField(max_length=15) - gene = ForeignKeyField(Gene, db_column="gene", related_name="transcripts") + gene = ForeignKeyField(Gene, column_name="gene", backref="transcripts") mim_gene_accession = IntegerField() mim_annotation = CharField() chrom = CharField(max_length=10) - start = IntegerField(db_column="start_pos") - stop = IntegerField(db_column="stop_pos") + start = IntegerField(column_name="start_pos") + stop = IntegerField(column_name="stop_pos") strand = EnumField(choices = ['+', '-']) class Feature(BaseModel): class Meta: - db_table = 'features' + table_name = 'features' schema = 'data' - gene = ForeignKeyField(Gene, db_column="gene", related_name='exons') - transcript = ForeignKeyField(Transcript, db_column="transcript", related_name='transcripts') + gene = ForeignKeyField(Gene, column_name="gene", backref='exons') + transcript = ForeignKeyField(Transcript, column_name="transcript", backref='transcripts') chrom = CharField(max_length=10) - start = IntegerField(db_column="start_pos") - stop = IntegerField(db_column="stop_pos") + start = IntegerField(column_name="start_pos") + stop = IntegerField(column_name="stop_pos") strand = EnumField(choices = ['+', '-']) feature_type = CharField() @@ -131,10 +131,10 @@ class Collection(BaseModel): A collection is a source of data which can be sampled into a SampleSet. """ class Meta: - db_table = 'collections' + table_name = 'collections' schema = 'data' - name = CharField(db_column="study_name", null = True) + name = CharField(column_name="study_name", null = True) ethnicity = CharField(null = True) @@ -144,7 +144,7 @@ class Study(BaseModel): one or more datasets. """ class Meta: - db_table = 'studies' + table_name = 'studies' schema = 'data' pi_name = CharField() @@ -152,7 +152,7 @@ class Meta: contact_name = CharField() contact_email = CharField() title = CharField() - description = TextField(db_column="study_description", null=True) + description = TextField(column_name="study_description", null=True) publication_date = DateTimeField() ref_doi = CharField(null=True) @@ -164,15 +164,15 @@ class Dataset(BaseModel): Most studies only have a single dataset, but multiple are allowed. """ class Meta: - db_table = 'datasets' + table_name = 'datasets' schema = 'data' - study = ForeignKeyField(Study, db_column="study", related_name='datasets') + study = ForeignKeyField(Study, column_name="study", backref='datasets') short_name = CharField() full_name = CharField() browser_uri = CharField(null=True) beacon_uri = CharField(null=True) - description = TextField(db_column="beacon_description", null=True) + description = TextField(column_name="beacon_description", null=True) avg_seq_depth = FloatField(null=True) seq_type = CharField(null=True) seq_tech = CharField(null=True) @@ -189,24 +189,24 @@ def has_image(self): class SampleSet(BaseModel): class Meta: - db_table = 'sample_sets' + table_name = 'sample_sets' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='sample_sets') - collection = ForeignKeyField(Collection, db_column="collection", related_name='sample_sets') + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='sample_sets') + collection = ForeignKeyField(Collection, column_name="collection", backref='sample_sets') sample_size = IntegerField() phenotype = CharField(null=True) class DatasetVersion(BaseModel): class Meta: - db_table = 'dataset_versions' + table_name = 'dataset_versions' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='versions') - reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='dataset_versions') - version = CharField(db_column="dataset_version") - description = TextField(db_column="dataset_description") + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='versions') + reference_set = ForeignKeyField(ReferenceSet, column_name="reference_set", backref='dataset_versions') + version = CharField(column_name="dataset_version") + description = TextField(column_name="dataset_description") terms = TextField() available_from = DateTimeField() ref_doi = CharField(null=True) @@ -221,23 +221,23 @@ class Meta: class DatasetFile(BaseModel): class Meta: - db_table = 'dataset_files' + table_name = 'dataset_files' schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version", related_name='files') - name = CharField(db_column="basename") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version", backref='files') + name = CharField(column_name="basename") uri = CharField() file_size = IntegerField() class DatasetLogo(BaseModel): class Meta: - db_table = 'dataset_logos' + table_name = 'dataset_logos' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='logo') + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='logo') mimetype = CharField() - data = BlobField(db_column="bytes") + data = BlobField(column_name="bytes") ### @@ -246,10 +246,10 @@ class Meta: class Variant(BaseModel): class Meta: - db_table = "variants" + table_name = "variants" schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version", related_name="variants") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version", backref="variants") rsid = IntegerField() chrom = CharField(max_length=10) pos = IntegerField() @@ -267,22 +267,42 @@ class Meta: vep_annotations = BinaryJSONField() +class VariantMate(BaseModel): + class Meta: + table_name = "mate" + schema = 'data' + + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version", backref="mate") + chrom = CharField(max_length=10) + pos = IntegerField() + ref = CharField() + alt = CharField() + chrom_id = CharField() + mate_chrom = CharField() + mate_start = IntegerField() + mate_id = CharField() + allele_freq = FloatField() + variant_id = CharField() + allele_count = IntegerField() + allele_num = IntegerField() + + class VariantGenes(BaseModel): class Meta: - db_table = 'variant_genes' + table_name = 'variant_genes' schema = 'data' - variant = ForeignKeyField(Variant, db_column="variant", related_name="genes") - gene = ForeignKeyField(Gene, db_column="gene", related_name="variants") + variant = ForeignKeyField(Variant, column_name="variant", backref="genes") + gene = ForeignKeyField(Gene, column_name="gene", backref="variants") class VariantTranscripts(BaseModel): class Meta: - db_table = 'variant_transcripts' + table_name = 'variant_transcripts' schema = 'data' - variant = ForeignKeyField(Variant, db_column="variant", related_name="transcripts") - transcript = ForeignKeyField(Transcript, db_column="transcript", related_name="variants") + variant = ForeignKeyField(Variant, column_name="variant", backref="transcripts") + transcript = ForeignKeyField(Transcript, column_name="transcript", backref="variants") class Coverage(BaseModel): @@ -297,10 +317,10 @@ class Coverage(BaseModel): coverage of at least 20 in this position. """ class Meta: - db_table = "coverage" + table_name = "coverage" schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version") chrom = CharField(max_length=10) pos = IntegerField() mean = FloatField() @@ -310,10 +330,10 @@ class Meta: class Metrics(BaseModel): class Meta: - db_table = "metrics" + table_name = "metrics" schema = 'data' - dataset_version = ForeignKeyField(DatasetVersion, db_column="dataset_version") + dataset_version = ForeignKeyField(DatasetVersion, column_name="dataset_version") metric = CharField() mids = ArrayField(IntegerField) hist = ArrayField(IntegerField) @@ -321,13 +341,13 @@ class Meta: class User(BaseModel): class Meta: - db_table = "users" + table_name = "users" schema = 'users' - name = CharField(db_column="username", null=True) + name = CharField(column_name="username", null=True) email = CharField(unique=True) identity = CharField(unique=True) - identity_type = EnumField(null=False, choices=['google', 'elixir']) + identity_type = EnumField(null=False, choices=['google', 'elixir'], default='elixir') affiliation = CharField(null=True) country = CharField(null=True) @@ -372,10 +392,10 @@ def has_requested_access(self, dataset): class SFTPUser(BaseModel): class Meta: - db_table = "sftp_users" + table_name = "sftp_users" schema = 'users' - user = ForeignKeyField(User, related_name='sftp_user') + user = ForeignKeyField(User, backref='sftp_user') user_uid = IntegerField(unique=True) user_name = CharField(null=False) password_hash = CharField(null=False) @@ -384,60 +404,60 @@ class Meta: class UserAccessLog(BaseModel): class Meta: - db_table = "user_access_log" + table_name = "user_access_log" schema = 'users' - user = ForeignKeyField(User, related_name='access_logs') - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_logs') + user = ForeignKeyField(User, backref='access_logs') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access_logs') action = EnumField(null=True, choices=['access_granted','access_revoked','access_requested','private_link']) ts = DateTimeField() class UserConsentLog(BaseModel): class Meta: - db_table = "user_consent_log" + table_name = "user_consent_log" schema = 'users' - user = ForeignKeyField(User, related_name='consent_logs') - dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='consent_logs') + user = ForeignKeyField(User, backref='consent_logs') + dataset_version = ForeignKeyField(DatasetVersion, column_name='dataset_version', backref='consent_logs') ts = DateTimeField() class UserDownloadLog(BaseModel): class Meta: - db_table = "user_download_log" + table_name = "user_download_log" schema = 'users' - user = ForeignKeyField(User, related_name='download_logs') - dataset_file = ForeignKeyField(DatasetFile, db_column='dataset_file', related_name='download_logs') + user = ForeignKeyField(User, backref='download_logs') + dataset_file = ForeignKeyField(DatasetFile, column_name='dataset_file', backref='download_logs') ts = DateTimeField() class DatasetAccess(BaseModel): class Meta: - db_table = "dataset_access" + table_name = "dataset_access" schema = 'users' - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access') - user = ForeignKeyField(User, related_name='dataset_access') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access') + user = ForeignKeyField(User, backref='dataset_access') wants_newsletter = BooleanField(null=True) is_admin = BooleanField(null=True) class Linkhash(BaseModel): class Meta: - db_table = "linkhash" + table_name = "linkhash" schema = 'users' - dataset_version = ForeignKeyField(DatasetVersion, db_column='dataset_version', related_name='link_hashes') - user = ForeignKeyField(User, related_name='link_hashes') + dataset_version = ForeignKeyField(DatasetVersion, column_name='dataset_version', backref='link_hashes') + user = ForeignKeyField(User, backref='link_hashes') hash = CharField() expires_on = DateTimeField() class BeaconCounts(BaseModel): class Meta: - db_table = "beacon_dataset_counts_table" + table_name = "beacon_dataset_counts_table" schema = 'beacon' datasetid = CharField(primary_key=True) @@ -451,31 +471,31 @@ class Meta: class DatasetVersionCurrent(DatasetVersion): class Meta: - db_table = 'dataset_version_current' + table_name = 'dataset_version_current' schema = 'data' - dataset = ForeignKeyField(Dataset, db_column="dataset", related_name='current_version') - reference_set = ForeignKeyField(ReferenceSet, db_column="reference_set", related_name='current_version') + dataset = ForeignKeyField(Dataset, column_name="dataset", backref='current_version') + reference_set = ForeignKeyField(ReferenceSet, column_name="reference_set", backref='current_version') class DatasetAccessCurrent(DatasetAccess): class Meta: - db_table = 'dataset_access_current' + table_name = 'dataset_access_current' schema = 'users' - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_current') - user = ForeignKeyField(User, related_name='access_current') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access_current') + user = ForeignKeyField(User, backref='access_current') has_access = IntegerField() access_requested = DateTimeField() class DatasetAccessPending(DatasetAccess): class Meta: - db_table = 'dataset_access_pending' + table_name = 'dataset_access_pending' schema = 'users' - dataset = ForeignKeyField(Dataset, db_column='dataset', related_name='access_pending') - user = ForeignKeyField(User, related_name='access_pending') + dataset = ForeignKeyField(Dataset, column_name='dataset', backref='access_pending') + user = ForeignKeyField(User, backref='access_pending') has_access = IntegerField() access_requested = DateTimeField() @@ -567,7 +587,7 @@ def get_dataset_version(dataset:str, version:str=None): def build_dict_from_row(row): d = {} - for field, value in row.__dict__['_data'].items(): + for field, value in row.__dict__['__data__'].items(): if field == "id": continue d[field] = value diff --git a/backend/handlers.py b/backend/handlers.py index 3d838eb72..5c171eb0f 100644 --- a/backend/handlers.py +++ b/backend/handlers.py @@ -35,7 +35,6 @@ def get_current_user(self): email = self.get_secure_cookie('email') name = self.get_secure_cookie('user') identity = self.get_secure_cookie('identity') - identity_type = self.get_secure_cookie('identity_type') # Fix ridiculous bug with quotation marks showing on the web if name and (name[0] == '"') and (name[-1] == '"'): @@ -49,8 +48,7 @@ def get_current_user(self): try: return db.User(email = email.decode('utf-8'), name = name.decode('utf-8'), - identity = identity.decode('utf-8'), - identity_type = identity_type.decode('utf-8')) + identity = identity.decode('utf-8')) except peewee.OperationalError as e: logging.error("Can't create new user: {}".format(e)) else: diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 6abc27649..26da598b4 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -5,6 +5,7 @@ import db import handlers +from . import error from . import lookups from . import utils @@ -24,7 +25,7 @@ def get(self, dataset:str, query:str, ds_version:str=None): dataset, ds_version = utils.parse_dataset(dataset, ds_version) ret = {} - results = lookups.get_autocomplete(dataset, query, ds_version) + results = lookups.autocomplete(dataset, query, ds_version) ret = {'values': sorted(list(set(results)))[:20]} self.finish(ret) @@ -87,12 +88,13 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): ds_version (str): dataset version """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) - ret = utils.get_coverage(dataset, datatype, item, ds_version) - if 'bad_region' in ret: - self.send_error(status_code=400, reason="Unable to parse the region") + try: + ret = utils.get_coverage(dataset, datatype, item, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return - if 'region_too_large' in ret: - self.send_error(status_code=400, reason="The region is too large") + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) return self.finish(ret) @@ -114,7 +116,7 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): try: ret = utils.get_coverage_pos(dataset, datatype, item, ds_version) except ValueError: - logging.error('GetCoveragePos: unable to parse region ({})'.format(region)) + logging.error('GetCoveragePos: unable to parse region ({})'.format(item)) self.send_error(status_code=400, reason='Unable to parse region') return @@ -139,7 +141,15 @@ def get(self, dataset:str, gene:str, ds_version:str=None): ret = {'gene':{'gene_id': gene_id}} # Gene - gene = lookups.get_gene(dataset, gene_id, ds_version) + try: + gene = lookups.get_gene(dataset, gene_id, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) + return + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) + return + if not gene: self.send_error(status_code=404, reason='Gene not found') return @@ -181,9 +191,9 @@ def get(self, dataset:str, region:str, ds_version:str=None): try: chrom, start, stop = utils.parse_region(region) - except ValueError: - logging.error('GetRegion: unable to parse region ({})'.format(region)) - self.send_error(status_code=400, reason='Unable to parse region') + except error.ParsingError as err: + self.send_error(status_code=400, reason=str(err)) + logging.warning('GetRegion: unable to parse region ({})'.format(region)) return ret = {'region':{'chrom': chrom, @@ -193,7 +203,7 @@ def get(self, dataset:str, region:str, ds_version:str=None): } if utils.is_region_too_large(start, stop): - self.send_error(status_code=400, reason="The region is too large") + self.send_error(status_code=400, reason='Region too large') return genes_in_region = lookups.get_genes_in_region(dataset, chrom, start, stop, ds_version) @@ -229,10 +239,12 @@ def get(self, dataset:str, transcript:str, ds_version:str=None): } # Add transcript information - transcript = lookups.get_transcript(dataset, transcript_id, ds_version) - if not transcript: - self.send_error(status_code=404, reason='Transcript not found') + try: + transcript = lookups.get_transcript(dataset, transcript_id, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return + ret['transcript']['id'] = transcript['transcript_id'] ret['transcript']['number_of_CDS'] = len([t for t in transcript['exons'] if t['feature_type'] == 'CDS']) @@ -270,18 +282,21 @@ def get(self, dataset:str, variant:str, ds_version:str=None): ret = {'variant':{}} # Variant v = variant.split('-') + if len(v) != 4: + logging.error('GetVariant: unable to parse variant ({})'.format(variant)) + self.send_error(status_code=400, reason=f'Unable to parse variant {variant}') try: v[1] = int(v[1]) except ValueError: - logging.error('GetVariant: unable to parse variant ({})'.format(variant)) - self.send_error(status_code=400, reason="Unable to parse variant") + logging.error('GetVariant: position not an integer ({})'.format(variant)) + self.send_error(status_code=400, reason=f'Position is not an integer in variant {variant}') return orig_variant = variant - variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3], ds_version) - - if not variant: + try: + variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3], ds_version) + except error.NotFoundError as err: logging.error('Variant not found ({})'.format(orig_variant)) - self.send_error(status_code=404, reason='Variant not found') + self.send_error(status_code=404, reason=str(err)) return # Just get the information we need @@ -320,7 +335,6 @@ def get(self, dataset:str, variant:str, ds_version:str=None): 'canonical': annotation['CANONICAL'], 'modification': annotation['HGVSp'].split(":")[1] if ':' in annotation['HGVSp'] else None}] - # Dataset frequencies. # This is reported per variable in the database data, with dataset # information inside the variables, so here we reorder to make the @@ -332,9 +346,11 @@ def get(self, dataset:str, variant:str, ds_version:str=None): dsvs = [dsv for dsv in dsvs if dsv.reference_set == curr_dsv.reference_set] dsv_groups = [(curr_dsv, variant)] for dsv in dsvs: - hit = lookups.get_variant(dsv.dataset.short_name, v[1], v[0], v[2], v[3], dsv.version) - if hit: - dsv_groups.append((dsv, hit)) + try: + hit = lookups.get_variant(dsv.dataset.short_name, v[1], v[0], v[2], v[3], dsv.version) + except error.NotFoundError: + continue + dsv_groups.append((dsv, hit)) frequencies = {'headers':[['Dataset','pop'], ['Allele Count','acs'], @@ -379,12 +395,13 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): item (str): item to query """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) - ret = utils.get_variant_list(dataset, datatype, item, ds_version) - if not ret: - self.send_error(status_code=500, reason='Unable to retrieve variants') + try: + ret = utils.get_variant_list(dataset, datatype, item, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return - if 'region_too_large' in ret: - self.send_error(status_code=400, reason="The region is too large") + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) return # inconvenient way of doing humpBack-conversion diff --git a/backend/modules/browser/error.py b/backend/modules/browser/error.py new file mode 100644 index 000000000..606bb5c2e --- /dev/null +++ b/backend/modules/browser/error.py @@ -0,0 +1,11 @@ +class NotFoundError(Exception): + """The query returned nothing from the database.""" + pass + +class ParsingError(Exception): + """Failed to parse the request.""" + pass + +class MalformedRequest(Exception): + """Bad request (e.g. too large region).""" + pass diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 652e7f359..3a518cb7f 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,17 +1,18 @@ """Lookup functions for the variant browser.""" - import logging import re import db +from . import error + SEARCH_LIMIT = 10000 REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') -def get_autocomplete(dataset:str, query:str, ds_version:str=None): +def autocomplete(dataset:str, query:str, ds_version:str=None): """ Provide autocomplete suggestions based on the query. @@ -27,7 +28,7 @@ def get_autocomplete(dataset:str, query:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') query = (db.Gene.select(db.Gene.name) .where(((db.Gene.name.startswith(query)) & (db.Gene.reference_set == ref_set)))) @@ -40,6 +41,7 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): Parse the search input. Datatype is one of: + * `gene` * `transcript` * `variant` @@ -47,11 +49,13 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): * `region` Identifier is one of: + * ensembl ID for gene * variant ID string for variant (eg. 1-1000-A-T) * region ID string for region (eg. 1-1000-2000) Follow these steps: + * if query is an ensembl ID, return it * if a gene symbol, return that gene's ensembl ID * if an RSID, return that variant's string @@ -68,32 +72,46 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): query = query.strip() # Parse Variant types - variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) - if variant: + try: + variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) + except (error.NotFoundError, error.ParsingError): + pass + else: if len(variant) == 1: - retval = ('variant', variant[0]['variant_id']) - else: - retval = ('dbsnp_variant_set', variant[0]['rsid']) - return retval + return ('variant', variant[0]['variant_id']) + return ('dbsnp_variant_set', variant[0]['rsid']) - gene = get_gene_by_name(dataset, query) - # From here out, all should be uppercase (gene, tx, region, variant_id) - query = query.upper() - if not gene: + # Gene + try: gene = get_gene_by_name(dataset, query) - if gene: + except error.NotFoundError: + pass + else: return 'gene', gene['gene_id'] + # Capital letters for all other queries + query = query.upper() + try: + gene = get_gene_by_name(dataset, query) + except error.NotFoundError: + pass + else: + return 'gene', gene['gene_id'] # Ensembl formatted queries if query.startswith('ENS'): # Gene - gene = get_gene(dataset, query) - if gene: + try: + gene = get_gene(dataset, query) + except error.NotFoundError: + pass + else: return 'gene', gene['gene_id'] - # Transcript - transcript = get_transcript(dataset, query) - if transcript: + try: + transcript = get_transcript(dataset, query) + except error.NotFoundError: + pass + else: return 'transcript', transcript['transcript_id'] # Region and variant queries @@ -105,8 +123,13 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): target_type = 'region' if match.group(2) == ":": target = target.replace(":","-") + if match.group(5) and set(match.group(4)).issubset(set("ACGT")): target_type = 'variant' + try: + get_raw_variant(dataset, match.group(3), match.group(1), match.group(4), match.group(5), ds_version) + except error.NotFoundError as err: + target_type = 'not_found' return target_type, target @@ -130,17 +153,19 @@ def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=No """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return [] + raise error.NotFoundError(f'Unable to find the dataset version in the database') if end_pos is None: end_pos = start_pos - return [values for values in (db.Coverage - .select() - .where((db.Coverage.pos >= start_pos) & - (db.Coverage.pos <= end_pos) & - (db.Coverage.chrom == chrom) & - (db.Coverage.dataset_version == dataset_version.id)) - .dicts())] + coverage = [row for row in (db.Coverage.select() + .where((db.Coverage.pos >= start_pos) & + (db.Coverage.pos <= end_pos) & + (db.Coverage.chrom == chrom) & + (db.Coverage.dataset_version == dataset_version.id)) + .dicts())] + if not coverage: + raise error.NotFoundError('No coverage found for the region') + return coverage def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:int=None, ds_version:str=None): @@ -186,7 +211,8 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: logging.info('get_exons_in_transcript({}, {}): unable to find dataset dbid'.format(dataset, transcript_id)) - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: transcript = (db.Transcript .select() @@ -196,11 +222,14 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): .get()) except db.Transcript.DoesNotExist: logging.info('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) - return None + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data.') wanted_types = ('CDS', 'UTR', 'exon') - return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & - (db.Feature.feature_type in wanted_types)).dicts()), - key=lambda k: k['start']) + features = sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & + (db.Feature.feature_type in wanted_types)).dicts()), + key=lambda k: k['start']) + if not features: + raise error.NotFoundError(f'No features found for transcript {transcript_id} in reference data.') + return features def get_gene(dataset:str, gene_id:str, ds_version:str=None): @@ -219,12 +248,13 @@ def get_gene(dataset:str, gene_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: return db.Gene.select().where((db.Gene.gene_id == gene_id) & (db.Gene.reference_set == ref_set)).dicts().get() except db.Gene.DoesNotExist: - return None + raise error.NotFoundError(f'Gene {gene_id} not found in reference data.') def get_gene_by_dbid(gene_dbid:str): @@ -262,7 +292,8 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return {} + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: return (db.Gene.select() .where((db.Gene.reference_set == ref_set) & @@ -278,8 +309,8 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): .dicts() .get()) except db.GeneOtherNames.DoesNotExist: - logging.error('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) - return {} + logging.info('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) + raise error.NotFoundError(f'Gene {gene_name} not found in reference data') def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_version:str=None): @@ -300,13 +331,13 @@ def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return {} + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') - gene_query = db.Gene.select().where((db.Gene.reference_set == ref_set) & - (db.Gene.start <= stop_pos) & - (db.Gene.stop >= start_pos) & - (db.Gene.chrom == chrom)).dicts() - return [gene for gene in gene_query] + genes = db.Gene.select().where((db.Gene.reference_set == ref_set) & + (db.Gene.start <= stop_pos) & + (db.Gene.stop >= start_pos) & + (db.Gene.chrom == chrom)).dicts() + return genes def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): @@ -327,7 +358,7 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') try: variant = (db.Variant @@ -351,9 +382,9 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio .dicts()] return variant except db.Variant.DoesNotExist: - logging.error('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' - .format(dataset, pos, chrom, ref, alt, dataset_version.id)) - return None + logging.info('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' + .format(dataset, pos, chrom, ref, alt, dataset_version.id)) + raise error.NotFoundError(f'Variant {chrom}-{pos}-{ref}-{alt} not found') def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): @@ -374,7 +405,7 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') try: transcript = (db.Transcript .select(db.Transcript, db.Gene.gene_id) @@ -386,7 +417,8 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): transcript['exons'] = get_exons_in_transcript(dataset, transcript_id) return transcript except db.Transcript.DoesNotExist: - return None + logging.info('get_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data') def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): @@ -405,14 +437,15 @@ def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - logging.error('get_transcripts_in_gene({}, {}): unable to get referenceset dbid'.format(dataset, gene_id)) - return [] + logging.warning('get_transcripts_in_gene({}, {}): unable to get referenceset dbid'.format(dataset, gene_id)) + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: gene = db.Gene.select().where((db.Gene.reference_set == ref_set) & (db.Gene.gene_id == gene_id)).dicts().get() except db.Gene.DoesNotExist: - logging.error('get_transcripts_in_gene({}, {}): unable to retrieve gene'.format(dataset, gene_id)) - return [] + logging.info('get_transcripts_in_gene({}, {}): unable to retrieve gene'.format(dataset, gene_id)) + raise error.NotFoundError(f'Gene {gene_id} not found in reference data') return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] @@ -469,24 +502,25 @@ def get_variants_by_rsid(dataset:str, rsid:str, ds_version:str=None): """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') if not rsid.startswith('rs'): logging.error('get_variants_by_rsid({}, {}): rsid not starting with rs'.format(dataset, rsid)) - return None + raise error.ParsingError('rsid not starting with rs') try: rsid = int(rsid.lstrip('rs')) except ValueError: logging.error('get_variants_by_rsid({}, {}): not an integer after rs'.format(dataset, rsid)) - return None - query = (db.Variant - .select() - .where((db.Variant.rsid == rsid) & - (db.Variant.dataset_version == dataset_version)) - .dicts()) + raise error.ParsingError('Not an integer after rs') + variants = (db.Variant + .select() + .where((db.Variant.rsid == rsid) & + (db.Variant.dataset_version == dataset_version)) + .dicts()) - variants = [variant for variant in query] + if not variants: + raise error.NotFoundError('No variants found for rsid {rsid}') return variants @@ -505,10 +539,10 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') gene = get_gene(dataset, gene_id, ds_version) if not gene: - return None + raise error.NotFoundError(f'Gene {gene_id} not found in reference data') variants = [variant for variant in db.Variant.select() .join(db.VariantGenes) @@ -544,7 +578,7 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') query = (db.Variant .select() .where((db.Variant.pos >= start_pos) & @@ -582,10 +616,12 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No """ dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + raise error.NotFoundError(f'Unable to find the dataset version in the database') transcript = get_transcript(dataset, transcript_id, ds_version) if not transcript: - return None + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data') variants = [variant for variant in db.Variant.select() .join(db.VariantTranscripts) diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py index 19d174884..b69797afb 100644 --- a/backend/modules/browser/tests/test_browser_handlers.py +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -1,7 +1,6 @@ """ Test the browser handlers """ - import requests import json @@ -63,7 +62,7 @@ def test_get_coverage(): assert response.status_code == 400 data_item = '1-1-5' response = requests.get('{}/api/dataset/{}/browser/coverage/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) - assert response.status_code == 200 + assert response.status_code == 404 def test_get_coverage_pos(): @@ -244,9 +243,15 @@ def test_search(): assert data['type'] == 'dbsnp' assert data['value'] == 142856307 - query = '21-9411281-T-C' + query = '22-1234321-A-T' + response = requests.get('{}/api/dataset/{}/browser/search/{}'.format(BASE_URL, dataset, query)) + data = json.loads(response.text) + assert data['type'] == 'not_found' + assert data['value'] == '22-1234321-A-T' + + query = '21-29461622-G-A' version = '20161223' response = requests.get('{}/api/dataset/{}/version/{}/browser/search/{}'.format(BASE_URL, dataset, version, query)) data = json.loads(response.text) assert data['type'] == 'variant' - assert data['value'] == '21-9411281-T-C' + assert data['value'] == '21-29461622-G-A' diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 21a13f5c9..6de69d606 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -2,19 +2,22 @@ Tests for the functions available in lookups.py """ +import pytest + +from .. import error from .. import lookups -def test_get_autocomplete(): +def test_autocomplete(): """ Test get_autocomplete() """ - res = lookups.get_autocomplete('SweGen', 'PA') + res = lookups.autocomplete('SweGen', 'PA') expected = set(["PABPC1P9", "PACSIN2", "PANX2", "PARP4P3", "PARVB", "PARVG", "PATZ1", "PAXBP1", "PAXBP1-AS1"]) assert set(res) == expected - res = lookups.get_autocomplete('Bad_dataset', 'PA') - assert not res + with pytest.raises(error.NotFoundError): + res = lookups.autocomplete('Bad_dataset', 'PA') def test_get_awesomebar_result(): @@ -33,6 +36,8 @@ def test_get_awesomebar_result(): assert result == ('transcript', 'ENST00000457709') result = lookups.get_awesomebar_result('SweGen', '22-46615715-46615880') assert result == ('region', '22-46615715-46615880') + result = lookups.get_awesomebar_result('SweGen', '22-1234321-A-A') + assert result == ('not_found', '22-1234321-A-A') result = lookups.get_awesomebar_result('SweGen', 'CHR22:46615715-46615880') assert result == ('region', '22-46615715-46615880') result = lookups.get_awesomebar_result('SweGen', 'CHR22-29461622-G-A') @@ -64,11 +69,12 @@ def test_get_coverage_for_bases(): assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no hits - coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) - assert not coverage + with pytest.raises(error.NotFoundError): + lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) # incorrect dataset - assert not lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) + with pytest.raises(error.NotFoundError): + lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) def test_get_coverage_for_transcript(): @@ -93,11 +99,12 @@ def test_get_coverage_for_transcript(): assert len(lookups.get_coverage_for_transcript('SweGen', '22', 46615715, 46615880)) == 17 # no hits - coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500285) - assert not coverage + with pytest.raises(error.NotFoundError): + coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500285) # incorrect dataset - assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) + with pytest.raises(error.NotFoundError): + assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) def test_get_exons_in_transcript(): @@ -108,12 +115,12 @@ def test_get_exons_in_transcript(): assert len(result) == 14 # bad dataset - result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') # bad transcript - result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') def test_get_gene(): @@ -135,12 +142,12 @@ def test_get_gene(): assert result[val] == expected[val] # non-existing gene - result = lookups.get_gene('SweGen', 'NOT_A_GENE') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_gene('SweGen', 'NOT_A_GENE') # non-existing dataset - result = lookups.get_gene('NoDataset', 'ENSG00000223972') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_gene('NoDataset', 'ENSG00000223972') def test_get_gene_by_dbid(): @@ -186,17 +193,15 @@ def test_get_gene_by_name(caplog): assert result[val] == expected[val] # non-existing gene - result = lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') - assert not result - assert caplog.messages[0] == 'get_gene_by_name(SweGen, NOT_A_GENE): unable to retrieve gene' + with pytest.raises(error.NotFoundError): + lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') # non-existing dataset - result = lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') - assert not result + with pytest.raises(error.NotFoundError): + lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') # name in other_names result = lookups.get_gene_by_name('SweGen', 'BCL8C') - print(result) assert result['gene_id'] == 'ENSG00000223875' @@ -214,10 +219,10 @@ def test_get_genes_in_region(): expected_ids = ['ENSG00000231565'] assert [gene['gene_id'] for gene in res] == expected_ids # bad dataset - res = lookups.get_genes_in_region('bad_dataset', '22', 25595800, 25615800) + with pytest.raises(error.NotFoundError): + lookups.get_genes_in_region('bad_dataset', '22', 25595800, 25615800) # nothing found - res = lookups.get_genes_in_region('SweGen', '22', 25595800, 25595801) - assert not res + assert not lookups.get_genes_in_region('SweGen', '22', 25595800, 25595801) def test_get_transcript(): @@ -237,7 +242,8 @@ def test_get_transcript(): assert len(result['exons']) == 1 # non-existing - assert not lookups.get_transcript('SweGen', 'INCORRECT') + with pytest.raises(error.NotFoundError): + lookups.get_transcript('SweGen', 'INCORRECT') def test_get_transcripts_in_gene(): @@ -247,8 +253,10 @@ def test_get_transcripts_in_gene(): res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000228314') assert len(res) == 3 - assert not lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') - assert not lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') + with pytest.raises(error.NotFoundError): + lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') + with pytest.raises(error.NotFoundError): + lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') def test_get_raw_variant(): @@ -260,8 +268,10 @@ def test_get_raw_variant(): assert len(result['genes']) == len(['ENSG00000229286', 'ENSG00000235265']) assert set(result['transcripts']) == set(['ENST00000448070', 'ENST00000413156']) assert len(result['transcripts']) == len(['ENST00000448070', 'ENST00000413156']) - assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') - assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') def test_get_transcripts_in_gene_by_dbid(): @@ -285,15 +295,20 @@ def test_get_variant(): assert len(result['genes']) == len(['ENSG00000229286', 'ENSG00000235265']) assert set(result['transcripts']) == set(['ENST00000448070', 'ENST00000413156']) assert len(result['transcripts']) == len(['ENST00000448070', 'ENST00000413156']) - result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T') - assert not result + + # not found + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 12321, '21', 'G', 'G') + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T') # incorrect position - assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') # with version - result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A', "20161223") - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A', "20161223") result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T', "20161223") assert result['variant_id'] == '21-9411609-G-T' @@ -306,19 +321,25 @@ def test_get_variants_by_rsid(): result = lookups.get_variants_by_rsid('SweGen', 'rs142856307') assert result[0]['pos'] == 16285954 assert len(result) == 5 - assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778') # with version - assert not lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223') + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223') result = lookups.get_variants_by_rsid('SweGen', 'rs76676778', '20161223') assert result[0]['variant_id'] == '21-9411609-G-T' # errors - assert lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') is None - assert lookups.get_variants_by_rsid('SweGen', '373706802') is None - assert lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') is None + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') + with pytest.raises(error.ParsingError): + lookups.get_variants_by_rsid('SweGen', '373706802') + with pytest.raises(error.ParsingError): + lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') # no variants with rsid available - assert not lookups.get_variants_by_rsid('SweGen', 'rs1') + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('SweGen', 'rs1') def test_get_variants_in_gene(): @@ -327,9 +348,17 @@ def test_get_variants_in_gene(): """ res = lookups.get_variants_in_gene('SweGen', 'ENSG00000198062') assert len(res) == 512 - assert not lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') - assert not lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') - assert not lookups.get_variants_in_gene('SweGen', 'ENSG00000198062', "BAD_VERSION") + + # existing gene without variants + assert not lookups.get_variants_in_gene('SweGen', 'ENSG00000128298') + + # bad requests + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('SweGen', 'ENSG00000198062', "BAD_VERSION") def test_get_variants_in_region(): @@ -342,12 +371,14 @@ def test_get_variants_in_region(): assert [res['pos'] for res in result] == expected_pos # no positions covered - result = lookups.get_variants_in_region('SweGen', '22', 16079200, 16079000) - assert not result + assert not lookups.get_variants_in_region('SweGen', '22', 16079200, 16079000) + + # no variants found + assert not lookups.get_variants_in_region('SweGen', '22', 106079000, 106079200) # incorrect dataset - result = lookups.get_variants_in_region('Incorrect_dataset', '22', 16079200, 16079400) - assert not result + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_region('Incorrect_dataset', '22', 16079200, 16079400) def test_get_variants_in_transcript(): @@ -356,5 +387,9 @@ def test_get_variants_in_transcript(): """ res = lookups.get_variants_in_transcript('SweGen', 'ENST00000452800') assert len(res) == 508 - assert not lookups.get_variants_in_transcript('BAD_DATASET', 'ENST00000452800') - assert not lookups.get_variants_in_transcript('SweGen', 'ENST123') + + # bad requests + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_in_transcript('BAD_DATASET', 'ENST00000452800') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_in_transcript('SweGen', 'ENST123') diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index 74cbbd7f5..83361475e 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -2,6 +2,9 @@ Tests for utils.py """ +import pytest + +from .. import error from .. import lookups from .. import utils @@ -52,7 +55,7 @@ def test_add_consequence_to_variant(): assert variant['major_consequence'] == '' # bad variant - variant = lookups.get_variant('SweGen', 38481311, '444', 'C', 'T') + variant = {} utils.add_consequence_to_variant(variant) assert not variant @@ -74,18 +77,22 @@ def test_get_coverage(): assert len(res['coverage']) == 144 res = utils.get_coverage('SweGen', 'region', '22-46615715-46615880') assert len(res['coverage']) == 17 - res = utils.get_coverage('SweGen', 'region', '22:46615715-46615880') - assert not res['coverage'] - res = utils.get_coverage('SweGen', 'region', '22-46615715asd-46615880') - assert not res['coverage'] - assert res['bad_region'] res = utils.get_coverage('SweGen', 'transcript', 'ENST00000438441') assert len(res['coverage']) == 144 - assert not utils.get_coverage('BAD_SET', 'transcript', 'ENST00000438441')['coverage'] + # bad regions + with pytest.raises(error.ParsingError): + res = utils.get_coverage('SweGen', 'region', '22-46615715asd-46615880') + # is seen as 22:46615715-46615880-46615880 + with pytest.raises(error.NotFoundError): + utils.get_coverage('SweGen', 'region', '22:46615715-46615880') + + # no coverage found + with pytest.raises(error.NotFoundError): + utils.get_coverage('BAD_SET', 'transcript', 'ENST00000438441')['coverage'] - res = utils.get_coverage('SweGen', 'region', '22-1-1000000') - assert res['region_too_large'] + with pytest.raises(error.MalformedRequest): + res = utils.get_coverage('SweGen', 'region', '22-1-1000000') def test_get_coverage_pos(): @@ -105,9 +112,20 @@ def test_get_coverage_pos(): assert res['start'] == 16364817 assert res['stop'] == 16366254 - res = utils.get_coverage_pos('BAD_SET', 'transcript', 'ENST00000438441') - for value in res.values(): - assert not value + # bad requests + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('BAD_SET', 'transcript', 'ENST00000438441') + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('SweGen', 'transcript', 'ENST1234321') + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('SweGen', 'gene', 'ENSG1234321') + with pytest.raises(error.ParsingError): + utils.get_coverage_pos('BAD_SET', 'region', '1:1:1:1') + + # too large request + with pytest.raises(error.MalformedRequest): + utils.get_coverage_pos('SweGen', 'region', '1-1-10000000') + def test_data_structures(): @@ -191,15 +209,25 @@ def test_get_variant_list(): assert len(res['variants']) == 13 res = utils.get_variant_list('SweGen', 'transcript', 'ENST00000438441') assert len(res['variants']) == 178 - res = utils.get_variant_list('SweGen', 'transcript', 'ENSTWEIRD') - assert not res - res = utils.get_variant_list('SweGen', 'region', '22-1-1000000') - assert res['region_too_large'] - res = utils.get_variant_list('SweGen', 'region', '22-16272587') assert len(res['variants']) == 4 + # bad requests + with pytest.raises(error.NotFoundError): + utils.get_variant_list('SweGen', 'transcript', 'ENSTWEIRD') + with pytest.raises(error.NotFoundError): + utils.get_variant_list('Bad_dataset', 'transcript', 'ENSTWEIRD') + with pytest.raises(error.NotFoundError): + utils.get_variant_list('SweGen', 'gene', 'ENSG1234321') + with pytest.raises(error.ParsingError): + utils.get_variant_list('SweGen', 'region', '1-1-1-1-1') + + # too large region + with pytest.raises(error.MalformedRequest): + utils.get_variant_list('SweGen', 'region', '22-1-1000000') + + def test_order_vep_by_csq(): """ Test order_vep_by_csq() @@ -224,6 +252,24 @@ def test_parse_dataset(): assert utils.parse_dataset('hg19:SweGen:180101') == ('SweGen', '180101') +def test_parse_region(): + assert utils.parse_region('1-2-3') == ('1', 2, 3) + assert utils.parse_region('X-15-30') == ('X', 15, 30) + assert utils.parse_region('1-2') == ('1', 2, 2) + + # bad regions + with pytest.raises(error.ParsingError): + print(utils.parse_region('1:2:2')) + with pytest.raises(error.ParsingError): + utils.parse_region('1-2-2-2') + with pytest.raises(error.ParsingError): + utils.parse_region('asdfgh') + with pytest.raises(error.ParsingError): + utils.parse_region('X-15-z') + with pytest.raises(error.ParsingError): + utils.parse_region('X-y-15') + + def test_remove_extraneous_vep_annotations(): """ Test remove_extraneous_vep_annotations() diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 589e2fe72..d3c15aaf8 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -2,6 +2,7 @@ import logging +from . import error from . import lookups # for coverage @@ -176,12 +177,10 @@ def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) elif datatype == 'region': - try: - chrom, start, stop = parse_region(item) - except ValueError: - return {'coverage': [], 'bad_region':True} + chrom, start, stop = parse_region(item) + if is_region_too_large(start, stop): - return {'coverage': [], 'region_too_large': True} + raise error.MalformedRequest('Region too large') ret['coverage'] = lookups.get_coverage_for_bases(dataset, chrom, start, stop, ds_version) elif datatype == 'transcript': @@ -211,15 +210,15 @@ def get_coverage_pos(dataset:str, datatype:str, item:str, ds_version:str=None): if datatype == 'region': chrom, start, stop = parse_region(item) + if is_region_too_large(start, stop): + raise error.MalformedRequest('Region too large') ret['start'] = start ret['stop'] = stop ret['chrom'] = chrom else: if datatype == 'gene': gene = lookups.get_gene(dataset, item) - if gene: - transcript = lookups.get_transcript(dataset, gene['canonical_transcript'], ds_version) - else: transcript = None + transcript = lookups.get_transcript(dataset, gene['canonical_transcript'], ds_version) elif datatype == 'transcript': transcript = lookups.get_transcript(dataset, item, ds_version) if transcript: @@ -343,15 +342,10 @@ def get_variant_list(dataset:str, datatype:str, item:str, ds_version:str=None): variants = lookups.get_variants_in_gene(dataset, item, ds_version) elif datatype == 'region': - try: - chrom, start, stop = parse_region(item) - start = int(start) - stop = int(stop) - except ValueError: - return None + chrom, start, stop = parse_region(item) if is_region_too_large(start, stop): - return {'variants': [], 'headers': [], 'region_too_large': True} + raise error.MalformedRequest('Region too large') variants = lookups.get_variants_in_region(dataset, chrom, start, stop, ds_version) elif datatype == 'transcript': @@ -453,7 +447,7 @@ def parse_region(region:str): Parse a region with either one or two positions Args: - region (str): region, e.g. `3:1000000` or `3:100100` + region (str): region, e.g. `3-100-200` or `3-100` Returns: tuple: (chrom, start, pos) @@ -465,11 +459,14 @@ def parse_region(region:str): elif len(parts) == 3: chrom, start, stop = parts else: - raise ValueError - - start = int(start) - stop = int(stop) + raise error.ParsingError(f'Unable to parse region {region}.') + try: + start = int(start) + stop = int(stop) + except ValueError: + raise error.ParsingError(f'Unable to parse region {region} (positions not integers).') + return chrom, start, stop diff --git a/backend/requirements.txt b/backend/requirements.txt index 247babe5a..298f31d4d 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,14 +1,14 @@ -Jinja2==2.10.1 -MarkupSafe==1.1.1 appdirs==1.4.3 -certifi==2019.3.9 +certifi==2019.6.16 chardet==3.0.4 idna==2.8 +Jinja2==2.10.1 +MarkupSafe==1.1.1 packaging==19.0 -peewee==2.9.2 +peewee==3.9.6 +psycopg2-binary==2.8.3 pyparsing==2.4.0 -requests==2.21.0 +requests==2.22.0 six==1.12.0 tornado==6.0.2 -urllib3==1.25.0 -psycopg2-binary==2.8.2 +urllib3==1.25.3 diff --git a/backend/route.py b/backend/route.py index ce8d53817..bf75d164f 100644 --- a/backend/route.py +++ b/backend/route.py @@ -19,16 +19,11 @@ tornado_settings = {"debug": False, "cookie_secret": swefreq_settings.cookie_secret, "login_url": "/login", - "google_oauth": { - "key": swefreq_settings.google_key, - "secret": swefreq_settings.google_secret - }, "elixir_oauth": { "id": swefreq_settings.elixir["id"], "secret": swefreq_settings.elixir["secret"], "redirect_uri": swefreq_settings.elixir["redirectUri"], }, - "redirect_uri": swefreq_settings.redirect_uri, "xsrf_cookies": True, "template_path": "templates/", } @@ -49,10 +44,7 @@ def __init__(self, settings): (r"/logout", auth.ElixirLogoutHandler), (r"/elixir/login", auth.ElixirLoginHandler), (r"/elixir/logout", auth.ElixirLogoutHandler), - (r"/google/login", auth.GoogleLoginHandler), - (r"/google/logout", auth.GoogleLogoutHandler), ## API Methods - (r"/api/users/elixir_transfer", auth.UpdateUserHandler), (r"/api/countries", application.CountryList), (r"/api/users/me", application.GetUser), (r"/api/users/datasets", application.UserDatasetAccess), @@ -93,9 +85,6 @@ def __init__(self, settings): self.declared_handlers.insert(-1, ("/developer/login", auth.DeveloperLoginHandler)) self.declared_handlers.insert(-1, ("/developer/quit", application.QuitHandler)) - # google oauth key - self.oauth_key = tornado_settings["google_oauth"]["key"] - # Setup the Tornado Application tornado.web.Application.__init__(self, self.declared_handlers, **settings) diff --git a/backend/settings.py b/backend/settings.py index ff925bb3a..c6543e4b5 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -1,4 +1,3 @@ - import os import sys import json @@ -23,10 +22,6 @@ json_settings = json.load(json_settings_fh) json_settings_fh.close() -google_key = json_settings["googleKey"] -google_secret = json_settings["googleSecret"] -redirect_uri = json_settings["redirectUri"] - elixir = json_settings["elixir"] ## Generated with base64.b64encode(uuid.uuid4().bytes + uuid.uuid4().bytes) diff --git a/frontend/assets/img/SLL_logo.png b/frontend/assets/img/SLL_logo.png index 5225326ff..71effa006 100644 Binary files a/frontend/assets/img/SLL_logo.png and b/frontend/assets/img/SLL_logo.png differ diff --git a/frontend/src/js/app.routes.js b/frontend/src/js/app.routes.js index 7b76d01d4..98962acc5 100644 --- a/frontend/src/js/app.routes.js +++ b/frontend/src/js/app.routes.js @@ -17,7 +17,6 @@ .config(["$routeProvider", "$locationProvider", "$httpProvider", function($routeProvider, $locationProvider, $httpProvider) { $routeProvider .when("/", { templateUrl: "static/templates/ng-templates/home.html" }) - .when("/login", { templateUrl: "static/templates/ng-templates/login.html" }) .when("/profile", { templateUrl: "static/templates/ng-templates/profile.html" }) .when("/error", { templateUrl: "static/templates/ng-templates/error.html" }) .when("/security_warning", { templateUrl: "static/templates/ng-templates/security-warning.html" }) diff --git a/frontend/templates/index.html b/frontend/templates/index.html index c4aa36439..0f092aa31 100644 --- a/frontend/templates/index.html +++ b/frontend/templates/index.html @@ -19,19 +19,6 @@ -[% if develop %] -[% else %] - - - -[% endif %]
@@ -57,11 +44,7 @@Examples - Gene: - PCSK9, Transcript: - ENST00000407236, Variant: - 22-46615880-T-C, Multi-allelic - variant: + PCSK9, Transcript: + ENST00000407236, Variant: + 22-46615880-T-C, Reference SNP ID: rs1800234, Region: - 22:46615715-46615880 + 22:46615715-46615880
- We can now transfer your credentials from your google account to your Elixir account! -
-