-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Trm 30823 UniParc proteome redundant fasta #249
base: main
Are you sure you want to change the base?
Changes from all commits
5ffb871
805dd62
d4fb112
5d93997
65f50d6
5f11138
ac82972
01e806c
7a6b247
2cb4648
cad6c55
d3c669a
fc381eb
149a8cf
01539d9
335954d
97d68a9
a13cefc
606d6ef
47003db
6e1c554
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,92 +6,99 @@ | |
import org.uniprot.core.util.EnumDisplay; | ||
|
||
public enum UniParcDatabase implements Database, EnumDisplay { | ||
EG_BACTERIA(900, "EnsemblBacteria", true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_FUNGI(1000, "EnsemblFungi", true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_METAZOA(1100, "EnsemblMetazoa", true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_PLANTS(1200, "EnsemblPlants", true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_PROTISTS(1300, "EnsemblProtists", true, "https://www.ensemblgenomes.org/id/%id"), | ||
|
||
EMBL(300, "EMBL", true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBL_CON(400, "EMBL_CON", true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBL_TPA(500, "EMBL_TPA", false, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBL_TSA(600, "EMBL_TSA", true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBLWGS(700, "EMBLWGS", true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
|
||
ENSEMBL_VERTEBRATE(800, "Ensembl", true, "https://www.ensembl.org/id/%id"), | ||
ENSEMBL_RAPID(1350, "EnsemblRapid", true, "https://rapid.ensembl.org/id/%id"), | ||
|
||
EPO(1400, "EPO", true, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=epo_prt&id=%id"), | ||
FLYBASE(1500, "FlyBase", true, "https://flybase.org/reports/%id.html"), | ||
EG_BACTERIA(900, "EnsemblBacteria", true, true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_FUNGI(1000, "EnsemblFungi", true, true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_METAZOA(1100, "EnsemblMetazoa", true, true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_PLANTS(1200, "EnsemblPlants", true, true, "https://www.ensemblgenomes.org/id/%id"), | ||
EG_PROTISTS(1300, "EnsemblProtists", true, true, "https://www.ensemblgenomes.org/id/%id"), | ||
|
||
EMBL(300, "EMBL", true, true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBL_CON(400, "EMBL_CON", true, true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBL_TPA(500, "EMBL_TPA", false, true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBL_TSA(600, "EMBL_TSA", true, true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
EMBLWGS(700, "EMBLWGS", true, true, "https://www.ebi.ac.uk/ena/browser/view/%id"), | ||
|
||
ENSEMBL_VERTEBRATE(800, "Ensembl", true, true, "https://www.ensembl.org/id/%id"), | ||
ENSEMBL_RAPID(1350, "EnsemblRapid", true, true, "https://rapid.ensembl.org/id/%id"), | ||
|
||
EPO(1400, "EPO", true, false, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=epo_prt&id=%id"), | ||
FLYBASE(1500, "FlyBase", true,false, "https://flybase.org/reports/%id.html"), | ||
FUSION_GDB( | ||
1550, | ||
"FusionGDB", | ||
true, | ||
false, | ||
"https://compbio.uth.edu/FusionGDB2/gene_search_result.cgi?type=quick_search&quick_search=%id"), | ||
|
||
H_INV(1600, "H-InvDB", false), | ||
IPI(1700, "IPI", false), | ||
H_INV(1600, "H-InvDB",false, false), | ||
IPI(1700, "IPI",false, false), | ||
|
||
JPO(1800, "JPO", true, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=jpo_prt&id=%id"), | ||
KIPO(1900, "KIPO", true, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=kipo_prt&id=%id"), | ||
PATRIC(2000, "PATRIC", true, "https://www.patricbrc.org/view/Feature/%id"), | ||
JPO(1800, "JPO", true,false, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=jpo_prt&id=%id"), | ||
KIPO(1900, "KIPO", true,false, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=kipo_prt&id=%id"), | ||
PATRIC(2000, "PATRIC", true,false, "https://www.patricbrc.org/view/Feature/%id"), | ||
PDB( | ||
2100, | ||
"PDB", | ||
true, | ||
false, | ||
"https://www.ebi.ac.uk/pdbe/entry/pdb/%id"), // need to remove the chain, eg "4q8n_A", | ||
// just use "4q8n" as id | ||
PIR(2200, "PIR", false), | ||
PIR(2200, "PIR",false, false), | ||
|
||
PIRARC(2300, "PIRARC", false), | ||
PRF(2400, "PRF", false, "http://www.prf.or.jp/cgi-bin/seqget.pl?id=%id"), | ||
REFSEQ(2500, "RefSeq", true, "https://www.ncbi.nlm.nih.gov/protein/%id"), | ||
REMTREMBL(2600, "REMTREMBL", false), | ||
PIRARC(2300, "PIRARC",false, false), | ||
PRF(2400, "PRF",false, false, "http://www.prf.or.jp/cgi-bin/seqget.pl?id=%id"), | ||
REFSEQ(2500, "RefSeq", true, true,"https://www.ncbi.nlm.nih.gov/protein/%id"), | ||
REMTREMBL(2600, "REMTREMBL",false, false), | ||
SEED( | ||
2700, | ||
"SEED", | ||
true, | ||
false, | ||
"https://pubseed.theseed.org/seedviewer.cgi?page=Annotation&feature=%id"), | ||
|
||
SGD(2800, "SGD", true, "https://www.yeastgenome.org/locus/%id"), | ||
SWISSPROT(100, "UniProtKB/Swiss-Prot", true, "https://www.uniprot.org/uniprot/%id"), | ||
SGD(2800, "SGD", true, false,"https://www.yeastgenome.org/locus/%id"), | ||
SWISSPROT(100, "UniProtKB/Swiss-Prot", true, false,"https://www.uniprot.org/uniprot/%id"), | ||
SWISSPROT_VARSPLIC( | ||
200, | ||
"UniProtKB/Swiss-Prot protein isoforms", | ||
true, | ||
false, | ||
"https://www.uniprot.org/uniprot/%id"), // swissprot isoform | ||
TAIR_ARABIDOPSIS( | ||
2900, | ||
"TAIR", | ||
true, | ||
false, | ||
"https://www.arabidopsis.org/servlets/TairObject?type=aa_sequence&name=%id"), | ||
TREMBL(100, "UniProtKB/TrEMBL", true, "https://www.uniprot.org/uniprot/%id"), | ||
TREMBL(100, "UniProtKB/TrEMBL", true, false, "https://www.uniprot.org/uniprot/%id"), | ||
|
||
TREMBLNEW(3000, "TREMBLNEW", false), | ||
TREMBL_VARSPLIC(3100, "TREMBL_VARSPLIC", false), | ||
TROME(3200, "TROME", true), // no link | ||
UNIMES(3300, "UNIMES", false), | ||
USPTO(3400, "USPTO", true, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uspto_prt&id=%id"), | ||
TREMBLNEW(3000, "TREMBLNEW", false, false), | ||
TREMBL_VARSPLIC(3100, "TREMBL_VARSPLIC", false, false), | ||
TROME(3200, "TROME", true, false), // no link | ||
UNIMES(3300, "UNIMES", false, false), | ||
USPTO(3400, "USPTO", true, false, "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uspto_prt&id=%id"), | ||
|
||
VECTORBASE(3500, "VectorBase", false), | ||
VEGA(3600, "VEGA", true, "https://vega.sanger.ac.uk/id/%id"), | ||
WORMBASE_PARASITE(3700, "WBParaSite", true, "https://parasite.wormbase.org/id/%id"), | ||
WORMBASE(3800, "WormBase", true, "https://wormbase.org/db/seq/protein?name=%id;class=CDS"); | ||
VECTORBASE(3500, "VectorBase", false, false), | ||
VEGA(3600, "VEGA", true, false, "https://vega.sanger.ac.uk/id/%id"), | ||
WORMBASE_PARASITE(3700, "WBParaSite", true, true, "https://parasite.wormbase.org/id/%id"), | ||
WORMBASE(3800, "WormBase", true, true,"https://wormbase.org/db/seq/protein?name=%id;class=CDS"); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we know if a database is source? Could you please add source of this information in this class? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Jie provided the list of databases that are sources in UniParc. |
||
private final String displayName; | ||
private final boolean alive; | ||
private final String url; | ||
private final int index; | ||
private final boolean source; | ||
|
||
UniParcDatabase(int index, String displayName, boolean alive) { | ||
this(index, displayName, alive, ""); | ||
UniParcDatabase(int index, String displayName, boolean alive, boolean source) { | ||
this(index, displayName, alive, source, ""); | ||
} | ||
|
||
UniParcDatabase(int index, String displayName, boolean alive, String url) { | ||
UniParcDatabase(int index, String displayName, boolean alive, boolean source,String url) { | ||
this.index = index; | ||
this.displayName = displayName; | ||
this.alive = alive; | ||
this.url = url; | ||
this.source = source; | ||
} | ||
|
||
public int getIndex() { | ||
|
@@ -110,6 +117,10 @@ public String getUrl() { | |
return url; | ||
} | ||
|
||
public boolean isSource() { | ||
return source; | ||
} | ||
|
||
public static @Nonnull UniParcDatabase typeOf(@Nonnull String displayName) { | ||
return EnumDisplay.typeOf(displayName, UniParcDatabase.class); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package org.uniprot.core.parser.fasta; | ||
|
||
public class FastaUtils { | ||
|
||
public static String parseSequence(String sequence) { | ||
StringBuilder sb = new StringBuilder(); | ||
int columnCounter = 0; | ||
for (char c : sequence.toCharArray()) { | ||
if (columnCounter % 60 == 0 && columnCounter > 0) { | ||
sb.append("\n"); | ||
} | ||
sb.append(c); | ||
columnCounter++; | ||
} | ||
return sb.toString(); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should add formula and an example how we construct source. :: and eg. ABC01415:UP000005640:Chromosome 1