forked from zelandiya/RAKE-tutorial
-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
return [(k[1], k[0]) for k in sorted_keywords]
- Loading branch information
1 parent
fa6c24e
commit a2aa1dd
Showing
3 changed files
with
38 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,18 @@ | ||
.DS_STORE | ||
# Mac/OSX | ||
.DS_Store | ||
|
||
# Windows | ||
Thumbs.db | ||
|
||
# MS VS code | ||
.vscode/settings.json | ||
|
||
# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# Production | ||
**/staticfiles | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from rake_nlp import Rake | ||
|
||
# | ||
# stop word list from SMART (Salton,1971). Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop | ||
# | ||
stoplist = ["a", "a's", "able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask", "asking", "associated", "at", "available", "away", "awfully", "b", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's", "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain", "certainly", "changes", "clearly", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "currently", "d", "definitely", "described", "despite", "did", "didn't", "different", "do", "does", "doesn't", "doing", "don't", "done", "down", "downwards", "during", "e", "each", "edu", "eg", "eight", "either", "else", "elsewhere", "enough", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "f", "far", "few", "fifth", "first", "five", "followed", "following", "follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "g", "get", "gets", "getting", "given", "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't", "happens", "hardly", "has", "hasn't", "have", "haven't", "having", "he", "he's", "hello", "help", "hence", "her", "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i", "i'd", "i'll", "i'm", "i've", "ie", "if", "ignored", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar", "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "it's", "its", "itself", "j", "just", "k", "keep", "keeps", "kept", "know", "knows", "known", "l", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like", "liked", "likely", "little", "look", "looking", "looks", "ltd", "m", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "n", "name", "namely", "nd", "near", "nearly", "necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone", "nor", "normally", "not", "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall", "own", "p", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provides", "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really", "reasonably", "regarding", "regardless", "regards", "relatively", "respectively", "right", "s", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "she", "should", "shouldn't", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure", "t", "t's", "take", "taken", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "there's", "thereafter", "thereby", "therefore", "therein", "theres", "thereupon", "these", "they", "they'd", "they'll", "they're", "they've", "think", "third", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", "try", "trying", "twice", "two", "u", "un", "under", "unfortunately", "unless", "unlikely", "until", "unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "uucp", "v", "value", "various", "very", "via", "viz", "vs", "w", "want", "wants", "was", "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome", "well", "went", "were", "weren't", "what", "what's", "whatever", "when", "whence", "whenever", "where", "where's", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "who's", "whoever", "whole", "whom", "whose", "why", "will", "willing", "wish", "with", "within", "without", "won't", "wonder", "would", "would", "wouldn't", "x", "y", "yes", "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves", "z", "zero"] | ||
|
||
text = "Background and aims. The ATP-binding cassette (ABC) transporter family transports various molecules across the enterocytes in the gut protecting the intestine against potentially harmful substances. Moreover, ABC transporters are involved in mucosal immune defence through interaction with cytokines. The study aimed to assess whether polymorphisms in ABCB1, ABCC2 and ABCG2 were associated with risk of colorectal cancer (CRC) and to investigate gene-environment (dietary factors, smoking and use of non-steroidal anti-inflammatory drugs) and gene-gene interactions between previously studied polymorphisms in IL1B and IL10 and ABC transporter genes in relation to CRC risk. Materials and methods. We used a Danish prospective case-cohort study of 1010 CRC cases and 1829 randomly selected participants from the Danish Diet, Cancer and Health cohort. Incidence rate ratios were calculated based on Cox proportional hazards model. Results. None of the polymorphisms were associated with CRC, but ABCB1 and ABCG2 haplotypes were associated with risk of CRC. ABCB1/rs1045642 interacted with intake of cereals and fiber (p-Value for interaction (Pint) = 0.001 and 0.01, respectively). In a three-way analysis, both ABCB1/rs1045642 and ABCG2/rs2231137 in combination with IL10/rs3024505 interacted with fiber intake in relation to risk of CRC (Pint = 0.0007 and 0.009). Conclusions. Our results suggest that the ABC transporters P-glycoprotein/multidrug resistance 1 and BRCP, in cooperation with IL-10, are involved in the biological mechanism underlying the protective effect of fiber intake in relation to CRC. These results should be replicated in other cohorts to rule out chance findings. © 2015 Informa Healthcare. Background: The etiology of the inflammatory bowel diseases, including ulcerative colitis (UC), remains incompletely explained. We hypothesized that an analysis of the UC colon proteome could reveal novel insights into the disease etiology. Methods: Mucosal colon biopsies were taken by endoscopy from noninflamed tissue of 10 patients with UC and 10 controls. The biopsies were either snap-frozen for protein analysis or prepared for histology. The protein content of the biopsies was characterized by high-throughput gel-free quantitative proteomics, and biopsy histology was analyzed by light microscopy and confocal microscopy. Results: We identified and quantified 5711 different proteins with proteomics. The abundance of the proteins calprotectin and lactotransferrin in the tissue correlated with the degree of tissue inflammation as determined by histology. However, fecal calprotectin did not correlate. Forty-six proteins were measured with a statistically significant differences in abundances between the UC colon tissue and controls. Eleven of the proteins with increased abundances in the UC biopsies were associated with neutrophils and neutrophil extracellular traps. The findings were validated by microscopy, where an increased abundance of neutrophils and the presence of neutrophil extracellular traps by extracellular DNA present in the UC colon tissue were confirmed. Conclusions: Neutrophils, induced neutrophil extracellular traps, and several proteins that play a part in innate immunity are all increased in abundance in the morphologically normal colon mucosa from patients with UC." | ||
|
||
r = Rake(stoplist, max_words_length=2) | ||
|
||
keywords = r.run(text) | ||
|
||
# 3. print results | ||
print("Keywords:", keywords) |