-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from ilias-ant/feature/ia/v2
v2 dataset
- Loading branch information
Showing
13 changed files
with
57,342 additions
and
15,304 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
class TakeLast: | ||
""" | ||
Returns the last non-null/non-empty value from the values received, | ||
so it's typically used as an output processor to single-valued fields. | ||
It doesn't receive any ``__init__`` method arguments, nor does it accept Loader contexts. | ||
Example: | ||
>>> proc = TakeLast() | ||
>>> proc(['one', 'two', 'three', '']) | ||
'three' | ||
""" | ||
|
||
def __call__(self, values): | ||
for value in values[::-1]: | ||
if value is not None and value != "": | ||
return value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "721fb2a0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 23, | ||
"id": "a8080c04", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"articles = pd.read_csv(\"../dataset/kaggle/articles.csv\", dtype={\"publication_year\": str})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"id": "39526889", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>url</th>\n", | ||
" <th>type</th>\n", | ||
" <th>publication</th>\n", | ||
" <th>title</th>\n", | ||
" <th>location</th>\n", | ||
" <th>body</th>\n", | ||
" <th>climb_year</th>\n", | ||
" <th>link_to_pdf</th>\n", | ||
" <th>author</th>\n", | ||
" <th>publication_year</th>\n", | ||
" <th>scraped_at</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Accident Reports</td>\n", | ||
" <td>ANAM</td>\n", | ||
" <td>Large Group Caught in Slide</td>\n", | ||
" <td>Washington, Silver Basic (Crystal Mountain)</td>\n", | ||
" <td>On the morning of Saturday, December 11, the f...</td>\n", | ||
" <td>2021</td>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Northwest Avalanche Cente</td>\n", | ||
" <td>202</td>\n", | ||
" <td>2022-11-05T20:12:06.963097</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Accident Reports</td>\n", | ||
" <td>ANAM</td>\n", | ||
" <td>Deeply Buried Weak Layer — Familiarity Heuristic</td>\n", | ||
" <td>Colorado, Park Range, North Fork of Fish Creek</td>\n", | ||
" <td>On March 19, 2022, two backcountry skiers plan...</td>\n", | ||
" <td>2022</td>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Colorado Avalanche Information Cente</td>\n", | ||
" <td>202</td>\n", | ||
" <td>2022-11-05T20:12:23.675110</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Accident Reports</td>\n", | ||
" <td>ANAM</td>\n", | ||
" <td>Long Fall on Rock — Ledge Collapsed</td>\n", | ||
" <td>Montana, Gallatin Canyon, The Watchtower</td>\n", | ||
" <td>On July 16, Greg Sievers (63) and Rob Meshew (...</td>\n", | ||
" <td>2021</td>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Greg Sievers and Mountain Projec</td>\n", | ||
" <td>202</td>\n", | ||
" <td>2022-11-05T20:14:12.658879</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Accident Reports</td>\n", | ||
" <td>ANAM</td>\n", | ||
" <td>Rappel Anchor Failure</td>\n", | ||
" <td>Minnesota, Taylors Falls</td>\n", | ||
" <td>On March 25, Climber 1 (31) and Climber 2 (23)...</td>\n", | ||
" <td>2021</td>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Climber 1 and The Editor</td>\n", | ||
" <td>202</td>\n", | ||
" <td>2022-11-05T20:14:01.360572</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Accident Reports</td>\n", | ||
" <td>ANAM</td>\n", | ||
" <td>Triggered Avalanche Catches Two Parties</td>\n", | ||
" <td>New Hampshire, Mt. Washington, Tuckerman Ravine</td>\n", | ||
" <td>On December 5, 2021, winds were light in Tucke...</td>\n", | ||
" <td>2021</td>\n", | ||
" <td>https://publications.americanalpineclub.org/ar...</td>\n", | ||
" <td>Mt. Washington Avalanche Cente</td>\n", | ||
" <td>202</td>\n", | ||
" <td>2022-11-05T20:12:26.421852</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" url type \\\n", | ||
"0 https://publications.americanalpineclub.org/ar... Accident Reports \n", | ||
"1 https://publications.americanalpineclub.org/ar... Accident Reports \n", | ||
"2 https://publications.americanalpineclub.org/ar... Accident Reports \n", | ||
"3 https://publications.americanalpineclub.org/ar... Accident Reports \n", | ||
"4 https://publications.americanalpineclub.org/ar... Accident Reports \n", | ||
"\n", | ||
" publication title \\\n", | ||
"0 ANAM Large Group Caught in Slide \n", | ||
"1 ANAM Deeply Buried Weak Layer — Familiarity Heuristic \n", | ||
"2 ANAM Long Fall on Rock — Ledge Collapsed \n", | ||
"3 ANAM Rappel Anchor Failure \n", | ||
"4 ANAM Triggered Avalanche Catches Two Parties \n", | ||
"\n", | ||
" location \\\n", | ||
"0 Washington, Silver Basic (Crystal Mountain) \n", | ||
"1 Colorado, Park Range, North Fork of Fish Creek \n", | ||
"2 Montana, Gallatin Canyon, The Watchtower \n", | ||
"3 Minnesota, Taylors Falls \n", | ||
"4 New Hampshire, Mt. Washington, Tuckerman Ravine \n", | ||
"\n", | ||
" body climb_year \\\n", | ||
"0 On the morning of Saturday, December 11, the f... 2021 \n", | ||
"1 On March 19, 2022, two backcountry skiers plan... 2022 \n", | ||
"2 On July 16, Greg Sievers (63) and Rob Meshew (... 2021 \n", | ||
"3 On March 25, Climber 1 (31) and Climber 2 (23)... 2021 \n", | ||
"4 On December 5, 2021, winds were light in Tucke... 2021 \n", | ||
"\n", | ||
" link_to_pdf \\\n", | ||
"0 https://publications.americanalpineclub.org/ar... \n", | ||
"1 https://publications.americanalpineclub.org/ar... \n", | ||
"2 https://publications.americanalpineclub.org/ar... \n", | ||
"3 https://publications.americanalpineclub.org/ar... \n", | ||
"4 https://publications.americanalpineclub.org/ar... \n", | ||
"\n", | ||
" author publication_year \\\n", | ||
"0 Northwest Avalanche Cente 202 \n", | ||
"1 Colorado Avalanche Information Cente 202 \n", | ||
"2 Greg Sievers and Mountain Projec 202 \n", | ||
"3 Climber 1 and The Editor 202 \n", | ||
"4 Mt. Washington Avalanche Cente 202 \n", | ||
"\n", | ||
" scraped_at \n", | ||
"0 2022-11-05T20:12:06.963097 \n", | ||
"1 2022-11-05T20:12:23.675110 \n", | ||
"2 2022-11-05T20:14:12.658879 \n", | ||
"3 2022-11-05T20:14:01.360572 \n", | ||
"4 2022-11-05T20:12:26.421852 " | ||
] | ||
}, | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"articles.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 25, | ||
"id": "eed296f5", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['url',\n", | ||
" 'type',\n", | ||
" 'publication',\n", | ||
" 'title',\n", | ||
" 'location',\n", | ||
" 'body',\n", | ||
" 'climb_year',\n", | ||
" 'link_to_pdf',\n", | ||
" 'author',\n", | ||
" 'publication_year',\n", | ||
" 'scraped_at']" | ||
] | ||
}, | ||
"execution_count": 25, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"list(articles.columns)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.