Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ParseRSSFeed Task #856

Merged
merged 10 commits into from
Mar 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ These changes are available in the [master branch](https://github.com/PrefectHQ/

### Task Library

- Add `ParseRSSFeed` for parsing a remote RSS feed - [#856](https://github.com/PrefectHQ/prefect/pull/856)

### Fixes

- Only checkpoint tasks if running in cloud - [#839](https://github.com/PrefectHQ/prefect/pull/839), [#854](https://github.com/PrefectHQ/prefect/pull/854)
Expand Down
2 changes: 2 additions & 0 deletions docs/.vuepress/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ module.exports = {
'tasks/kubernetes',
'tasks/notifications',
'tasks/operators',
'tasks/rss',
'tasks/shell',
'tasks/sqlite',
'tasks/strings'
Expand Down Expand Up @@ -159,6 +160,7 @@ module.exports = {
'task_library/kubernetes',
'task_library/notifications',
'task_library/operators',
'task_library/rss',
'task_library/shell',
'task_library/sqlite',
'task_library/strings',
Expand Down
15 changes: 15 additions & 0 deletions docs/guide/task_library/rss.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
title: RSS
---

# RSS

Tasks for interacting with RSS feeds.

## Parsing

### ParseRSSFeed <Badge text="task"/>

Task for parsing RSS feeds.

[API Reference](/api/tasks/rss.html#prefect-tasks-rss-parserssfeed)
5 changes: 5 additions & 0 deletions docs/outline.toml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ title = "Notification Tasks"
module = "prefect.tasks.notifications"
classes = ["EmailTask"]

[pages.tasks.rss]
title = "RSS Tasks"
module = "prefect.tasks.rss"
classes = ["ParseRSSFeed"]

[pages.tasks.sqlite]
title = "SQLite Tasks"
module = "prefect.tasks.database"
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"google-cloud-storage >= 1.13, < 2.0",
],
"kubernetes": ["dask-kubernetes == 0.7.0", "kubernetes >= 8.0.1, < 9.0"],
"rss": ["feedparser >= 5.0.1, < 6.0"],
"templates": ["jinja2 >= 2.0, < 3.0"],
"viz": ["graphviz >= 0.8.3"],
"twitter": ["tweepy >= 3.5, < 4.0"],
Expand Down
9 changes: 9 additions & 0 deletions src/prefect/tasks/rss/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
Tasks for interacting with RSS feeds.
"""
try:
from prefect.tasks.rss.feed import ParseRSSFeed
except ImportError:
raise ImportError(
'Using `prefect.tasks.rss` requires Prefect to be installed with the "rss" extra.'
)
42 changes: 42 additions & 0 deletions src/prefect/tasks/rss/feed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import Any

import feedparser

from prefect import Task
from prefect.utilities.tasks import defaults_from_attrs


class ParseRSSFeed(Task):
"""
Task for parsing RSS feeds.

Args:
- feed_url (str): A remote URL pointing to an RSS feed
- **kwargs (dict, optional): additional keyword arguments to pass to the Task
constructor
"""

def __init__(self, feed_url: str = None, **kwargs: Any):
self.feed_url = feed_url

super().__init__(**kwargs)

@defaults_from_attrs("feed_url")
def run(self, feed_url: str = None) -> "feedparser.FeedParserDict":
"""
Task run method.

Args:
- feed_url (str): A remote URL pointing to an RSS feed

Return:
- FeedParserDict: A dictionary representing the information from the
parsed feed. The object is accessable through indexing and attributes.

Raises:
- ValueError: if `feed_url` is `None`
"""
if not feed_url:
raise ValueError("The feed_url must be provided.")

return feedparser.parse(feed_url)
joshmeek marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions tests/tasks/rss/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import pytest

pytest.importorskip("feedparser")
52 changes: 52 additions & 0 deletions tests/tasks/rss/test_feed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from unittest.mock import MagicMock

import pytest

from prefect.tasks.rss import ParseRSSFeed


class TestParseRSSFeedTask:
def test_empty_initialization(self):
task = ParseRSSFeed()
assert not task.feed_url

def test_filled_initialization(self):
task = ParseRSSFeed(feed_url="test")
assert task.feed_url == "test"

def test_empty_feed_url_raises_error(self):
task = ParseRSSFeed()
with pytest.raises(ValueError):
task.run()

def test_invalid_feed_url_raises_error(self):
task = ParseRSSFeed()
with pytest.raises(ValueError):
task.run(feed_url=None)

def test_feed_url_init_value_is_used(self, monkeypatch):
task = ParseRSSFeed(feed_url="test")

parse = MagicMock()
monkeypatch.setattr("prefect.tasks.rss.feed.feedparser.parse", parse)

task.run()
assert parse.call_args[0][0] == "test"

def test_feed_url_run_value_is_used(self, monkeypatch):
task = ParseRSSFeed()

parse = MagicMock()
monkeypatch.setattr("prefect.tasks.rss.feed.feedparser.parse", parse)

task.run(feed_url="test")
assert parse.call_args[0][0] == "test"

def test_feed_url_value_is_replaced(self, monkeypatch):
task = ParseRSSFeed(feed_url="test")

parse = MagicMock()
monkeypatch.setattr("prefect.tasks.rss.feed.feedparser.parse", parse)

task.run(feed_url="b_test")
assert parse.call_args[0][0] == "b_test"