Skip to content

Commit

Permalink
Test feed as a suite
Browse files Browse the repository at this point in the history
  • Loading branch information
nahuelhds committed May 25, 2020
1 parent f514e67 commit 9489515
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 102 deletions.
2 changes: 1 addition & 1 deletion diffengine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def setup_db():

if isinstance(database_handler, SqliteDatabase):
try:
migrator = SqliteMigrator(database)
migrator = SqliteMigrator(database_handler)
migrate(migrator.add_index("entryversion", ("url",), False))
except OperationalError as e:
logging.debug(e)
Expand Down
198 changes: 97 additions & 101 deletions test_diffengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
UA,
TwitterHandler,
SendgridHandler,
_fingerprint,
)
from diffengine.config import load_config
from diffengine.text_builder import build_text
Expand All @@ -45,130 +46,125 @@
if os.path.isdir(test_home):
shutil.rmtree(test_home)

generate_config(test_home, {"db": "sqlite:///:memory:"})
# set things up but disable prompting for initial feed
init(test_home, prompt=False)

# the sequence of these tests is significant


def test_version():
assert setup.version in UA


def test_feed():
f = Feed.create(name="Test", url="https://inkdroid.org/feed.xml")
f.get_latest()
assert f.created
assert len(f.entries) == 10


def test_entry():
f = Feed.get(Feed.url == "https://inkdroid.org/feed.xml")
e = f.entries[0]
v = e.get_latest()
assert type(v) == EntryVersion
assert len(e.versions) == 1


def test_diff():
f = Feed.get(Feed.url == "https://inkdroid.org/feed.xml")
e = f.entries[0]
v1 = e.versions[0]

# remove some characters from the version
v1.summary = v1.summary[0:-20]
v1.save()

v2 = e.get_latest()
assert type(v2) == EntryVersion
assert v2.diff
assert v2.archive_url is not None
assert (
re.match("^https://web.archive.org/web/[0-9]+/.+$", v2.archive_url) is not None
)

diff = v2.diff
assert diff.old == v1
assert diff.new == v2
assert os.path.isfile(diff.html_path)
assert os.path.isfile(diff.screenshot_path)
assert os.path.isfile(diff.thumbnail_path)

# check that the url for the internet archive diff is working
assert re.match("^https://web.archive.org/web/diff/\d+/\d+/https.+$", diff.url)


def test_html_diff():
f = Feed.get(Feed.url == "https://inkdroid.org/feed.xml")
e = f.entries[0]
def test_fingerprint():
assert _fingerprint("foo bar") == "foobar"
assert _fingerprint("foo bar\nbaz") == "foobarbaz"
assert _fingerprint("foo<br>bar") == "foobar"
assert _fingerprint("foo'bar") == "foobar"
assert _fingerprint("foo’bar") == "foobar"

# add a change to the summary that htmldiff ignores
v1 = e.versions[-1]
parts = v1.summary.split()
parts.insert(2, "<br> \n")
v1.summary = " ".join(parts)
v1.save()

v2 = e.get_latest()
assert v2 is None
class FeedTest(TestCase):
feed = None
entry = None
version = None

def setUp(self) -> None:
generate_config(test_home, {"db": "sqlite:///:memory:"})
# set things up but disable prompting for initial feed
init(test_home, prompt=False)
self.feed = Feed.create(name="Test", url="https://inkdroid.org/feed.xml")
self.feed.get_latest()
self.entry = self.feed.entries[0]
self.version = self.entry.get_latest()

def test_feed(self):
assert self.feed.created
assert len(self.feed.entries) == 10

def test_entry(self):
assert type(self.version) == EntryVersion
assert len(self.entry.versions) == 1

def test_diff(self):
e = self.entry
v1 = e.versions[0]

# remove some characters from the version
v1.summary = v1.summary[0:-20]
v1.save()

v2 = e.get_latest()
assert type(v2) == EntryVersion
assert v2.diff
assert v2.archive_url is not None
assert (
re.match("^https://web.archive.org/web/[0-9]+/.+$", v2.archive_url)
is not None
)

def test_many_to_many():
diff = v2.diff
assert diff.old == v1
assert diff.new == v2
assert os.path.isfile(diff.html_path)
assert os.path.isfile(diff.screenshot_path)
assert os.path.isfile(diff.thumbnail_path)

# these two feeds share this entry, we want diffengine to support
# multiple feeds for the same content, which is fairly common at
# large media organizations with multiple topical feeds
url = "https://www.washingtonpost.com/classic-apps/how-a-week-of-tweets-by-trump-stoked-anxiety-moved-markets-and-altered-plans/2017/01/07/38be8e64-d436-11e6-9cb0-54ab630851e8_story.html"
# check that the url for the internet archive diff is working
assert re.match(
"^https://web.archive.org/web/diff/\\d+/\\d+/https.+$", diff.url
)

f1 = Feed.create(
name="feed1",
url="https://raw.githubusercontent.com/DocNow/diffengine/master/test-data/feed1.xml",
)
f1.get_latest()
def test_html_diff(self):
e = self.entry

f2 = Feed.create(
name="feed2",
url="https://raw.githubusercontent.com/DocNow/diffengine/master/test-data/feed2.xml",
)
f2.get_latest()
# add a change to the summary that htmldiff ignores
v1 = e.versions[-1]
parts = v1.summary.split()
parts.insert(2, "<br> \n")
v1.summary = " ".join(parts)
v1.save()

assert f1.entries.where(Entry.url == url).count() == 1
assert f2.entries.where(Entry.url == url).count() == 1
v2 = e.get_latest()
assert v2 is None

e = Entry.get(Entry.url == url)
assert FeedEntry.select().where(FeedEntry.entry == e).count() == 2
def test_many_to_many(self):

# these two feeds share this entry, we want diffengine to support
# multiple feeds for the same content, which is fairly common at
# large media organizations with multiple topical feeds
url = "https://www.washingtonpost.com/classic-apps/how-a-week-of-tweets-by-trump-stoked-anxiety-moved-markets-and-altered-plans/2017/01/07/38be8e64-d436-11e6-9cb0-54ab630851e8_story.html"

def test_bad_feed_url():
# bad feed url shouldn't cause a fatal exception
f = Feed.create(name="feed1", url="http://example.org/feedfeed.xml")
f.get_latest()
assert True
f1 = Feed.create(
name="feed1",
url="https://raw.githubusercontent.com/DocNow/diffengine/master/test-data/feed1.xml",
)
f1.get_latest()

f2 = Feed.create(
name="feed2",
url="https://raw.githubusercontent.com/DocNow/diffengine/master/test-data/feed2.xml",
)
f2.get_latest()

def test_whitespace():
f = Feed.get(url="https://inkdroid.org/feed.xml")
e = f.entries[0]
v1 = e.versions[-1]
assert f1.entries.where(Entry.url == url).count() == 1
assert f2.entries.where(Entry.url == url).count() == 1

# add some whitespace
v1.summary = v1.summary + "\n\n "
v1.save()
e = Entry.get(Entry.url == url)
assert FeedEntry.select().where(FeedEntry.entry == e).count() == 2

# whitespace should not count when diffing
v2 = e.get_latest()
assert v2 == None
def test_bad_feed_url(self):
# bad feed url shouldn't cause a fatal exception
f = Feed.create(name="feed1", url="http://example.org/feedfeed.xml")
f.get_latest()
assert True

def test_whitespace(self):
e = self.feed.entries[0]
v1 = e.versions[-1]

def test_fingerprint():
from diffengine import _fingerprint
# add some whitespace
v1.summary = v1.summary + "\n\n "
v1.save()

assert _fingerprint("foo bar") == "foobar"
assert _fingerprint("foo bar\nbaz") == "foobarbaz"
assert _fingerprint("foo<br>bar") == "foobar"
assert _fingerprint("foo'bar") == "foobar"
assert _fingerprint("foo’bar") == "foobar"
# whitespace should not count when diffing
v2 = e.get_latest()
assert v2 == None


class EnvVarsTest(TestCase):
Expand Down

0 comments on commit 9489515

Please sign in to comment.