Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the finder return only published documents #158

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
12 changes: 12 additions & 0 deletions .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,18 @@ keep adding things here, for the newcomers.
different behavior, **we use the same classes for both posts
and events**.

* All content pages from prose are published, but not all are
linked. This is a small nuance from the way we build the site.
Events and posts can be published or unpublished, and their
markdown files in prose have a `published` field in the
frontmatter. They are linked automatically if they have a
`published: true`. Summaries and info pages are always
published, so their markdown files in prose don't have a
`published` field in the frontmatter. They have to be linked
manually, for example, in the main menu. All content is
published because we add the unpublished posts/events and
unlinked info pages to the page used for scraping.

* Summaries are a particular case of
`DocumentWithFrontmatter`. For example, they are always
published so
Expand Down
14 changes: 10 additions & 4 deletions app.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@
)
end
@page = Page::Homepage.new(
posts: posts_finder.find_all,
events: events_finder.find_all,
posts: posts_finder.find_published,
events: events_finder.find_published,
featured_people: featured_people,
quote: quote_finder.find_single
)
Expand All @@ -101,7 +101,7 @@

get '/blog/' do
finder = Document::Finder.new(pattern: posts_pattern, baseurl: '/blog/')
@page = Page::Posts.new(posts: finder.find_all, title: 'Blog')
@page = Page::Posts.new(posts: finder.find_published, title: 'Blog')
erb :posts
end

Expand All @@ -119,7 +119,7 @@

get '/events/' do
finder = Document::Finder.new(pattern: events_pattern, baseurl: '/events/')
@page = Page::Posts.new(posts: finder.find_all, title: 'Events')
@page = Page::Posts.new(posts: finder.find_published, title: 'Events')
erb :posts
end

Expand Down Expand Up @@ -295,5 +295,11 @@
get '/scraper-start-page.html' do
@people = all_people
@places = mapit.places_of_type('FED') + mapit.places_of_type('SEN') + mapit.places_of_type('STA')
finder = Document::Finder.new(pattern: posts_pattern, baseurl: '/blog/')
@posts = finder.find_unpublished
finder = Document::Finder.new(pattern: events_pattern, baseurl: '/events/')
@events = finder.find_unpublished
finder = Document::Finder.new(pattern: "#{info_dir}/*.md", baseurl: '/info/')
@infopages = finder.find_published
erb :scraper_start_page, layout: false
end
18 changes: 13 additions & 5 deletions lib/document/finder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,23 @@ def initialize(pattern:, baseurl:)
def find_single
raise_error_if_multiple_files_found
raise_error_if_no_files_found
find_all.first
find_published.first
end

def find_or_empty
none? ? create_empty_document : find_all.first
none? ? create_empty_document : find_published.first
end

def find_all
filenames.map { |filename| create_document(filename) }
def find_published
create_documents.select(&:published?)
end

def find_unpublished
create_documents.reject(&:published?)
end

def find_featured
find_all.select(&:featured?)
create_documents.select(&:featured?)
end

def multiple?
Expand All @@ -49,6 +53,10 @@ def raise_error_if_no_files_found
raise Document::NoFilesFoundError, message if none?
end

def create_documents
@create_documents ||= filenames.map { |filename| create_document(filename) }
end

def filenames
@filenames ||= Dir.glob(pattern).sort
end
Expand Down
45 changes: 36 additions & 9 deletions tests/document/finder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,44 @@
let(:finder) { Document::Finder.new(pattern: "#{filename}.md", baseurl: '/path/') }

it 'finds a single document' do
Dir.stub :glob, [filename] do
Dir.stub :glob, [new_tempfile('')] do
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't have to change, does it? If you want to change to using new_tempfile for all of these consistently, I'd make that a separate preparatory commit, and also change the remaining use of filename.

Copy link
Contributor Author

@octopusinvitro octopusinvitro Apr 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this is because now things have changed: when you call find_single you are also calling find_all, so with the introduction of select(&:published?), the method find_all now has to look at the contents of the file, which before it didn't have to, so this test fails. But I wouldn't create a temp file for the other tests if it is not needed... 🤔 What do you think?

finder.find_single.class.must_equal(Document::MarkdownWithFrontmatter)
end
end

it 'finds several documents' do
Dir.stub :glob, [new_tempfile(''), new_tempfile('')] do
finder.find_published.count.must_equal(2)
end
end

it 'returns only published documents' do
published = '---
published: true
---'
nofield = '---
---'
unpublished = '---
published: false
---'
filenames = [new_tempfile(published), new_tempfile(nofield), new_tempfile(unpublished)]
Dir.stub :glob, filenames do
finder.find_published.count.must_equal(2)
end
end

it 'can find unpublished documents' do
published = '---
published: true
---'
unpublished = '---
published: false
---'
Dir.stub :glob, [new_tempfile(published), new_tempfile(unpublished), new_tempfile(unpublished)] do
finder.find_unpublished.count.must_equal(2)
end
end

it 'creates a document with the right url' do
contents = '---
slug: a-slug
Expand All @@ -22,15 +55,9 @@
end
end

it 'finds several documents' do
Dir.stub :glob, [filename, 'another-file'] do
finder.find_all.count.must_equal(2)
end
end

it 'sorts the found filenames alphabetically' do
Dir.stub :glob, %w(zed be) do
finder.find_all.first.send(:basename).must_equal('be')
Dir.stub :glob, [new_tempfile('', 'zed'), new_tempfile('', 'be')] do
finder.find_published.first.send(:basename).start_with?('be').must_equal(true)
end
end

Expand Down
15 changes: 15 additions & 0 deletions tests/web/unlinked_pages.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,21 @@
subject.xpath('//a[contains(@href, "/people/")]').count.must_equal(fed + sen + sta)
subject.xpath('//a[contains(@href, "/places/")]').count.must_equal(fed + sen + sta)
end

it 'scrapes unpublished post and event pages since they are unlinked' do
unpublished = '---
published: false
---'
Dir.stub :glob, [new_tempfile(unpublished)] do
get '/scraper-start-page.html'
refute_empty(subject.xpath('//a[contains(text(), "Unlinked post")]'))
refute_empty(subject.xpath('//a[contains(text(), "Unlinked event")]'))
end
end

it 'scrapes all info pages, including those unlinked' do
refute_empty(subject.xpath('//a[contains(text(), "Unlinked infopage")]'))
end
end

describe 'The Jinja2 Template' do
Expand Down
12 changes: 12 additions & 0 deletions views/scraper_start_page.erb
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,15 @@
<p><a href="<%= place.url %>people/">Old people page route, must trigger redirect to <%= place.url %></a></p>
<p><a href="<%= place.url %>places/">Old places page route, must trigger redirect to <%= place.url %></a></p>
<% end %>

<% @posts.each do |post| %>
<p><a href="<%= post.url %>">Unlinked post - <%= post.title %></a></p>
<% end %>

<% @events.each do |event| %>
<p><a href="<%= event.url %>">Unlinked event - <%= event.title %></a></p>
<% end %>

<% @infopages.each do |infopage| %>
<p><a href="<%= infopage.url %>">Unlinked infopage - <%= infopage.title %></a></p>
<% end %>