Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add seen_by_guid property to feeds for dynamic link feeds #131

Merged
merged 10 commits into from
Nov 16, 2022
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,15 @@ feeds:
validate_cert: false
```

Using the GUID instead of the link for tracking seen torrents is also available,
useful for changing URLs such as Prowlarr's proxy links. Default is false:

```yaml
feeds:
- url: http://example.com/feed1
seen_by_guid: true
```

### All available options

The following configuration file example contains every existing option
Expand Down Expand Up @@ -150,6 +159,7 @@ feeds:
download_path: /home/user/match2
- url: http://example.com/feed9
validate_cert: false
seen_by_guid: true

update_interval: 600

Expand Down
11 changes: 7 additions & 4 deletions lib/transmission-rss/aggregator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,15 @@ def process_link(feed, item)

# Link is not a String directly.
link = link.href if link.class != String


# Determine whether to use guid or link as seen hash
seen_value = feed.seen_by_guid ? (item.guid.content rescue item.guid || link).to_s : link
StormPooper marked this conversation as resolved.
Show resolved Hide resolved

# The link is not in +@seen+ Array.
unless @seen.include?(link)
unless @seen.include?(seen_value)
# Skip if filter defined and not matching.
unless feed.matches_regexp?(item.title)
@seen.add(link)
@seen.add(seen_value)
return
end

Expand All @@ -129,7 +132,7 @@ def process_link(feed, item)
rescue Client::Unauthorized, Errno::ECONNREFUSED, Timeout::Error
@log.debug('not added to seen file ' + link)
else
@seen.add(link)
@seen.add(seen_value)
end
end

Expand Down
6 changes: 4 additions & 2 deletions lib/transmission-rss/feed.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module TransmissionRSS
class Feed
attr_reader :url, :regexp, :config, :validate_cert
attr_reader :url, :regexp, :config, :validate_cert, :seen_by_guid

def initialize(config = {})
@download_paths = {}
Expand All @@ -12,7 +12,6 @@ def initialize(config = {})
@url = URI.escape(URI.unescape(config['url'] || config.keys.first))

@download_path = config['download_path']
@validate_cert = config['validate_cert'].nil? || config['validate_cert']

matchers = Array(config['regexp']).map do |e|
e.is_a?(String) ? e : e['matcher']
Expand All @@ -25,6 +24,9 @@ def initialize(config = {})
@config = {}
@url = config.to_s
end

@validate_cert = @config['validate_cert'].nil? || !!@config['validate_cert']
StormPooper marked this conversation as resolved.
Show resolved Hide resolved
@seen_by_guid = !!@config['seen_by_guid']
end

def download_path(title = nil)
Expand Down
244 changes: 244 additions & 0 deletions spec/aggregator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,248 @@
end
end
end

describe '#process_link' do
StormPooper marked this conversation as resolved.
Show resolved Hide resolved
before(:each) do
VCR.use_cassette('feed_fetch', MATCH_REQUESTS_ON) do
@item = subject.send(:parse, subject.send(:fetch, FEEDS.first)).first
subject.seen.clear!
end
end

it 'returns enclosure url and adds url to seen' do
content = subject.send(:process_link, FEEDS.first, @item)

url = URI.parse(content)

expect(url.scheme).to eq('https')
expect(url.host).to eq('www.archlinux.org')
expect(File.basename(url.path)).to match(/\.iso\.torrent$/)

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.enclosure.url)).to be true
end

it 'returns link and adds link to seen if no enclosure url' do
@item.enclosure = nil

content = subject.send(:process_link, FEEDS.first, @item)

url = URI.parse(content)
expect(url.scheme).to eq('https')
expect(url.host).to eq('www.archlinux.org')
expect(File.basename(url.path)).to match(/2020\.01\.01$/)

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.link)).to be true
end

it 'returns nil if no link or enclosure url' do
@item.enclosure = nil
@item.link = nil

content = subject.send(:process_link, FEEDS.first, @item)

expect(content).to be_nil

expect(subject.seen.size).to eq(0)
end

it 'returns nil but adds url to seen if unseen but no regexp match' do
feed = Feed.new({
'url' => FEEDS.first.url,
'regexp' => 'WILL_NOT_MATCH$'
})

content = subject.send(:process_link, feed, @item)

expect(content).to be_nil

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.enclosure.url)).to be true
end

it 'returns enclosure url and adds guid to seen if seen_by_guid' do
feed = Feed.new({
'url' => FEEDS.first.url,
'seen_by_guid' => true
})

content = subject.send(:process_link, feed, @item)

url = URI.parse(content)
expect(url.scheme).to eq('https')
expect(url.host).to eq('www.archlinux.org')
expect(File.basename(url.path)).to match(/\.iso\.torrent$/)

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.guid.content.to_s)).to be true
end

it 'returns link and adds guid to seen if seen_by_guid but no enclosure url' do
feed = Feed.new({
'url' => FEEDS.first.url,
'seen_by_guid' => true
})
@item.enclosure = nil

content = subject.send(:process_link, feed, @item)

url = URI.parse(content)
expect(url.scheme).to eq('https')
expect(url.host).to eq('www.archlinux.org')
expect(File.basename(url.path)).to match(/2020\.01\.01$/)

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.guid.content.to_s)).to be true
end

it 'returns enclosure url and adds url to seen if seen_by_guid but no guid' do
feed = Feed.new({
'url' => FEEDS.first.url,
'seen_by_guid' => true
})
@item.guid = nil

content = subject.send(:process_link, feed, @item)

expect(content).not_to be_empty

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.enclosure.url)).to be true
end

it 'returns link and adds link to seen if seen_by_guid but no guid' do
feed = Feed.new({
'url' => FEEDS.first.url,
'seen_by_guid' => true
})
@item.enclosure = nil
@item.guid = nil

content = subject.send(:process_link, feed, @item)

expect(content).not_to be_empty

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.link)).to be true
end

it 'returns enclosure url and adds guid to seen if seen_by_guid but guid has no attributes' do
feed = Feed.new({
'url' => FEEDS.first.url,
'seen_by_guid' => true
})
@item.guid = @item.guid.content

content = subject.send(:process_link, feed, @item)

expect(content).not_to be_empty

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.guid)).to be true
end

it 'returns link and adds guid to seen if seen_by_guid but no enclosure link and guid has no attributes' do
feed = Feed.new({
'url' => FEEDS.first.url,
'seen_by_guid' => true
})
@item.enclosure = nil
@item.guid = @item.guid.content

content = subject.send(:process_link, feed, @item)

expect(content).not_to be_empty

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.guid)).to be true
end

it 'returns nil but adds to seen if seen_by_guid and unseen but no regexp match' do
feed = Feed.new({
'url' => FEEDS.first.url,
'regexp' => 'WILL_NOT_MATCH$',
'seen_by_guid' => true
})

content = subject.send(:process_link, feed, @item)

expect(content).to be_nil

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.guid.content)).to be true
end

it 'calls on_new_item when returning link and adding to seen' do
on_new_item_args = nil
subject.on_new_item do | arg1, arg2, arg3 |
on_new_item_args = Hash[binding.local_variables.map{|x| [x, binding.local_variable_get(x)]}]
end

content = subject.send(:process_link, FEEDS.first, @item)

expect(on_new_item_args).not_to be_nil
expect(on_new_item_args[:arg1]).to eq(@item.enclosure.url)
expect(on_new_item_args[:arg2]).to be(FEEDS.first)
expect(on_new_item_args[:arg3]).to be_nil

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.enclosure.url)).to be true
end

it 'calls on_new_item with download_path when download_path set on feed' do
on_new_item_args = nil
subject.on_new_item do | arg1, arg2, arg3 |
on_new_item_args = Hash[binding.local_variables.map{|x| [x, binding.local_variable_get(x)]}]
end
feed = Feed.new({
'url' => FEEDS.first.url,
'download_path' => '/tmp'
})

content = subject.send(:process_link, feed, @item)

expect(on_new_item_args).not_to be_nil
expect(on_new_item_args[:arg1]).to eq(@item.enclosure.url)
expect(on_new_item_args[:arg2]).to be(feed)
expect(on_new_item_args[:arg3]).to eq('/tmp')

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.enclosure.url)).to be true
end

it 'calls on_new_item with download_path from regexp when matching' do
on_new_item_args = nil
subject.on_new_item do | arg1, arg2, arg3 |
on_new_item_args = Hash[binding.local_variables.map{|x| [x, binding.local_variable_get(x)]}]
end
feed = Feed.new({
'url' => FEEDS.first.url,
'regexp' => [{'matcher' => '.+', 'download_path' => '/tmp/foo'}]
})

content = subject.send(:process_link, feed, @item)

expect(on_new_item_args).not_to be_nil
expect(on_new_item_args[:arg1]).to eq(@item.enclosure.url)
expect(on_new_item_args[:arg2]).to be(feed)
expect(on_new_item_args[:arg3]).to eq('/tmp/foo')

expect(subject.seen.size).to eq(1)
expect(subject.seen.include?(@item.enclosure.url)).to be true
end

[Client::Unauthorized, Errno::ECONNREFUSED, Timeout::Error].each { | err |
it "does not add to seen when on_new_item throws #{err}" do
subject.on_new_item do
raise err.new "Test #{err}"
end

content = subject.send(:process_link, FEEDS.first, @item)

expect(subject.seen.size).to eq(0)
end
}
end
end
14 changes: 11 additions & 3 deletions spec/feed_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
expect(feed.config).not_to be_nil
expect(feed.download_path).to be_nil
expect(feed.regexp).to be_nil
expect(feed.validate_cert).to eq(true)
expect(feed.seen_by_guid).to eq(false)
end

it 'should be able to parse encoded url' do
Expand All @@ -27,13 +29,17 @@
expect(feed.url).to eq(@url)
expect(feed.download_path).to be_nil
expect(feed.regexp).to be_nil
expect(feed.validate_cert).to eq(true)
expect(feed.seen_by_guid).to eq(false)
end

it 'should be able to parse old style with both options' do
feed = Feed.new({@url => nil, 'download_path' => @download_path, 'regexp' => @matcher})
it 'should be able to parse old style with all options' do
feed = Feed.new({@url => nil, 'download_path' => @download_path, 'regexp' => @matcher, 'validate_cert' => true, 'seen_by_guid' => false})
expect(feed.url).to eq(@url)
expect(feed.download_path).to eq(@download_path)
expect(feed.regexp).to eq(@regexp)
expect(feed.validate_cert).to eq(true)
expect(feed.seen_by_guid).to eq(false)
end

it 'should be able to use new style config with no options' do
Expand All @@ -44,10 +50,12 @@
end

it 'should be able to use new style config with all options' do
feed = Feed.new({'url' => @url, 'download_path' => @download_path, 'regexp' => @matcher})
feed = Feed.new({'url' => @url, 'download_path' => @download_path, 'regexp' => @matcher, 'validate_cert' => false, 'seen_by_guid' => true})
expect(feed.url).to eq(@url)
expect(feed.download_path).to eq(@download_path)
expect(feed.regexp).to eq(@regexp)
expect(feed.validate_cert).to eq(false)
expect(feed.seen_by_guid).to eq(true)
end

it 'should have a functioning matcher' do
Expand Down
5 changes: 5 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@ def tmp_path(file)
end

MATCH_REQUESTS_ON = { match_requests_on: [:method, :uri, :headers, :body] }

RSpec.configure do |config|
StormPooper marked this conversation as resolved.
Show resolved Hide resolved
config.filter_run focus: true
config.run_all_when_everything_filtered = true
end
1 change: 1 addition & 0 deletions transmission-rss.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ feeds:
download_path: /home/user/match2
- url: http://example.com/feed8
validate_cert: false
seen_by_guid: true

# Feed probing interval in seconds. Default is 600.

Expand Down