Skip to content

Commit

Permalink
Allowed direct links to dash videos hosted on v.redd.it through.
Browse files Browse the repository at this point in the history
  • Loading branch information
Hackatrons committed Feb 1, 2022
1 parent af70844 commit c5850eb
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 49 deletions.
32 changes: 32 additions & 0 deletions DiscordBot.Test/DomainBlacklistFilterTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using System.Linq;
using System.Threading.Tasks;
using DiscordBot.Filters;
using DiscordBot.Models;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace DiscordBot.Test;

[TestClass]
public class DomainBlacklistFilterTest
{
[TestMethod]
public async Task EnsureDirectDashLinksAreAllowed()
{
var filter = new DomainBlacklistFilter(new UnitTestLogger<DomainBlacklistFilter>());

var shouldAllow = new SearchResult
{
Url = "https://v.redd.it/123/DASH_720.mp4"
};
var shouldDisallow = new SearchResult
{
Url = "https://v.redd.it/123/"
};

var allowed = await filter.Filter(new[] { shouldAllow }.ToAsyncEnumerable()).ToListAsync();
Assert.AreEqual(1, allowed.Count);

var disallowed = await filter.Filter(new[] { shouldDisallow }.ToAsyncEnumerable()).ToListAsync();
Assert.AreEqual(0, disallowed.Count);
}
}
5 changes: 2 additions & 3 deletions DiscordBot.Test/PushshiftUrlTest.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using DiscordBot.Pushshift;
using DiscordBot.Pushshift.Models;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace DiscordBot.Test;
Expand Down Expand Up @@ -210,10 +209,10 @@ public void ScoreLessThanOrEqualToReturnsCorrectUrl()
[TestMethod]
public void QueryWithFieldsReturnsCorrectUrl()
{
Assert.AreEqual("https://api.pushshift.io/reddit/search/submission/?q=asdf&fields=url,post_hint,is_self,selftext",
Assert.AreEqual("https://api.pushshift.io/reddit/search/submission/?q=asdf&fields=a,b",
new PushshiftQuery()
.Search("asdf")
.Fields<PushshiftResult>()
.Fields(new[] { "a", "b" })
.ToString());
}
}
39 changes: 19 additions & 20 deletions DiscordBot.Test/ResultExpanderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,29 @@
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Linq;

namespace DiscordBot.Test
namespace DiscordBot.Test;

[TestClass]
public class ResultExpanderTest
{
[TestClass]
public class ResultExpanderTest
[TestMethod]
public void RegexCorrectlyExtractsUrls()
{
[TestMethod]
public void RegexCorrectlyExtractsUrls()
{
const string url1 = "https://asdf.com/lol.mp4?q=123";
const string url2 = "https://asdf.com/lol.gif";
const string url3 = "http://www.asdftube.com/video/abc123";
const string url1 = "https://asdf.com/lol.mp4?q=123";
const string url2 = "https://asdf.com/lol.gif";
const string url3 = "http://www.asdftube.com/video/abc123";

var result = new PushshiftResult
{
Selftext = $@"
var result = new PushshiftResult
{
Selftext = $@"
Hi there check out this link {url1} and this one '{url2}' and finally this one:{url3}"
};
};

var urls = result.ExtractUrls().ToList();
Assert.AreEqual(3, urls.Count);
var urls = result.ExtractUrls().ToList();
Assert.AreEqual(3, urls.Count);

Assert.IsTrue(urls.Any(x => x.Equals(url1)));
Assert.IsTrue(urls.Any(x => x.Equals(url2)));
Assert.IsTrue(urls.Any(x => x.Equals(url3)));
}
Assert.IsTrue(urls.Any(x => x.Url.Equals(url1)));
Assert.IsTrue(urls.Any(x => x.Url.Equals(url2)));
Assert.IsTrue(urls.Any(x => x.Url.Equals(url3)));
}
}
}
27 changes: 27 additions & 0 deletions DiscordBot.Test/UnitTestLogger.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using Microsoft.Extensions.Logging;
using System;

namespace DiscordBot.Test;

internal class UnitTestLogger<T> : ILogger<T>
{
public void Log<TState>(
LogLevel logLevel,
EventId eventId,
TState state,
Exception? exception,
Func<TState, Exception?, string> formatter)
{
Console.WriteLine(formatter(state, exception));
}

public bool IsEnabled(LogLevel logLevel)
{
return true;
}

public IDisposable BeginScope<TState>(TState state)
{
throw new NotImplementedException();
}
}
9 changes: 5 additions & 4 deletions DiscordBot/Commands/BaseSearchCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,13 @@ async Task<IEnumerable<SearchResult>> GetResults(string query)
// merge the result sets
var combined = mostRecent
.UnionBy(highestScore, x => x.Url)
.ToList();
// filter out reddit self posts
.Where(x => !x.IsSelf.GetValueOrDefault());

// extract any additional results from the reddit post
var additionalResults = combined
// extract any additional results from reddit posts
var additionalResults = mostRecent
.SelectMany(x => x.ExtractUrls())
.Select(x => new SearchResult { Url = x })
.UnionBy(highestScore.SelectMany(x => x.ExtractUrls()), x => x.Url)
.ToList();

// merge the additional results in
Expand Down
36 changes: 28 additions & 8 deletions DiscordBot/Filters/DomainBlacklistFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@ namespace DiscordBot.Filters;
/// </summary>
public class DomainBlacklistFilter : IResultFilter
{
const string RedditVideoHost = "v.redd.it";

// TODO: move to config
static readonly string[] BlacklistDomains =
{
// can't embed links from v.reddit.it
// TODO: is there a service out there we can leverage that converts v.reddit links to embeddable links?
"v.redd.it",
// not media
"reddit.com"
};
Expand All @@ -27,17 +26,38 @@ public DomainBlacklistFilter(ILogger<DomainBlacklistFilter> logger)
_logger = logger.ThrowIfNull();
}

static bool IsBlacklisted(string url) =>
BlacklistDomains.Any(url.ContainsIgnoreCase);
static bool IsAllowed(string url)
{
if (!Uri.TryCreate(url, UriKind.RelativeOrAbsolute, out var uri))
return false;

if (!url.ContainsIgnoreCase(RedditVideoHost))
return !BlacklistDomains.Any(url.ContainsIgnoreCase);

// can't embed v.redd.it links
// however if we've been provided with a direct link to a dash video or audio file
// then allow it through
// e.g.:
// allow https://v.redd.it/123/DASH_720.mp4
// allow https://v.redd.it/123/DASH_1_2_M
// don't allow https://v.redd.it/123

// this is a bit dodgy, could do with a better method
// basically /123/DASH_720.mp4 but trimmed to 123/DASH_720.mp4 = 1 slash
// /123 but trimmed to 123 = 0 slashes
// so we want 1 or more slashes
var numberOfSlashes = uri.PathAndQuery.Trim('/').Count(c => c == '/');
return numberOfSlashes >= 1;
}

public IAsyncEnumerable<SearchResult> Filter(IAsyncEnumerable<SearchResult> input) => input
.ThrowIfNull()
.Where(x =>
{
var blacklisted = IsBlacklisted(x.Url);
if (blacklisted)
var allowed = IsAllowed(x.Url);
if (!allowed)
_logger.LogDebug("Excluding result {url} as the domain has been blacklisted.", x.Url);
return !blacklisted;
return allowed;
});
}
3 changes: 2 additions & 1 deletion DiscordBot/Filters/EmbeddableMediaFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ public class EmbeddableMediaFilter : IResultFilter
"instagram.com",
"streamable.com",
"youtube.com",
"youtu.be"
"youtu.be",
"v.redd.it"
};

static readonly string[] EmbeddableMimeTypes =
Expand Down
4 changes: 1 addition & 3 deletions DiscordBot/Pushshift/Models/PostHint.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// ReSharper disable UnusedMember.Global

namespace DiscordBot.Pushshift.Models;
namespace DiscordBot.Pushshift.Models;

/// <summary>
/// A pushshift post hint.
Expand Down
13 changes: 13 additions & 0 deletions DiscordBot/Pushshift/Models/Preview.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// ReSharper disable UnusedMember.Global

using System.Text.Json.Serialization;

namespace DiscordBot.Pushshift.Models;

public class Preview
{
[JsonPropertyName("enabled")]
public bool? Enabled { get; set; }
[JsonPropertyName("reddit_video_preview")]
public RedditVideoPreview? RedditVideoPreview { get; set; }
}
6 changes: 3 additions & 3 deletions DiscordBot/Pushshift/Models/PushshiftResult.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// ReSharper disable UnusedMember.Global

using System.Text.Json.Serialization;
using System.Text.Json.Serialization;

namespace DiscordBot.Pushshift.Models;

Expand Down Expand Up @@ -32,4 +30,6 @@ public record PushshiftResult
/// </summary>
[JsonPropertyName("selftext")]
public string? Selftext { get; set; }
[JsonPropertyName("preview")]
public Preview? Preview { get; set; }
}
9 changes: 9 additions & 0 deletions DiscordBot/Pushshift/Models/RedditVideoPreview.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Text.Json.Serialization;

namespace DiscordBot.Pushshift.Models;

public class RedditVideoPreview
{
[JsonPropertyName("fallback_url")]
public string? FallbackUrl { get; set; }
}
19 changes: 16 additions & 3 deletions DiscordBot/Pushshift/PushshiftQuery.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,21 @@ public PushshiftQuery Subreddits(params string[] subreddits)
/// <remarks>
/// Not really required, just reduces a bit of bandwidth and possibly provides a slight peformance improvement.
/// </remarks>
public PushshiftQuery Fields(IEnumerable<string> fields)
{
_fields.AddRange(fields.ThrowIfNull());

return this;
}

/// <summary>
/// Specifies which fields to return in the json payload based on the type.
/// </summary>
/// <remarks>
/// Not really required, just reduces a bit of bandwidth and possibly provides a slight peformance improvement.
/// Also only works for top level fields as it doesn't look like pushshift allows us to specify sub-fields e.g. "preview.enabled".
/// By specifying "preview" however, all sub-fields are still returned from the API.
/// </remarks>
public PushshiftQuery Fields<T>()
{
var fields = typeof(T)
Expand All @@ -53,9 +68,7 @@ public PushshiftQuery Fields<T>()
})
.Select(x => x.jsonAttribute?.Name ?? x.property.Name);

_fields.AddRange(fields.ThrowIfNull());

return this;
return Fields(fields);
}

/// <summary>
Expand Down
19 changes: 15 additions & 4 deletions DiscordBot/Pushshift/PushshiftResultExpander.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using DiscordBot.Pushshift.Models;
using DiscordBot.Models;
using DiscordBot.Pushshift.Models;
using DiscordBot.Text;

namespace DiscordBot.Pushshift;
Expand All @@ -9,14 +10,24 @@ namespace DiscordBot.Pushshift;
internal static class PushshiftResultExpander
{
/// <summary>
/// Extracts any additional urls from pushshift results from metadata and the reddit submission text.
/// Extracts any additional urls from pushshift results from metadata fields and the reddit submission text.
/// </summary>
public static IEnumerable<string> ExtractUrls(this PushshiftResult result)
public static IEnumerable<SearchResult> ExtractUrls(this PushshiftResult result)
{
// note we can't use the image previews (result.preview.images), as reddit blocks them unless the referrer comes from reddit I think
var fromSelfText = !string.IsNullOrWhiteSpace(result.Selftext)
? UrlRegex.Match(result.Selftext)
: Enumerable.Empty<string>();

return fromSelfText;
// the dash format uses two separate files; on for video and one for audio
// there is no combined version with both audio and video unfortunately
// so best we can do is provide the video
var videoPreview = result.Preview?.RedditVideoPreview?.FallbackUrl;

foreach (var url in fromSelfText)
yield return new SearchResult { Url = url };

if (videoPreview != null)
yield return new SearchResult { Url = videoPreview, MediaHint = MediaType.Video };
}
}

0 comments on commit c5850eb

Please sign in to comment.