Skip to content

Commit

Permalink
#61 (#70)
Browse files Browse the repository at this point in the history
* Typo in LoadHtmlDocumentAsync

* Improved logging on webscraper

* Further improved error reporting

* Added exceptions for webscraper

* Refactored webscraper

* Updated project version
  • Loading branch information
Anequit authored Jan 23, 2022
1 parent 8f0b983 commit 4fa923b
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
<TargetFramework>net5.0</TargetFramework>
<Nullable>enable</Nullable>
<AssemblyName>Cyberdrop Downloader</AssemblyName>
<Copyright>Copyright © 2021</Copyright>
<Copyright>Copyright © 2022</Copyright>
<Product>Cyberdrop Downloader</Product>
<AssemblyVersion>2.2.3.0</AssemblyVersion>
<FileVersion>2.2.2.0</FileVersion>
<AssemblyVersion>2.2.4.0</AssemblyVersion>
<FileVersion>2.2.4.0</FileVersion>
<NeutralLanguage>en</NeutralLanguage>
<Company />
<Authors />
<PackageIconUrl />
<ApplicationIcon>Assets\duck.ico</ApplicationIcon>
<Win32Resource />
<Version>2.2.3</Version>
<Version>2.2.4</Version>
<PackageProjectUrl>https://github.com/izqalan/cy-client</PackageProjectUrl>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<DebugType>none</DebugType>
Expand Down
26 changes: 24 additions & 2 deletions src/CyberdropDownloader.Avalonia/ViewModels/MainWindowViewModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Reflection;
using System.Threading;
using System.Threading.Tasks;
using CyberdropDownloader.Core.Exceptions;

namespace CyberdropDownloader.Avalonia.ViewModels
{
Expand Down Expand Up @@ -115,8 +116,29 @@ await Task.Run(async () =>
_cancellationTokenSource = new CancellationTokenSource();
}

// Load the album
await _webScraper.LoadAlbumAsync(url);
try
{
await _webScraper.LoadAlbumAsync(url);
}
catch(Exception exception)
{
switch(exception)
{
case NullAlbumTitleException:
Log("Failed to fetch album title.");
break;

case NullAlbumSizeException:
Log("Failed to fetch album size.");
break;

case NullAlbumFilesException:
Log("Failed to fetch album files.");
break;
}

continue;
}

// If the album url is invalid, then log and skip over it
if(!_webScraper.Successful)
Expand Down
24 changes: 24 additions & 0 deletions src/CyberdropDownloader.Core/Exceptions/NullAlbumFilesException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System;
using System.Runtime.Serialization;

namespace CyberdropDownloader.Core.Exceptions
{
public class NullAlbumFilesException : Exception
{
public NullAlbumFilesException()
{
}

public NullAlbumFilesException(string message) : base(message)
{
}

public NullAlbumFilesException(string message, Exception innerException) : base(message, innerException)
{
}

protected NullAlbumFilesException(SerializationInfo info, StreamingContext context) : base(info, context)
{
}
}
}
24 changes: 24 additions & 0 deletions src/CyberdropDownloader.Core/Exceptions/NullAlbumSizeException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System;
using System.Runtime.Serialization;

namespace CyberdropDownloader.Core.Exceptions
{
public class NullAlbumSizeException : Exception
{
public NullAlbumSizeException()
{
}

public NullAlbumSizeException(string message) : base(message)
{
}

public NullAlbumSizeException(string message, Exception innerException) : base(message, innerException)
{
}

protected NullAlbumSizeException(SerializationInfo info, StreamingContext context) : base(info, context)
{
}
}
}
24 changes: 24 additions & 0 deletions src/CyberdropDownloader.Core/Exceptions/NullAlbumTitleException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System;
using System.Runtime.Serialization;

namespace CyberdropDownloader.Core.Exceptions
{
public class NullAlbumTitleException : Exception
{
public NullAlbumTitleException()
{
}

public NullAlbumTitleException(string message) : base(message)
{
}

public NullAlbumTitleException(string message, Exception innerException) : base(message, innerException)
{
}

protected NullAlbumTitleException(SerializationInfo info, StreamingContext context) : base(info, context)
{
}
}
}
125 changes: 65 additions & 60 deletions src/CyberdropDownloader.Core/WebScraper.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using CyberdropDownloader.Core.DataModels;
using CyberdropDownloader.Core.Exceptions;
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
Expand All @@ -7,76 +8,80 @@

namespace CyberdropDownloader.Core
{
public class WebScraper
{
private string _url;
private HtmlDocument _htmlDocument;
public class WebScraper
{
private Album _album;
private bool _successful;

private Album _album;
private bool _successful;
public Album Album => _album;
public bool Successful => _successful;

public Album Album => _album;
public bool Successful => _successful;
public async Task LoadAlbumAsync(string url)
{
await Task.Run(async () =>
{
// Load webpage
HtmlDocument htmlDocument = await new HtmlWeb().LoadFromWebAsync(url);

public async Task LoadAlbumAsync(string url)
{
_url = url;
_successful = false;
if(htmlDocument != null)
{
try
{
(string title, string size, Queue<AlbumFile> files) albumData = FetchAlbumData(htmlDocument);

await Task.Run(async () =>
{
await LoadHtmlDocumenteAsync();
_album = new Album(albumData.title, albumData.size, albumData.files);
_successful = true;
}
catch(Exception)
{
_successful = false;
}
}
});
}

if (_htmlDocument != null)
{
try
{
// Insatiate new album with title, size, and files.
_album = new Album(FetchAlbumTitle(), FetchAlbumSize(), FetchAlbumFiles());
_successful = true;
}
catch
{
_successful = false;
}
}
});
}
#region Load Album
private (string title, string size, Queue<AlbumFile> files) FetchAlbumData(HtmlDocument htmlDocument)
{
return (FetchAlbumTitle(htmlDocument), FetchAlbumSize(htmlDocument), FetchAlbumFiles(htmlDocument));
}

#region Load Album
private string FetchAlbumTitle() => _htmlDocument.DocumentNode.SelectNodes("//div/h1[@id='title']").First().Attributes["title"].Value;
private string FetchAlbumTitle(HtmlDocument htmlDocument)
{
string title = htmlDocument.DocumentNode.SelectNodes("//div/h1[@id='title']").First().Attributes["title"].Value;

private string FetchAlbumSize() => _htmlDocument.DocumentNode.SelectNodes("//div/p[@class='title']")[1].InnerHtml;
return title ?? throw new NullAlbumTitleException();
}

private Queue<AlbumFile> FetchAlbumFiles()
{
Queue<AlbumFile> urls = new Queue<AlbumFile>();
private string FetchAlbumSize(HtmlDocument htmlDocument)
{
string size = htmlDocument.DocumentNode.SelectNodes("//div/p[@class='title']")[1].InnerHtml;

HtmlNodeCollection nodes = _htmlDocument.DocumentNode.SelectNodes("//a[@class='image'][@href]");
return size ?? throw new NullAlbumFilesException();
}

foreach (HtmlNode link in nodes)
{
urls.Enqueue(new AlbumFile()
{
Name = link.Attributes["title"].Value,
Url = link.Attributes["href"].Value
});
}
private Queue<AlbumFile> FetchAlbumFiles(HtmlDocument htmlDocument)
{
Queue<AlbumFile> urls = new Queue<AlbumFile>();

return urls;
}
HtmlNodeCollection files = htmlDocument.DocumentNode.SelectNodes("//a[@class='image'][@href]");

private async Task LoadHtmlDocumenteAsync()
{
try
{
_htmlDocument = await new HtmlWeb().LoadFromWebAsync(_url);
}
catch (Exception)
{
_htmlDocument = null!;
}
}
#endregion
}
if(files == null)
{
throw new NullAlbumFilesException();
}

foreach(HtmlNode link in files)
{
urls.Enqueue(new AlbumFile()
{
Name = link.Attributes["title"].Value,
Url = link.Attributes["href"].Value
});
}

return urls;
}
#endregion
}
}

0 comments on commit 4fa923b

Please sign in to comment.