Skip to content

Commit

Permalink
- field Uid in favor of lumina37/aiotieba#77 @ ForumModeratorRevisi…
Browse files Browse the repository at this point in the history
…on.cs

* now will insert new moderator revisions into DB @ `ForumModeratorRevisionCrawlWorker.DoWork()`
@ crawler
  • Loading branch information
n0099 committed Jan 9, 2023
1 parent f593d48 commit 1a2ad1a
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 13 deletions.
4 changes: 2 additions & 2 deletions crawler/src/Db/Revision/ForumModeratorRevision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ namespace tbm.Crawler.Db.Revision
{
public class ForumModeratorRevision
{
[Key] public uint Id { get; set; }
public uint Time { get; set; }
public uint Fid { get; set; }
public long Uid { get; set; }
public string Portrait { get; set; } = "";
public string? Type { get; set; }
public string? ModeratorType { get; set; }
}
}
2 changes: 1 addition & 1 deletion crawler/src/Db/TbmDbContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protected override void OnModelCreating(ModelBuilder b)
b.Entity<SubReplyRevision>().ToTable("tbmc_revision_subReply").HasKey(e => new {e.Spid, e.Time});
b.Entity<UserRevision>().ToTable("tbmc_revision_user").HasKey(e => new {e.Uid, e.Time});
b.Entity<AuthorExpGradeRevision>().ToTable("tbmc_revision_authorExpGrade").HasKey(e => new {e.Fid, e.Uid, e.Time});
b.Entity<ForumModeratorRevision>().ToTable("tbmc_revision_forumModerator").HasKey(e => new {e.Fid, e.Uid, e.Time});
b.Entity<ForumModeratorRevision>().ToTable("tbmc_revision_forumModerator");
b.Entity<Forum>().ToTable("tbm_forum");
}

Expand Down
1 change: 1 addition & 0 deletions crawler/src/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ private static async Task Main()
{
service.AddHostedService<MainCrawlWorker>();
service.AddHostedService<RetryCrawlWorker>();
service.AddHostedService<ForumModeratorRevisionCrawlWorker>();
var httpConfig = context.Configuration.GetSection("ClientRequester");
service.AddHttpClient("tbClient", client =>
{
Expand Down
2 changes: 1 addition & 1 deletion crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ public class ThreadArchiveCrawlFacade : ThreadCrawlFacade
{
public new delegate ThreadArchiveCrawlFacade New(Fid fid, string forumName);

public ThreadArchiveCrawlFacade(ILogger<ThreadCrawlFacade> logger, TbmDbContext.New dbContextFactory,
public ThreadArchiveCrawlFacade(ILogger<ThreadArchiveCrawlFacade> logger, TbmDbContext.New dbContextFactory,
ThreadArchiveCrawler.New crawler, ThreadParser parser, ThreadSaver.New saver, UserParserAndSaver users,
ClientRequesterTcs requesterTcs, IIndex<string, CrawlerLocks> locks, Fid fid, string forumName
) : base(logger, dbContextFactory, crawler.Invoke, parser, saver, users, requesterTcs, locks, fid, forumName) { }
Expand Down
52 changes: 43 additions & 9 deletions crawler/src/Worker/ForumModeratorRevisionCrawlWorker.cs
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
using AngleSharp;
using LinqToDB;
using LinqToDB.EntityFrameworkCore;
using IConfiguration = Microsoft.Extensions.Configuration.IConfiguration;

namespace tbm.Crawler.Worker
{
public class ForumModeratorRevisionCrawlWorker : CyclicCrawlWorker
{
private readonly ILogger<MainCrawlWorker> _logger;
private readonly ILifetimeScope _scope0;

public ForumModeratorRevisionCrawlWorker(ILogger<MainCrawlWorker> logger,
IConfiguration config, ILifetimeScope scope0) : base(logger, config)
{
_logger = logger;
_scope0 = scope0;
}
public ForumModeratorRevisionCrawlWorker(ILogger<ForumModeratorRevisionCrawlWorker> logger,
IConfiguration config, ILifetimeScope scope0) : base(logger, config, false) => _scope0 = scope0;

protected override async Task DoWork(CancellationToken stoppingToken)
{
await using var scope1 = _scope0.BeginLifetimeScope();
var db = scope1.Resolve<TbmDbContext.New>()(0);
var db0 = scope1.Resolve<TbmDbContext.New>()(0);
var browsing = BrowsingContext.New(Configuration.Default.WithDefaultLoader());
foreach (var forum in from f in db.Forum where f.IsCrawling select new {f.Fid, f.Name})
foreach (var forum in from f in db0.Forum.AsNoTracking() where f.IsCrawling select new {f.Fid, f.Name})
{
if (stoppingToken.IsCancellationRequested) return;
var doc = await browsing.OpenAsync($"https://tieba.baidu.com/bawu2/platform/listBawuTeamInfo?ie=utf-8&word={forum.Name}", stoppingToken);
Expand All @@ -38,6 +35,43 @@ protected override async Task DoWork(CancellationToken stoppingToken)
.OfType<string>();
return memberPortraits.Select(portrait => (type, portrait));
});

var fid = forum.Fid;
var now = (Time)DateTimeOffset.Now.ToUnixTimeSeconds();
await using var db1 = scope1.Resolve<TbmDbContext.New>()(0);
await using var transaction = await db1.Database.BeginTransactionAsync(IsolationLevel.ReadCommitted, stoppingToken);
var revisions = moderators.SelectMany(i => i).Select(tuple => new ForumModeratorRevision
{
Time = now,
Fid = fid,
Portrait = tuple.portrait,
ModeratorType = tuple.type
}).ToList();
var existingLatestRevisions = (from r in db1.ForumModeratorRevisions.AsNoTracking()
where r.Fid == fid
select new
{
r.Time,
r.Portrait,
r.ModeratorType,
Rank = Sql.Ext.Rank().Over().PartitionBy(r.Portrait).OrderByDesc(r.Time).ToValue()
}).Where(e => e.Rank == 1).ToLinqToDB().ToList();

db1.ForumModeratorRevisions.AddRange(revisions.ExceptBy(
existingLatestRevisions.Select(e => (e.Portrait, e.ModeratorType)),
r => (r.Portrait, r.ModeratorType)));
db1.ForumModeratorRevisions.AddRange(existingLatestRevisions.ExceptBy(
revisions.Select(r => (r.Portrait, r.ModeratorType)),
e => (e.Portrait, e.ModeratorType))
.Select(e => new ForumModeratorRevision
{
Time = now,
Fid = fid,
Portrait = e.Portrait,
ModeratorType = null // moderator only exists in db means he is no longer a moderator
}));
_ = await db1.SaveChangesAsync(stoppingToken);
await transaction.CommitAsync(stoppingToken);
}
}
}
Expand Down

0 comments on commit 1a2ad1a

Please sign in to comment.