diff --git a/crawler/src/Db/Revision/ForumModeratorRevision.cs b/crawler/src/Db/Revision/ForumModeratorRevision.cs index 2349ca2c..7d49430c 100644 --- a/crawler/src/Db/Revision/ForumModeratorRevision.cs +++ b/crawler/src/Db/Revision/ForumModeratorRevision.cs @@ -4,10 +4,10 @@ namespace tbm.Crawler.Db.Revision { public class ForumModeratorRevision { + [Key] public uint Id { get; set; } public uint Time { get; set; } public uint Fid { get; set; } - public long Uid { get; set; } public string Portrait { get; set; } = ""; - public string? Type { get; set; } + public string? ModeratorType { get; set; } } } diff --git a/crawler/src/Db/TbmDbContext.cs b/crawler/src/Db/TbmDbContext.cs index a1c3cdc5..6f2939d8 100644 --- a/crawler/src/Db/TbmDbContext.cs +++ b/crawler/src/Db/TbmDbContext.cs @@ -42,7 +42,7 @@ protected override void OnModelCreating(ModelBuilder b) b.Entity().ToTable("tbmc_revision_subReply").HasKey(e => new {e.Spid, e.Time}); b.Entity().ToTable("tbmc_revision_user").HasKey(e => new {e.Uid, e.Time}); b.Entity().ToTable("tbmc_revision_authorExpGrade").HasKey(e => new {e.Fid, e.Uid, e.Time}); - b.Entity().ToTable("tbmc_revision_forumModerator").HasKey(e => new {e.Fid, e.Uid, e.Time}); + b.Entity().ToTable("tbmc_revision_forumModerator"); b.Entity().ToTable("tbm_forum"); } diff --git a/crawler/src/Program.cs b/crawler/src/Program.cs index 93728be3..05cbf9a1 100644 --- a/crawler/src/Program.cs +++ b/crawler/src/Program.cs @@ -28,6 +28,7 @@ private static async Task Main() { service.AddHostedService(); service.AddHostedService(); + service.AddHostedService(); var httpConfig = context.Configuration.GetSection("ClientRequester"); service.AddHttpClient("tbClient", client => { diff --git a/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs b/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs index fa6ce5ab..3e5671db 100644 --- a/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs +++ b/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs @@ -4,7 +4,7 @@ public class ThreadArchiveCrawlFacade : ThreadCrawlFacade { public new delegate ThreadArchiveCrawlFacade New(Fid fid, string forumName); - public ThreadArchiveCrawlFacade(ILogger logger, TbmDbContext.New dbContextFactory, + public ThreadArchiveCrawlFacade(ILogger logger, TbmDbContext.New dbContextFactory, ThreadArchiveCrawler.New crawler, ThreadParser parser, ThreadSaver.New saver, UserParserAndSaver users, ClientRequesterTcs requesterTcs, IIndex locks, Fid fid, string forumName ) : base(logger, dbContextFactory, crawler.Invoke, parser, saver, users, requesterTcs, locks, fid, forumName) { } diff --git a/crawler/src/Worker/ForumModeratorRevisionCrawlWorker.cs b/crawler/src/Worker/ForumModeratorRevisionCrawlWorker.cs index c4a8a510..5ec6e9db 100644 --- a/crawler/src/Worker/ForumModeratorRevisionCrawlWorker.cs +++ b/crawler/src/Worker/ForumModeratorRevisionCrawlWorker.cs @@ -1,26 +1,23 @@ using AngleSharp; +using LinqToDB; +using LinqToDB.EntityFrameworkCore; using IConfiguration = Microsoft.Extensions.Configuration.IConfiguration; namespace tbm.Crawler.Worker { public class ForumModeratorRevisionCrawlWorker : CyclicCrawlWorker { - private readonly ILogger _logger; private readonly ILifetimeScope _scope0; - public ForumModeratorRevisionCrawlWorker(ILogger logger, - IConfiguration config, ILifetimeScope scope0) : base(logger, config) - { - _logger = logger; - _scope0 = scope0; - } + public ForumModeratorRevisionCrawlWorker(ILogger logger, + IConfiguration config, ILifetimeScope scope0) : base(logger, config, false) => _scope0 = scope0; protected override async Task DoWork(CancellationToken stoppingToken) { await using var scope1 = _scope0.BeginLifetimeScope(); - var db = scope1.Resolve()(0); + var db0 = scope1.Resolve()(0); var browsing = BrowsingContext.New(Configuration.Default.WithDefaultLoader()); - foreach (var forum in from f in db.Forum where f.IsCrawling select new {f.Fid, f.Name}) + foreach (var forum in from f in db0.Forum.AsNoTracking() where f.IsCrawling select new {f.Fid, f.Name}) { if (stoppingToken.IsCancellationRequested) return; var doc = await browsing.OpenAsync($"https://tieba.baidu.com/bawu2/platform/listBawuTeamInfo?ie=utf-8&word={forum.Name}", stoppingToken); @@ -38,6 +35,43 @@ protected override async Task DoWork(CancellationToken stoppingToken) .OfType(); return memberPortraits.Select(portrait => (type, portrait)); }); + + var fid = forum.Fid; + var now = (Time)DateTimeOffset.Now.ToUnixTimeSeconds(); + await using var db1 = scope1.Resolve()(0); + await using var transaction = await db1.Database.BeginTransactionAsync(IsolationLevel.ReadCommitted, stoppingToken); + var revisions = moderators.SelectMany(i => i).Select(tuple => new ForumModeratorRevision + { + Time = now, + Fid = fid, + Portrait = tuple.portrait, + ModeratorType = tuple.type + }).ToList(); + var existingLatestRevisions = (from r in db1.ForumModeratorRevisions.AsNoTracking() + where r.Fid == fid + select new + { + r.Time, + r.Portrait, + r.ModeratorType, + Rank = Sql.Ext.Rank().Over().PartitionBy(r.Portrait).OrderByDesc(r.Time).ToValue() + }).Where(e => e.Rank == 1).ToLinqToDB().ToList(); + + db1.ForumModeratorRevisions.AddRange(revisions.ExceptBy( + existingLatestRevisions.Select(e => (e.Portrait, e.ModeratorType)), + r => (r.Portrait, r.ModeratorType))); + db1.ForumModeratorRevisions.AddRange(existingLatestRevisions.ExceptBy( + revisions.Select(r => (r.Portrait, r.ModeratorType)), + e => (e.Portrait, e.ModeratorType)) + .Select(e => new ForumModeratorRevision + { + Time = now, + Fid = fid, + Portrait = e.Portrait, + ModeratorType = null // moderator only exists in db means he is no longer a moderator + })); + _ = await db1.SaveChangesAsync(stoppingToken); + await transaction.CommitAsync(stoppingToken); } } }