From b28fe967c97b212acfd9acb4b3ae71e019da29c2 Mon Sep 17 00:00:00 2001 From: n0099 Date: Thu, 2 May 2024 08:35:38 +0800 Subject: [PATCH] * fix `FOR UPDATE is not allowed with window functions` @ `AuthorRevisionSaver.SaveAuthorRevisions()` * fix `IEnumerable<>.Conatins(IEnumerable<>, IEqualityComparer<>)` cannot be translated @ `ReplySaver.SaveReplySignatures()` * fix quoting string literal & identifiers for Postgres flavor * replace MySQL specified `IFNULL()` with standard `COALESCE()` - setting timeout variables for MySQL @ `PushAllPostContentsIntoSonicWorker.DoWork()` @ crawler * suppress Roslyn analyzer rule `AV1568` in `ExifGpsTagValuesParser.ParseGpsCoordinateOrNull()` & `ExifDateTimeTagValuesParser.ParseExifDateTimeOrNull()` @ MetadataConsumer.cs @ imagePipeline + virtual methods `OnBuildingNpgsqlDataSource()`, `GetNpgsqlDataSource()` & method `GetNpgsqlDataSourceFactory()` for mapping enum types in Postgres * rename virtual method `OnConfiguringMysql()` to `OnConfiguringNpgsql()` * rename static field `SelectForUpdateCommandInterceptorInstace` to `SelectForUpdateCommandInterceptorSingleton` @ TbmDbContext.cs + class `NpgsqlCamelCaseNameTranslator` for converting enum value names @ shared @ c# --- c#/crawler/src/Db/CrawlerDbContext.cs | 10 ++++++++ .../Tieba/Crawl/Saver/AuthorRevisionSaver.cs | 8 ++++--- .../src/Tieba/Crawl/Saver/Post/ReplySaver.cs | 4 +++- .../PushAllPostContentsIntoSonicWorker.cs | 11 +++------ .../src/Consumer/MetadataConsumer.cs | 4 ++++ .../src/Db/ImagePipelineDbContext.cs | 3 ++- .../src/NpgsqlCamelCaseNameTranslator.cs | 14 +++++++++++ c#/shared/src/TbmDbContext.cs | 23 +++++++++++++++---- 8 files changed, 59 insertions(+), 18 deletions(-) create mode 100644 c#/shared/src/NpgsqlCamelCaseNameTranslator.cs diff --git a/c#/crawler/src/Db/CrawlerDbContext.cs b/c#/crawler/src/Db/CrawlerDbContext.cs index 0608fe1c..39dbbb07 100644 --- a/c#/crawler/src/Db/CrawlerDbContext.cs +++ b/c#/crawler/src/Db/CrawlerDbContext.cs @@ -1,4 +1,5 @@ using Microsoft.EntityFrameworkCore.Infrastructure; +using Npgsql; using static tbm.Crawler.Db.Revision.ReplyRevision; using static tbm.Crawler.Db.Revision.SubReplyRevision; using static tbm.Crawler.Db.Revision.ThreadRevision; @@ -8,6 +9,8 @@ namespace tbm.Crawler.Db; public class CrawlerDbContext(Fid fid) : TbmDbContext { + private static Lazy? _dataSourceSingleton; + public CrawlerDbContext() : this(fid: 0) { } public delegate CrawlerDbContext NewDefault(); public delegate CrawlerDbContext New(Fid fid); @@ -102,6 +105,13 @@ protected override void OnModelCreating(ModelBuilder b) b.Entity().ToTable("tbm_forum"); } + protected override void OnBuildingNpgsqlDataSource(NpgsqlDataSourceBuilder builder) => + builder.MapEnum("tbmcr_triggeredBy", new NpgsqlCamelCaseNameTranslator()); + + [SuppressMessage("Critical Code Smell", "S2696:Instance members should not write to \"static\" fields")] + protected override Lazy GetNpgsqlDataSource(string? connectionString) => + _dataSourceSingleton ??= GetNpgsqlDataSourceFactory(connectionString); + public class ModelCacheKeyFactory : IModelCacheKeyFactory { // https://stackoverflow.com/questions/51864015/entity-framework-map-model-class-to-table-at-run-time/51899590#51899590 // https://docs.microsoft.com/en-us/ef/core/modeling/dynamic-model diff --git a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs index ab3b56b4..7ca7a461 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs @@ -1,4 +1,4 @@ -using LinqToDB.DataProvider.MySql; +using LinqToDB.DataProvider.PostgreSQL; namespace tbm.Crawler.Tieba.Crawl.Saver; @@ -51,10 +51,12 @@ private void SaveAuthorRevisions( { Helper.GetNowTimestamp(out var now); var existingRevisionOfExistingUsers = dbSet.AsNoTracking() - .Where(e => e.Fid == db.Fid && posts.Select(p => p.AuthorUid).Distinct().Contains(e.Uid)) + .Where(e => e.Fid == db.Fid + && posts.Select(p => p.AuthorUid).Distinct().Contains(e.Uid)) .Select(latestRevisionProjectionFactory) + .AsCte() // https://stackoverflow.com/questions/49854322/usage-of-for-update-in-window-function-postgres#comment86726589_49854322 .Where(e => e.Rank == 1) - .AsMySql().ForUpdateHint() + .AsPostgreSQL().ForUpdateHint() .ToLinqToDB().AsEnumerable() .Join(posts, e => e.Uid, p => p.AuthorUid, (e, p) => ( diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index f63415e1..03ed851c 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -142,7 +142,9 @@ private Action SaveReplySignatures(CrawlerDbContext db, IEnumerable r var existingSignatures = ( from s in db.ReplySignatures.AsTracking().ForUpdate() where uniqueSignatures.Select(us => us.Id).Contains(s.SignatureId) - && uniqueSignatures.Select(us => us.XxHash3).Contains(s.XxHash3, new ByteArrayEqualityComparer()) + + // server side eval doesn't need ByteArrayEqualityComparer + && uniqueSignatures.Select(us => us.XxHash3).Contains(s.XxHash3) select s ).ToList(); (from existing in existingSignatures diff --git a/c#/crawler/src/Worker/PushAllPostContentsIntoSonicWorker.cs b/c#/crawler/src/Worker/PushAllPostContentsIntoSonicWorker.cs index 09afa153..57e92107 100644 --- a/c#/crawler/src/Worker/PushAllPostContentsIntoSonicWorker.cs +++ b/c#/crawler/src/Worker/PushAllPostContentsIntoSonicWorker.cs @@ -22,9 +22,9 @@ protected override async Task DoWork(CancellationToken stoppingToken) #pragma warning restore IDISP004 // Don't ignore created IDisposable .Query<(Fid Fid, int ReplyCount, int SubReplyCount)>( string.Join(" UNION ALL ", (from f in db.Forums select f.Fid).AsEnumerable().Select(fid => - $"SELECT {fid} AS Fid," - + $"IFNULL((SELECT id FROM tbmc_f{fid}_reply ORDER BY id DESC LIMIT 1), 0) AS ReplyCount," - + $"IFNULL((SELECT id FROM tbmc_f{fid}_subReply ORDER BY id DESC LIMIT 1), 0) AS SubReplyCount"))) + $"SELECT '{fid}'," + + $"COALESCE((SELECT id FROM \"tbmc_f{fid}_reply\" ORDER BY id DESC LIMIT 1), 0)," + + $"COALESCE((SELECT id FROM \"tbmc_f{fid}_subReply\" ORDER BY id DESC LIMIT 1), 0)"))) .ToList(); var forumCount = forumPostCountsTuples.Count * 2; // reply and sub reply var totalPostCount = forumPostCountsTuples.Sum(t => t.ReplyCount) @@ -36,11 +36,6 @@ protected override async Task DoWork(CancellationToken stoppingToken) await using var dbFactory = dbContextFactory(); var dbWithFid = dbFactory.Value(fid); - // enlarge the default mysql connection read/write timeout to prevent it close connection while pushing - // since pushing post contents into sonic is slower than fetching records from mysql, aka back-pressure - _ = await dbWithFid.Database.ExecuteSqlRawAsync( - "SET SESSION net_read_timeout = 3600; SET SESSION net_write_timeout = 3600;", stoppingToken); - _ = await pusher.Ingest.FlushBucketAsync($"{pusher.CollectionPrefix}replies_content", $"f{fid}"); pushedPostCount += PushPostContentsWithTiming(fid, forumIndex - 1, forumCount, "replies", replyCount, totalPostCount, pushedPostCount, dbWithFid.ReplyContents.AsNoTracking(), diff --git a/c#/imagePipeline/src/Consumer/MetadataConsumer.cs b/c#/imagePipeline/src/Consumer/MetadataConsumer.cs index afa5d9a6..b97b3a0b 100644 --- a/c#/imagePipeline/src/Consumer/MetadataConsumer.cs +++ b/c#/imagePipeline/src/Consumer/MetadataConsumer.cs @@ -201,7 +201,9 @@ private static class ExifGpsTagValuesParser public static Point? ParseGpsCoordinateOrNull( IEnumerable allTagValues, IEnumerable? latitude, +#pragma warning disable AV1568 // Parameter value should not be overwritten in method body string? latitudeRef, +#pragma warning restore AV1568 // Parameter value should not be overwritten in method body IEnumerable? longitude, string? longitudeRef) { @@ -253,7 +255,9 @@ private static double ConvertDmsToDd(IReadOnlyList dms) private static partial class ExifDateTimeTagValuesParser { +#pragma warning disable AV1568 // Parameter value should not be overwritten in method body public static DateTimeAndOffset? ParseExifDateTimeOrNull(string? exifDateTime, string? exifFractionalSeconds) +#pragma warning restore AV1568 // Parameter value should not be overwritten in method body { // https://gist.github.com/thanatos/eee17100476a336a711e // tested inputs with valid results: // "2019:02:07 21:238" => "2019/2/7 21:23:08" , "" diff --git a/c#/imagePipeline/src/Db/ImagePipelineDbContext.cs b/c#/imagePipeline/src/Db/ImagePipelineDbContext.cs index e6c7abbb..c5449ae9 100644 --- a/c#/imagePipeline/src/Db/ImagePipelineDbContext.cs +++ b/c#/imagePipeline/src/Db/ImagePipelineDbContext.cs @@ -1,4 +1,5 @@ using Microsoft.EntityFrameworkCore.Infrastructure; +using Npgsql.EntityFrameworkCore.PostgreSQL.Infrastructure; using static tbm.ImagePipeline.Db.ImageMetadata; namespace tbm.ImagePipeline.Db; @@ -20,7 +21,7 @@ public ImagePipelineDbContext() : this(fid: 0, script: "") { } private Fid Fid { get; } = fid; private string Script { get; } = script; - protected override void OnConfiguringMysql(MySqlDbContextOptionsBuilder builder) => builder.UseNetTopologySuite(); + protected override void OnConfiguringNpgsql(NpgsqlDbContextOptionsBuilder builder) => builder.UseNetTopologySuite(); [SuppressMessage("Style", "IDE0058:Expression value is never used")] protected override void OnModelCreating(ModelBuilder b) diff --git a/c#/shared/src/NpgsqlCamelCaseNameTranslator.cs b/c#/shared/src/NpgsqlCamelCaseNameTranslator.cs new file mode 100644 index 00000000..b0993e91 --- /dev/null +++ b/c#/shared/src/NpgsqlCamelCaseNameTranslator.cs @@ -0,0 +1,14 @@ +using System.Globalization; +using Npgsql; + +namespace tbm.Shared; + +public class NpgsqlCamelCaseNameTranslator : INpgsqlNameTranslator +{ + public string TranslateTypeName(string clrName) => TranslateMemberName(clrName); + + /// https://github.com/efcore/EFCore.NamingConventions/blob/0d19b13a8e62ec20779c3cca03c27f200b5b7458/EFCore.NamingConventions/Internal/CamelCaseNameRewriter.cs#L13 + /// https://github.com/npgsql/npgsql/pull/1690 + public string TranslateMemberName(string clrName) => + char.ToLower(clrName[0], CultureInfo.InvariantCulture) + clrName[1..]; +} diff --git a/c#/shared/src/TbmDbContext.cs b/c#/shared/src/TbmDbContext.cs index b5f7bc4b..9b7cedf2 100644 --- a/c#/shared/src/TbmDbContext.cs +++ b/c#/shared/src/TbmDbContext.cs @@ -7,13 +7,15 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; using NLog.Extensions.Logging; +using Npgsql; +using Npgsql.EntityFrameworkCore.PostgreSQL.Infrastructure; using LogLevel = Microsoft.Extensions.Logging.LogLevel; namespace tbm.Shared; public abstract class TbmDbContext : DbContext { - protected static readonly SelectForUpdateCommandInterceptor SelectForUpdateCommandInterceptorInstance = new(); + protected static readonly SelectForUpdateCommandInterceptor SelectForUpdateCommandInterceptorSingleton = new(); [SuppressMessage("Style", "CC0072:Remove Async termination when method is not asynchronous.", Justification = "https://github.com/code-cracker/code-cracker/issues/1086")] protected sealed class SelectForUpdateCommandInterceptor : DbCommandInterceptor @@ -72,10 +74,9 @@ public class TbmDbContext : TbmDbContext [SuppressMessage("Style", "IDE0058:Expression value is never used")] protected override void OnConfiguring(DbContextOptionsBuilder options) { - var connectionString = Config.GetConnectionString("Main"); - options.UseMySql(connectionString!, ServerVersion.AutoDetect(connectionString), OnConfiguringMysql) + options.UseNpgsql(GetNpgsqlDataSource(Config.GetConnectionString("Main")).Value, OnConfiguringNpgsql) .ReplaceService() - .AddInterceptors(SelectForUpdateCommandInterceptorInstance) + .AddInterceptors(SelectForUpdateCommandInterceptorSingleton) .UseCamelCaseNamingConvention(); var dbSettings = Config.GetSection("DbSettings"); @@ -102,5 +103,17 @@ protected override void OnModelCreating(ModelBuilder b) protected void OnModelCreatingWithFid(ModelBuilder b, uint fid) => b.Entity().ToTable($"tbmc_f{fid}_reply_content_image"); - protected virtual void OnConfiguringMysql(MySqlDbContextOptionsBuilder builder) { } + protected virtual void OnConfiguringNpgsql(NpgsqlDbContextOptionsBuilder builder) { } + + protected virtual void OnBuildingNpgsqlDataSource(NpgsqlDataSourceBuilder builder) { } + + protected virtual Lazy GetNpgsqlDataSource(string? connectionString) => + throw new NotSupportedException(); + + protected Lazy GetNpgsqlDataSourceFactory(string? connectionString) => new(() => + { + var dataSourceBuilder = new NpgsqlDataSourceBuilder(connectionString); + OnBuildingNpgsqlDataSource(dataSourceBuilder); + return dataSourceBuilder.Build(); + }); }