Skip to content

Commit

Permalink
* fix FOR UPDATE is not allowed with window functions @ `AuthorRevi…
Browse files Browse the repository at this point in the history
…sionSaver.SaveAuthorRevisions()`

* fix `IEnumerable<>.Conatins(IEnumerable<>, IEqualityComparer<>)` cannot be translated @ `ReplySaver.SaveReplySignatures()`

* fix quoting string literal & identifiers for Postgres flavor
* replace MySQL specified `IFNULL()` with standard `COALESCE()`
- setting timeout variables for MySQL
@ `PushAllPostContentsIntoSonicWorker.DoWork()`
@ crawler

* suppress Roslyn analyzer rule `AV1568` in `ExifGpsTagValuesParser.ParseGpsCoordinateOrNull()` & `ExifDateTimeTagValuesParser.ParseExifDateTimeOrNull()` @ MetadataConsumer.cs
@ imagePipeline

+ virtual methods `OnBuildingNpgsqlDataSource()`, `GetNpgsqlDataSource()` & method `GetNpgsqlDataSourceFactory()` for mapping enum types in Postgres
* rename virtual method `OnConfiguringMysql()` to `OnConfiguringNpgsql()`
* rename static field `SelectForUpdateCommandInterceptorInstace` to `SelectForUpdateCommandInterceptorSingleton`
@ TbmDbContext.cs

+ class `NpgsqlCamelCaseNameTranslator` for converting enum value names
@ shared
@ c#
  • Loading branch information
n0099 committed May 2, 2024
1 parent 14fed98 commit b28fe96
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 18 deletions.
10 changes: 10 additions & 0 deletions c#/crawler/src/Db/CrawlerDbContext.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Microsoft.EntityFrameworkCore.Infrastructure;
using Npgsql;
using static tbm.Crawler.Db.Revision.ReplyRevision;
using static tbm.Crawler.Db.Revision.SubReplyRevision;
using static tbm.Crawler.Db.Revision.ThreadRevision;
Expand All @@ -8,6 +9,8 @@ namespace tbm.Crawler.Db;

public class CrawlerDbContext(Fid fid) : TbmDbContext<CrawlerDbContext.ModelCacheKeyFactory>
{
private static Lazy<NpgsqlDataSource>? _dataSourceSingleton;

public CrawlerDbContext() : this(fid: 0) { }
public delegate CrawlerDbContext NewDefault();
public delegate CrawlerDbContext New(Fid fid);
Expand Down Expand Up @@ -102,6 +105,13 @@ protected override void OnModelCreating(ModelBuilder b)
b.Entity<Forum>().ToTable("tbm_forum");
}

protected override void OnBuildingNpgsqlDataSource(NpgsqlDataSourceBuilder builder) =>
builder.MapEnum<PostType>("tbmcr_triggeredBy", new NpgsqlCamelCaseNameTranslator());

[SuppressMessage("Critical Code Smell", "S2696:Instance members should not write to \"static\" fields")]
protected override Lazy<NpgsqlDataSource> GetNpgsqlDataSource(string? connectionString) =>
_dataSourceSingleton ??= GetNpgsqlDataSourceFactory(connectionString);

public class ModelCacheKeyFactory : IModelCacheKeyFactory
{ // https://stackoverflow.com/questions/51864015/entity-framework-map-model-class-to-table-at-run-time/51899590#51899590
// https://docs.microsoft.com/en-us/ef/core/modeling/dynamic-model
Expand Down
8 changes: 5 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LinqToDB.DataProvider.MySql;
using LinqToDB.DataProvider.PostgreSQL;

namespace tbm.Crawler.Tieba.Crawl.Saver;

Expand Down Expand Up @@ -51,10 +51,12 @@ private void SaveAuthorRevisions<TPost, TRevision, TValue>(
{
Helper.GetNowTimestamp(out var now);
var existingRevisionOfExistingUsers = dbSet.AsNoTracking()
.Where(e => e.Fid == db.Fid && posts.Select(p => p.AuthorUid).Distinct().Contains(e.Uid))
.Where(e => e.Fid == db.Fid
&& posts.Select(p => p.AuthorUid).Distinct().Contains(e.Uid))
.Select(latestRevisionProjectionFactory)
.AsCte() // https://stackoverflow.com/questions/49854322/usage-of-for-update-in-window-function-postgres#comment86726589_49854322
.Where(e => e.Rank == 1)
.AsMySql().ForUpdateHint()
.AsPostgreSQL().ForUpdateHint()
.ToLinqToDB().AsEnumerable()
.Join(posts, e => e.Uid, p => p.AuthorUid, (e, p) =>
(
Expand Down
4 changes: 3 additions & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ private Action SaveReplySignatures(CrawlerDbContext db, IEnumerable<ReplyPost> r
var existingSignatures = (
from s in db.ReplySignatures.AsTracking().ForUpdate()
where uniqueSignatures.Select(us => us.Id).Contains(s.SignatureId)
&& uniqueSignatures.Select(us => us.XxHash3).Contains(s.XxHash3, new ByteArrayEqualityComparer())

// server side eval doesn't need ByteArrayEqualityComparer
&& uniqueSignatures.Select(us => us.XxHash3).Contains(s.XxHash3)
select s
).ToList();
(from existing in existingSignatures
Expand Down
11 changes: 3 additions & 8 deletions c#/crawler/src/Worker/PushAllPostContentsIntoSonicWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ protected override async Task DoWork(CancellationToken stoppingToken)
#pragma warning restore IDISP004 // Don't ignore created IDisposable
.Query<(Fid Fid, int ReplyCount, int SubReplyCount)>(
string.Join(" UNION ALL ", (from f in db.Forums select f.Fid).AsEnumerable().Select(fid =>
$"SELECT {fid} AS Fid,"
+ $"IFNULL((SELECT id FROM tbmc_f{fid}_reply ORDER BY id DESC LIMIT 1), 0) AS ReplyCount,"
+ $"IFNULL((SELECT id FROM tbmc_f{fid}_subReply ORDER BY id DESC LIMIT 1), 0) AS SubReplyCount")))
$"SELECT '{fid}',"
+ $"COALESCE((SELECT id FROM \"tbmc_f{fid}_reply\" ORDER BY id DESC LIMIT 1), 0),"
+ $"COALESCE((SELECT id FROM \"tbmc_f{fid}_subReply\" ORDER BY id DESC LIMIT 1), 0)")))
.ToList();
var forumCount = forumPostCountsTuples.Count * 2; // reply and sub reply
var totalPostCount = forumPostCountsTuples.Sum(t => t.ReplyCount)
Expand All @@ -36,11 +36,6 @@ protected override async Task DoWork(CancellationToken stoppingToken)
await using var dbFactory = dbContextFactory();
var dbWithFid = dbFactory.Value(fid);

// enlarge the default mysql connection read/write timeout to prevent it close connection while pushing
// since pushing post contents into sonic is slower than fetching records from mysql, aka back-pressure
_ = await dbWithFid.Database.ExecuteSqlRawAsync(
"SET SESSION net_read_timeout = 3600; SET SESSION net_write_timeout = 3600;", stoppingToken);

_ = await pusher.Ingest.FlushBucketAsync($"{pusher.CollectionPrefix}replies_content", $"f{fid}");
pushedPostCount += PushPostContentsWithTiming(fid, forumIndex - 1, forumCount, "replies",
replyCount, totalPostCount, pushedPostCount, dbWithFid.ReplyContents.AsNoTracking(),
Expand Down
4 changes: 4 additions & 0 deletions c#/imagePipeline/src/Consumer/MetadataConsumer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,9 @@ private static class ExifGpsTagValuesParser
public static Point? ParseGpsCoordinateOrNull(
IEnumerable<IExifValue> allTagValues,
IEnumerable<Rational>? latitude,
#pragma warning disable AV1568 // Parameter value should not be overwritten in method body
string? latitudeRef,
#pragma warning restore AV1568 // Parameter value should not be overwritten in method body
IEnumerable<Rational>? longitude,
string? longitudeRef)
{
Expand Down Expand Up @@ -253,7 +255,9 @@ private static double ConvertDmsToDd(IReadOnlyList<double> dms)

private static partial class ExifDateTimeTagValuesParser
{
#pragma warning disable AV1568 // Parameter value should not be overwritten in method body
public static DateTimeAndOffset? ParseExifDateTimeOrNull(string? exifDateTime, string? exifFractionalSeconds)
#pragma warning restore AV1568 // Parameter value should not be overwritten in method body
{ // https://gist.github.com/thanatos/eee17100476a336a711e
// tested inputs with valid results:
// "2019:02:07 21:238" => "2019/2/7 21:23:08" , ""
Expand Down
3 changes: 2 additions & 1 deletion c#/imagePipeline/src/Db/ImagePipelineDbContext.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Microsoft.EntityFrameworkCore.Infrastructure;
using Npgsql.EntityFrameworkCore.PostgreSQL.Infrastructure;
using static tbm.ImagePipeline.Db.ImageMetadata;

namespace tbm.ImagePipeline.Db;
Expand All @@ -20,7 +21,7 @@ public ImagePipelineDbContext() : this(fid: 0, script: "") { }
private Fid Fid { get; } = fid;
private string Script { get; } = script;

protected override void OnConfiguringMysql(MySqlDbContextOptionsBuilder builder) => builder.UseNetTopologySuite();
protected override void OnConfiguringNpgsql(NpgsqlDbContextOptionsBuilder builder) => builder.UseNetTopologySuite();

[SuppressMessage("Style", "IDE0058:Expression value is never used")]
protected override void OnModelCreating(ModelBuilder b)
Expand Down
14 changes: 14 additions & 0 deletions c#/shared/src/NpgsqlCamelCaseNameTranslator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using System.Globalization;
using Npgsql;

namespace tbm.Shared;

public class NpgsqlCamelCaseNameTranslator : INpgsqlNameTranslator
{
public string TranslateTypeName(string clrName) => TranslateMemberName(clrName);

/// <see>https://github.com/efcore/EFCore.NamingConventions/blob/0d19b13a8e62ec20779c3cca03c27f200b5b7458/EFCore.NamingConventions/Internal/CamelCaseNameRewriter.cs#L13</see>
/// <see>https://github.com/npgsql/npgsql/pull/1690</see>
public string TranslateMemberName(string clrName) =>
char.ToLower(clrName[0], CultureInfo.InvariantCulture) + clrName[1..];
}
23 changes: 18 additions & 5 deletions c#/shared/src/TbmDbContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using NLog.Extensions.Logging;
using Npgsql;
using Npgsql.EntityFrameworkCore.PostgreSQL.Infrastructure;
using LogLevel = Microsoft.Extensions.Logging.LogLevel;

namespace tbm.Shared;

public abstract class TbmDbContext : DbContext
{
protected static readonly SelectForUpdateCommandInterceptor SelectForUpdateCommandInterceptorInstance = new();
protected static readonly SelectForUpdateCommandInterceptor SelectForUpdateCommandInterceptorSingleton = new();

[SuppressMessage("Style", "CC0072:Remove Async termination when method is not asynchronous.", Justification = "https://github.com/code-cracker/code-cracker/issues/1086")]
protected sealed class SelectForUpdateCommandInterceptor : DbCommandInterceptor
Expand Down Expand Up @@ -72,10 +74,9 @@ public class TbmDbContext<TModelCacheKeyFactory> : TbmDbContext
[SuppressMessage("Style", "IDE0058:Expression value is never used")]
protected override void OnConfiguring(DbContextOptionsBuilder options)
{
var connectionString = Config.GetConnectionString("Main");
options.UseMySql(connectionString!, ServerVersion.AutoDetect(connectionString), OnConfiguringMysql)
options.UseNpgsql(GetNpgsqlDataSource(Config.GetConnectionString("Main")).Value, OnConfiguringNpgsql)
.ReplaceService<IModelCacheKeyFactory, TModelCacheKeyFactory>()
.AddInterceptors(SelectForUpdateCommandInterceptorInstance)
.AddInterceptors(SelectForUpdateCommandInterceptorSingleton)
.UseCamelCaseNamingConvention();

var dbSettings = Config.GetSection("DbSettings");
Expand All @@ -102,5 +103,17 @@ protected override void OnModelCreating(ModelBuilder b)
protected void OnModelCreatingWithFid(ModelBuilder b, uint fid) =>
b.Entity<ReplyContentImage>().ToTable($"tbmc_f{fid}_reply_content_image");

protected virtual void OnConfiguringMysql(MySqlDbContextOptionsBuilder builder) { }
protected virtual void OnConfiguringNpgsql(NpgsqlDbContextOptionsBuilder builder) { }

protected virtual void OnBuildingNpgsqlDataSource(NpgsqlDataSourceBuilder builder) { }

protected virtual Lazy<NpgsqlDataSource> GetNpgsqlDataSource(string? connectionString) =>
throw new NotSupportedException();

protected Lazy<NpgsqlDataSource> GetNpgsqlDataSourceFactory(string? connectionString) => new(() =>
{
var dataSourceBuilder = new NpgsqlDataSourceBuilder(connectionString);
OnBuildingNpgsqlDataSource(dataSourceBuilder);
return dataSourceBuilder.Build();
});
}

0 comments on commit b28fe96

Please sign in to comment.