Skip to content

Commit

Permalink
Fix encoding handling for Catherine & CFB
Browse files Browse the repository at this point in the history
  • Loading branch information
tge-was-taken committed Dec 1, 2024
1 parent 74914ae commit 72d3064
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 37 deletions.
11 changes: 9 additions & 2 deletions Source/AtlusScriptCompiler/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ private static void DisplayUsage()
Console.WriteLine(" P3 Persona 3's custom encoding");
Console.WriteLine(" P4 Persona 4's custom encoding");
Console.WriteLine(" P5 Persona 5's custom encoding");
Console.WriteLine(" CAT Catherine's custom encoding");
Console.WriteLine(" CFB Catherine: Full Body's custom encoding");
Console.WriteLine(" UT (utf-8) UTF-8 Encoding. Used by Persona 3 Reload");
Console.WriteLine(" Unicode (utf-16) UTF-16 Encoding.");
Console.WriteLine(" Unicode Big Endian (utf-16-be) Big Endian UTF-16 Encoding.");
Expand Down Expand Up @@ -385,6 +387,12 @@ private static bool TryParseArguments(string[] args)
case "utf-16-be":
MessageScriptEncoding = Encoding.BigEndianUnicode;
break;
case "cat":
MessageScriptEncoding = CatherineEncoding.Instance;
break;
case "cfb":
MessageScriptEncoding = CatherineFullBodyEncoding.Instance;
break;
default:
try
{
Expand Down Expand Up @@ -556,7 +564,6 @@ private static bool TryParseArguments(string[] args)
}

if (!UEWrapped) Logger.Info($"Output file path is set to {OutputFilePath}");

return true;
}

Expand Down Expand Up @@ -1099,6 +1106,6 @@ public enum OutputFileFormat
V3,
V3BE,
V4,
V4BE
V4BE,
}
}
2 changes: 1 addition & 1 deletion Source/AtlusScriptCompiler/Properties/launchSettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"profiles": {
"AtlusScriptCompiler": {
"commandName": "Project",
"commandLineArgs": "-Decompile -In \"C:\\Users\\cweer\\Downloads\\Xrd777+Astrea-Extracted\\Xrd777+Astrea-Extracted\\Game\\Xrd777\\Field\\Data\\DataAsset\\Bf\\Dungeon\\Hit\\patch.flow.bf\" -Library P3RE"
"commandLineArgs": "-Decompile -In \"F:\\Software\\Games\\PC\\SteamLibrary\\steamapps\\common\\CatherineClassic\\data\\event\\e003\\010\\m000.bmd\" -Encoding cat"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
using System.Collections.Generic;

namespace AtlusScriptLibrary.Common.Text.Encodings;

public abstract class CatherineEncodingBase : CustomUnicodeEncoding
{
protected readonly static Dictionary<ushort, char> _codeToChar = new()
{
{ 0xFFE3, ' ' }
};

protected CatherineEncodingBase(bool isBigEndian)
: base(isBigEndian, _codeToChar) { }

public override CustomUnicodeEncoding GetEncodingForEndianness(bool isBigEndian)
=> isBigEndian ? CatherineBigEndianEncoding.Instance : CatherineEncoding.Instance;
}

public class CatherineBigEndianEncoding : CatherineEncodingBase
{
public static CatherineBigEndianEncoding Instance { get; } = new();
private CatherineBigEndianEncoding() : base(true) { }
}

public class CatherineEncoding : CatherineEncodingBase
{
public static CatherineEncoding Instance { get; } = new();
private CatherineEncoding() : base(false) { }
}

public abstract class CatherineFullBodyEncodingBase : CustomUnicodeEncoding
{
protected readonly static Dictionary<ushort, char> _codeToChar = new()
{
};

protected CatherineFullBodyEncodingBase(bool isBigEndian)
: base(isBigEndian, _codeToChar) { }

public override CustomUnicodeEncoding GetEncodingForEndianness(bool isBigEndian)
=> isBigEndian ? CatherineFullBodyBigEndianEncoding.Instance : CatherineFullBodyEncoding.Instance;
}

public class CatherineFullBodyEncoding : CatherineFullBodyEncodingBase
{
public static CatherineFullBodyEncoding Instance { get; } = new();
private CatherineFullBodyEncoding() : base(false) { }
}

public class CatherineFullBodyBigEndianEncoding : CatherineFullBodyEncodingBase
{
public static CatherineFullBodyBigEndianEncoding Instance { get; } = new();
private CatherineFullBodyBigEndianEncoding() : base(true) { }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace AtlusScriptLibrary.Common.Text.Encodings;

public abstract class CustomUnicodeEncoding : Encoding
{
private readonly Dictionary<char, ushort> _charToCode;
private readonly Dictionary<ushort, char> _codeToChar;
private readonly Encoding _baseEncoding;
private readonly bool _isBigEndian;

protected CustomUnicodeEncoding(bool isBigEndian, Dictionary<ushort, char> codeToCharMap)
{
_baseEncoding = isBigEndian ? BigEndianUnicode : Unicode;
_isBigEndian = isBigEndian;
_codeToChar = codeToCharMap;
_charToCode = _codeToChar.ToDictionary(x => x.Value, x => x.Key);
}

public IReadOnlyDictionary<char, ushort> CharToCustomCode => _charToCode;
public IReadOnlyDictionary<ushort, char> CustomCodeToChar => _codeToChar;
public abstract CustomUnicodeEncoding GetEncodingForEndianness(bool isBigEndian);

public override int GetByteCount(char[] chars, int index, int count)
=> _baseEncoding.GetByteCount(chars, index, count);

public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
int bytesWritten = 0;
for (int i = 0; i < charCount; i++)
{
char currentChar = chars[charIndex + i];
if (_charToCode.TryGetValue(currentChar, out ushort code))
{
if (_isBigEndian)
BinaryPrimitives.WriteUInt16BigEndian(bytes.AsSpan(byteIndex + bytesWritten), code);
else
BinaryPrimitives.WriteUInt16LittleEndian(bytes.AsSpan(byteIndex + bytesWritten), code);

bytesWritten += 2;
}
else
{
// Fallback to base encoding for unmapped characters
bytesWritten += _baseEncoding.GetBytes(chars, charIndex + i, 1, bytes, byteIndex + bytesWritten);
}
}
return bytesWritten;
}

public override int GetCharCount(byte[] bytes, int index, int count)
=> _baseEncoding.GetCharCount(bytes, index, count);

public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
int charsWritten = 0;
for (int i = 0; i < byteCount; i += 2)
{
ushort code = _isBigEndian
? BinaryPrimitives.ReadUInt16BigEndian(bytes.AsSpan(byteIndex + i))
: BinaryPrimitives.ReadUInt16LittleEndian(bytes.AsSpan(byteIndex + i));

if (_codeToChar.TryGetValue(code, out char decodedChar))
{
chars[charIndex + charsWritten++] = decodedChar;
}
else
{
// Fallback to base decoding for unmapped codes
char[] fallbackChar = new char[1];
_baseEncoding.GetChars(bytes, byteIndex + i, 2, fallbackChar, 0);
chars[charIndex + charsWritten++] = fallbackChar[0];
}
}
return charsWritten;
}

public override int GetMaxByteCount(int charCount)
=> _baseEncoding.GetMaxByteCount(charCount);

public override int GetMaxCharCount(int byteCount)
=> _baseEncoding.GetMaxCharCount(byteCount);
}
22 changes: 22 additions & 0 deletions Source/AtlusScriptLibrary/Common/Text/Encodings/EncodingHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using AtlusScriptLibrary.Common.IO;
using System.Text;

namespace AtlusScriptLibrary.Common.Text.Encodings;

public class EncodingHelper
{
public static Encoding GetEncodingForEndianness(Encoding encoding, bool isBigEndian)
{
if (encoding == Encoding.Unicode)
{
if (isBigEndian)
return Encoding.BigEndianUnicode;
}
else if (encoding is CustomUnicodeEncoding cue)
{
return cue.GetEncodingForEndianness(isBigEndian);
}

return encoding;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using AtlusScriptLibrary.Common.Libraries;
using AtlusScriptLibrary.Common.Logging;
using AtlusScriptLibrary.Common.Text.Encodings;
using AtlusScriptLibrary.FlowScriptLanguage.Compiler.Parser;
using AtlusScriptLibrary.FlowScriptLanguage.Compiler.Processing;
using AtlusScriptLibrary.FlowScriptLanguage.Decompiler;
Expand Down Expand Up @@ -59,13 +60,18 @@ public class FlowScriptCompiler

private int mStackValueCount; // for debugging
private IntrinsicSupport mInstrinsic;
private Encoding encoding;

private ScopeContext Scope => mScopeStack.Peek();

/// <summary>
/// Gets or sets the encoding to use for any imported MessageScripts.
/// </summary>
public Encoding Encoding { get; set; }
public Encoding Encoding
{
get => encoding;
set => encoding = EncodingHelper.GetEncodingForEndianness(encoding, mFormatVersion.HasFlag(FormatVersion.BigEndian));
}

/// <summary>
/// Gets or sets the library registry to use for any imported MessageScripts.
Expand Down
1 change: 1 addition & 0 deletions Source/AtlusScriptLibrary/FlowScriptLanguage/Enums.cs
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,5 @@ public enum FormatVersion : uint
Version3BigEndian = BinaryFormatVersion.Version3BigEndian,
Version4 = BinaryFormatVersion.Version4,
Version4BigEndian = BinaryFormatVersion.Version4BigEndian,
BigEndian = BinaryFormatVersion.BigEndian,
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using AtlusScriptLibrary.Common.IO;
using AtlusScriptLibrary.Common.Text.Encodings;
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
Expand Down Expand Up @@ -29,11 +30,15 @@ public MessageScriptBinaryV2Builder(BinaryFormatVersion version)
mSpeakerNames = new List<byte[]>();
mPosition = BinaryHeaderV2.SIZE+BinaryHeader2.SIZE;
mDialogs = new List<Tuple<BinaryDialogKind, object>>();
mEncoding = EncodingHelper.GetEncodingForEndianness(Encoding.Unicode, mFormatVersion.HasFlag(BinaryFormatVersion.BigEndian));
}

internal void SetEncoding(Encoding encoding)
{
mEncoding = encoding;
if (encoding == null) throw new ArgumentNullException(nameof(encoding));
if (encoding.IsSingleByte)
throw new ArgumentException($"Single byte encoding not supported", nameof(encoding));
mEncoding = EncodingHelper.GetEncodingForEndianness(encoding, mFormatVersion.HasFlag(BinaryFormatVersion.BigEndian));
}

public void AddDialog(MessageDialog message)
Expand Down Expand Up @@ -267,11 +272,7 @@ private void ProcessToken(IToken token, List<byte> bytes)
private void ProcessTextToken(StringToken token, List<byte> bytes)
{
var text = token.Value;
text = text.Replace(" ", "\uFFE3"); // hack: replace space character

var textBytes = mFormatVersion.HasFlag(BinaryFormatVersion.BigEndian) ?
Encoding.BigEndianUnicode.GetBytes(text) :
Encoding.Unicode.GetBytes(text);
var textBytes = mEncoding.GetBytes(text);

// simple add to the list of bytes
bytes.AddRange(textBytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using Antlr4.Runtime.Tree;
using AtlusScriptLibrary.Common.Libraries;
using AtlusScriptLibrary.Common.Logging;
using AtlusScriptLibrary.Common.Text.Encodings;
using System;
using System.Collections.Generic;
using System.Diagnostics;
Expand Down Expand Up @@ -52,7 +53,7 @@ public class MessageScriptCompiler
public MessageScriptCompiler(FormatVersion version, Encoding encoding = null)
{
mVersion = version;
mEncoding = encoding;
mEncoding = EncodingHelper.GetEncodingForEndianness(encoding, version.HasFlag(FormatVersion.BigEndian));
mLogger = new Logger(nameof(MessageScriptCompiler));
mVariables = new Dictionary<string, int>();
mImports = new List<MessageScript>();
Expand Down
Loading

0 comments on commit 72d3064

Please sign in to comment.