diff --git a/Source/AtlusScriptCompiler/Program.cs b/Source/AtlusScriptCompiler/Program.cs index 69833c4..ce173ea 100644 --- a/Source/AtlusScriptCompiler/Program.cs +++ b/Source/AtlusScriptCompiler/Program.cs @@ -101,6 +101,8 @@ private static void DisplayUsage() Console.WriteLine(" P3 Persona 3's custom encoding"); Console.WriteLine(" P4 Persona 4's custom encoding"); Console.WriteLine(" P5 Persona 5's custom encoding"); + Console.WriteLine(" CAT Catherine's custom encoding"); + Console.WriteLine(" CFB Catherine: Full Body's custom encoding"); Console.WriteLine(" UT (utf-8) UTF-8 Encoding. Used by Persona 3 Reload"); Console.WriteLine(" Unicode (utf-16) UTF-16 Encoding."); Console.WriteLine(" Unicode Big Endian (utf-16-be) Big Endian UTF-16 Encoding."); @@ -385,6 +387,12 @@ private static bool TryParseArguments(string[] args) case "utf-16-be": MessageScriptEncoding = Encoding.BigEndianUnicode; break; + case "cat": + MessageScriptEncoding = CatherineEncoding.Instance; + break; + case "cfb": + MessageScriptEncoding = CatherineFullBodyEncoding.Instance; + break; default: try { @@ -556,7 +564,6 @@ private static bool TryParseArguments(string[] args) } if (!UEWrapped) Logger.Info($"Output file path is set to {OutputFilePath}"); - return true; } @@ -1099,6 +1106,6 @@ public enum OutputFileFormat V3, V3BE, V4, - V4BE + V4BE, } } \ No newline at end of file diff --git a/Source/AtlusScriptCompiler/Properties/launchSettings.json b/Source/AtlusScriptCompiler/Properties/launchSettings.json index 5a08657..a0912e0 100644 --- a/Source/AtlusScriptCompiler/Properties/launchSettings.json +++ b/Source/AtlusScriptCompiler/Properties/launchSettings.json @@ -2,7 +2,7 @@ "profiles": { "AtlusScriptCompiler": { "commandName": "Project", - "commandLineArgs": "-Decompile -In \"C:\\Users\\cweer\\Downloads\\Xrd777+Astrea-Extracted\\Xrd777+Astrea-Extracted\\Game\\Xrd777\\Field\\Data\\DataAsset\\Bf\\Dungeon\\Hit\\patch.flow.bf\" -Library P3RE" + "commandLineArgs": "-Decompile -In \"F:\\Software\\Games\\PC\\SteamLibrary\\steamapps\\common\\CatherineClassic\\data\\event\\e003\\010\\m000.bmd\" -Encoding cat" } } } \ No newline at end of file diff --git a/Source/AtlusScriptLibrary/Common/Text/Encodings/CatherineEncoding.cs b/Source/AtlusScriptLibrary/Common/Text/Encodings/CatherineEncoding.cs new file mode 100644 index 0000000..8096011 --- /dev/null +++ b/Source/AtlusScriptLibrary/Common/Text/Encodings/CatherineEncoding.cs @@ -0,0 +1,54 @@ +using System.Collections.Generic; + +namespace AtlusScriptLibrary.Common.Text.Encodings; + +public abstract class CatherineEncodingBase : CustomUnicodeEncoding +{ + protected readonly static Dictionary _codeToChar = new() + { + { 0xFFE3, ' ' } + }; + + protected CatherineEncodingBase(bool isBigEndian) + : base(isBigEndian, _codeToChar) { } + + public override CustomUnicodeEncoding GetEncodingForEndianness(bool isBigEndian) + => isBigEndian ? CatherineBigEndianEncoding.Instance : CatherineEncoding.Instance; +} + +public class CatherineBigEndianEncoding : CatherineEncodingBase +{ + public static CatherineBigEndianEncoding Instance { get; } = new(); + private CatherineBigEndianEncoding() : base(true) { } +} + +public class CatherineEncoding : CatherineEncodingBase +{ + public static CatherineEncoding Instance { get; } = new(); + private CatherineEncoding() : base(false) { } +} + +public abstract class CatherineFullBodyEncodingBase : CustomUnicodeEncoding +{ + protected readonly static Dictionary _codeToChar = new() + { + }; + + protected CatherineFullBodyEncodingBase(bool isBigEndian) + : base(isBigEndian, _codeToChar) { } + + public override CustomUnicodeEncoding GetEncodingForEndianness(bool isBigEndian) + => isBigEndian ? CatherineFullBodyBigEndianEncoding.Instance : CatherineFullBodyEncoding.Instance; +} + +public class CatherineFullBodyEncoding : CatherineFullBodyEncodingBase +{ + public static CatherineFullBodyEncoding Instance { get; } = new(); + private CatherineFullBodyEncoding() : base(false) { } +} + +public class CatherineFullBodyBigEndianEncoding : CatherineFullBodyEncodingBase +{ + public static CatherineFullBodyBigEndianEncoding Instance { get; } = new(); + private CatherineFullBodyBigEndianEncoding() : base(true) { } +} diff --git a/Source/AtlusScriptLibrary/Common/Text/Encodings/CustomUnicodeEncoding.cs b/Source/AtlusScriptLibrary/Common/Text/Encodings/CustomUnicodeEncoding.cs new file mode 100644 index 0000000..2d6e86e --- /dev/null +++ b/Source/AtlusScriptLibrary/Common/Text/Encodings/CustomUnicodeEncoding.cs @@ -0,0 +1,87 @@ +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace AtlusScriptLibrary.Common.Text.Encodings; + +public abstract class CustomUnicodeEncoding : Encoding +{ + private readonly Dictionary _charToCode; + private readonly Dictionary _codeToChar; + private readonly Encoding _baseEncoding; + private readonly bool _isBigEndian; + + protected CustomUnicodeEncoding(bool isBigEndian, Dictionary codeToCharMap) + { + _baseEncoding = isBigEndian ? BigEndianUnicode : Unicode; + _isBigEndian = isBigEndian; + _codeToChar = codeToCharMap; + _charToCode = _codeToChar.ToDictionary(x => x.Value, x => x.Key); + } + + public IReadOnlyDictionary CharToCustomCode => _charToCode; + public IReadOnlyDictionary CustomCodeToChar => _codeToChar; + public abstract CustomUnicodeEncoding GetEncodingForEndianness(bool isBigEndian); + + public override int GetByteCount(char[] chars, int index, int count) + => _baseEncoding.GetByteCount(chars, index, count); + + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + int bytesWritten = 0; + for (int i = 0; i < charCount; i++) + { + char currentChar = chars[charIndex + i]; + if (_charToCode.TryGetValue(currentChar, out ushort code)) + { + if (_isBigEndian) + BinaryPrimitives.WriteUInt16BigEndian(bytes.AsSpan(byteIndex + bytesWritten), code); + else + BinaryPrimitives.WriteUInt16LittleEndian(bytes.AsSpan(byteIndex + bytesWritten), code); + + bytesWritten += 2; + } + else + { + // Fallback to base encoding for unmapped characters + bytesWritten += _baseEncoding.GetBytes(chars, charIndex + i, 1, bytes, byteIndex + bytesWritten); + } + } + return bytesWritten; + } + + public override int GetCharCount(byte[] bytes, int index, int count) + => _baseEncoding.GetCharCount(bytes, index, count); + + public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) + { + int charsWritten = 0; + for (int i = 0; i < byteCount; i += 2) + { + ushort code = _isBigEndian + ? BinaryPrimitives.ReadUInt16BigEndian(bytes.AsSpan(byteIndex + i)) + : BinaryPrimitives.ReadUInt16LittleEndian(bytes.AsSpan(byteIndex + i)); + + if (_codeToChar.TryGetValue(code, out char decodedChar)) + { + chars[charIndex + charsWritten++] = decodedChar; + } + else + { + // Fallback to base decoding for unmapped codes + char[] fallbackChar = new char[1]; + _baseEncoding.GetChars(bytes, byteIndex + i, 2, fallbackChar, 0); + chars[charIndex + charsWritten++] = fallbackChar[0]; + } + } + return charsWritten; + } + + public override int GetMaxByteCount(int charCount) + => _baseEncoding.GetMaxByteCount(charCount); + + public override int GetMaxCharCount(int byteCount) + => _baseEncoding.GetMaxCharCount(byteCount); +} \ No newline at end of file diff --git a/Source/AtlusScriptLibrary/Common/Text/Encodings/EncodingHelper.cs b/Source/AtlusScriptLibrary/Common/Text/Encodings/EncodingHelper.cs new file mode 100644 index 0000000..2831e0e --- /dev/null +++ b/Source/AtlusScriptLibrary/Common/Text/Encodings/EncodingHelper.cs @@ -0,0 +1,22 @@ +using AtlusScriptLibrary.Common.IO; +using System.Text; + +namespace AtlusScriptLibrary.Common.Text.Encodings; + +public class EncodingHelper +{ + public static Encoding GetEncodingForEndianness(Encoding encoding, bool isBigEndian) + { + if (encoding == Encoding.Unicode) + { + if (isBigEndian) + return Encoding.BigEndianUnicode; + } + else if (encoding is CustomUnicodeEncoding cue) + { + return cue.GetEncodingForEndianness(isBigEndian); + } + + return encoding; + } +} diff --git a/Source/AtlusScriptLibrary/FlowScriptLanguage/Compiler/FlowScriptCompiler.cs b/Source/AtlusScriptLibrary/FlowScriptLanguage/Compiler/FlowScriptCompiler.cs index 7510677..758f4bb 100644 --- a/Source/AtlusScriptLibrary/FlowScriptLanguage/Compiler/FlowScriptCompiler.cs +++ b/Source/AtlusScriptLibrary/FlowScriptLanguage/Compiler/FlowScriptCompiler.cs @@ -1,5 +1,6 @@ using AtlusScriptLibrary.Common.Libraries; using AtlusScriptLibrary.Common.Logging; +using AtlusScriptLibrary.Common.Text.Encodings; using AtlusScriptLibrary.FlowScriptLanguage.Compiler.Parser; using AtlusScriptLibrary.FlowScriptLanguage.Compiler.Processing; using AtlusScriptLibrary.FlowScriptLanguage.Decompiler; @@ -59,13 +60,18 @@ public class FlowScriptCompiler private int mStackValueCount; // for debugging private IntrinsicSupport mInstrinsic; + private Encoding encoding; private ScopeContext Scope => mScopeStack.Peek(); /// /// Gets or sets the encoding to use for any imported MessageScripts. /// - public Encoding Encoding { get; set; } + public Encoding Encoding + { + get => encoding; + set => encoding = EncodingHelper.GetEncodingForEndianness(encoding, mFormatVersion.HasFlag(FormatVersion.BigEndian)); + } /// /// Gets or sets the library registry to use for any imported MessageScripts. diff --git a/Source/AtlusScriptLibrary/FlowScriptLanguage/Enums.cs b/Source/AtlusScriptLibrary/FlowScriptLanguage/Enums.cs index 6bef9d9..acaf6b3 100644 --- a/Source/AtlusScriptLibrary/FlowScriptLanguage/Enums.cs +++ b/Source/AtlusScriptLibrary/FlowScriptLanguage/Enums.cs @@ -199,4 +199,5 @@ public enum FormatVersion : uint Version3BigEndian = BinaryFormatVersion.Version3BigEndian, Version4 = BinaryFormatVersion.Version4, Version4BigEndian = BinaryFormatVersion.Version4BigEndian, + BigEndian = BinaryFormatVersion.BigEndian, } diff --git a/Source/AtlusScriptLibrary/MessageScriptLanguage/BinaryModel/V2/MessageScriptBinaryBuilderV2.cs b/Source/AtlusScriptLibrary/MessageScriptLanguage/BinaryModel/V2/MessageScriptBinaryBuilderV2.cs index aea1c4d..ea07ba7 100644 --- a/Source/AtlusScriptLibrary/MessageScriptLanguage/BinaryModel/V2/MessageScriptBinaryBuilderV2.cs +++ b/Source/AtlusScriptLibrary/MessageScriptLanguage/BinaryModel/V2/MessageScriptBinaryBuilderV2.cs @@ -1,4 +1,5 @@ using AtlusScriptLibrary.Common.IO; +using AtlusScriptLibrary.Common.Text.Encodings; using System; using System.Buffers.Binary; using System.Collections.Generic; @@ -29,11 +30,15 @@ public MessageScriptBinaryV2Builder(BinaryFormatVersion version) mSpeakerNames = new List(); mPosition = BinaryHeaderV2.SIZE+BinaryHeader2.SIZE; mDialogs = new List>(); + mEncoding = EncodingHelper.GetEncodingForEndianness(Encoding.Unicode, mFormatVersion.HasFlag(BinaryFormatVersion.BigEndian)); } internal void SetEncoding(Encoding encoding) { - mEncoding = encoding; + if (encoding == null) throw new ArgumentNullException(nameof(encoding)); + if (encoding.IsSingleByte) + throw new ArgumentException($"Single byte encoding not supported", nameof(encoding)); + mEncoding = EncodingHelper.GetEncodingForEndianness(encoding, mFormatVersion.HasFlag(BinaryFormatVersion.BigEndian)); } public void AddDialog(MessageDialog message) @@ -267,11 +272,7 @@ private void ProcessToken(IToken token, List bytes) private void ProcessTextToken(StringToken token, List bytes) { var text = token.Value; - text = text.Replace(" ", "\uFFE3"); // hack: replace space character - - var textBytes = mFormatVersion.HasFlag(BinaryFormatVersion.BigEndian) ? - Encoding.BigEndianUnicode.GetBytes(text) : - Encoding.Unicode.GetBytes(text); + var textBytes = mEncoding.GetBytes(text); // simple add to the list of bytes bytes.AddRange(textBytes); diff --git a/Source/AtlusScriptLibrary/MessageScriptLanguage/Compiler/MessageScriptCompiler.cs b/Source/AtlusScriptLibrary/MessageScriptLanguage/Compiler/MessageScriptCompiler.cs index e111344..adb9477 100644 --- a/Source/AtlusScriptLibrary/MessageScriptLanguage/Compiler/MessageScriptCompiler.cs +++ b/Source/AtlusScriptLibrary/MessageScriptLanguage/Compiler/MessageScriptCompiler.cs @@ -2,6 +2,7 @@ using Antlr4.Runtime.Tree; using AtlusScriptLibrary.Common.Libraries; using AtlusScriptLibrary.Common.Logging; +using AtlusScriptLibrary.Common.Text.Encodings; using System; using System.Collections.Generic; using System.Diagnostics; @@ -52,7 +53,7 @@ public class MessageScriptCompiler public MessageScriptCompiler(FormatVersion version, Encoding encoding = null) { mVersion = version; - mEncoding = encoding; + mEncoding = EncodingHelper.GetEncodingForEndianness(encoding, version.HasFlag(FormatVersion.BigEndian)); mLogger = new Logger(nameof(MessageScriptCompiler)); mVariables = new Dictionary(); mImports = new List(); diff --git a/Source/AtlusScriptLibrary/MessageScriptLanguage/MessageScript.cs b/Source/AtlusScriptLibrary/MessageScriptLanguage/MessageScript.cs index 0087782..3ff2c79 100644 --- a/Source/AtlusScriptLibrary/MessageScriptLanguage/MessageScript.cs +++ b/Source/AtlusScriptLibrary/MessageScriptLanguage/MessageScript.cs @@ -1,4 +1,5 @@ using AtlusScriptLibrary.Common.IO; +using AtlusScriptLibrary.Common.Text.Encodings; using AtlusScriptLibrary.MessageScriptLanguage.BinaryModel; using AtlusScriptLibrary.MessageScriptLanguage.BinaryModel.V1; using AtlusScriptLibrary.MessageScriptLanguage.BinaryModel.V2; @@ -43,8 +44,8 @@ public static MessageScript FromBinary(MessageScriptBinary binary, FormatVersion { Id = binary.Header.UserId, FormatVersion = version == FormatVersion.Detect ? (FormatVersion)binary.FormatVersion : version, - Encoding = encoding }; + instance.Encoding = EncodingHelper.GetEncodingForEndianness(encoding, version.HasFlag(FormatVersion.BigEndian)) ?? Encoding.ASCII; // Convert the binary messages to their counterpart var labelOccurences = new Dictionary(); @@ -86,7 +87,7 @@ public static MessageScript FromBinary(MessageScriptBinary binary, FormatVersion if (binaryMessage.SpeakerId < binary.SpeakerTableHeader.SpeakerCount) { speakerName = ParseSpeakerText(binary.SpeakerTableHeader.SpeakerNameArray - .Value[binaryMessage.SpeakerId].Value, instance.FormatVersion, encoding == null ? Encoding.ASCII : encoding); + .Value[binaryMessage.SpeakerId].Value, instance.FormatVersion, instance.Encoding); } message = new MessageDialog(name, new NamedSpeaker(speakerName)); @@ -112,7 +113,7 @@ public static MessageScript FromBinary(MessageScriptBinary binary, FormatVersion if (pageCount != 0) { // Parse the line data - ParsePages(message, pageStartAddresses, buffer, instance.FormatVersion, encoding == null ? Encoding.ASCII : encoding); + ParsePages(message, pageStartAddresses, buffer, instance.FormatVersion, instance.Encoding); } // Add it to the message list @@ -127,12 +128,16 @@ public static MessageScript FromBinary(MessageScriptBinaryV2 binary, FormatVersi if (binary == null) throw new ArgumentNullException(nameof(binary)); + // Create new script instance & set user id, format version var instance = new MessageScript { FormatVersion = version == FormatVersion.Detect ? (FormatVersion)binary.FormatVersion : version, - Encoding = encoding }; + var isBigEndian = instance.FormatVersion.HasFlag(FormatVersion.BigEndian); + instance.Encoding = + EncodingHelper.GetEncodingForEndianness(encoding, isBigEndian) + ?? EncodingHelper.GetEncodingForEndianness(Encoding.Unicode, isBigEndian); // Convert the binary messages to their counterpart var labelOccurences = new Dictionary(); @@ -182,7 +187,7 @@ public static MessageScript FromBinary(MessageScriptBinaryV2 binary, FormatVersi if (pageCount != 0) { // Parse the line data - ParsePages(message, pageStartAddresses, buffer, instance.FormatVersion, encoding == null ? Encoding.ASCII : encoding); + ParsePages(message, pageStartAddresses, buffer, instance.FormatVersion, instance.Encoding); } // Add it to the message list @@ -195,26 +200,26 @@ public static MessageScript FromBinary(MessageScriptBinaryV2 binary, FormatVersi /// /// Deserializes and creates a from a file. /// - public static MessageScript FromFile(string path, FormatVersion version = FormatVersion.Version1, Encoding encoding = null) + public static MessageScript FromFile(string path, FormatVersion version = FormatVersion.Detect, Encoding encoding = null) { if (path == null) throw new ArgumentNullException(nameof(path)); var binary = MessageScriptBinaryFactory.FromFile(path); - return FromBinary(binary); + return FromBinary(binary, version, encoding); } /// /// Deserializes and creates a from a stream. /// - public static MessageScript FromStream(Stream stream, FormatVersion version = FormatVersion.Version1, Encoding encoding = null, bool leaveOpen = false) + public static MessageScript FromStream(Stream stream, FormatVersion version = FormatVersion.Detect, Encoding encoding = null, bool leaveOpen = false) { if (stream == null) throw new ArgumentNullException(nameof(stream)); var binary = MessageScriptBinaryFactory.FromStream(stream); - return FromBinary(binary); + return FromBinary(binary, version, encoding); } private static string ResolveName(Dictionary labelOccurences, string name) @@ -317,49 +322,48 @@ static bool IsUnicodeCharacter(ushort c) { return ((ushort)(c + 0x2800)) > 0x7FF; } - static char MapToUnicodeCharacter(ushort c) + static char MapToUnicodeCharacter(ushort c, Encoding encoding) { - if (c == 0xFFE3) + if (encoding is CustomUnicodeEncoding cue) { - return ' '; - } - else - { - return (char)c; + if (cue.CustomCodeToChar.TryGetValue(c, out var ch)) + return ch; } + return (char)c; } - static bool IsSafeCharacter(ushort c) + static bool IsSafeCharacter(ushort c, Encoding encoding) { - return (c >= 21 && c <= 126) || (c == 0xFFE3); + var result = (c >= 21 && c <= 126); + if (encoding is CustomUnicodeEncoding cue) + result = result || cue.CustomCodeToChar.ContainsKey(c); + return result; } tokens = []; if (!TryReadUInt16(buffer, ref bufferIndex, version, out var c)) return false; - //if (c == 0) - // return false; if (IsUnicodeCharacter(c)) { - if (!IsSafeCharacter(c)) + if (!IsSafeCharacter(c, encoding)) { tokens.Add(new CodePointToken((byte)((c & 0xFF00) >> 8), (byte)(c & 0xFF))); } else { var stringBuilder = new StringBuilder(); - stringBuilder.Append(MapToUnicodeCharacter(c)); + stringBuilder.Append(MapToUnicodeCharacter(c, encoding)); while (true) { if (!TryReadUInt16(buffer, ref bufferIndex, version, out c)) break; - if (!(IsUnicodeCharacter(c) && IsSafeCharacter(c))) + if (!(IsUnicodeCharacter(c) && IsSafeCharacter(c, encoding))) { bufferIndex -= 2; break; } - stringBuilder.Append(MapToUnicodeCharacter(c)); + stringBuilder.Append(MapToUnicodeCharacter(c, encoding)); } tokens.Add(new StringToken(stringBuilder.ToString())); } @@ -677,7 +681,9 @@ private MessageScriptBinary ToBinaryV1() var builder = new MessageScriptBinaryBuilder((BinaryFormatVersion)FormatVersion); builder.SetUserId(Id); - builder.SetEncoding(Encoding); + + if (Encoding != null) + builder.SetEncoding(Encoding); foreach (var dialog in Dialogs) { @@ -702,7 +708,8 @@ private MessageScriptBinaryV2 ToBinaryV2() { var builder = new MessageScriptBinaryV2Builder((BinaryFormatVersion)FormatVersion); - builder.SetEncoding(Encoding); + if (Encoding != null) + builder.SetEncoding(Encoding); foreach (var dialog in Dialogs) {