Skip to content

Commit

Permalink
Merge pull request #11 from AnnulusGames/fix-extra-columns
Browse files Browse the repository at this point in the history
Fix: cannot properly deserialize csv with mismatched number of columns
  • Loading branch information
AnnulusGames authored Jul 1, 2024
2 parents 15f9a72 + b2df970 commit 433a692
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 57 deletions.
154 changes: 117 additions & 37 deletions src/Csv.SourceGenerator/CsvSerializerGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ void EmitReadHeader()
{
builder.AppendLine("var allowComments = reader.Options.AllowComments;");
builder.AppendLine("if (allowComments) reader.TrySkipComment();");
builder.AppendLine("if (reader.Options.HasHeader) reader.AdvanceToEndOfLine();");
builder.AppendLine("if (reader.Options.HasHeader) reader.TrySkipLine();");
}

var members = type.Members;
Expand All @@ -391,36 +391,59 @@ void EmitReadHeader()
using (builder.BeginBlockScope("while (reader.Remaining > 0)"))
{
builder.AppendLine("if (reader.TryReadEndOfLine()) continue;");
builder.AppendLine("if (allowComments && reader.TrySkipComment()) continue;");
builder.AppendLine("if (allowComments && reader.TrySkipComment(false)) continue;");

EmitDeserializeLocalVariables(type, builder);
builder.AppendLine("var ___endOfLine = false;");

for (int i = 0; i < members.Count; i++)
{
EmitReadMember(members[i], builder);

if (i == members.Count - 1)
using (builder.BeginBlockScope($"for (int __i = 0; __i <= {members.Max(x => x.Index)}; __i++)"))
{
using (builder.BeginBlockScope("switch (__i)"))
{
builder.AppendLine();
builder.AppendLine("ADD_ITEM:");
builder.Append("list.Add(");
using (builder.BeginBlockScope("new()"))
foreach (var member in members)
{
foreach (var member in type.Members)
using (builder.BeginIndentScope($"case {member.Index}:"))
{
builder.AppendLine($"{member.Symbol.Name} = __{member.Symbol.Name},");
EmitReadMember(member, builder);
builder.AppendLine("break;");
}
}
builder.AppendLine(");");
using (builder.BeginIndentScope("default:"))
{
builder.AppendLine("reader.SkipField();");
builder.AppendLine("break;");
}
}
else

using (builder.BeginBlockScope("if (reader.TryReadEndOfLine(true))"))
{
builder.AppendLine("___endOfLine = true;");
builder.AppendLine("goto ADD_ITEM;");
}

builder.AppendLine("if (!reader.TryReadSeparator(false)) goto ADD_ITEM;");
}

builder.AppendLine();
builder.AppendLine("ADD_ITEM:");
builder.Append("list.Add(");
using (builder.BeginBlockScope("new()"))
{
foreach (var member in type.Members)
{
builder.AppendLine("if (reader.TryReadEndOfLine()) goto ADD_ITEM;");
builder.AppendLine("if (!reader.TryReadSeparator()) goto ADD_ITEM;");
builder.AppendLine($"{member.Symbol.Name} = __{member.Symbol.Name},");
}
}
builder.AppendLine(");");

builder.AppendLine();
using (builder.BeginBlockScope("if (!___endOfLine)"))
{
builder.AppendLine("if (!reader.TrySkipLine()) goto RETURN;");
}
}

builder.AppendLine("RETURN:");
builder.AppendLine("return list.AsSpan().ToArray();");
}

Expand All @@ -437,32 +460,55 @@ void EmitReadHeader()
builder.AppendLine("if (allowComments && reader.TrySkipComment(false)) continue;");

EmitDeserializeLocalVariables(type, builder);
builder.AppendLine("var ___endOfLine = false;");

for (int i = 0; i < members.Count; i++)
using (builder.BeginBlockScope($"for (int __i = 0; __i <= {members.Max(x => x.Index)}; __i++)"))
{
EmitReadMember(members[i], builder);

if (i == members.Count - 1)
using (builder.BeginBlockScope("switch (__i)"))
{
builder.AppendLine();
builder.AppendLine("ADD_ITEM:");
using (builder.BeginBlockScope("destination[n++] = new()"))
foreach (var member in members)
{
foreach (var member in type.Members)
using (builder.BeginIndentScope($"case {member.Index}:"))
{
builder.AppendLine($"{member.Symbol.Name} = __{member.Symbol.Name},");
EmitReadMember(member, builder);
builder.AppendLine("break;");
}
}
builder.AppendLine(";");
using (builder.BeginIndentScope("default:"))
{
builder.AppendLine("reader.SkipField();");
builder.AppendLine("break;");
}
}
else

using (builder.BeginBlockScope("if (reader.TryReadEndOfLine(true))"))
{
builder.AppendLine("if (reader.TryReadEndOfLine(true)) goto ADD_ITEM;");
builder.AppendLine("if (!reader.TryReadSeparator(false)) goto ADD_ITEM;");
builder.AppendLine("___endOfLine = true;");
builder.AppendLine("goto ADD_ITEM;");
}

builder.AppendLine("if (!reader.TryReadSeparator(false)) goto ADD_ITEM;");
}

builder.AppendLine();
builder.AppendLine("ADD_ITEM:");
using (builder.BeginBlockScope("destination[n++] = new()"))
{
foreach (var member in type.Members)
{
builder.AppendLine($"{member.Symbol.Name} = __{member.Symbol.Name},");
}
}
builder.AppendLine(";");

builder.AppendLine();
using (builder.BeginBlockScope("if (!___endOfLine)"))
{
builder.AppendLine("if (!reader.TrySkipLine()) goto RETURN;");
}
}

builder.AppendLine("RETURN:");
builder.AppendLine("return n;");
}
}
Expand All @@ -484,14 +530,22 @@ void EmitReadHeader()
builder.AppendLine("var keyBuffer = new global::Csv.Internal.TempList<byte>();");
using (builder.BeginBlockScope("try"))
{
builder.AppendLine("var ___endOfLine = false;");

using (builder.BeginBlockScope($"for (int i = 0; i < {members.Count}; i++)"))
{
builder.AppendLine("reader.ReadUtf8(ref keyBuffer);");
builder.AppendLine("map[i] = GetColumnIndex(keyBuffer.AsSpan());");
builder.AppendLine("keyBuffer.Clear(false);");
builder.AppendLine("if (reader.TryReadEndOfLine(true)) break;");
using (builder.BeginBlockScope("if (reader.TryReadEndOfLine(true))"))
{
builder.AppendLine("___endOfLine = true;");
builder.AppendLine("break;");
}
builder.AppendLine($"if (i != {members.Count} - 1) reader.TryReadSeparator(false);");
}

builder.AppendLine("if (!___endOfLine) reader.TrySkipLine();");
}
using (builder.BeginBlockScope("finally"))
{
Expand All @@ -513,10 +567,11 @@ void EmitReadHeader()
builder.AppendLine("if (allowComments && reader.TrySkipComment(false)) continue;");

EmitDeserializeLocalVariables(type, builder);
builder.AppendLine("var ___endOfLine = false;");

using (builder.BeginBlockScope("foreach (var index in map)"))
using (builder.BeginBlockScope("foreach (var ___i in map)"))
{
using (builder.BeginBlockScope("switch (index)"))
using (builder.BeginBlockScope("switch (___i)"))
{
for (int i = 0; i < members.Count; i++)
{
Expand All @@ -532,7 +587,12 @@ void EmitReadHeader()
}
}

builder.AppendLine("if (reader.TryReadEndOfLine(true)) goto ADD_ITEM;");
using (builder.BeginBlockScope("if (reader.TryReadEndOfLine(true))"))
{
builder.AppendLine("___endOfLine = true;");
builder.AppendLine("goto ADD_ITEM;");
}

builder.AppendLine("if (!reader.TryReadSeparator(false)) goto ADD_ITEM;");
}

Expand All @@ -547,8 +607,15 @@ void EmitReadHeader()
}
}
builder.AppendLine(");");

builder.AppendLine();
using (builder.BeginBlockScope("if (!___endOfLine)"))
{
builder.AppendLine("if (!reader.TrySkipLine()) goto RETURN;");
}
}

builder.AppendLine("RETURN:");
builder.AppendLine("return list.AsSpan().ToArray();");
}

Expand All @@ -565,10 +632,11 @@ void EmitReadHeader()
builder.AppendLine("if (allowComments && reader.TrySkipComment(false)) continue;");

EmitDeserializeLocalVariables(type, builder);
builder.AppendLine("var ___endOfLine = false;");

using (builder.BeginBlockScope("foreach (var index in map)"))
using (builder.BeginBlockScope("foreach (var __i in map)"))
{
using (builder.BeginBlockScope("switch (index)"))
using (builder.BeginBlockScope("switch (__i)"))
{
for (int i = 0; i < members.Count; i++)
{
Expand All @@ -584,7 +652,12 @@ void EmitReadHeader()
}
}

builder.AppendLine("if (reader.TryReadEndOfLine(true)) goto ADD_ITEM;");
using (builder.BeginBlockScope("if (reader.TryReadEndOfLine(true))"))
{
builder.AppendLine("___endOfLine = true;");
builder.AppendLine("goto ADD_ITEM;");
}

builder.AppendLine("if (!reader.TryReadSeparator(false)) goto ADD_ITEM;");
}

Expand All @@ -598,8 +671,15 @@ void EmitReadHeader()
}
}
builder.AppendLine(";");

builder.AppendLine();
using (builder.BeginBlockScope("if (!___endOfLine)"))
{
builder.AppendLine("if (!reader.TrySkipLine()) goto RETURN;");
}
}

builder.AppendLine("RETURN:");
builder.AppendLine("return n;");
}
}
Expand Down
28 changes: 8 additions & 20 deletions src/Csv/CsvReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public char ReadChar()
}
}

internal int SkipField()
public int SkipField()
{
if (IsNextSeparatorOrNewline())
{
Expand Down Expand Up @@ -279,38 +279,26 @@ public bool TrySkipComment(bool skipWhiteSpace = true)
{
if (skipWhiteSpace) SkipWhitespace();
var isComment = reader.IsNext((byte)'#', true);
if (isComment) AdvanceToEndOfLine();
if (isComment) TrySkipLine();

return isComment;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void AdvanceToEndOfLine()
public bool TrySkipLine()
{
if (!reader.TryAdvanceToAny([(byte)'\n', (byte)'\r'], false)) return;
reader.TryRead(out var c1);
if (c1 == '\r' && reader.TryPeek(out var c2) && c2 == '\n')
{
reader.Advance(1);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void AdvanceToSeparatorOrEndOfLine()
{
SkipWhitespace();

while (true)
if (!reader.TryAdvanceToAny([(byte)'\n', (byte)'\r'], false))
{
if (!reader.TryPeek(out var c)) return;
if (c == (byte)'\n' || c == (byte)'\r' || c == separator) break;
return false;
}

reader.TryRead(out var c1);
if (c1 == '\r' && reader.TryPeek(out var c2) && c2 == '\n')
{
reader.Advance(1);
}

return true;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand Down
19 changes: 19 additions & 0 deletions tests/Csv.Tests/SerializeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,25 @@ public void Test_Deserialize_Complex()

CollectionAssert.AreEqual(expected, actual);
}

[Test]
public void Test_Deserialize_ExtraColumns()
{
var csv =
@"Name,Age,Dummy1,Dummy2
Alex,21,1,""a""
Bob,35,25,""b""
Charles,17,23,""c"""u8;

User[] actual = CsvSerializer.Deserialize<User>(new ReadOnlySequence<byte>(csv.ToArray()));
User[] expected = [
new() { Name = "Alex", Age = 21 },
new() { Name = "Bob", Age = 35 },
new() { Name = "Charles", Age = 17 }
];

CollectionAssert.AreEqual(expected, actual);
}
}

[CsvObject]
Expand Down

0 comments on commit 433a692

Please sign in to comment.