Skip to content

Commit

Permalink
Write schema/header when WriteAll called.
Browse files Browse the repository at this point in the history
  • Loading branch information
jehugaleahsa committed May 2, 2021
1 parent 76a6e86 commit 946c641
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 18 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 4.14.0 (2021-05-01)
**Summary** - The behavior of `TypedWriter.WriteAll` is somewhat unintuitive when called with no records. The expectation is that the header is written when performing this bulk operation, otherwise the caller has to explicitly check for empty or always call `WriteSchema` explicitly beforehand. This slightly changes the behavior of the code, such that it might result in headers/schema being written in cases where the file was blank before. However, when `IsFirstRecordSchema` is `true`, it is extremely unlikely consumers would expect a blank file to be generated.

During my testing, I also discovered a bug where the schema was getting set, then unset when the header/schema record was the only record in the file. You should be able to try to read the first record of an empty file and get `false` back, then read the schema via `GetSchema`; however, my code would throwing an `InvalidOperationException` or, worse, a `NullReferenceException`.

## 4.13.0 (2020-12-03)
**Summary** - This change allows the original text making up a record to be viewed while parsing a file. The raw record contents will be accessible via the `IRecordContext` interface, which is available within the event args.

Expand All @@ -11,7 +16,7 @@ The ADO.NET classes didn't receive the same level of love that the rest of the l
Technically this is a breaking change that might warrant a major version change; however, as the previous behavior could not possibly be desired and few people actually use the ADO.NET classes, I am going to include this in the next minor version, treating it as just a bug fix.

## 4.11.0 (2020-10-09)
**Summary* - Allow handling unrecognized rows when using schema selectors.
**Summary** - Allow handling unrecognized rows when using schema selectors.

Previously, if a record was encountered that could be handled by any of the configured schemas, the selector would throw a generic `FlatFilesException`. Now, a `RecordProcessingException`is thrown instead, which can be ignored causing the record to be skipped.

Expand Down
30 changes: 30 additions & 0 deletions FlatFiles.Test/AutoMapTester.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,36 @@ public void ShouldDeduceSchemaForType_ColumnNameCustomization()
AssertEqual(expected, results, 2);
}

[TestMethod]
public void ShouldWriteHeadersWhenNoRecordsProvided()
{
var mapper = SeparatedValueTypeMapper.Define<Person>(() => new Person());
mapper.Property(x => x.Id);
mapper.Property(x => x.Name);
mapper.Property(x => x.CreatedOn);
mapper.Property(x => x.IsActive);
var stringWriter = new StringWriter();
var writer = mapper.GetWriter(stringWriter);
writer.WriteAll(new Person[0]);
writer.WriteAll(new Person[0]); // Test we don't double write headers
var output = stringWriter.ToString();

var stringReader = new StringReader(output);
var options = new SeparatedValueOptions()
{
IsFirstRecordSchema = true
};
var reader = new SeparatedValueReader(stringReader, options);
Assert.IsFalse(reader.Read(), "No records should have been written.");

var schema = reader.GetSchema();
Assert.AreEqual(4, schema.ColumnDefinitions.Count, "The wrong number of headers were found.");
var expected = new[] { "Id", "Name", "CreatedOn", "IsActive" };
var actual = schema.ColumnDefinitions.Select(c => c.ColumnName).ToArray();
CollectionAssert.AreEqual(expected, actual);

}

private static void AssertEqual(IList<Person> expected, IList<Person> actual, int id)
{
Assert.AreEqual(expected[id].Id, actual[id].Id, $"Wrong ID for person {id}");
Expand Down
10 changes: 5 additions & 5 deletions FlatFiles/FixedLengthReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ private async ValueTask<bool> SkipAsyncInternal()

private string[] PartitionRecord(string record)
{
if (record == null)
{
return null;
}
var schema = GetSchema(record);
metadata.ExecutionContext.Schema = schema;
if (schema == null)
Expand Down Expand Up @@ -411,11 +415,7 @@ private string[] PartitionRecord(string record)

private FixedLengthSchema GetSchema(string record)
{
if (record == null)
{
return null;
}
if (schemaSelector == null)
if (record == null || schemaSelector == null)
{
return metadata.ExecutionContext.Schema;
}
Expand Down
12 changes: 7 additions & 5 deletions FlatFiles/FlatFiles.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,25 @@
<TargetFrameworks>netcoreapp3.0;netstandard2.0;netstandard1.6;net451</TargetFrameworks>
<NeutralLanguage>en-US</NeutralLanguage>
<Description>Reads and writes CSV, fixed-length and other flat file formats with a focus on schema definition, configuration and speed. Supports mapping directly between files and classes.</Description>
<Copyright>Copyright @ 2020</Copyright>
<Copyright>Copyright @ 2021</Copyright>
<PackageLicenseUrl></PackageLicenseUrl>
<PackageProjectUrl>https://github.com/jehugaleahsa/FlatFiles</PackageProjectUrl>
<RepositoryUrl>https://github.com/jehugaleahsa/FlatFiles.git</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageTags>csv;comma;tab;separated;value;delimited;flat;file;fixed;width;fixed-width;length;fixed-length;parser;parsing;parse</PackageTags>
<PackageReleaseNotes>Add the ability to view the original text making up a record while processing the records.</PackageReleaseNotes>
<PackageReleaseNotes>The behavior of TypedWriter.WriteAll is somewhat unintuitive when called with no records. The expectation is that the header/schema is written when performing this bulk operation; otherwise, the caller has to explicitly call WriteSchema explicitly beforehand. This slightly changes the behavior of the code, such that it might result in headers/schema being written in cases where the file was blank before. However, when IsFirstRecordSchema is true, it is extremely unlikely consumers would expect a blank file to be generated.

During my testing, I also discovered a bug where the schema was getting set, then unset when the header/schema record was the only record in the file. You should be able to try to read the first record of an empty file and get false back, then read the schema via GetSchema; however, my code would throwing an InvalidOperationException or, worse, a NullReferenceException.</PackageReleaseNotes>
<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>FlatFiles.snk</AssemblyOriginatorKeyFile>
<Version>4.13.0</Version>
<Version>4.14.0</Version>
</PropertyGroup>

<PropertyGroup>
<LangVersion>8.0</LangVersion>
<PackageIconUrl></PackageIconUrl>
<AssemblyVersion>4.13.0.0</AssemblyVersion>
<FileVersion>4.13.0.0</FileVersion>
<AssemblyVersion>4.14.0.0</AssemblyVersion>
<FileVersion>4.14.0.0</FileVersion>
<PackageLicenseFile>UNLICENSE.txt</PackageLicenseFile>
<PackageIcon>icon.png</PackageIcon>
</PropertyGroup>
Expand Down
19 changes: 12 additions & 7 deletions FlatFiles/SeparatedValueReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,16 @@ private void HandleSchema()
return;
}
string[] columnNames = ReadNextRecord();
metadata.ExecutionContext.Schema = new SeparatedValueSchema();
if (columnNames == null)
{
return;
}
var schema = new SeparatedValueSchema();
metadata.ExecutionContext.Schema = schema;
foreach (string columnName in columnNames)
{
StringColumn column = new StringColumn(columnName);
metadata.ExecutionContext.Schema.AddColumn(column);
schema.AddColumn(column);
}
}

Expand Down Expand Up @@ -306,6 +311,10 @@ private async Task HandleSchemaAsync()
return;
}
string[] columnNames = await ReadNextRecordAsync().ConfigureAwait(false);
if (columnNames == null)
{
return;
}
metadata.ExecutionContext.Schema = new SeparatedValueSchema();
foreach (string columnName in columnNames)
{
Expand Down Expand Up @@ -356,11 +365,7 @@ private async Task<string[]> ReadWithFilterAsync()

private SeparatedValueSchema GetSchema(string[] rawValues)
{
if (rawValues == null)
{
return null;
}
if (schemaSelector == null)
if (rawValues == null || schemaSelector == null)
{
return metadata.ExecutionContext.Schema;
}
Expand Down
2 changes: 2 additions & 0 deletions FlatFiles/TypeMapping/TypedWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ public static class TypedWriterExtensions
/// <returns>The entities written by the writer.</returns>
public static void WriteAll<TEntity>(this ITypedWriter<TEntity> writer, IEnumerable<TEntity> entities)
{
writer.WriteSchema();
foreach (var entity in entities)
{
writer.Write(entity);
Expand All @@ -230,6 +231,7 @@ public static void WriteAll<TEntity>(this ITypedWriter<TEntity> writer, IEnumera
/// <returns>The entities written by the writer.</returns>
public static async Task WriteAllAsync<TEntity>(this ITypedWriter<TEntity> writer, IEnumerable<TEntity> entities)
{
await writer.WriteSchemaAsync().ConfigureAwait(false);
foreach (var entity in entities)
{
await writer.WriteAsync(entity).ConfigureAwait(false);
Expand Down

0 comments on commit 946c641

Please sign in to comment.