Skip to content

Commit

Permalink
Ignore data column for every flattened universe dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
jhonabreul committed Dec 2, 2024
1 parent 2780780 commit 1cb0534
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 54 deletions.
32 changes: 10 additions & 22 deletions Common/Data/UniverseSelection/ETFConstituentUniverse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,17 @@ public class ETFConstituentUniverse : BaseDataCollection
/// Time of the previous ETF constituent data update
/// </summary>
public DateTime? LastUpdate { get; set; }

/// <summary>
/// The percentage of the ETF allocated to this constituent
/// </summary>
public decimal? Weight { get; set; }

/// <summary>
/// Number of shares held in the ETF
/// </summary>
public decimal? SharesHeld { get; set; }

/// <summary>
/// Market value of the current asset held in U.S. dollars
/// </summary>
Expand All @@ -67,23 +67,6 @@ public override DateTime EndTime
set { Time = value - Period; }
}

/// <summary>
/// Initializes a new instance of the <see cref="ETFConstituentUniverse"/> class
/// </summary>
public ETFConstituentUniverse()
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ETFConstituentUniverse"/> class
/// </summary>
/// <param name="time">The time of this data</param>
/// <param name="symbol">The symbol for this data</param>
public ETFConstituentUniverse(DateTime time, Symbol symbol)
: base(time, time, symbol, null, null)
{
}

/// <summary>
/// Return the URL string source of the file. This will be converted to a stream
/// </summary>
Expand Down Expand Up @@ -138,12 +121,15 @@ public override BaseData Reader(SubscriptionDataConfig config, string line, Date
? (decimal?)null
: Parse.Decimal(split[5], NumberStyles.Any);

return new ETFConstituentUniverse(date, symbol)
return new ETFConstituentUniverse
{
LastUpdate = lastUpdateDate,
Weight = weighting,
SharesHeld = sharesHeld,
MarketValue = marketValue,

Symbol = symbol,
Time = date
};
}

Expand All @@ -162,13 +148,15 @@ public override bool RequiresMapping()
/// <returns>Clone of the instance</returns>
public override BaseData Clone()
{
return new ETFConstituentUniverse(Time, Symbol)
return new ETFConstituentUniverse
{
LastUpdate = LastUpdate,
Weight = Weight,
SharesHeld = SharesHeld,
MarketValue = MarketValue,

Symbol = Symbol,
Time = Time,
Data = Data
};
}
Expand Down
7 changes: 6 additions & 1 deletion Common/Python/PandasConverter.DataFrameGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ private class DataFrameGenerator
private static readonly string[] MultiCanonicalSymbolsDataFrameNames = new[] { "canonical", "time" };
private static readonly string[] SingleBaseDataCollectionDataFrameNames = new[] { "time" };

private static readonly string[] _forcedBaseDataCollectionExcludedMembers = new string[] { nameof(BaseDataCollection.Data) };

private readonly Type _dataType;
private readonly bool _requestedTick;
private readonly bool _requestedQuoteBar;
Expand Down Expand Up @@ -162,7 +164,10 @@ protected void AddData<T>(IEnumerable<T> data)
foreach (var item in data)
{
var pandasData = prevSymbol != null && item.Symbol == prevSymbol ? prevPandasData : GetPandasData(item);
pandasData.Add(item);
var forcedExcludedMembers = _flatten && item is BaseDataCollection
? _forcedBaseDataCollectionExcludedMembers
: Enumerable.Empty<string>();
pandasData.Add(item, forcedExcludedMembers);
prevSymbol = item.Symbol;
prevPandasData = pandasData;
}
Expand Down
19 changes: 14 additions & 5 deletions Common/Python/PandasData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,15 @@ public PandasData(object data, bool timeAsColumn = false)
/// Adds security data object to the end of the lists
/// </summary>
/// <param name="data"><see cref="IBaseData"/> object that contains security data</param>
public void Add(object data)
/// <param name="forcedExcludedMembers">
/// Optional list of member names that need to be ignored even if not marked as <see cref="PandasIgnoreAttribute"/>
/// </param>
public void Add(object data, IEnumerable<string> forcedExcludedMembers = null)
{
Add(data, false);
Add(data, false, forcedExcludedMembers);
}

private void Add(object data, bool overrideValues)
private void Add(object data, bool overrideValues, IEnumerable<string> forcedExcludedMembers = null)
{
if (data == null)
{
Expand All @@ -182,7 +185,7 @@ private void Add(object data, bool overrideValues)
}
}

AddMembersData(data, typeMembers, endTime, overrideValues);
AddMembersData(data, typeMembers, endTime, overrideValues, forcedExcludedMembers?.ToArray());

if (data is DynamicData dynamicData)
{
Expand Down Expand Up @@ -579,10 +582,16 @@ private static IEnumerable<DataTypeMember> GetDataTypeMembers(Type type, string[
/// Adds the member value to the corresponding series, making sure unwrapped values a properly added
/// by checking the children members and adding their values to their own series
/// </summary>
private void AddMembersData(object instance, IEnumerable<DataTypeMember> members, DateTime endTime, bool overrideValues)
private void AddMembersData(object instance, IEnumerable<DataTypeMember> members, DateTime endTime, bool overrideValues,
string[] forcedExcludedMembers = null)
{
foreach (var member in members)
{
if (forcedExcludedMembers != null && forcedExcludedMembers.Contains(member.Member.Name, StringComparer.InvariantCulture))
{
continue;
}

if (!member.ShouldBeUnwrapped)
{
AddMemberToSeries(instance, endTime, member, overrideValues);
Expand Down
127 changes: 101 additions & 26 deletions Tests/Algorithm/AlgorithmHistoryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3297,6 +3297,38 @@ assert isinstance(constituent, Fundamental), f'Unflattened DF: expected a list o
}
}

[Test]
public void CSharpCustomUniverseHistoryDataFramesHaveExpectedFormat()
{
var algorithm = GetAlgorithm(new DateTime(2015, 01, 15));
var universe = algorithm.AddUniverse<CustomUniverseData>("CustomUniverse", Resolution.Daily, (x) => x.Select(y => y.Symbol));

using (Py.GIL())
{
PythonInitializer.Initialize();
algorithm.SetPandasConverter();

using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat",
$@"
from AlgorithmImports import *
def get_universe_history(algorithm, universe, flatten):
return algorithm.history(universe, 3, flatten=flatten)
");

dynamic getUniverseHistory = testModule.GetAttr("get_universe_history");
var df = getUniverseHistory(algorithm, universe, false);
var flattenedDf = getUniverseHistory(algorithm, universe, true);

Func<CustomUniverseData, decimal> getWeight = (data) => data.Weight;
AssertCustomUniverseDataFrames(df, flattenedDf, getWeight);

var columns = ((List<PyObject>)flattenedDf.columns.to_list().As<List<PyObject>>())
.Select(column => column.InvokeMethod("__str__").GetAndDispose<string>());
CollectionAssert.DoesNotContain(columns, "data");
}
}

[Test]
public void PythonCustomUniverseHistoryDataFramesHaveExpectedFormat()
{
Expand Down Expand Up @@ -3340,44 +3372,87 @@ def get_universe_history(algorithm, flatten):
var df = getUniverseHistory(algorithm, false);
var flattenedDf = getUniverseHistory(algorithm, true);

var expectedDates = new List<DateTime>
Func<PythonData, decimal> getWeight = (data) => Convert.ToDecimal(data.GetProperty("weight"));
AssertCustomUniverseDataFrames(df, flattenedDf, getWeight);
}
}

public class CustomUniverseData : BaseDataCollection
{
public decimal Weight { get; private set; }

public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode)
{
return new SubscriptionDataSource("TestData/portfolio_targets.csv",
SubscriptionTransportMedium.LocalFile,
FileFormat.FoldingCollection);
}

public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode)
{
var csv = line.Split(',');

try
{
var endTime = DateTime.ParseExact(csv[0], "yyyy-MM-dd", CultureInfo.InvariantCulture);
var symbol = Symbol.Create(csv[1], SecurityType.Equity, Market.USA);
var weight = Convert.ToDecimal(csv[2], CultureInfo.InvariantCulture);

return new CustomUniverseData
{
Symbol = symbol,
Time = endTime - TimeSpan.FromDays(1),
EndTime = endTime,
Weight = weight
};
}
catch
{
return null;
}
}
}

private static void AssertCustomUniverseDataFrames<T>(dynamic df, dynamic flattenedDf, Func<T, decimal> getWeight)
where T : BaseData
{
var expectedDates = new List<DateTime>
{
new DateTime(2015, 01, 13),
new DateTime(2015, 01, 14),
new DateTime(2015, 01, 15),
};

var flattenedDfDates = ((List<DateTime>)flattenedDf.index.get_level_values(0).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, flattenedDfDates);
var flattenedDfDates = ((List<DateTime>)flattenedDf.index.get_level_values(0).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, flattenedDfDates);

var dfDates = ((List<DateTime>)df.index.get_level_values(1).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, dfDates);
var dfDates = ((List<DateTime>)df.index.get_level_values(1).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, dfDates);

df = df.droplevel(0); // drop symbol just to make access easier
foreach (var date in expectedDates)
{
using var pyDate = date.ToPython();
var constituents = (List<PythonData>)df.loc[pyDate].As<List<PythonData>>();
var flattendDfConstituents = flattenedDf.loc[pyDate];

CollectionAssert.IsNotEmpty(constituents);
Assert.AreEqual(flattendDfConstituents.shape[0].As<int>(), constituents.Count);
df = df.droplevel(0); // drop symbol just to make access easier
foreach (var date in expectedDates)
{
using var pyDate = date.ToPython();
var constituents = (List<T>)df.loc[pyDate].As<List<T>>();
var flattendDfConstituents = flattenedDf.loc[pyDate];

var constituentsSymbols = constituents.Select(x => x.Symbol).ToList();
var flattendDfConstituentsSymbols = ((List<Symbol>)flattendDfConstituents.index.to_list().As<List<Symbol>>()).ToList();
CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols);
CollectionAssert.IsNotEmpty(constituents);
Assert.AreEqual(flattendDfConstituents.shape[0].As<int>(), constituents.Count);

var constituentsWeights = constituents.Select(x => x.GetProperty("weight")).ToList();
var flattendDfConstituentsWeights = constituentsSymbols
.Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As<decimal>())
.Cast<decimal>()
.ToList();
CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights);
}
var constituentsSymbols = constituents.Select(x => x.Symbol).ToList();
var flattendDfConstituentsSymbols = ((List<Symbol>)flattendDfConstituents.index.to_list().As<List<Symbol>>()).ToList();
CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols);

Log.Debug((string)df.to_string());
Log.Debug((string)flattenedDf.to_string());
var constituentsWeights = constituents.Select(x => getWeight(x)).ToList();
var flattendDfConstituentsWeights = constituentsSymbols
.Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As<decimal>())
.Cast<decimal>()
.ToList();
CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights);
}

Log.Debug((string)df.to_string());
Log.Debug((string)flattenedDf.to_string());
}

private static void AssertDesNotThrowPythonException(Action action)
Expand Down

0 comments on commit 1cb0534

Please sign in to comment.