From 1cb0534d1688ed06eb668775785c479db412b63d Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Mon, 2 Dec 2024 11:42:23 -0400 Subject: [PATCH] Ignore data column for every flattened universe dataframe --- .../ETFConstituentUniverse.cs | 32 ++--- .../PandasConverter.DataFrameGenerator.cs | 7 +- Common/Python/PandasData.cs | 19 ++- Tests/Algorithm/AlgorithmHistoryTests.cs | 127 ++++++++++++++---- 4 files changed, 131 insertions(+), 54 deletions(-) diff --git a/Common/Data/UniverseSelection/ETFConstituentUniverse.cs b/Common/Data/UniverseSelection/ETFConstituentUniverse.cs index 85c7ba53620a..9a207f9cc5d9 100644 --- a/Common/Data/UniverseSelection/ETFConstituentUniverse.cs +++ b/Common/Data/UniverseSelection/ETFConstituentUniverse.cs @@ -37,17 +37,17 @@ public class ETFConstituentUniverse : BaseDataCollection /// Time of the previous ETF constituent data update /// public DateTime? LastUpdate { get; set; } - + /// /// The percentage of the ETF allocated to this constituent /// public decimal? Weight { get; set; } - + /// /// Number of shares held in the ETF /// public decimal? SharesHeld { get; set; } - + /// /// Market value of the current asset held in U.S. dollars /// @@ -67,23 +67,6 @@ public override DateTime EndTime set { Time = value - Period; } } - /// - /// Initializes a new instance of the class - /// - public ETFConstituentUniverse() - { - } - - /// - /// Initializes a new instance of the class - /// - /// The time of this data - /// The symbol for this data - public ETFConstituentUniverse(DateTime time, Symbol symbol) - : base(time, time, symbol, null, null) - { - } - /// /// Return the URL string source of the file. This will be converted to a stream /// @@ -138,12 +121,15 @@ public override BaseData Reader(SubscriptionDataConfig config, string line, Date ? (decimal?)null : Parse.Decimal(split[5], NumberStyles.Any); - return new ETFConstituentUniverse(date, symbol) + return new ETFConstituentUniverse { LastUpdate = lastUpdateDate, Weight = weighting, SharesHeld = sharesHeld, MarketValue = marketValue, + + Symbol = symbol, + Time = date }; } @@ -162,13 +148,15 @@ public override bool RequiresMapping() /// Clone of the instance public override BaseData Clone() { - return new ETFConstituentUniverse(Time, Symbol) + return new ETFConstituentUniverse { LastUpdate = LastUpdate, Weight = Weight, SharesHeld = SharesHeld, MarketValue = MarketValue, + Symbol = Symbol, + Time = Time, Data = Data }; } diff --git a/Common/Python/PandasConverter.DataFrameGenerator.cs b/Common/Python/PandasConverter.DataFrameGenerator.cs index 505a549fe9ae..c99efc3688b6 100644 --- a/Common/Python/PandasConverter.DataFrameGenerator.cs +++ b/Common/Python/PandasConverter.DataFrameGenerator.cs @@ -36,6 +36,8 @@ private class DataFrameGenerator private static readonly string[] MultiCanonicalSymbolsDataFrameNames = new[] { "canonical", "time" }; private static readonly string[] SingleBaseDataCollectionDataFrameNames = new[] { "time" }; + private static readonly string[] _forcedBaseDataCollectionExcludedMembers = new string[] { nameof(BaseDataCollection.Data) }; + private readonly Type _dataType; private readonly bool _requestedTick; private readonly bool _requestedQuoteBar; @@ -162,7 +164,10 @@ protected void AddData(IEnumerable data) foreach (var item in data) { var pandasData = prevSymbol != null && item.Symbol == prevSymbol ? prevPandasData : GetPandasData(item); - pandasData.Add(item); + var forcedExcludedMembers = _flatten && item is BaseDataCollection + ? _forcedBaseDataCollectionExcludedMembers + : Enumerable.Empty(); + pandasData.Add(item, forcedExcludedMembers); prevSymbol = item.Symbol; prevPandasData = pandasData; } diff --git a/Common/Python/PandasData.cs b/Common/Python/PandasData.cs index 9db4e22dd17e..020f4244f854 100644 --- a/Common/Python/PandasData.cs +++ b/Common/Python/PandasData.cs @@ -158,12 +158,15 @@ public PandasData(object data, bool timeAsColumn = false) /// Adds security data object to the end of the lists /// /// object that contains security data - public void Add(object data) + /// + /// Optional list of member names that need to be ignored even if not marked as + /// + public void Add(object data, IEnumerable forcedExcludedMembers = null) { - Add(data, false); + Add(data, false, forcedExcludedMembers); } - private void Add(object data, bool overrideValues) + private void Add(object data, bool overrideValues, IEnumerable forcedExcludedMembers = null) { if (data == null) { @@ -182,7 +185,7 @@ private void Add(object data, bool overrideValues) } } - AddMembersData(data, typeMembers, endTime, overrideValues); + AddMembersData(data, typeMembers, endTime, overrideValues, forcedExcludedMembers?.ToArray()); if (data is DynamicData dynamicData) { @@ -579,10 +582,16 @@ private static IEnumerable GetDataTypeMembers(Type type, string[ /// Adds the member value to the corresponding series, making sure unwrapped values a properly added /// by checking the children members and adding their values to their own series /// - private void AddMembersData(object instance, IEnumerable members, DateTime endTime, bool overrideValues) + private void AddMembersData(object instance, IEnumerable members, DateTime endTime, bool overrideValues, + string[] forcedExcludedMembers = null) { foreach (var member in members) { + if (forcedExcludedMembers != null && forcedExcludedMembers.Contains(member.Member.Name, StringComparer.InvariantCulture)) + { + continue; + } + if (!member.ShouldBeUnwrapped) { AddMemberToSeries(instance, endTime, member, overrideValues); diff --git a/Tests/Algorithm/AlgorithmHistoryTests.cs b/Tests/Algorithm/AlgorithmHistoryTests.cs index 7afd33fb7b68..02b6c03dbab3 100644 --- a/Tests/Algorithm/AlgorithmHistoryTests.cs +++ b/Tests/Algorithm/AlgorithmHistoryTests.cs @@ -3297,6 +3297,38 @@ assert isinstance(constituent, Fundamental), f'Unflattened DF: expected a list o } } + [Test] + public void CSharpCustomUniverseHistoryDataFramesHaveExpectedFormat() + { + var algorithm = GetAlgorithm(new DateTime(2015, 01, 15)); + var universe = algorithm.AddUniverse("CustomUniverse", Resolution.Daily, (x) => x.Select(y => y.Symbol)); + + using (Py.GIL()) + { + PythonInitializer.Initialize(); + algorithm.SetPandasConverter(); + + using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat", + $@" +from AlgorithmImports import * + +def get_universe_history(algorithm, universe, flatten): + return algorithm.history(universe, 3, flatten=flatten) + "); + + dynamic getUniverseHistory = testModule.GetAttr("get_universe_history"); + var df = getUniverseHistory(algorithm, universe, false); + var flattenedDf = getUniverseHistory(algorithm, universe, true); + + Func getWeight = (data) => data.Weight; + AssertCustomUniverseDataFrames(df, flattenedDf, getWeight); + + var columns = ((List)flattenedDf.columns.to_list().As>()) + .Select(column => column.InvokeMethod("__str__").GetAndDispose()); + CollectionAssert.DoesNotContain(columns, "data"); + } + } + [Test] public void PythonCustomUniverseHistoryDataFramesHaveExpectedFormat() { @@ -3340,44 +3372,87 @@ def get_universe_history(algorithm, flatten): var df = getUniverseHistory(algorithm, false); var flattenedDf = getUniverseHistory(algorithm, true); - var expectedDates = new List + Func getWeight = (data) => Convert.ToDecimal(data.GetProperty("weight")); + AssertCustomUniverseDataFrames(df, flattenedDf, getWeight); + } + } + + public class CustomUniverseData : BaseDataCollection + { + public decimal Weight { get; private set; } + + public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode) + { + return new SubscriptionDataSource("TestData/portfolio_targets.csv", + SubscriptionTransportMedium.LocalFile, + FileFormat.FoldingCollection); + } + + public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode) + { + var csv = line.Split(','); + + try + { + var endTime = DateTime.ParseExact(csv[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); + var symbol = Symbol.Create(csv[1], SecurityType.Equity, Market.USA); + var weight = Convert.ToDecimal(csv[2], CultureInfo.InvariantCulture); + + return new CustomUniverseData + { + Symbol = symbol, + Time = endTime - TimeSpan.FromDays(1), + EndTime = endTime, + Weight = weight + }; + } + catch + { + return null; + } + } + } + + private static void AssertCustomUniverseDataFrames(dynamic df, dynamic flattenedDf, Func getWeight) + where T : BaseData + { + var expectedDates = new List { new DateTime(2015, 01, 13), new DateTime(2015, 01, 14), new DateTime(2015, 01, 15), }; - var flattenedDfDates = ((List)flattenedDf.index.get_level_values(0).to_list().As>()).Distinct().ToList(); - CollectionAssert.AreEqual(expectedDates, flattenedDfDates); + var flattenedDfDates = ((List)flattenedDf.index.get_level_values(0).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, flattenedDfDates); - var dfDates = ((List)df.index.get_level_values(1).to_list().As>()).Distinct().ToList(); - CollectionAssert.AreEqual(expectedDates, dfDates); + var dfDates = ((List)df.index.get_level_values(1).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, dfDates); - df = df.droplevel(0); // drop symbol just to make access easier - foreach (var date in expectedDates) - { - using var pyDate = date.ToPython(); - var constituents = (List)df.loc[pyDate].As>(); - var flattendDfConstituents = flattenedDf.loc[pyDate]; - - CollectionAssert.IsNotEmpty(constituents); - Assert.AreEqual(flattendDfConstituents.shape[0].As(), constituents.Count); + df = df.droplevel(0); // drop symbol just to make access easier + foreach (var date in expectedDates) + { + using var pyDate = date.ToPython(); + var constituents = (List)df.loc[pyDate].As>(); + var flattendDfConstituents = flattenedDf.loc[pyDate]; - var constituentsSymbols = constituents.Select(x => x.Symbol).ToList(); - var flattendDfConstituentsSymbols = ((List)flattendDfConstituents.index.to_list().As>()).ToList(); - CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols); + CollectionAssert.IsNotEmpty(constituents); + Assert.AreEqual(flattendDfConstituents.shape[0].As(), constituents.Count); - var constituentsWeights = constituents.Select(x => x.GetProperty("weight")).ToList(); - var flattendDfConstituentsWeights = constituentsSymbols - .Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As()) - .Cast() - .ToList(); - CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights); - } + var constituentsSymbols = constituents.Select(x => x.Symbol).ToList(); + var flattendDfConstituentsSymbols = ((List)flattendDfConstituents.index.to_list().As>()).ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols); - Log.Debug((string)df.to_string()); - Log.Debug((string)flattenedDf.to_string()); + var constituentsWeights = constituents.Select(x => getWeight(x)).ToList(); + var flattendDfConstituentsWeights = constituentsSymbols + .Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As()) + .Cast() + .ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights); } + + Log.Debug((string)df.to_string()); + Log.Debug((string)flattenedDf.to_string()); } private static void AssertDesNotThrowPythonException(Action action)