From 31bfe45254b88603cfeaa95ec397cd3017bc6ef6 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Thu, 12 Sep 2024 09:47:35 +0200 Subject: [PATCH 1/3] sort dataset variables and add TimeArray example --- docs/Project.toml | 2 ++ docs/src/UserGuide/faq.md | 48 +++++++++++++++++++++++++++++++++++++- src/DatasetAPI/Datasets.jl | 4 ++-- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index 8bac56ca..63bbbb9c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -19,6 +19,7 @@ GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +MarketData = "945b72a4-3b13-509d-9b46-1525bb5c06de" MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e" @@ -26,6 +27,7 @@ PlotUtils = "995b91a9-d308-5afd-9ec6-746e21dbc043" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" SkipNan = "aed68c70-c8b0-4309-8cd1-d392a74f991a" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +TimeSeries = "9e3dc215-6440-5c97-bce1-76c03772f85e" WGLMakie = "276b4fcb-3e11-5398-bf8b-a0c2d153d008" WeightedOnlineStats = "bbac0a1f-7c9d-5672-960b-c6ca726e5d5d" YAXArrayBase = "90b8fcef-0c2d-428d-9c56-5f86629e9d14" diff --git a/docs/src/UserGuide/faq.md b/docs/src/UserGuide/faq.md index d5ad6cdd..3807e825 100644 --- a/docs/src/UserGuide/faq.md +++ b/docs/src/UserGuide/faq.md @@ -269,7 +269,7 @@ We can also use more than one criteria for grouping the values. In the next exam fitcube = cubefittable(t, Mean, :values, by=(:classes, :time)) ```` -## How do I assing variable names to `YAXArrays` in a `Dataset` +## How do I assign variable names to `YAXArrays` in a `Dataset` ### One variable name @@ -288,3 +288,49 @@ nothing # hide ````@ansi howdoi ds = YAXArrays.Dataset(; (keylist .=> varlist)...) ```` + +## Ho do I construct a `Dataset` from a TimeArray + +In this section we will use `MarketData.jl` and `TimeSeries.jl` to simulate some stocks. + +````@example howdoi +using YAXArrays, DimensionalData +using MarketData, TimeSeries + +stocks = Dict(:Stock1 => random_ohlcv(), :Stock2 => random_ohlcv(), :Stock3 => random_ohlcv()) +d_keys = keys(stocks) +```` + +currently there is not direct support to obtain `dims` from a `TimeArray`, but can build a function for it + +````@example howdoi +getTArrayAxes(ta::TimeArray) = (Dim{:time}(timestamp(ta)), Dim{:variable}(colnames(ta)), ); +nothing # hide +```` +then, we create the `YAXArrays` as + +````@example howdoi +yax_list = [YAXArray(getTArrayAxes(stocks[k]), values(stocks[k])) for k in d_keys]; +nothing # hide +```` + +and a `Dataset` with all `stocks` names + +````@ansi howdoi +ds = Dataset(; (d_keys .=> yax_list)...) +```` + +and it looks like there some small differences in the axes, they are being printed independently although they should be the same. Well, they are at least at the `==` level but not at `===`. We could use the axes from one `YAXArray` as reference and `rebuild` all the others + +````@example howdoi +yax_list = [rebuild(yax_list[1], values(data[k])) for k in d_keys]; +nothing # hide +```` + +and voilĂ  + +````@ansi howdoi +ds = Dataset(; (d_keys .=> yax_list)...) +```` + +now they are printed together, showing that is exactly the same axis structure for all variables. \ No newline at end of file diff --git a/src/DatasetAPI/Datasets.jl b/src/DatasetAPI/Datasets.jl index 02488f08..4cbf06e8 100644 --- a/src/DatasetAPI/Datasets.jl +++ b/src/DatasetAPI/Datasets.jl @@ -125,7 +125,7 @@ function Base.show(io::IO, ds::Dataset) if !isempty(variables_with_shared_axes_only) printstyled(io, "Variables: ", color=:light_blue) print(io, "\n") - println(io, join(variables_with_shared_axes_only, ", ")) + println(io, join(sort(variables_with_shared_axes_only), ", ")) println(io) end @@ -146,7 +146,7 @@ function Base.show(io::IO, ds::Dataset) end printstyled(io, " Variables: ", color=:light_blue) padding = " " ^ 2 # Adjust this number to match the length of " Variables: " - variables_str = join(variables, ", ") + variables_str = join(sort(variables), ", ") padded_variables = padding * variables_str print(io, "\n") println(io, padded_variables) From d6cda20b0c707b805914a02e23f60860cc6e8986 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Thu, 12 Sep 2024 10:08:33 +0200 Subject: [PATCH 2/3] is stocks --- docs/src/UserGuide/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/UserGuide/faq.md b/docs/src/UserGuide/faq.md index 3807e825..fe144a59 100644 --- a/docs/src/UserGuide/faq.md +++ b/docs/src/UserGuide/faq.md @@ -323,7 +323,7 @@ ds = Dataset(; (d_keys .=> yax_list)...) and it looks like there some small differences in the axes, they are being printed independently although they should be the same. Well, they are at least at the `==` level but not at `===`. We could use the axes from one `YAXArray` as reference and `rebuild` all the others ````@example howdoi -yax_list = [rebuild(yax_list[1], values(data[k])) for k in d_keys]; +yax_list = [rebuild(yax_list[1], values(stocks[k])) for k in d_keys]; nothing # hide ```` From 353f5c9025b7473f27170311e04d22c27d099ac8 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Thu, 12 Sep 2024 11:01:42 +0200 Subject: [PATCH 3/3] typo --- docs/src/UserGuide/faq.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/UserGuide/faq.md b/docs/src/UserGuide/faq.md index fe144a59..963b3031 100644 --- a/docs/src/UserGuide/faq.md +++ b/docs/src/UserGuide/faq.md @@ -301,7 +301,7 @@ stocks = Dict(:Stock1 => random_ohlcv(), :Stock2 => random_ohlcv(), :Stock3 => r d_keys = keys(stocks) ```` -currently there is not direct support to obtain `dims` from a `TimeArray`, but can build a function for it +currently there is not direct support to obtain `dims` from a `TimeArray`, but we can code a function for it ````@example howdoi getTArrayAxes(ta::TimeArray) = (Dim{:time}(timestamp(ta)), Dim{:variable}(colnames(ta)), ); @@ -320,7 +320,7 @@ and a `Dataset` with all `stocks` names ds = Dataset(; (d_keys .=> yax_list)...) ```` -and it looks like there some small differences in the axes, they are being printed independently although they should be the same. Well, they are at least at the `==` level but not at `===`. We could use the axes from one `YAXArray` as reference and `rebuild` all the others +and, it looks like there some small differences in the axes, they are being printed independently although they should be the same. Well, they are at least at the `==` level but not at `===`. We could use the axes from one `YAXArray` as reference and `rebuild` all the others ````@example howdoi yax_list = [rebuild(yax_list[1], values(stocks[k])) for k in d_keys];