Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add preview to datasets as specified in the Kedro catalog under metadata #1374

Merged
merged 16 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions demo-project/conf/base/catalog_01_raw.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
companies:
type: pandas.CSVDataSet
filepath: ${base_location}/01_raw/companies.csv
layer: raw
metadata:
kedro-viz:
layer: raw
preview_args:
merelcht marked this conversation as resolved.
Show resolved Hide resolved
nrows: 5

reviews:
type: pandas.CSVDataSet
filepath: ${base_location}/01_raw/reviews.csv
metadata:
kedro-viz:
layer: raw
preview_args:
nrows: 4
nrows: 10


shuttles:
type: pandas.ExcelDataSet
filepath: ${base_location}/01_raw/shuttles.xlsx
metadata:
kedro-viz:
layer: raw
preview_args:
nrows: 15


20 changes: 18 additions & 2 deletions package/kedro_viz/models/flowchart.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ class DataNode(GraphNode):
# the list of modular pipelines this data node belongs to
modular_pipelines: List[str] = field(init=False)

viz_metadata: Optional[Dict] = field(init=False)

# command to run the pipeline to this node
run_command: Optional[str] = field(init=False, default=None)

Expand All @@ -458,6 +460,12 @@ def __post_init__(self):
self.modular_pipelines = self._expand_namespaces(
self._get_namespace(self.full_name)
)
metadata = getattr(self.kedro_obj, "metadata", None)
if metadata:
try:
self.viz_metadata = metadata["kedro-viz"]
except (AttributeError, KeyError): # pragma: no cover
logger.debug("Kedro-viz metadata not found for %s", self.full_name)

# TODO: improve this scheme.
def is_plot_node(self):
Expand Down Expand Up @@ -488,7 +496,15 @@ def is_tracking_node(self):

def is_preview_node(self):
"""Checks if the current node has a preview"""
return hasattr(self.kedro_obj, "_preview")
try:
is_preview = bool(self.viz_metadata["preview_args"])
except (AttributeError, KeyError):
return False
return is_preview
Comment on lines +506 to +510
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is ok how it is, but I don't understand why we need the viz_metadata property. This isn't used for anything other than preview args, so why can't we just do this?

def is_preview_node(self):
    try:
        self.kedro_obj.metadata["kedro-viz"]["preview_args"]
    except (AttributeError, KeyError):  # pragma: no cover
        logger.debug("Kedro-viz preview args not found for %s", self.full_name)
        return False
    else:
        return True

def get_preview_args(self):
        """Gets the preview arguments for a dataset"""
        return self.kedro_obj.metadata["kedro-viz"]["preview_args"]

But it's not a big thing so if you think it's clearer how you have it then don't worry about it 🙂

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw I see this is probably the opposite of what @noklam was suggesting, so feel free to ignore and just do whatever you think is best 😁

Copy link
Contributor Author

@rashidakanchwala rashidakanchwala Jun 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think viz_metadata might be eventually used for more than preview. for e.g. looking at this (#662) ticket. if we were to also show table statistics we could probably have

metadata :
   kedro-viz: 
       preview_args : 
             nrows=4 
      profiler_args:
              show = true.. 

something like it


def get_preview_args(self):
"""Gets the preview arguments for a dataset"""
return self.viz_metadata["preview_args"]


@dataclass
Expand Down Expand Up @@ -595,7 +611,7 @@ def __post_init__(self, data_node: DataNode):
self.tracking_data = dataset.load()
elif data_node.is_preview_node():
try:
self.preview = dataset._preview() # type: ignore
self.preview = dataset._preview(**data_node.get_preview_args()) # type: ignore
except Exception as exc: # pylint: disable=broad-except # pragma: no cover
logger.warning(
"'%s' could not be previewed. Full exception: %s: %s",
Expand Down
13 changes: 13 additions & 0 deletions package/tests/test_models/test_flowchart.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=too-many-public-methods
import base64
from functools import partial
from pathlib import Path
Expand Down Expand Up @@ -368,6 +369,18 @@ def test_data_node_metadata(self):
assert data_node_metadata.filepath == "/tmp/dataset.csv"
assert data_node_metadata.run_command == "kedro run --to-outputs=dataset"

def test_get_preview_args(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can we also have a test for when preview_args does not exist to check assert not data_node.is_preview_node()?

metadata = {"kedro-viz": {"preview_args": {"nrows": 3}}}
dataset = CSVDataSet(filepath="test.csv", metadata=metadata)
data_node = GraphNode.create_data_node(
full_name="dataset",
tags=set(),
layer=None,
dataset=dataset,
)
assert data_node.is_preview_node() is True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert data_node.is_preview_node() is True
assert data_node.is_preview_node()

assert data_node.get_preview_args() == {"nrows": 3}

def test_preview_data_node_metadata(self):
mock_preview_data = {
"columns": ["id", "company_rating", "company_location"],
Expand Down
2 changes: 1 addition & 1 deletion src/components/metadata-modal/metadata-modal.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const MetadataModal = ({ metadata, onToggle, visible }) => {
</div>
{hasPreview && (
<div className="pipeline-metadata-modal__preview-text">
Previewing first 40 rows only
Previewing first {metadata.preview.data.length} rows
</div>
)}
</div>
Expand Down
4 changes: 1 addition & 3 deletions src/components/metadata/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import './styles/metadata.css';
* Shows node meta data
*/
const MetaData = ({
flags,
isPrettyNameOn,
metadata,
onToggleCode,
Expand Down Expand Up @@ -58,7 +57,7 @@ const MetaData = ({
const hasPlot = Boolean(metadata?.plot);
const hasImage = Boolean(metadata?.image);
const hasTrackingData = Boolean(metadata?.trackingData);
const hasPreviewData = Boolean(metadata?.preview) && flags.previewDataSet;
const hasPreviewData = Boolean(metadata?.preview);
const isMetricsTrackingDataset = nodeTypeIcon === 'metricsTracking';
const hasCode = Boolean(metadata?.code);
const isTranscoded = Boolean(metadata?.originalType);
Expand Down Expand Up @@ -326,7 +325,6 @@ const MetaData = ({
};

export const mapStateToProps = (state, ownProps) => ({
flags: state.flags,
isPrettyNameOn: state.prettyName,
metadata: getClickedNodeMetaData(state),
theme: state.theme,
Expand Down
6 changes: 0 additions & 6 deletions src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,6 @@ export const flags = {
default: false,
icon: '🔛',
},
previewDataSet: {
name: 'Preview datasets',
description: 'Enable dataset previews in the metadata panel',
default: true,
icon: '🗂',
},
};

export const settings = {
Expand Down