Skip to content

Commit

Permalink
Support other data types for DataViewer (#5093)
Browse files Browse the repository at this point in the history
For #4677 

Add code for other data types 
Add new functional tests for those data types

<!--
  If an item below does not apply to you, then go ahead and check it off as "done" and strikethrough the text, e.g.:
    - [x] ~Has unit tests & system/integration tests~
-->
- [x] Pull request represents a single change (i.e. not fixing disparate/unrelated things in a single PR)
- [x] Title summarizes what is changing
- [x] Has a [news entry](https://github.com/Microsoft/vscode-python/tree/master/news) file (remember to thank yourself!)
- [ ] Has sufficient logging.
- [ ] Has telemetry for enhancements.
- [x] Unit tests & system/integration tests are added/updated
- [ ] [Test plan](https://github.com/Microsoft/vscode-python/blob/master/.github/test_plan.md) is updated as appropriate
- [ ] [`package-lock.json`](https://github.com/Microsoft/vscode-python/blob/master/package-lock.json) has been regenerated by running `npm install` (if dependencies have changed)
- [ ] The wiki is updated with any design decisions/details.
  • Loading branch information
rchiodo authored Apr 3, 2019
1 parent db9706a commit 56d44da
Show file tree
Hide file tree
Showing 27 changed files with 1,080 additions and 851 deletions.
2 changes: 1 addition & 1 deletion news/1 Enhancements/4677.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Add preliminary support for viewing dataframes.
Add support for viewing dataframes, lists, dicts, nparrays.
9 changes: 9 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2397,6 +2397,7 @@
"mocha-junit-reporter": "^1.17.0",
"mocha-multi-reporters": "^1.1.7",
"node-has-native-dependencies": "^1.0.2",
"node-html-parser": "^1.1.13",
"nyc": "^13.3.0",
"raw-loader": "^0.5.1",
"react": "^16.5.2",
Expand Down
5 changes: 3 additions & 2 deletions package.nls.json
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@
"DataScience.dataExplorerInvalidVariableFormat" : "'{0}' is not an active variable.",
"DataScience.jupyterGetVariablesExecutionError" : "Failure during variable extraction:\r\n{0}",
"DataScience.loadingMessage" : "loading ...",
"DataScience.noRowsInDataExplorer" : "Fetching data ...",
"DataScience.noRowsInDataViewer" : "Fetching data ...",
"DataScience.pandasTooOldForViewingFormat" : "Python package 'pandas' is version {0}. Version 0.20 or greater is required for viewing data.",
"DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data."
"DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data.",
"DataScience.valuesColumn": "values"
}
54 changes: 34 additions & 20 deletions pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,35 @@
# Query Jupyter server for the info about a dataframe
import json as _VSCODE_json
import pandas as _VSCODE_pd

# In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
# Indexes off of _VSCODE_targetVariable need to index types that are part of IJupyterVariable
_VSCODE_targetVariable = _VSCODE_json.loads('_VSCode_JupyterTestValue')
_VSCODE_evalResult = eval(_VSCODE_targetVariable['name'])

# First list out the columns of the data frame (assuming it is one for now)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
_VSCODE_columnTypes = []
_VSCODE_columnNames = []
if _VSCODE_targetVariable['type'] == 'list':
_VSCODE_columnTypes = ['string'] # Might be able to be more specific here?
_VSCODE_columnNames = ['_VSCode_JupyterValuesColumn']
elif _VSCODE_targetVariable['type'] == 'Series':
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'dict':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'ndarray':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'DataFrame':
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)

# Make sure we have an index column (see code in getJupyterVariableDataFrameRows.py)
if 'index' not in _VSCODE_columnNames:
Expand All @@ -17,13 +38,13 @@

# Then loop and generate our output json
_VSCODE_columns = []
for n in range(0, len(_VSCODE_columnNames)):
c = _VSCODE_columnNames[n]
t = _VSCODE_columnTypes[n]
for _VSCODE_n in range(0, len(_VSCODE_columnNames)):
_VSCODE_column_name = _VSCODE_columnNames[_VSCODE_n]
_VSCODE_column_type = _VSCODE_columnTypes[_VSCODE_n]
_VSCODE_colobj = {}
_VSCODE_colobj['key'] = c
_VSCODE_colobj['name'] = c
_VSCODE_colobj['type'] = str(t)
_VSCODE_colobj['key'] = _VSCODE_column_name
_VSCODE_colobj['name'] = _VSCODE_column_name
_VSCODE_colobj['type'] = str(_VSCODE_column_type)
_VSCODE_columns.append(_VSCODE_colobj)

del _VSCODE_columnNames
Expand All @@ -33,20 +54,13 @@
_VSCODE_targetVariable['columns'] = _VSCODE_columns
del _VSCODE_columns

# Figure out shape if not already there
if 'shape' not in _VSCODE_targetVariable:
_VSCODE_targetVariable['shape'] = str(_VSCODE_evalResult.shape)

# Row count is actually embedded in shape. Should be the second number
import re as _VSCODE_re
_VSCODE_regex = r"\(\s*(\d+),\s*(\d+)\s*\)"
_VSCODE_matches = _VSCODE_re.search(_VSCODE_regex, _VSCODE_targetVariable['shape'])
if (_VSCODE_matches):
_VSCODE_targetVariable['rowCount'] = int(_VSCODE_matches[1])
del _VSCODE_matches
# Figure out shape if not already there. Use the shape to compute the row count
if (hasattr(_VSCODE_evalResult, "shape")):
_VSCODE_targetVariable['rowCount'] = _VSCODE_evalResult.shape[0]
elif _VSCODE_targetVariable['type'] == 'list':
_VSCODE_targetVariable['rowCount'] = len(_VSCODE_evalResult)
else:
_VSCODE_targetVariable['rowCount'] = 0
del _VSCODE_regex

# Transform this back into a string
print(_VSCODE_json.dumps(_VSCODE_targetVariable))
20 changes: 19 additions & 1 deletion pythonFiles/datascience/getJupyterVariableDataFrameRows.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Query Jupyter server for the rows of a data frame
import json as _VSCODE_json
import pandas as _VSCODE_pd
import pandas.io.json as _VSCODE_pd_json

# In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
Expand All @@ -12,12 +13,29 @@
_VSCODE_startRow = max(_VSCode_JupyterStartRow, 0)
_VSCODE_endRow = min(_VSCode_JupyterEndRow, _VSCODE_targetVariable['rowCount'])

# Assume we have a dataframe. If not, turn our eval result into a dataframe
_VSCODE_df = _VSCODE_evalResult
if (_VSCODE_targetVariable['type'] == 'list'):
_VSCODE_df = _VSCODE_pd.DataFrame({'_VSCode_JupyterValuesColumn':_VSCODE_evalResult})
elif (_VSCODE_targetVariable['type'] == 'Series'):
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'dict':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'ndarray':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
# If not a known type, then just let pandas handle it.
elif not (hasattr(_VSCODE_df, 'iloc')):
_VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)

# Turn into JSON using pandas. We use pandas because it's about 3 orders of magnitude faster to turn into JSON
_VSCODE_rows = df.iloc[_VSCODE_startRow:_VSCODE_endRow]
_VSCODE_rows = _VSCODE_df.iloc[_VSCODE_startRow:_VSCODE_endRow]
_VSCODE_result = _VSCODE_pd_json.to_json(None, _VSCODE_rows, orient='table', date_format='iso')
print(_VSCODE_result)

# Cleanup our variables
del _VSCODE_df
del _VSCODE_endRow
del _VSCODE_startRow
del _VSCODE_rows
Expand Down
Loading

0 comments on commit 56d44da

Please sign in to comment.