Support other data types for DataViewer (#5093)

For #4677 Add code for other data types Add new functional tests for those data types  - [x] Pull request represents a single change (i.e. not fixing disparate/unrelated things in a single PR) - [x] Title summarizes what is changing - [x] Has a [news entry](https://github.com/Microsoft/vscode-python/tree/master/news) file (remember to thank yourself!) - [ ] Has sufficient logging. - [ ] Has telemetry for enhancements. - [x] Unit tests & system/integration tests are added/updated - [ ] [Test plan](https://github.com/Microsoft/vscode-python/blob/master/.github/test_plan.md) is updated as appropriate - [ ] [`package-lock.json`](https://github.com/Microsoft/vscode-python/blob/master/package-lock.json) has been regenerated by running `npm install` (if dependencies have changed) - [ ] The wiki is updated with any design decisions/details.
microsoft · Apr 3, 2019 · 56d44da · 56d44da
1 parent db9706a
commit 56d44da
Show file tree

Hide file tree

Showing 27 changed files with 1,080 additions and 851 deletions.
diff --git a/news/1 Enhancements/4677.md b/news/1 Enhancements/4677.md
@@ -1 +1 @@
-Add preliminary support for viewing dataframes.
+Add support for viewing dataframes, lists, dicts, nparrays.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -2397,6 +2397,7 @@
         "mocha-junit-reporter": "^1.17.0",
         "mocha-multi-reporters": "^1.1.7",
         "node-has-native-dependencies": "^1.0.2",
+        "node-html-parser": "^1.1.13",
         "nyc": "^13.3.0",
         "raw-loader": "^0.5.1",
         "react": "^16.5.2",

diff --git a/package.nls.json b/package.nls.json
@@ -226,7 +226,8 @@
     "DataScience.dataExplorerInvalidVariableFormat" : "'{0}' is not an active variable.",
     "DataScience.jupyterGetVariablesExecutionError" : "Failure during variable extraction:\r\n{0}",
     "DataScience.loadingMessage" : "loading ...",
-    "DataScience.noRowsInDataExplorer" : "Fetching data ...",
+    "DataScience.noRowsInDataViewer" : "Fetching data ...",
     "DataScience.pandasTooOldForViewingFormat" : "Python package 'pandas' is version {0}. Version 0.20 or greater is required for viewing data.",
-    "DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data."    
+    "DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data.",
+    "DataScience.valuesColumn": "values"    
 }
diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
@@ -1,14 +1,35 @@
 # Query Jupyter server for the info about a dataframe
 import json as _VSCODE_json
+import pandas as _VSCODE_pd
 
 # In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
 # Indexes off of _VSCODE_targetVariable need to index types that are part of IJupyterVariable
 _VSCODE_targetVariable = _VSCODE_json.loads('_VSCode_JupyterTestValue')
 _VSCODE_evalResult = eval(_VSCODE_targetVariable['name'])
 
 # First list out the columns of the data frame (assuming it is one for now)
-_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
-_VSCODE_columnNames = list(_VSCODE_evalResult)
+_VSCODE_columnTypes = []
+_VSCODE_columnNames = []
+if _VSCODE_targetVariable['type'] == 'list':
+    _VSCODE_columnTypes = ['string'] # Might be able to be more specific here?
+    _VSCODE_columnNames = ['_VSCode_JupyterValuesColumn']
+elif _VSCODE_targetVariable['type'] == 'Series':
+    _VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
+    _VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
+    _VSCODE_columnNames = list(_VSCODE_evalResult)
+elif _VSCODE_targetVariable['type'] == 'dict':
+    _VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
+    _VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
+    _VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
+    _VSCODE_columnNames = list(_VSCODE_evalResult)
+elif _VSCODE_targetVariable['type'] == 'ndarray':
+    _VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
+    _VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
+    _VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
+    _VSCODE_columnNames = list(_VSCODE_evalResult)
+elif _VSCODE_targetVariable['type'] == 'DataFrame':
+    _VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
+    _VSCODE_columnNames = list(_VSCODE_evalResult)
 
 # Make sure we have an index column (see code in getJupyterVariableDataFrameRows.py)
 if 'index' not in _VSCODE_columnNames:
@@ -17,13 +38,13 @@
 
 # Then loop and generate our output json
 _VSCODE_columns = []
-for n in range(0, len(_VSCODE_columnNames)):
-    c = _VSCODE_columnNames[n]
-    t = _VSCODE_columnTypes[n]
+for _VSCODE_n in range(0, len(_VSCODE_columnNames)):
+    _VSCODE_column_name = _VSCODE_columnNames[_VSCODE_n]
+    _VSCODE_column_type = _VSCODE_columnTypes[_VSCODE_n]
     _VSCODE_colobj = {}
-    _VSCODE_colobj['key'] = c
-    _VSCODE_colobj['name'] = c
-    _VSCODE_colobj['type'] = str(t)
+    _VSCODE_colobj['key'] = _VSCODE_column_name
+    _VSCODE_colobj['name'] = _VSCODE_column_name
+    _VSCODE_colobj['type'] = str(_VSCODE_column_type)
     _VSCODE_columns.append(_VSCODE_colobj)
 
 del _VSCODE_columnNames
@@ -33,20 +54,13 @@
 _VSCODE_targetVariable['columns'] = _VSCODE_columns
 del _VSCODE_columns
 
-# Figure out shape if not already there
-if 'shape' not in _VSCODE_targetVariable:
-    _VSCODE_targetVariable['shape'] = str(_VSCODE_evalResult.shape)
-
-# Row count is actually embedded in shape. Should be the second number
-import re as _VSCODE_re
-_VSCODE_regex = r"\(\s*(\d+),\s*(\d+)\s*\)"
-_VSCODE_matches = _VSCODE_re.search(_VSCODE_regex, _VSCODE_targetVariable['shape'])
-if (_VSCODE_matches):
-    _VSCODE_targetVariable['rowCount'] = int(_VSCODE_matches[1])
-    del _VSCODE_matches
+# Figure out shape if not already there. Use the shape to compute the row count
+if (hasattr(_VSCODE_evalResult, "shape")):
+    _VSCODE_targetVariable['rowCount'] = _VSCODE_evalResult.shape[0]
+elif _VSCODE_targetVariable['type'] == 'list':
+    _VSCODE_targetVariable['rowCount'] = len(_VSCODE_evalResult)
 else:
     _VSCODE_targetVariable['rowCount'] = 0
-del _VSCODE_regex
 
 # Transform this back into a string
 print(_VSCODE_json.dumps(_VSCODE_targetVariable))
diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameRows.py b/pythonFiles/datascience/getJupyterVariableDataFrameRows.py
@@ -1,5 +1,6 @@
 # Query Jupyter server for the rows of a data frame
 import json as _VSCODE_json
+import pandas as _VSCODE_pd
 import pandas.io.json as _VSCODE_pd_json
 
 # In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
@@ -12,12 +13,29 @@
 _VSCODE_startRow = max(_VSCode_JupyterStartRow, 0)
 _VSCODE_endRow = min(_VSCode_JupyterEndRow, _VSCODE_targetVariable['rowCount'])
 
+# Assume we have a dataframe. If not, turn our eval result into a dataframe
+_VSCODE_df = _VSCODE_evalResult
+if (_VSCODE_targetVariable['type'] == 'list'):
+    _VSCODE_df = _VSCODE_pd.DataFrame({'_VSCode_JupyterValuesColumn':_VSCODE_evalResult})
+elif (_VSCODE_targetVariable['type'] == 'Series'):
+    _VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
+elif _VSCODE_targetVariable['type'] == 'dict':
+    _VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
+    _VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
+elif _VSCODE_targetVariable['type'] == 'ndarray':
+    _VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
+    _VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
+# If not a known type, then just let pandas handle it.
+elif not (hasattr(_VSCODE_df, 'iloc')):
+    _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)
+
 # Turn into JSON using pandas. We use pandas because it's about 3 orders of magnitude faster to turn into JSON
-_VSCODE_rows = df.iloc[_VSCODE_startRow:_VSCODE_endRow]
+_VSCODE_rows = _VSCODE_df.iloc[_VSCODE_startRow:_VSCODE_endRow]
 _VSCODE_result = _VSCODE_pd_json.to_json(None, _VSCODE_rows, orient='table', date_format='iso')
 print(_VSCODE_result)
 
 # Cleanup our variables
+del _VSCODE_df
 del _VSCODE_endRow
 del _VSCODE_startRow
 del _VSCODE_rows
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		Add preliminary support for viewing dataframes.
		Add support for viewing dataframes, lists, dicts, nparrays.