Merge pull request #59 from aws-deepracer-community/dev

Minor additional features
aws-deepracer-community · Jun 23, 2024 · 03daf30 · 03daf30
2 parents be9a3fa + ad01433
commit 03daf30
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 17 deletions.
diff --git a/deepracer/logs/log_utils.py b/deepracer/logs/log_utils.py
@@ -254,7 +254,7 @@ class AnalysisUtils:
     """
     @staticmethod
     def simulation_agg(df, firstgroup='iteration', secondgroup='episode',
-                       add_tstamp=False, is_eval=False):
+                       add_tstamp=False, is_eval=False, add_perf=False):
         """Groups all log data by episodes and other information and returns
         a pandas dataframe with aggregated information
 
@@ -283,7 +283,8 @@ def simulation_agg(df, firstgroup='iteration', secondgroup='episode',
         for multiple log files loaded stream would be preferred
         add_tstamp - whether to add a timestamp, by default False
         is_eval - is data for evaluation (training if False), default: False
-
+        add_perf - add performance related data
+        
         Returns:
         Aggregated dataframe
         """
@@ -292,11 +293,13 @@ def simulation_agg(df, firstgroup='iteration', secondgroup='episode',
             if df.nunique(axis=0)['worker'] > 1:
                 logging.warning('Multiple workers found, consider using'
                                 'secondgroup="unique_episode"')
+
+        with pd.option_context("mode.copy_on_write", True):
+            df['delta_time'] = df['tstamp'].astype(float).diff()
+            df.loc[df['episode_status'] == 'prepare', 'delta_time'] = 0.0
 
-        df.loc[:,'delta_time'] = df['tstamp'].astype(float)-df['tstamp'].shift(1).astype(float)
-        df.loc[df['episode_status'] == 'prepare', 'delta_time'] = 0.0
-        df.loc[:,'delta_dist']=(((df['x'].shift(1)-df['x']) ** 2 + (df['y'].shift(1)-df['y']) ** 2) ** 0.5)
-        df.loc[df['episode_status'] == 'prepare', 'delta_dist'] = 0.0
+            df['delta_dist'] = (df['x'].diff() ** 2 + df['y'].diff() ** 2) ** 0.5
+            df.loc[df['episode_status'] == 'prepare', 'delta_dist'] = 0.0
 
         grouped = df.groupby([firstgroup, secondgroup])
 
@@ -355,6 +358,17 @@ def simulation_agg(df, firstgroup='iteration', secondgroup='episode',
 
         result['complete'] = np.where(result['progress'] == 100, 1, 0)
 
+        if add_perf:
+            by_perf_mean = grouped['delta_time'].agg('mean').reset_index() \
+                .rename(columns={'delta_time': 'step_time_mean'})
+            by_perf_max = grouped['delta_time'].agg('max').reset_index() \
+                .rename(columns={'delta_time': 'step_time_max'})
+            by_perf_std = grouped['delta_time'].agg('std').reset_index() \
+                .rename(columns={'delta_time': 'step_time_std'})
+            result = result.merge(by_perf_mean, on=[firstgroup, secondgroup]) \
+                .merge(by_perf_max, on=[firstgroup, secondgroup]) \
+                .merge(by_perf_std, on=[firstgroup, secondgroup])
+
         return result
 
     @staticmethod

diff --git a/deepracer/logs/metrics.py b/deepracer/logs/metrics.py
@@ -375,6 +375,7 @@ def plotProgress(
             self,
             method: ListStr = "mean",
             rolling_average: int = 5,
+            rolling_average_method: str = "mean",
             figsize: tuple = (12, 5),
             rounds: list = None,
             workers: list = None,
@@ -393,8 +394,10 @@ def plotProgress(
         Arguments:
         method - (str / list) Statistical value to be calculated. Examples are 'mean', 'median',
             'min' & 'max'. Default: 'mean'.
-        rolling_average - (int) Plotted line will be averaged with last number of x iterations.
+        rolling_average - (int) Plotted line will be aggregated (e.g. averaged) with last number of x iterations.
             Default: 5.
+        rolling_average_method - (str) Plotted line will be aggregated with method.
+            Default: 'mean'.
         figsize - (tuple) Matplotlib figsize definition.
         series - (list) List of series to plot, contains tuples containing column in summary to
             plot, the legend title and color of plot. Default:
@@ -442,7 +445,7 @@ def plotProgress(
                 ax.scatter(x, summary[s[0]], s=2, alpha=0.5, color=s[2])
                 ax.plot(
                     x,
-                    summary[s[0]].rolling(rolling_average, min_periods=1).mean().values,
+                    summary[s[0]].rolling(rolling_average, min_periods=1).agg(rolling_average_method).values,
                     label=s[1],
                     color=s[2],
                 )

diff --git a/requirements.txt b/requirements.txt
@@ -6,6 +6,9 @@ wheel>=0.34.0
 twine>=3.1.0
 pytest>=5.3.0
 versioneer>=0.18
+opencv-python>=4.2.0
+tensorflow-cpu>=2.1.0
+python-resize-image>=1.1.19
 # project depenencies
 boto3>=1.12.0
 python-dateutil<3.0.0,>=2.1

diff --git a/tests/deepracer/logs/test_logs.py b/tests/deepracer/logs/test_logs.py
@@ -25,6 +25,10 @@ class Constants:
                             'dist', 'new_reward', 'speed', 'reward', 'time_if_complete',
                             'reward_if_complete', 'quintile', 'complete']
 
+    TRAIN_COLUMNS_UNIQUE_PERF = ['iteration', 'unique_episode', 'steps', 'start_at', 'progress', 'time',
+                            'dist', 'new_reward', 'speed', 'reward', 'time_if_complete',
+                            'reward_if_complete', 'quintile', 'complete', 'step_time_mean', 'step_time_max', 'step_time_std']
+
     EVAL_COLUMNS = ['stream', 'episode', 'steps', 'start_at', 'progress', 'time', 'dist', 'speed',
                     'crashed', 'off_track', 'time_if_complete', 'complete']
 
@@ -77,16 +81,19 @@ def test_episode_analysis_drfc3_local(self):
         drl.load_training_trace()
         df = drl.dataframe()
 
-        simulation_agg = AnalysisUtils.simulation_agg(df, secondgroup='unique_episode')
+        simulation_agg = AnalysisUtils.simulation_agg(df, secondgroup='unique_episode', add_perf=True)
         complete_ones = simulation_agg[simulation_agg['progress'] == 100]
         fastest = complete_ones.nsmallest(5, 'time')
+        print(fastest)
 
         assert LogFolderType.DRFC_MODEL_MULTIPLE_WORKERS == drl.fh.type  # CONSOLE_MODEL_WITH_LOGS
-        assert (690, len(Constants.TRAIN_COLUMNS_UNIQUE)) == simulation_agg.shape
-        assert np.all(Constants.TRAIN_COLUMNS_UNIQUE == simulation_agg.columns)
-        assert 402 == fastest.iloc[0, 1]
-        assert 189.0 == fastest.iloc[0, 2]
-        assert 12.548 == pytest.approx(fastest.iloc[0, 5])
+        assert (690, len(Constants.TRAIN_COLUMNS_UNIQUE_PERF)) == simulation_agg.shape
+        assert np.all(Constants.TRAIN_COLUMNS_UNIQUE_PERF == simulation_agg.columns)
+        assert 402 == fastest['unique_episode'].iloc[0]
+        assert 189.0 == fastest['steps'].iloc[0]
+        assert 17.12718 == pytest.approx(fastest['dist'].iloc[0], rel=1e-3)
+        assert 0.06639 == pytest.approx(fastest['step_time_mean'].iloc[0], rel=1e-3)
+        assert 12.548 == pytest.approx(fastest['time'].iloc[0])
 
     @pytest.mark.skipif(os.environ.get("TOX_S3_BUCKET", None) is None, reason="Requires AWS access")
     def test_episode_analysis_drfc3_s3(self):

diff --git a/tox.ini b/tox.ini
@@ -7,10 +7,12 @@
 envlist = py3
 
 [testenv]
-deps =
-    pytest
 commands =
     pytest {posargs}
 changedir = {toxinidir}/tests
 passenv = TOX_S3_BUCKET,TOX_ENABLE_AWS
-extras = test
+extras = test
+allowlist_externals = pytest
+
+[pytest]
+filterwarnings = ignore::DeprecationWarning:tensorflow.*