traja-team · JustinShenk · Jan 17, 2021 · Jan 2, 2021 · Jan 5, 2021 · Jan 3, 2021
diff --git a/.codecov.yml b/.codecov.yml
diff --git a/.coveragerc b/.coveragerc
@@ -7,6 +7,4 @@ exclude_lines =
     if __name__ == .__main__.:
 omit =
     traja/tests/*
-    traja/contrib/*
-    traja/models/*
-    traja/rutils.py
+    traja/contrib/*
diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,25 @@
+codecov:
+  require_ci_to_pass: yes
+
+coverage:
+  precision: 2
+  round: down
+  range: "70...100"
+
+parsers:
+  gcov:
+    branch_detection:
+      conditional: yes
+      loop: yes
+      method: yes
+      macro: yes
+
+comment:
+  layout: "reach,diff,flags,files,footer"
+  behavior: default
+  require_changes: no
+
+ignore:
+  - "test_*.py"
+  - "traja-gui.py*"
+
diff --git a/docs/neuralnets/train_lstm.py b/docs/neuralnets/train_lstm.py
@@ -4,7 +4,7 @@
 """
 import traja
 from traja.model import LSTM 
-from traja.datasets import dataset
+from traja.dataset import dataset
 
 df = traja.TrajaDataFrame({"x": [0, 1, 2, 3, 4], "y": [1, 3, 2, 4, 5]})
 

diff --git a/docs/source/predictions.rst b/docs/source/predictions.rst
@@ -29,13 +29,22 @@ via :class:`~traja.models.predictive_models.lstm.LSTM`.
     batch_size = 10 # How many sequences to train every step. Constrained by GPU memory.
     num_past = 10 # How many time steps from which to learn the time series
     num_future = 5 # How many time steps to predict
+    split_by_id = False # Whether to split data into training, test and validation sets based on
+                        # the animal's ID or not. If True, an animal's entire trajectory will only
+                        # be used for training, or only for testing and so on.
+                        # If your animals are territorial (like Jaguars) and you want to forecast
+                        # their trajectories, you want this to be false. If, however, you want to
+                        # classify the group membership of an animal, you want this to be true,
+                        # so that you can verify that previously unseen animals get assigned to
+                        # the correct class.
 
 
     data_loaders, scalers = dataset.MultiModalDataLoader(df,
                                                          batch_size=batch_size,
                                                          n_past=num_past,
                                                          n_future=num_future,
-                                                         num_workers=1)
+                                                         num_workers=1,
+                                                         split_by_id=split_by_id)
 
 .. note::
 
@@ -78,4 +87,78 @@ via :class:`~traja.models.predictive_models.lstm.LSTM`.
     # Train the model
     trainer.fit(data_loaders, model_save_path, epochs=10, training_mode='forecasting')
 
-.. image:: _static/rnn_prediction.png
+After training, you can determine the network's final performance with test data, if you want to pick
+the best model, or with validation data, if you want to determine the performance of your model.
+
+The data_loaders dictionary contains the 'sequential_test_loader' and 'sequential_validation_loader,
+that preserve the order of the original data. The dictionary also contains the 'test_loader' and
+'validation_loader' data loaders, where the order of the time series is randomised.
+
+.. code-block:: python
+
+    validation_loader = data_loaders['sequential_validation_loader']
+
+    trainer.validate(validation_loader)
+
+Finally, you can display your training results using the built-in plotting libraries.
+
+.. code-block:: python
+
+    from traja.plotting import plot_prediction
+
+    batch_index = 0  # The batch you want to plot
+    plot_prediction(model, validation_loader, batch_index)
+
+.. image:: _static/rnn_prediction.png
+
+Parameter searching
+-------------------
+
+When optimising neural networks, you often want to change the parameters. When training a forecaster,
+you have to reinitialise and retrain your model. However, when training a classifier or regressor, you
+can reset these on the fly, since they work directly on the latent space of your model.
+VAE models provide utility functions to make this easy.
+
+.. code-block:: python
+
+    from traja.models import MultiModelVAE
+    input_size = 2 # Number of input dimensions (normally x, y)
+    output_size = 2 # Same as input_size when predicting
+    num_layers = 2 # Number of LSTM layers. Deeper learns more complex patterns but overfits.
+    hidden_size = 32 # Width of layers. Wider learns bigger patterns but overfits. Try 32, 64, 128, 256, 512
+    dropout = 0.1 # Ignore some network connections. Improves generalisation.
+
+    # Classifier parameters
+    classifier_hidden_size = 32
+    num_classifier_layers = 4
+    num_classes = 42
+
+    # Regressor parameters
+    regressor_hidden_size = 18
+    num_regressor_layers = 1
+    num_regressor_parameters = 3
+
+    model = MultiModelVAE(input_size=input_size,
+                          hidden_size=hidden_size,
+                          num_layers=num_layers,
+                          output_size=output_size,
+                          dropout=dropout,
+                          batch_size=batch_size,
+                          num_future=num_future,
+                          classifier_hidden_size=classifier_hidden_size,
+                          num_classifier_layers=num_classifier_layers,
+                          num_classes=num_classes,
+                          regressor_hidden_size=regressor_hidden_size,
+                          num_regressor_layers=num_regressor_layers,
+                          num_regressor_parameters=num_regressor_parameters)
+
+    new_classifier_hidden_size = 64
+    new_num_classifier_layers = 2
+
+    model.reset_classifier(classifier_hidden_size=new_classifier_hidden_size,
+                           num_classifier_layers=new_num_classifier_layers)
+
+    new_regressor_hidden_size = 64
+    new_num_regressor_layers = 2
+    model.reset_regressor(regressor_hidden_size=new_regressor_hidden_size,
+                          num_regressor_layers=new_num_regressor_layers)
diff --git a/docs/source/reference.rst b/docs/source/reference.rst
@@ -52,7 +52,7 @@ The following methods are available via :mod:`traja.plotting`:
 
 .. automethod:: traja.plotting.polar_bar
 
-.. automethod:: traja.plotting.predict
+.. automethod:: traja.plotting.plot_prediction
 
 .. automethod:: traja.plotting.sans_serif
 

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -4,7 +4,7 @@ matplotlib
 shapely
 psutil
 scipy
-sklearn
+scikit-learn
 fastdtw
 plotly
 networkx

diff --git a/traja-gui.py b/traja-gui.py
@@ -36,7 +36,7 @@ def __init__(self, filepath):
 
     @pyqtSlot()
     def read_in_chunks(self):
-        """ load datasets in parts and update the progess par """
+        """ load dataset in parts and update the progess par """
         chunksize = 10 ** 3
         lines_number = sum(1 for line in open(self.filepath))
         self.progressMaximum.emit(lines_number // chunksize)

diff --git a/traja/__init__.py b/traja/__init__.py
@@ -1,12 +1,12 @@
+import logging
+
+from traja import dataset
+from traja import models
 from .accessor import TrajaAccessor
 from .frame import TrajaDataFrame, TrajaCollection
 from .parsers import read_file, from_df
 from .plotting import *
 from .trajectory import *
-from traja import models
-from traja import datasets
-
-import logging
 
 __author__ = "justinshenk"
 __version__ = "0.2.3"

diff --git a/traja/accessor.py b/traja/accessor.py
@@ -50,7 +50,7 @@ def bounds(self):
         return (xlim, ylim)
 
     def night(self, begin: str = "19:00", end: str = "7:00"):
-        """Get nighttime datasets between `begin` and `end`.
+        """Get nighttime dataset between `begin` and `end`.
 
         Args:
           begin (str):  (Default value = '19:00')
@@ -63,7 +63,7 @@ def night(self, begin: str = "19:00", end: str = "7:00"):
         return self.between(begin, end)
 
     def day(self, begin: str = "7:00", end: str = "19:00"):
-        """Get daytime datasets between `begin` and `end`.
+        """Get daytime dataset between `begin` and `end`.
 
         Args:
           begin (str):  (Default value = '7:00')
@@ -141,14 +141,14 @@ def rediscretize_points(self, R, **kwargs):
         return traja.trajectory.rediscretize_points(self, _obj, R=R, **kwargs)
 
     def trip_grid(
-        self,
-        bins: Union[int, tuple] = 10,
-        log: bool = False,
-        spatial_units=None,
-        normalize: bool = False,
-        hist_only: bool = False,
-        plot: bool = True,
-        **kwargs,
+            self,
+            bins: Union[int, tuple] = 10,
+            log: bool = False,
+            spatial_units=None,
+            normalize: bool = False,
+            hist_only: bool = False,
+            plot: bool = True,
+            **kwargs,
     ):
         """Returns a 2D histogram of trip.
 
@@ -325,9 +325,9 @@ def get_derivatives(self) -> pd.DataFrame:
         return derivs
 
     def speed_intervals(
-        self,
-        faster_than: Union[float, int] = None,
-        slower_than: Union[float, int] = None,
+            self,
+            faster_than: Union[float, int] = None,
+            slower_than: Union[float, int] = None,
     ):
         """Returns ``TrajaDataFrame`` with speed time intervals.
 

diff --git a/traja/contrib/rdp.py b/traja/contrib/rdp.py
@@ -105,10 +105,10 @@ def _rdp_iter(M, start_index, last_index, epsilon, dist=pldist):
 
 
 def rdp_iter(
-    M: Union[list, np.ndarray],
-    epsilon: float,
-    dist: Callable = pldist,
-    return_mask: bool = False,
+        M: Union[list, np.ndarray],
+        epsilon: float,
+        dist: Callable = pldist,
+        return_mask: bool = False,
 ):
     """
     Simplifies a given array of points.
@@ -135,11 +135,11 @@ def rdp_iter(
 
 
 def rdp(
-    M: Union[list, np.ndarray],
-    epsilon: float = 0,
-    dist: Callable = pldist,
-    algo: str = "iter",
-    return_mask: bool = False,
+        M: Union[list, np.ndarray],
+        epsilon: float = 0,
+        dist: Callable = pldist,
+        algo: str = "iter",
+        return_mask: bool = False,
 ):
     """
     Simplifies a given array of points using the Ramer-Douglas-Peucker

diff --git a/traja/data/__init__.py b/traja/data/__init__.py
diff --git a/traja/data/loader.py b/traja/data/loader.py
diff --git a/traja/dataset/__init__.py b/traja/dataset/__init__.py
@@ -0,0 +1,2 @@
+from . import example
+from .dataset import TimeSeriesDataset, MultiModalDataLoader
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,7 +4,7 @@ matplotlib @@
     shapely
     psutil
     scipy
-    sklearn
+    scikit-learn
     fastdtw
     plotly
     networkx
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from . import example
		from .dataset import TimeSeriesDataset, MultiModalDataLoader