Skip to content

Commit

Permalink
Merge pull request #39 from traja-team/datagenerator
Browse files Browse the repository at this point in the history
Data generator added
  • Loading branch information
JustinShenk authored Jan 17, 2021
2 parents f72853d + ef9c6ca commit 9e9a6e3
Show file tree
Hide file tree
Showing 44 changed files with 2,099 additions and 3,286 deletions.
3 changes: 0 additions & 3 deletions .codecov.yml

This file was deleted.

4 changes: 1 addition & 3 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ exclude_lines =
if __name__ == .__main__.:
omit =
traja/tests/*
traja/contrib/*
traja/models/*
traja/rutils.py
traja/contrib/*
25 changes: 25 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
codecov:
require_ci_to_pass: yes

coverage:
precision: 2
round: down
range: "70...100"

parsers:
gcov:
branch_detection:
conditional: yes
loop: yes
method: yes
macro: yes

comment:
layout: "reach,diff,flags,files,footer"
behavior: default
require_changes: no

ignore:
- "test_*.py"
- "traja-gui.py*"

2 changes: 1 addition & 1 deletion docs/neuralnets/train_lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""
import traja
from traja.model import LSTM
from traja.datasets import dataset
from traja.dataset import dataset

df = traja.TrajaDataFrame({"x": [0, 1, 2, 3, 4], "y": [1, 3, 2, 4, 5]})

Expand Down
87 changes: 85 additions & 2 deletions docs/source/predictions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,22 @@ via :class:`~traja.models.predictive_models.lstm.LSTM`.
batch_size = 10 # How many sequences to train every step. Constrained by GPU memory.
num_past = 10 # How many time steps from which to learn the time series
num_future = 5 # How many time steps to predict
split_by_id = False # Whether to split data into training, test and validation sets based on
# the animal's ID or not. If True, an animal's entire trajectory will only
# be used for training, or only for testing and so on.
# If your animals are territorial (like Jaguars) and you want to forecast
# their trajectories, you want this to be false. If, however, you want to
# classify the group membership of an animal, you want this to be true,
# so that you can verify that previously unseen animals get assigned to
# the correct class.
data_loaders, scalers = dataset.MultiModalDataLoader(df,
batch_size=batch_size,
n_past=num_past,
n_future=num_future,
num_workers=1)
num_workers=1,
split_by_id=split_by_id)
.. note::

Expand Down Expand Up @@ -78,4 +87,78 @@ via :class:`~traja.models.predictive_models.lstm.LSTM`.
# Train the model
trainer.fit(data_loaders, model_save_path, epochs=10, training_mode='forecasting')
.. image:: _static/rnn_prediction.png
After training, you can determine the network's final performance with test data, if you want to pick
the best model, or with validation data, if you want to determine the performance of your model.

The data_loaders dictionary contains the 'sequential_test_loader' and 'sequential_validation_loader,
that preserve the order of the original data. The dictionary also contains the 'test_loader' and
'validation_loader' data loaders, where the order of the time series is randomised.

.. code-block:: python
validation_loader = data_loaders['sequential_validation_loader']
trainer.validate(validation_loader)
Finally, you can display your training results using the built-in plotting libraries.

.. code-block:: python
from traja.plotting import plot_prediction
batch_index = 0 # The batch you want to plot
plot_prediction(model, validation_loader, batch_index)
.. image:: _static/rnn_prediction.png

Parameter searching
-------------------

When optimising neural networks, you often want to change the parameters. When training a forecaster,
you have to reinitialise and retrain your model. However, when training a classifier or regressor, you
can reset these on the fly, since they work directly on the latent space of your model.
VAE models provide utility functions to make this easy.

.. code-block:: python
from traja.models import MultiModelVAE
input_size = 2 # Number of input dimensions (normally x, y)
output_size = 2 # Same as input_size when predicting
num_layers = 2 # Number of LSTM layers. Deeper learns more complex patterns but overfits.
hidden_size = 32 # Width of layers. Wider learns bigger patterns but overfits. Try 32, 64, 128, 256, 512
dropout = 0.1 # Ignore some network connections. Improves generalisation.
# Classifier parameters
classifier_hidden_size = 32
num_classifier_layers = 4
num_classes = 42
# Regressor parameters
regressor_hidden_size = 18
num_regressor_layers = 1
num_regressor_parameters = 3
model = MultiModelVAE(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
output_size=output_size,
dropout=dropout,
batch_size=batch_size,
num_future=num_future,
classifier_hidden_size=classifier_hidden_size,
num_classifier_layers=num_classifier_layers,
num_classes=num_classes,
regressor_hidden_size=regressor_hidden_size,
num_regressor_layers=num_regressor_layers,
num_regressor_parameters=num_regressor_parameters)
new_classifier_hidden_size = 64
new_num_classifier_layers = 2
model.reset_classifier(classifier_hidden_size=new_classifier_hidden_size,
num_classifier_layers=new_num_classifier_layers)
new_regressor_hidden_size = 64
new_num_regressor_layers = 2
model.reset_regressor(regressor_hidden_size=new_regressor_hidden_size,
num_regressor_layers=new_num_regressor_layers)
2 changes: 1 addition & 1 deletion docs/source/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ The following methods are available via :mod:`traja.plotting`:

.. automethod:: traja.plotting.polar_bar

.. automethod:: traja.plotting.predict
.. automethod:: traja.plotting.plot_prediction

.. automethod:: traja.plotting.sans_serif

Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ matplotlib
shapely
psutil
scipy
sklearn
scikit-learn
fastdtw
plotly
networkx
Expand Down
2 changes: 1 addition & 1 deletion traja-gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, filepath):

@pyqtSlot()
def read_in_chunks(self):
""" load datasets in parts and update the progess par """
""" load dataset in parts and update the progess par """
chunksize = 10 ** 3
lines_number = sum(1 for line in open(self.filepath))
self.progressMaximum.emit(lines_number // chunksize)
Expand Down
8 changes: 4 additions & 4 deletions traja/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging

from traja import dataset
from traja import models
from .accessor import TrajaAccessor
from .frame import TrajaDataFrame, TrajaCollection
from .parsers import read_file, from_df
from .plotting import *
from .trajectory import *
from traja import models
from traja import datasets

import logging

__author__ = "justinshenk"
__version__ = "0.2.3"
Expand Down
26 changes: 13 additions & 13 deletions traja/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def bounds(self):
return (xlim, ylim)

def night(self, begin: str = "19:00", end: str = "7:00"):
"""Get nighttime datasets between `begin` and `end`.
"""Get nighttime dataset between `begin` and `end`.
Args:
begin (str): (Default value = '19:00')
Expand All @@ -63,7 +63,7 @@ def night(self, begin: str = "19:00", end: str = "7:00"):
return self.between(begin, end)

def day(self, begin: str = "7:00", end: str = "19:00"):
"""Get daytime datasets between `begin` and `end`.
"""Get daytime dataset between `begin` and `end`.
Args:
begin (str): (Default value = '7:00')
Expand Down Expand Up @@ -141,14 +141,14 @@ def rediscretize_points(self, R, **kwargs):
return traja.trajectory.rediscretize_points(self, _obj, R=R, **kwargs)

def trip_grid(
self,
bins: Union[int, tuple] = 10,
log: bool = False,
spatial_units=None,
normalize: bool = False,
hist_only: bool = False,
plot: bool = True,
**kwargs,
self,
bins: Union[int, tuple] = 10,
log: bool = False,
spatial_units=None,
normalize: bool = False,
hist_only: bool = False,
plot: bool = True,
**kwargs,
):
"""Returns a 2D histogram of trip.
Expand Down Expand Up @@ -325,9 +325,9 @@ def get_derivatives(self) -> pd.DataFrame:
return derivs

def speed_intervals(
self,
faster_than: Union[float, int] = None,
slower_than: Union[float, int] = None,
self,
faster_than: Union[float, int] = None,
slower_than: Union[float, int] = None,
):
"""Returns ``TrajaDataFrame`` with speed time intervals.
Expand Down
18 changes: 9 additions & 9 deletions traja/contrib/rdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ def _rdp_iter(M, start_index, last_index, epsilon, dist=pldist):


def rdp_iter(
M: Union[list, np.ndarray],
epsilon: float,
dist: Callable = pldist,
return_mask: bool = False,
M: Union[list, np.ndarray],
epsilon: float,
dist: Callable = pldist,
return_mask: bool = False,
):
"""
Simplifies a given array of points.
Expand All @@ -135,11 +135,11 @@ def rdp_iter(


def rdp(
M: Union[list, np.ndarray],
epsilon: float = 0,
dist: Callable = pldist,
algo: str = "iter",
return_mask: bool = False,
M: Union[list, np.ndarray],
epsilon: float = 0,
dist: Callable = pldist,
algo: str = "iter",
return_mask: bool = False,
):
"""
Simplifies a given array of points using the Ramer-Douglas-Peucker
Expand Down
Empty file removed traja/data/__init__.py
Empty file.
22 changes: 0 additions & 22 deletions traja/data/loader.py

This file was deleted.

2 changes: 2 additions & 0 deletions traja/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import example
from .dataset import TimeSeriesDataset, MultiModalDataLoader
Loading

0 comments on commit 9e9a6e3

Please sign in to comment.