Data Loading

Contents

import kessler
from kessler.nn import LSTMPredictor
from kessler.data import kelvins_to_event_dataset
import pandas as pd

# Set the random number generator seed for reproducibility
kessler.seed(1)

Data Loading#

Kessler accepts CDMs either in KVN format or as pandas dataframes. We hereby show a pandas dataframe loading example:

#As an example, we first show the case in which the data comes from the Kelvins competition.
#For this, we built a specific converter that takes care of the conversion from Kelvins format
#to standard CDM format (the data can be downloaded at https://kelvins.esa.int/collision-avoidance-challenge/data/):
file_name='kelvins_data/train_data.csv'
events = kelvins_to_event_dataset(file_name, drop_features=['c_rcs_estimate', 't_rcs_estimate'], num_events=1000) #we use only 200 events

#Instead, this is a generic real CDM data loader that should parse your Pandas (uncomment the following lines if needed):
#file_name = 'path_to_csv/file.csv'

#df=pd.read_csv(file_name)
#events = EventDataset.from_pandas(df)

Descriptive Statistics#

#Descriptive statistics of the event:
kessler_stats = events.to_dataframe().describe()
print(kessler_stats)

LSTM Training#

#We only use features with numeric content for the training
#nn_features is a list of the feature names taken into account for the training:
#it can be edited in case more features want to be added or removed
nn_features = events.common_features(only_numeric=True)
print(nn_features)

# Split data into a test set (5% of the total number of events)
len_test_set=int(0.05*len(events))
print('Test data:', len_test_set)
events_test=events[-len_test_set:]
print(events_test)

# The rest of the data will be used for training and validation
print('Training and validation data:', len(events)-len_test_set)
events_train_and_val=events[:-len_test_set]
print(events_train_and_val)

# Create an LSTM predictor, specialized to the nn_features we extracted above
model = LSTMPredictor(
            lstm_size=256,  # Number of hidden units per LSTM layer
            lstm_depth=2,  # Number of stacked LSTM layers
            dropout=0.2,  # Dropout probability
            features=nn_features)  # The list of feature names to use in the LSTM

# Start training
model.learn(events_train_and_val, 
            epochs=10, # Number of epochs (one epoch is one full pass through the training dataset)
            lr=1e-3, # Learning rate, can decrease it if training diverges
            batch_size=16, # Minibatch size, can be decreased if there are issues with memory use
            device='cpu', # Can be 'cuda' if there is a GPU available
            valid_proportion=0.15, # Proportion of the data to use as a validation set internally
            num_workers=4, # Number of multithreaded dataloader workers, 4 is good for performance, but if there are any issues or errors, please try num_workers=1 as this solves issues with PyTorch most of the time
            event_samples_for_stats=1000) # Number of events to use to compute NN normalization factors, have this number as big as possible (and at least a few thousands)

#Save the model to a file after training:
model.save(file_name="LSTM_20epochs_lr10-4_batchsize16")

#NN loss plotted to a file:
model.plot_loss(file_name='plot_loss.pdf')

#we show an example CDM from the set:
events_train_and_val[0][0]

#we take a single event, we remove the last CDM and try to predict it
event=events_test[3]
event_len = len(event)
print(event)
event_beginning = event[0:event_len-1]
print(event_beginning)
event_evolution = model.predict_event(event_beginning, num_samples=100, max_length=14)

#We plot the prediction in red:
axs = event_evolution.plot_features(['RELATIVE_SPEED', 'MISS_DISTANCE', 'OBJECT1_CT_T'], return_axs=True, linewidth=0.1, color='red', alpha=0.33, label='Prediction')
#and the ground truth value in blue:
event.plot_features(['RELATIVE_SPEED', 'MISS_DISTANCE', 'OBJECT1_CT_T'], axs=axs, label='Real', legend=True)

#we now plot the uncertainty prediction for all the covariance matrix elements of both OBJECT1 and OBJECT2:
axs = event_evolution.plot_uncertainty(return_axs=True, linewidth=0.5, label='Prediction', alpha=0.5, color='red', legend=True, diagonal=False)
event.plot_uncertainty(axs=axs, label='Real', diagonal=False)