From scratch (& bare bones) Experiment Management.
 

YAML & Addict

YAML

Putting all configuration needed for running an experiment into a YAML file ..

with open(f'{EXPERIMENT_STORE}/lstm_base/experiment.yaml', 'rb') as f:
    exp = yaml.full_load(f)
exp
{'config': {'name': 'lstm_base',
  'path': None,
  'desc': 'Baseline settings for EHR_LSTM experiments',
  'checkpoint_path': None},
 'params': {'data_params': {'dataset_path': None,
   'labels': ['diabetes', 'stroke', 'alzheimers', 'coronaryheart'],
   'age_start': 0,
   'age_stop': 20,
   'age_in_months': False,
   'bs': 128,
   'num_workers': 0,
   'lazy_load_gpu': True},
  'optim_params': {'optim': 'Adagrad',
   'lr': 0.01,
   'lr_decay': 0,
   'weight_decay': 0.0},
  'model_params': {'model': 'LSTM',
   'αd': 0.5736,
   'lstm_layers': 4,
   'linear_layers': 4,
   'initrange': 0.3,
   'bn': False,
   'input_drp': 0.3,
   'lstm_drp': 0.3,
   'linear_drp': 0.3,
   'zero_bn': False}}}
exp['params']['model_params']
{'model': 'LSTM',
 'αd': 0.5736,
 'lstm_layers': 4,
 'linear_layers': 4,
 'initrange': 0.3,
 'bn': False,
 'input_drp': 0.3,
 'lstm_drp': 0.3,
 'linear_drp': 0.3,
 'zero_bn': False}
print(exp['params']['model_params']['lstm_layers'])
4
exp['params']['data_params']['labels']
['diabetes', 'stroke', 'alzheimers', 'coronaryheart']

Addict

Once we import Addict, like this ..

from addict import Dict

It allows us to access dict keys like an attribute.

A discussion on StackOverflow details other options like AttrDict and Munch.

exp_addict = Dict(exp)
exp_addict
{'config': {'name': 'lstm_base',
  'path': None,
  'desc': 'Baseline settings for EHR_LSTM experiments',
  'checkpoint_path': None},
 'params': {'data_params': {'dataset_path': None,
   'labels': ['diabetes', 'stroke', 'alzheimers', 'coronaryheart'],
   'age_start': 0,
   'age_stop': 20,
   'age_in_months': False,
   'bs': 128,
   'num_workers': 0,
   'lazy_load_gpu': True},
  'optim_params': {'optim': 'Adagrad',
   'lr': 0.01,
   'lr_decay': 0,
   'weight_decay': 0.0},
  'model_params': {'model': 'LSTM',
   'αd': 0.5736,
   'lstm_layers': 4,
   'linear_layers': 4,
   'initrange': 0.3,
   'bn': False,
   'input_drp': 0.3,
   'lstm_drp': 0.3,
   'linear_drp': 0.3,
   'zero_bn': False}}}
exp_addict.params.model_params
{'model': 'LSTM',
 'αd': 0.5736,
 'lstm_layers': 4,
 'linear_layers': 4,
 'initrange': 0.3,
 'bn': False,
 'input_drp': 0.3,
 'lstm_drp': 0.3,
 'linear_drp': 0.3,
 'zero_bn': False}
exp_addict.params.model_params.values()
dict_values(['LSTM', 0.5736, 4, 4, 0.3, False, 0.3, 0.3, 0.3, False])

Helpers

get_data[source]

get_data(params, for_training=True)

Convenience fn to get data (for training or testing) based on data_params in experiment.yaml

get_optimizer[source]

get_optimizer(model, params)

Convenience fn to get optimizer based on optim_params in `experiment.yaml

get_model[source]

get_model(dataset_path, num_labels, params)

Convenience fn to get model based on model_params in `experiment.yaml

Experiment

sys.version_info[1]>=8
True

class Experiment[source]

Experiment(name, config, params)

A simple (bare bones) Experiment class for experiment management

Experiment.create[source]

Experiment.create(exp_name, desc, dataset_path, labels, optim, model, exp_path='default_exp_store', checkpoint_path='default_model_store', age_start=0, age_stop=20, age_in_months=False, lazy_load_gpu=True, bs=128, num_workers=0, lr=0.01, lr_decay=0, weight_decay=0, αd=0.5736, linear_layers=4, initrange=0.3, bn=False, input_drp=0.3, linear_drp=0.3, lstm_layers=4, lstm_drp=0.3, zero_bn=False)

Create a new Experiment object

Only the first 5 arguments are required, if the rest are not passed in, default values will be used (including for stores - EXPERIMENT_STORE and MODEL_STORE).

Experiment.create_from_file[source]

Experiment.create_from_file(path, name)

Create a new Experiment object from the experiment.yaml file in the path/name directory

Experiment.__repr__[source]

Experiment.__repr__()

Print out Experiment details

Experiment.save[source]

Experiment.save()

Save the Experiment

Experiment.load[source]

Experiment.load(name, path=None)

Load an existing Experiment

Experiment.fit[source]

Experiment.fit(epochs, from_checkpoint=False, to_checkpoint=True, verbosity=0.75, plot=True, save=True)

Fit function that assembles everything needed and calls the lemonpie.learn.fit function

Experiment.predict[source]

Experiment.predict(plot=True, save=True)

Predict function that assembles everything needed and calls the lemonpie.learn.predict function

Tests

LSTM

Creating an LSTM experiment with default parameters will look like this ..

labels = ['diabetes', 'stroke', 'alzheimers', 'coronary_heart', 'breast_cancer', 'epilepsy']
lstm_base = Experiment.create('lstm_base', 'baseline for LSTMs', PATH_1K, labels, 'Adagrad', 'LSTM')

Testing __repr__()

lstm_base
{ 'name': 'lstm_base',
  'path': '/home/vinod/.lemonpie/experiments',
  'desc': 'baseline for LSTMs',
  'checkpoint_path': '/home/vinod/.lemonpie/models'}
{ 'data_params': { 'dataset_path': '/home/vinod/.lemonpie/datasets/synthea/1K',
                   'labels': [ 'diabetes', 'stroke', 'alzheimers',
                               'coronary_heart', 'breast_cancer', 'epilepsy'],
                   'age_start': 0,
                   'age_stop': 20,
                   'age_in_months': False,
                   'bs': 128,
                   'num_workers': 0,
                   'lazy_load_gpu': True},
  'optim_params': { 'optim': 'Adagrad',
                    'lr': 0.01,
                    'lr_decay': 0,
                    'weight_decay': 0},
  'model_params': { 'model': 'LSTM',
                    'αd': 0.5736,
                    'linear_layers': 4,
                    'initrange': 0.3,
                    'bn': False,
                    'input_drp': 0.3,
                    'linear_drp': 0.3,
                    'zero_bn': False,
                    'lstm_layers': 4,
                    'lstm_drp': 0.3}}
lstm_base.fit(3, verbosity=1)
epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |         10.246 | [0.498 0.609 0.676 0.646]              1.202 | [0.666 0.736 0.984 0.839]
    1 |          1.155 | [0.677 0.858 0.919 0.751]              1.074 | [0.689 0.736 0.983 0.850]
    2 |          1.092 | [0.725 0.866 0.881 0.716]              1.062 | [0.681 0.705 0.985 0.840]
Checkpointed to "/home/vinod/.lemonpie/models/lstm_base/checkpoint.tar"
Saved experiment to /home/vinod/.lemonpie/experiments/lstm_base/lstm_base.experiment
No experiment settings file found, so creating it ..
Saved experiment settings to /home/vinod/.lemonpie/experiments/lstm_base/experiment.yaml

Loading that saved experiment into a variable with a different name ..

lstm_base_exp = Experiment.load('lstm_base')
Loaded experiment from /home/vinod/.lemonpie/experiments/lstm_base/lstm_base.experiment
%time lstm_base_exp.fit(2, from_checkpoint=True)
From "/home/vinod/.lemonpie/models/lstm_base/checkpoint.tar", loading model ...
loading optimizer and epoch_index ...
epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    3 |          1.003 | [0.752 0.891 0.964 0.745]              1.052 | [0.677 0.715 0.985 0.847]
Checkpointed to "/home/vinod/.lemonpie/models/lstm_base/checkpoint.tar"
Saved experiment to /home/vinod/.lemonpie/experiments/lstm_base/lstm_base.experiment
CPU times: user 6.16 s, sys: 148 ms, total: 6.31 s
Wall time: 6.33 s

Predicting ..

lstm_base_exp.predict()
From "/home/vinod/.lemonpie/models/lstm_base/checkpoint.tar", loading model ...
test loss = 1.3619893789291382
test aurocs = [0.72539  0.879464 0.909025 0.81737  0.924893 0.633047]

Prediction Summary ...
                auroc_score  optimal_threshold     auroc_95_ci
diabetes           0.725390           0.404663  (0.613, 0.826)
stroke             0.879464           0.269451  (0.778, 0.959)
alzheimers         0.909025           0.291508   (0.833, 0.97)
coronary_heart     0.817370           0.343887  (0.702, 0.903)
breast_cancer      0.924893           0.530320    (0.825, 1.0)
epilepsy           0.633047           0.449865  (0.318, 0.932)
Saved experiment to /home/vinod/.lemonpie/experiments/lstm_base/lstm_base.experiment

Testing - creating a new experiment from file

  • here we will use the settings file saved by the last experiment
  • in reality, this is for when you create multiple experiment settings after a hyperparameter search
    • In that case each experiment will be in its own directory in the EXPERIMENT_STORE
    • And each will have a dedicated settings file called experiment.yaml
lstm_exp_2 = Experiment.create_from_file(EXPERIMENT_STORE, 'lstm_base')
lstm_exp_2
{ 'name': 'lstm_base',
  'path': '/home/vinod/.lemonpie/experiments',
  'desc': 'baseline for LSTMs',
  'checkpoint_path': '/home/vinod/.lemonpie/models'}
{ 'data_params': { 'dataset_path': '/home/vinod/.lemonpie/datasets/synthea/1K',
                   'labels': [ 'diabetes', 'stroke', 'alzheimers',
                               'coronary_heart', 'breast_cancer', 'epilepsy'],
                   'age_start': 0,
                   'age_stop': 20,
                   'age_in_months': False,
                   'bs': 128,
                   'num_workers': 0,
                   'lazy_load_gpu': True},
  'optim_params': { 'optim': 'Adagrad',
                    'lr': 0.01,
                    'lr_decay': 0,
                    'weight_decay': 0},
  'model_params': { 'model': 'LSTM',
                    'αd': 0.5736,
                    'linear_layers': 4,
                    'initrange': 0.3,
                    'bn': False,
                    'input_drp': 0.3,
                    'linear_drp': 0.3,
                    'zero_bn': False,
                    'lstm_layers': 4,
                    'lstm_drp': 0.3}}

CNN

Creating a CNN experiment with default parameters

cnn_base = Experiment.create('cnn_base', 'baseline for CNNs', PATH_1K, labels, 'Adagrad', 'CNN')
cnn_base
{ 'name': 'cnn_base',
  'path': '/home/vinod/.lemonpie/experiments',
  'desc': 'baseline for CNNs',
  'checkpoint_path': '/home/vinod/.lemonpie/models'}
{ 'data_params': { 'dataset_path': '/home/vinod/.lemonpie/datasets/synthea/1K',
                   'labels': [ 'diabetes', 'stroke', 'alzheimers',
                               'coronary_heart', 'breast_cancer', 'epilepsy'],
                   'age_start': 0,
                   'age_stop': 20,
                   'age_in_months': False,
                   'bs': 128,
                   'num_workers': 0,
                   'lazy_load_gpu': True},
  'optim_params': { 'optim': 'Adagrad',
                    'lr': 0.01,
                    'lr_decay': 0,
                    'weight_decay': 0},
  'model_params': { 'model': 'CNN',
                    'αd': 0.5736,
                    'linear_layers': 4,
                    'initrange': 0.3,
                    'bn': False,
                    'input_drp': 0.3,
                    'linear_drp': 0.3,
                    'zero_bn': False,
                    'lstm_layers': None,
                    'lstm_drp': None}}
cnn_base.fit(3)
epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |        274.866 | [0.588 0.511 0.378 0.524]              1.235 | [0.637 0.757 0.969 0.832]
    2 |          1.094 | [0.701 0.869 0.944 0.692]              1.173 | [0.631 0.687 0.936 0.834]
Checkpointed to "/home/vinod/.lemonpie/models/cnn_base/checkpoint.tar"
Saved experiment to /home/vinod/.lemonpie/experiments/cnn_base/cnn_base.experiment
No experiment settings file found, so creating it ..
Saved experiment settings to /home/vinod/.lemonpie/experiments/cnn_base/experiment.yaml
cnn_base_exp = Experiment.load('cnn_base')
Loaded experiment from /home/vinod/.lemonpie/experiments/cnn_base/cnn_base.experiment
%time cnn_base_exp.fit(2, from_checkpoint=True, plot=False)
From "/home/vinod/.lemonpie/models/cnn_base/checkpoint.tar", loading model ...
loading optimizer and epoch_index ...
epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    3 |          0.994 | [0.818 0.906 0.912 0.711]              1.195 | [0.656 0.735 0.982 0.834]
Checkpointed to "/home/vinod/.lemonpie/models/cnn_base/checkpoint.tar"
Saved experiment to /home/vinod/.lemonpie/experiments/cnn_base/cnn_base.experiment
CPU times: user 6.85 s, sys: 720 ms, total: 7.57 s
Wall time: 7.73 s
cnn_base_exp.predict()
From "/home/vinod/.lemonpie/models/cnn_base/checkpoint.tar", loading model ...
test loss = 1.0534400939941406
test aurocs = [0.764864 0.876623 0.908297 0.808036 0.776824 0.645923]

Prediction Summary ...
                auroc_score  optimal_threshold     auroc_95_ci
diabetes           0.764864           0.279021  (0.682, 0.846)
stroke             0.876623           0.767606  (0.756, 0.967)
alzheimers         0.908297           0.571818  (0.847, 0.961)
coronary_heart     0.808036           0.587451   (0.674, 0.92)
breast_cancer      0.776824           0.509909  (0.667, 0.876)
epilepsy           0.645923           0.600106  (0.422, 0.863)
Saved experiment to /home/vinod/.lemonpie/experiments/cnn_base/cnn_base.experiment

Testing creating from file, like we did above with the LSTM

cnn_exp_2 = Experiment.create_from_file(EXPERIMENT_STORE, 'cnn_base')
cnn_exp_2
{ 'name': 'cnn_base',
  'path': '/home/vinod/.lemonpie/experiments',
  'desc': 'baseline for CNNs',
  'checkpoint_path': '/home/vinod/.lemonpie/models'}
{ 'data_params': { 'dataset_path': '/home/vinod/.lemonpie/datasets/synthea/1K',
                   'labels': [ 'diabetes', 'stroke', 'alzheimers',
                               'coronary_heart', 'breast_cancer', 'epilepsy'],
                   'age_start': 0,
                   'age_stop': 20,
                   'age_in_months': False,
                   'bs': 128,
                   'num_workers': 0,
                   'lazy_load_gpu': True},
  'optim_params': { 'optim': 'Adagrad',
                    'lr': 0.01,
                    'lr_decay': 0,
                    'weight_decay': 0},
  'model_params': { 'model': 'CNN',
                    'αd': 0.5736,
                    'linear_layers': 4,
                    'initrange': 0.3,
                    'bn': False,
                    'input_drp': 0.3,
                    'linear_drp': 0.3,
                    'zero_bn': False,
                    'lstm_layers': None,
                    'lstm_drp': None}}
cnn_exp_2.fit(3)
epoch |     train loss |     train aurocs                  valid loss |     valid aurocs    
----------------------------------------------------------------------------------------------------
    0 |        278.917 | [0.580 0.591 0.552 0.477]              2.339 | [0.672 0.730 0.957 0.823]
    2 |          1.163 | [0.691 0.837 0.913 0.656]              1.100 | [0.647 0.734 0.991 0.790]
Checkpointed to "/home/vinod/.lemonpie/models/cnn_base/checkpoint.tar"
Saved experiment to /home/vinod/.lemonpie/experiments/cnn_base/cnn_base.experiment