Source code for deep_bottleneck.utils

import numpy as np
from collections import namedtuple


[docs]def construct_full_dataset(training, test):
    """Concatenates training and test data splits to obtain the full dataset.

    The input arguments use the following naming convention:
        - X is the training data
        - y is training class, with numbers from 0 to 1
        - Y is training class, but coded as a 2-dim vector with one entry set to 1 at the column index corresponding to the class
  
    Args:
        training: Namedtuple with fields X, y and Y:
        test: Namedtuple with fields X, y and Y:

    Returns:
        A new Namedtuple with fields X, y and Y containing the concatenation of training and test data
    """
    Dataset = namedtuple('Dataset', ['X', 'Y', 'y', 'n_classes'])
    X = np.concatenate((training.X, test.X))
    y = np.concatenate((training.y, test.y))
    Y = np.concatenate((training.Y, test.Y))
    return Dataset(X, Y, y, training.n_classes)


[docs]def shuffle_in_unison_inplace(a, b):
    """Shuffles both array a and b randomly in unison
    Args:
        a: An Array, for example containing data samples
        b: An Array, fpor example containing labels

    Returns:
        Both arrays shuffled in the same way
    """
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]


[docs]def data_shuffle(data_sets_org, percent_of_train, min_test_data=80, shuffle_data=False):
    """Divided the data to train and test and shuffle it"""
    # TODO Function data_shuffle need refctoring and proper docstring
    perc = lambda i, t: np.rint((i * t) / 100).astype(np.int32)
    C = type('type_C', (object,), {})
    data_sets = C()
    stop_train_index = perc(percent_of_train, data_sets_org.data.shape[0])
    start_test_index = stop_train_index
    if percent_of_train > min_test_data:
        start_test_index = perc(min_test_data, data_sets_org.data.shape[0])
    data_sets.train = C()
    data_sets.test = C()
    if shuffle_data:
        shuffled_data, shuffled_labels = shuffle_in_unison_inplace(data_sets_org.data, data_sets_org.labels)
    else:
        shuffled_data, shuffled_labels = data_sets_org.data, data_sets_org.labels
    data_sets.train.data = shuffled_data[:stop_train_index, :]
    data_sets.train.labels = shuffled_labels[:stop_train_index, :]
    data_sets.test.data = shuffled_data[start_test_index:, :]
    data_sets.test.labels = shuffled_labels[start_test_index:, :]
    return data_sets


[docs]def is_dense_like(layer):
    """Check whether a layer has attribute 'kernel', which is true for dense-like layers
    Args:
        layer: Keras layer to check for attribute 'kernel'

    Returns:
        True if layer has attribute 'kernel', False otherwise
    """
    return hasattr(layer, 'kernel')


def _get_current_min_max(activations):
    """Get both minimum and maximum of an array
    Args:
        activations: numpy ndarray

    Returns:
        Minimum and maximum value of activations
    """
    return np.min(activations), np.max(activations)

[docs]def get_min_max(activations_summary, layer_number, neuron_number=None):
    """Get minimum and maximum of activations of a specific layer or a specific neuron over all epochs
    Args:
        activations_summary: numpy ndarray
        layer_number: Index of the layer
        neuron_number: Index of the neuron. If None, activations of the whole layer serve as a basis

    Returns:
        Minimum and maximum value of activations over all epochs
    """
    epochs_in_activation_summary = [int(epoch) for epoch in activations_summary]
    epochs_in_activation_summary = np.asarray(sorted(epochs_in_activation_summary))

    total_max = float("-inf")
    total_min = float("inf")
    for epochs in epochs_in_activation_summary:
        activations = activations_summary[f'{epochs}/activations']
        layer_activations = np.asarray(activations[str(layer_number)])
        layer_activations = layer_activations.transpose()

        if neuron_number is not None:
            current_min, current_max = _get_current_min_max(layer_activations[neuron_number])
        else:
            current_min, current_max = _get_current_min_max(layer_activations)

        total_max = np.max([current_max, total_max])
        total_min = np.min([current_min, total_min])

    return total_min, total_max