Source code for deep_bottleneck.datasets.harmonics

import scipy.io as sio
from pathlib2 import Path
from collections import namedtuple

import numpy as np

from tensorflow.python.keras import utils as keras_utils
from deep_bottleneck import utils


[docs]def load(nb_dir = ''): """ Load the Information Bottleneck harmonics dataset The output follows the following naming convention: - X is the data - y is class, with numbers from 0 to 9 - Y is class, but coded as a 10-dim vector with one entry set to 1 at the column index corresponding to the class Returns: Returns two namedtuples, the first one containing training and the second one containing test data respectively. Both come with fields X, y and Y: """ ID = '2017_12_21_16_51_3_275766' n_classes = 2 data_file = Path(nb_dir + 'datasets/IB_data_' + str(ID) + '.npz') if data_file.is_file(): data = np.load(nb_dir + 'datasets/IB_data_' + str(ID) + '.npz') else: import_IB_data_from_mat(ID, nb_dir) data = np.load(nb_dir + 'datasets/IB_data_' + str(ID) + '.npz') X_train = data['X_train'] y_train = data['y_train'] X_test = data['X_test'] y_test = data['y_test'] Y_train = keras_utils.to_categorical(y_train, n_classes).astype('float32') Y_test = keras_utils.to_categorical(y_test, n_classes).astype('float32') Dataset = namedtuple('Dataset', ['X', 'Y', 'y', 'n_classes']) training = Dataset(X_train, Y_train, y_train, int(n_classes)) test = Dataset(X_test, Y_test, y_test, int(n_classes)) return training, test
[docs]def import_IB_data_from_mat(name_ID, nb_dir = ''): """ Writes a .npy file to disk containing the harmonics dataset used by Tishby Args: name_ID: Identifier which is going to be part of the output filename Returns: None """ print('Loading Data...') d = sio.loadmat(nb_dir + 'datasets/var_u.mat') F = d['F'] y = d['y'] C = type('type_C', (object,), {}) data_sets_original = C() data_sets_original.data = F data_sets_original.labels = np.squeeze(np.concatenate((y[None, :], 1 - y[None, :]), axis=0).T) data_sets = utils.data_shuffle(data_sets_original, 80, shuffle_data=True) X_train, y_train, X_test, y_test = data_sets.train.data, data_sets.train.labels[:, 0], data_sets.test.data, data_sets.test.labels[:, 0] np.savez_compressed(nb_dir + 'datasets/IB_data_' + str(name_ID), X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)