Source code for deep_bottleneck.datasets.mushroom

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import namedtuple
from tensorflow import keras
from tensorflow.python.keras import utils as keras_utils


[docs]def load(): """Load the mushroom dataset. Mushrooms are to be classified as either edible or poisonous. The output follows the following naming convention: - X is the data - y is class, with numbers from 0 to 9 - Y is class, but coded as a 10-dim vector with one entry set to 1 at the column index corresponding to the class Returns: Returns two namedtuples, the first one containing training and the second one containing test data respectively. Both come with fields X, y and Y: """ mushrooms = pd.read_csv('datasets/mushroom.csv') n_classes = 2 y = mushrooms['class=e'] X = mushrooms.drop(['class=e', 'class=p'], axis=1).values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11) Y_train = keras_utils.to_categorical(y_train, n_classes).astype('float32') Y_test = keras_utils.to_categorical(y_test, n_classes).astype('float32') Dataset = namedtuple('Dataset', ['X', 'Y', 'y', 'n_classes']) training = Dataset(X_train, Y_train, y_train, n_classes) test = Dataset(X_test, Y_test, y_test, n_classes) return training, test