Source code for snntoolbox.datasets.utils

# -*- coding: utf-8 -*-
"""
The main purpose of this module is to load a dataset from disk and feed it to
the toolbox in one of the formats it can handle.

For details see

.. autosummary::
    :nosignatures:

    get_dataset

@author: rbodo
"""

import json
import os

from configparser import NoOptionError
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from snntoolbox.utils.utils import import_helpers


[docs]def get_dataset(config): """Get dataset, either from ``.npz`` files or ``keras.ImageDataGenerator``. Returns Dictionaries with keys ``x_test`` and ``y_test`` if data set was loaded in ``.npz`` format, or with ``dataflow`` key if data will be loaded from ``.jpg``, ``.png``, or ``.bmp`` files by a ``keras.ImageDataGenerator``. Parameters ---------- config: configparser.ConfigParser Settings. Returns ------- normset: dict Used to normalized the network parameters. testset: dict Used to test the networks. """ testset = {} normset = try_get_normset_from_scalefacs(config) dataset_path = config.get('paths', 'dataset_path') dataset_format = config.get('input', 'dataset_format') normalize_thresholds = config.getboolean('loihi', 'normalize_thresholds', fallback=False) is_testset_needed = config.getboolean('tools', 'evaluate_ann') or \ config.getboolean('tools', 'simulate') or normalize_thresholds is_normset_needed = normalize_thresholds or ( config.getboolean('tools', 'normalize') and normset == {}) batch_size = config.getint('simulation', 'batch_size') # _______________________________ Keras __________________________________# try: keras_dataset = config.get('input', 'keras_dataset') if keras_dataset: from keras_rewiring.utilities.load_dataset \ import load_and_preprocess_dataset num_to_test = config.getint('simulation', 'num_to_test') data = load_and_preprocess_dataset(keras_dataset) x_test, y_test = data['test'] testset = { 'x_test': x_test[:num_to_test], 'y_test': y_test[:num_to_test]} if is_normset_needed: normset['x_norm'] = x_test return normset, testset except (NoOptionError, ImportError) as e: print("Warning:", e) # ________________________________ npz ___________________________________# if dataset_format == 'npz': print("Loading data set from '.npz' files in {}.\n".format( dataset_path)) if is_testset_needed: num_to_test = config.getint('simulation', 'num_to_test') x_test = load_npz(dataset_path, 'x_test.npz')[:num_to_test] y_test = load_npz(dataset_path, 'y_test.npz')[:num_to_test] testset = {'x_test': x_test, 'y_test': y_test} if is_normset_needed: x_norm = load_npz(dataset_path, 'x_norm.npz') normset['x_norm'] = x_norm # ________________________________ jpg ___________________________________# elif dataset_format in {'jpg', 'png'}: print("Loading data set from ImageDataGenerator, using images in " "{}.\n".format(dataset_path)) # Transform str to dict datagen_kwargs = eval(config.get('input', 'datagen_kwargs')) dataflow_kwargs = eval(config.get('input', 'dataflow_kwargs')) # Get class labels class_idx_path = config.get('paths', 'class_idx_path') if class_idx_path != '': class_idx = json.load(open(os.path.abspath(class_idx_path))) dataflow_kwargs['classes'] = \ [class_idx[str(idx)][0] for idx in range(len(class_idx))] # Get proprocessing function if 'preprocessing_function' in datagen_kwargs: helpers = import_helpers(datagen_kwargs['preprocessing_function'], config) datagen_kwargs['preprocessing_function'] = \ helpers.preprocessing_function dataflow_kwargs['directory'] = dataset_path if 'batch_size' not in dataflow_kwargs: dataflow_kwargs['batch_size'] = batch_size datagen = ImageDataGenerator(**datagen_kwargs) if (datagen.featurewise_center or datagen.featurewise_std_normalization or datagen.zca_whitening): # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) rs = datagen_kwargs.get('rescale', None) x_orig = ImageDataGenerator(rescale=rs).flow_from_directory( **dataflow_kwargs).next()[0] datagen.fit(x_orig) if is_normset_needed: shuffle = dataflow_kwargs.get('shuffle') dataflow_kwargs['shuffle'] = True normset['dataflow'] = \ datagen.flow_from_directory(**dataflow_kwargs) dataflow_kwargs['shuffle'] = shuffle if is_testset_needed: testset = { 'dataflow': datagen.flow_from_directory(**dataflow_kwargs)} # _______________________________ aedat __________________________________# elif dataset_format == 'aedat': if is_normset_needed: print("Loading normalization dataset from '.npz' file in {}.\n" "".format(dataset_path)) x_norm = load_npz(dataset_path, 'x_norm.npz') normset['x_norm'] = x_norm # For Loihi threshold normalization we need to pass the # normalization data in the testset dict. testset = {'x_norm': x_norm} return normset, testset
[docs]def try_get_normset_from_scalefacs(config): """ Instead of loading a normalization data set to calculate scale-factors, try to get the scale-factors stored on disk during a previous run. Parameters ---------- config: configparser.ConfigParser Settings. Returns ------- : Union[dict, None] A dictionary with single key 'scale_facs'. The corresponding value is itself a dictionary containing the scale factors for each layer. Returns empty set if no scale factors were found. """ newpath = os.path.join(config.get('paths', 'log_dir_of_current_run'), 'normalization') if not os.path.exists(newpath): os.makedirs(newpath) filepath = os.path.join(newpath, config.get('normalization', 'percentile') + '.json') if os.path.isfile(filepath): print("Loading scale factors from disk instead of recalculating.") with open(filepath) as f: return {'scale_facs': json.load(f)} return {}
[docs]def to_categorical(y, nb_classes): """Convert class vector to binary class matrix. If the input ``y`` has shape (``nb_samples``,) and contains integers from 0 to ``nb_classes``, the output array will be of dimension (``nb_samples``, ``nb_classes``). """ y = np.asarray(y, dtype='int32') y_cat = np.zeros((len(y), nb_classes)) for i in range(len(y)): y_cat[i, y[i]] = 1. return y_cat
[docs]def load_npz(path, filename): """Load dataset from an ``.npz`` file. Parameters ---------- filename : string Name of file. path: string Location of dataset to load. Returns ------- : tuple[np.array] The dataset as a numpy array containing samples. """ return np.load(os.path.join(path, filename))['arr_0']