Source code for aisquared.utils.utils

from typing import Union
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error
import tensorflow as tf
import numpy as np
import beyondml


def _print_report(true_data, orig_preds, mimic_preds, problem_type):
    print('ORIGINAL PERFORMANCE:')
    if problem_type == 'classification':
        print(confusion_matrix(true_data, orig_preds))
        print(classification_report(true_data, orig_preds))
    else:
        print(mean_squared_error(true_data, orig_preds, squared=False))
    print('\n\n')

    print('MIMIC PERFORMANCE:')
    print('\n')
    print('Relative to First Model:')
    if problem_type == 'classification':
        if mimic_preds.shape[1] == 1:
            print(confusion_matrix(orig_preds, (mimic_preds >= 0.5).astype(int)))
            print(classification_report(orig_preds,
                  (mimic_preds >= 0.5).astype(int)))
        else:
            print(confusion_matrix(orig_preds, mimic_preds.argmax(axis=1)))
            print(classification_report(orig_preds, mimic_preds.argmax(axis=1)))
    else:
        print(mean_squared_error(orig_preds, mimic_preds, squared=False))
        print(f'Standard deviation: {np.std(mimic_preds - orig_preds)}')
    print('\n')

    print('Relative to Original:')
    if problem_type == 'classification':
        if mimic_preds.shape[1] == 1:
            print(confusion_matrix(true_data, (mimic_preds >= 0.5).astype(int)))
            print(classification_report(
                true_data, (mimic_preds >= 0.5).astype(int)))
        else:
            print(confusion_matrix(true_data, mimic_preds.argmax(axis=1)))
            print(classification_report(true_data, mimic_preds.argmax(axis=1)))
    else:
        print(mean_squared_error(true_data, mimic_preds, squared=False))
        print(f'Standard deviation: {np.std(true_data - orig_preds)}')


def _get_cv_model(size, input_shape, num_outputs, output_activation):
    if size == 'small':
        num_blocks = 2
        num_hidden = 1
        hidden_size = 64
    elif size == 'medium':
        num_blocks = 4
        num_hidden = 2
        hidden_size = 128
    elif size == 'large':
        num_blocks = 6
        num_hidden = 4
        hidden_size = 256
    else:
        raise ValueError(
            f'size must be one of "small", "medium", "large", got {size}')

    input_layer = tf.keras.layers.Input(input_shape)
    x = tf.keras.layers.Conv2D(
        8, 3, padding='same', activation='relu')(input_layer)
    x = tf.keras.layers.Conv2D(8, 3, padding='same', activation='relu')(x)
    x = tf.keras.layers.MaxPool2D(strides=1)(x)

    for block_num in range(num_blocks - 1):
        x = tf.keras.layers.Conv2D(
            8 * (2 ** (block_num + 1)), 3, padding='same', activation='relu')(x)
        x = tf.keras.layers.Conv2D(
            8 * (2 ** (block_num + 1)), 3, padding='same', activation='relu')(x)
        x = tf.keras.layers.MaxPool2D(strides=1)(x)

    x = tf.keras.layers.Flatten()(x)
    for _ in range(num_hidden):
        x = tf.keras.layers.Dense(hidden_size, activation='relu')(x)

    output_layer = tf.keras.layers.Dense(
        num_outputs, activation=output_activation)(x)

    return tf.keras.models.Model(input_layer, output_layer)


def _get_embedding_model(size, vocab_size, input_shape, num_outputs, output_activation):

    if vocab_size is None:
        raise ValueError(
            'If NLP embedding model specified, must also specify vocab_size')

    if size == 'small':
        embedding_dim = 4
        num_hidden = 4
        hidden_size = 64
    elif size == 'medium':
        embedding_dim = 8
        num_hidden = 6
        hidden_size = 128
    elif size == 'large':
        embedding_dim = 16
        num_hidden = 8
        hidden_size = 256
    else:
        raise ValueError(
            f'size must be one of "small", "medium", "large", got {size}')

    input_layer = tf.keras.layers.Input(input_shape)
    x = tf.keras.layers.Embedding(vocab_size, embedding_dim)(input_layer)
    x = tf.keras.layers.Flatten()(x)
    for _ in range(num_hidden):
        x = tf.keras.layers.Dense(hidden_size, activation='relu')(x)
    output_layer = tf.keras.layers.Dense(
        num_outputs, activation=output_activation)(x)

    return tf.keras.models.Model(input_layer, output_layer)


def _get_fc_model(size, input_shape, num_outputs, output_activation):
    if size == 'small':
        num_hidden = 4
        hidden_size = 64
    elif size == 'medium':
        num_hidden = 6
        hidden_size = 128
    elif size == 'large':
        num_hidden = 8
        hidden_size = 256
    else:
        raise ValueError(
            f'size must be one of "small", "medium", "large", got {size}')

    input_layer = tf.keras.layers.Input(input_shape)
    for i in range(num_hidden):
        if i == 0:
            x = tf.keras.layers.Dense(
                hidden_size, activation='relu')(input_layer)
        else:
            x = tf.keras.layers.Dense(hidden_size, activation='relu')(x)
    output_layer = tf.keras.layers.Dense(
        num_outputs, activation=output_activation)(x)

    return tf.keras.models.Model(input_layer, output_layer)


[docs]def mimic_model( trained_model: BaseEstimator, nnet: tf.keras.models.Model, training_data: np.ndarray, test_data: np.ndarray, test_labels: np.ndarray, problem_type: str, loss: str, metrics: Union[str, list], optimizer: str, mimic_proba: bool = False, retention: float = 0.9, batch_size: int = 32, epochs: int = 100, starting_sparsification: int = 0, max_sparsification: int = 99, sparsification_rate: int = 5 ) -> tf.keras.models.Model: """ Train a sparse neural network to mimic a scikit-learn model Parameters ---------- trained_model : sklearn model The model that is already trained nnet : TensorFlow keras Model The neural network to train to mimic the trained model training_data : array or array-like The input data that was used to train the trained model test_data : array or array-like The input data to be used for testing test_labels : array or array-like The output data used in testing problem_type : str The type of problem, either 'classification' or 'regression' loss : str or keras loss function The loss to use metrics : str, function or list of str, function Metrics to measure optimizer : str or keras optimizer The optimizer to use mimic_proba : bool (default False) For classification, mimic the probability outputs retention : float (default 0.9) The retention of performance to allow further pruning batch_size : int (default 32) The batch size to use while training epochs : int (default 100) The number of epochs (if early stopping is not met beforehand) starting_sparsification : int (default 0) The starting model sparsification max_sparsification : int (default 99) The maximum sparsification to allow sparsification_rate : int (default 5) The sparsification rate when invoked Returns ------- nnet : TensorFlow keras Model The trained model """ # Check problem type if problem_type not in ['classification', 'regression']: raise ValueError( 'problem_type must be "classification" or "regression"') # Check that the model to mimic is a sklearn model if not isinstance(trained_model, BaseEstimator): raise TypeError( 'Currently can only convert trained scikit-learn models') # Get the predictions if isinstance(trained_model, TransformerMixin): training_predictions = trained_model.transform(training_data) testing_predictions = trained_model.transform(test_data) elif isinstance(trained_model, (ClassifierMixin, RegressorMixin)): if isinstance(trained_model, ClassifierMixin) and mimic_proba: training_predictions = trained_model.predict_proba(training_data) testing_predictions = trained_model.predict(test_data) else: training_predictions = trained_model.predict(training_data) testing_predictions = trained_model.predict(test_data) else: raise TypeError( 'trained_model is not a scikit-learn TransformerMixin, ClassifierMixin, or RegressorMixin') if problem_type == 'classification': cutoff = retention else: cutoff = mean_squared_error( test_labels, testing_predictions, squared=False) / retention # Add layer masks nnet = beyondml.tflow.utils.add_layer_masks(nnet) nnet = beyondml.tflow.utils.train_model( nnet, training_data, training_predictions, loss, metrics, optimizer, cutoff, batch_size, epochs, starting_sparsification, max_sparsification, sparsification_rate ) nnet = beyondml.tflow.utils.remove_layer_masks(nnet) nnet_preds = nnet.predict(test_data) _print_report(test_labels, testing_predictions, nnet_preds, problem_type) return nnet
[docs]def get_model( model_type: str, input_shape: Union[int, tuple], num_outputs: int, output_activation: str, size: str = 'small', vocab_size: Union[None, int] = None ): """ Get a pre-configured model for different use cases Parameters ---------- model_type : str Either 'cv', 'nlp_embedding', or 'fc', defining the model type input_shape : int or tuple of int The input shape to the model num_outputs : int The output shape of the model output_activation : str or keras activation function The activation of the final layer of the model size : str (default 'small') One of either 'small', 'medium', or 'large' vocab_size : str or None (default None) Size of the vocab, if model_type is 'nlp_embedding' Returns ------- model : TensorFlow Keras model The model """ if model_type == 'cv': return _get_cv_model(size, input_shape, num_outputs, output_activation) elif model_type == 'nlp_embedding': return _get_embedding_model(size, vocab_size, input_shape, num_outputs, output_activation) elif model_type == 'fc': return _get_fc_model(size, input_shape, num_outputs, output_activation) else: raise ValueError( f'model_type must be one of "cv", "nlp_embedding", or "fc", got {model_type}')