target_models

This module defines the victim models and provides methods to save, load and train them.

To set the seed:

victim_models.set_seed(1234)

To load the models:

cifar10Model = victim_models.CifarModel()
kaggleModel = victim_models.KaggleModel()

To train it with a tf.data.Dataset called cifar10 (dimension has to fit):

victim_models.train_model(cifar10Model, cifar10, epochs=1)

To save and load the model:

victim_models.save_model("filename",cifar10Model)
cifar10Model = victim_models.load_model("filename")

View Source

  1"""
  2.. include:: ../docs/target_models.md
  3"""
  4
  5from os import environ, makedirs
  6
  7# Tensorflow C++ backend logging verbosity
  8environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # NOQA
  9
 10import datasets as ds
 11import utils
 12
 13from os.path import dirname, isdir, join
 14import datetime
 15
 16from typing import Dict
 17from tensorflow import keras
 18from tensorflow.python.framework import random_seed
 19from tensorflow.data import Dataset  # pyright: ignore
 20from tensorflow.keras.activations import tanh  # pyright: ignore
 21from tensorflow.keras.layers import Conv2D, Dense, InputLayer  # pyright: ignore
 22from tensorflow.keras.layers import MaxPool2D, Softmax, Flatten  # pyright: ignore
 23from tensorflow.keras import Sequential  # pyright: ignore
 24
 25global_seed: int = 1234
 26
 27
 28def set_seed(new_seed: int):
 29    """
 30    Set the global seed that will be used for all functions that include
 31    randomness.
 32    """
 33    global global_seed
 34    global_seed = new_seed
 35    random_seed.set_seed(global_seed)
 36
 37
 38class CifarModel(Sequential):
 39    """
 40    On CIFAR datasets, we train a standard convolutional neural
 41    network (CNN) with two convolution and max pooling layers
 42    plus a fully connected layer of size 128 and a Sof tMax layer.
 43    ‘We use Tanh as the activation function. We set the learning
 44    rate to 0.001, the learning rate decay to le — 07, and the
 45    maximum epochs of training to 100.
 46    """
 47
 48    def __init__(self) -> None:
 49        super().__init__()
 50        activation = tanh
 51        batchSize = 100
 52        self.add(InputLayer(input_shape=(32, 32, 3), batch_size=batchSize))
 53        self.add(Conv2D(32, 3, activation=activation))
 54        self.add(MaxPool2D(pool_size=(2, 2)))
 55        self.add(Conv2D(32, 3, activation=activation))
 56        self.add(MaxPool2D(pool_size=(2, 2)))
 57        self.add(Flatten())
 58        self.add(Dense(128, activation=activation))
 59        self.add(Dense(10, activation=activation))
 60        self.add(Softmax())
 61
 62
 63class KaggleModel(Sequential):
 64    """
 65    On the purchase dataset (see Section VI-A), we train a fully
 66    connected neural network with one hidden layer of size 128
 67    and a SoftMax layer. We use Tanh as the activation function.
 68    ‘We set the learning rate to 0.001, the learning rate decay to
 69    1e — 07, and the maximum epochs of training to 200.
 70    """
 71
 72    def __init__(self, output_size: int) -> None:
 73        super().__init__()
 74        activation = tanh
 75        batchSize = 100  # TODO: hardcoded
 76        self.add(InputLayer(input_shape=(600), batch_size=batchSize))
 77        self.add(Dense(128, activation=activation))
 78        self.add(Dense(output_size, activation=activation))
 79        self.add(Softmax())
 80
 81
 82def load_model(name: str, verbose=True) -> Sequential:
 83    """
 84    Load model from disk.
 85
 86    The file name will be constructed from the `name` argument.
 87    """
 88    if verbose:
 89        print(f"Loading model {name} from disk.")
 90    filePath: str = join(dirname(__file__), "../models/target", name)
 91    return keras.models.load_model(filePath)
 92
 93
 94def save_model(name: str, model: Sequential) -> None:
 95    """
 96    Save model to disk.
 97
 98    The file name will be constructed from the `name` argument.
 99    """
100    folderPath: str = join(dirname(__file__),"../models/target")
101    if not isdir(folderPath):
102        makedirs(folderPath, exist_ok=True)
103    filePath: str = join(folderPath, name)
104    model.save(filePath)
105
106
107def train_model(model: Sequential, modelName: str, trainData: Dataset,
108                testData: Dataset, hyperpar: Dict):
109    epochs: int = int(hyperpar["epochs"])
110    learningRate: float = float(hyperpar["learningRate"])
111    batchSize: int = int(hyperpar["batchSize"])
112    print(
113        f"Training model {modelName} for {epochs} epochs with learning rate {learningRate} and batch size {batchSize}.")
114
115    optimizer = keras.optimizers.Adam(learning_rate=learningRate, name="Adam")
116    loss = keras.losses.CategoricalCrossentropy()
117    metrics = [keras.metrics.CategoricalAccuracy()]
118
119    model.compile(optimizer, loss, metrics)
120    trainData = trainData.batch(batchSize, drop_remainder=True)
121    testData = testData.batch(batchSize, drop_remainder=True)
122    log_dir = "logs/target/" + modelName
123    makedirs(log_dir, exist_ok=True)
124    cb = keras.callbacks.TensorBoard(histogram_freq=1, log_dir=log_dir)
125    return model.fit(trainData, epochs=epochs, callbacks=[cb], validation_data=testData)
126
127
128def evaluate_model(model: Sequential, dataset: Dataset):
129    # TODO: batchSize is hardcoded
130    batchSize = 100
131    dataset = dataset.batch(batchSize, drop_remainder=True)
132    return model.evaluate(dataset)
133
134
135def get_model_name(config: Dict) -> str:
136    modelConfig = config["targetModel"]["hyperparameters"]
137    return \
138        f'{config["targetDataset"]["name"]}_' + \
139        f'classes_{config["targetModel"]["classes"]}_' + \
140        f'lr_{modelConfig["learningRate"]}_' + \
141        f'bs_{modelConfig["batchSize"]}_' + \
142        f'epochs_{modelConfig["epochs"]}_' + \
143        f'trainsize_{config["targetDataset"]["trainSize"]}'
144
145
146def get_target_model(config: Dict, targetDataset) -> Sequential:
147    """
148    Try to load target model. If it doesn't work, train it.
149    """
150
151    modelName = get_model_name(config)
152
153    try:
154        print(f"Loading target model from disk.")
155        model: KaggleModel = load_model(modelName, verbose=config["verbose"])
156
157    except BaseException:
158        print("Didn't work, retraining target model.")
159        model: KaggleModel = train_target_model(config, targetDataset)
160
161    print("Evaluating target model on training data:")
162    trainDataName = modelName + "_train_data"
163    trainData = ds.load_target(trainDataName)
164    trainAcc = evaluate_model(model,trainData)[1]
165
166    print("Evaluating target model on testing data:")
167    testDataName = modelName + "_test_data"
168    testData = ds.load_target(testDataName)
169    testAcc = evaluate_model(model,testData)[1]
170
171    hash = utils.hash(str(config))
172    with open(f"{hash}_targetModelAccuracy.csv",'w') as file:
173        file.write(f"Target Model Training Accuracy: {trainAcc}\n")
174        file.write(f"Target Model Testing Accuracy: {testAcc}\n")
175
176    return model
177
178
179def train_target_model(config: Dict, targetDataset) -> Sequential:
180
181    targetDataset = ds.shuffle(targetDataset)
182    dataConfig = config["targetDataset"]
183    modelConfig = config["targetModel"]["hyperparameters"]
184
185    modelName = get_model_name(config)
186    trainDataName = modelName + "_train_data"
187    testDataName = modelName + "_test_data"
188    restDataName = modelName + "_rest_data"
189
190    trainData = targetDataset.take(dataConfig["trainSize"])
191    testData = targetDataset.skip(dataConfig["trainSize"]).take(dataConfig["testSize"])
192    restData = targetDataset.skip(dataConfig["trainSize"]).skip(dataConfig["testSize"])
193
194    ds.save_target(trainData,trainDataName)
195    ds.save_target(testData,testDataName)
196    ds.save_target(restData,restDataName)
197
198    model = KaggleModel(config["targetModel"]["classes"])
199
200    train_model(model, modelName, trainData, testData, modelConfig)
201
202    print("Saving target model to disk.")
203    save_model(modelName, model)
204    evaluate_model(model, testData)
205    return model
206
207
208if __name__ == "__main__":
209    import argparse
210    import configuration as con
211    import datasets as ds
212
213    parser = argparse.ArgumentParser(description='Train the target model.')
214    parser.add_argument('--config', help='Relative path to config file.',)
215    config = con.from_cli_options(vars(parser.parse_args()))
216    set_seed(config["seed"])
217
218    targetDataset = ds.load_dataset(config["targetDataset"]["name"])
219    targetModel = get_target_model(config, targetDataset)

def set_seed(new_seed: int): View Source

29def set_seed(new_seed: int):
30    """
31    Set the global seed that will be used for all functions that include
32    randomness.
33    """
34    global global_seed
35    global_seed = new_seed
36    random_seed.set_seed(global_seed)

Set the global seed that will be used for all functions that include randomness.

class CifarModel(keras.engine.sequential.Sequential): View Source

39class CifarModel(Sequential):
40    """
41    On CIFAR datasets, we train a standard convolutional neural
42    network (CNN) with two convolution and max pooling layers
43    plus a fully connected layer of size 128 and a Sof tMax layer.
44    ‘We use Tanh as the activation function. We set the learning
45    rate to 0.001, the learning rate decay to le — 07, and the
46    maximum epochs of training to 100.
47    """
48
49    def __init__(self) -> None:
50        super().__init__()
51        activation = tanh
52        batchSize = 100
53        self.add(InputLayer(input_shape=(32, 32, 3), batch_size=batchSize))
54        self.add(Conv2D(32, 3, activation=activation))
55        self.add(MaxPool2D(pool_size=(2, 2)))
56        self.add(Conv2D(32, 3, activation=activation))
57        self.add(MaxPool2D(pool_size=(2, 2)))
58        self.add(Flatten())
59        self.add(Dense(128, activation=activation))
60        self.add(Dense(10, activation=activation))
61        self.add(Softmax())

On CIFAR datasets, we train a standard convolutional neural network (CNN) with two convolution and max pooling layers plus a fully connected layer of size 128 and a Sof tMax layer. ‘We use Tanh as the activation function. We set the learning rate to 0.001, the learning rate decay to le — 07, and the maximum epochs of training to 100.

CifarModel() View Source

49    def __init__(self) -> None:
50        super().__init__()
51        activation = tanh
52        batchSize = 100
53        self.add(InputLayer(input_shape=(32, 32, 3), batch_size=batchSize))
54        self.add(Conv2D(32, 3, activation=activation))
55        self.add(MaxPool2D(pool_size=(2, 2)))
56        self.add(Conv2D(32, 3, activation=activation))
57        self.add(MaxPool2D(pool_size=(2, 2)))
58        self.add(Flatten())
59        self.add(Dense(128, activation=activation))
60        self.add(Dense(10, activation=activation))
61        self.add(Softmax())

Creates a Sequential model instance.

Args: layers: Optional list of layers to add to the model. name: Optional name for the model.

Inherited Members

keras.engine.sequential.Sequential: supports_masking; add; pop; build; call; compute_output_shape; compute_mask; get_config; from_config; input_spec
keras.engine.functional.Functional: input; input_shape; output; output_shape
keras.engine.training.Model: compile; metrics; metrics_names; distribute_strategy; run_eagerly; train_step; compute_loss; compute_metrics; make_train_function; fit; test_step; make_test_function; evaluate; predict_step; make_predict_function; predict; reset_metrics; train_on_batch; test_on_batch; predict_on_batch; fit_generator; evaluate_generator; predict_generator; trainable_weights; non_trainable_weights; get_weights; save; save_weights; load_weights; to_json; to_yaml; reset_states; state_updates; weights; summary; get_layer; save_spec
keras.engine.base_layer.Layer: add_weight; compute_output_signature; dtype; name; dynamic; trainable; activity_regularizer; losses; add_loss; add_metric; add_update; set_weights; finalize_state; get_input_mask_at; get_output_mask_at; input_mask; output_mask; get_input_shape_at; get_output_shape_at; get_input_at; get_output_at; count_params; dtype_policy; compute_dtype; variable_dtype; inbound_nodes; outbound_nodes; variables; trainable_variables; non_trainable_variables; add_variable
tensorflow.python.module.module.Module: name_scope; submodules; with_name_scope

class KaggleModel(keras.engine.sequential.Sequential): View Source

64class KaggleModel(Sequential):
65    """
66    On the purchase dataset (see Section VI-A), we train a fully
67    connected neural network with one hidden layer of size 128
68    and a SoftMax layer. We use Tanh as the activation function.
69    ‘We set the learning rate to 0.001, the learning rate decay to
70    1e — 07, and the maximum epochs of training to 200.
71    """
72
73    def __init__(self, output_size: int) -> None:
74        super().__init__()
75        activation = tanh
76        batchSize = 100  # TODO: hardcoded
77        self.add(InputLayer(input_shape=(600), batch_size=batchSize))
78        self.add(Dense(128, activation=activation))
79        self.add(Dense(output_size, activation=activation))
80        self.add(Softmax())

On the purchase dataset (see Section VI-A), we train a fully connected neural network with one hidden layer of size 128 and a SoftMax layer. We use Tanh as the activation function. ‘We set the learning rate to 0.001, the learning rate decay to 1e — 07, and the maximum epochs of training to 200.

KaggleModel(output_size: int) View Source

73    def __init__(self, output_size: int) -> None:
74        super().__init__()
75        activation = tanh
76        batchSize = 100  # TODO: hardcoded
77        self.add(InputLayer(input_shape=(600), batch_size=batchSize))
78        self.add(Dense(128, activation=activation))
79        self.add(Dense(output_size, activation=activation))
80        self.add(Softmax())

Creates a Sequential model instance.

Args: layers: Optional list of layers to add to the model. name: Optional name for the model.

Inherited Members

keras.engine.sequential.Sequential: supports_masking; add; pop; build; call; compute_output_shape; compute_mask; get_config; from_config; input_spec
keras.engine.functional.Functional: input; input_shape; output; output_shape
keras.engine.training.Model: compile; metrics; metrics_names; distribute_strategy; run_eagerly; train_step; compute_loss; compute_metrics; make_train_function; fit; test_step; make_test_function; evaluate; predict_step; make_predict_function; predict; reset_metrics; train_on_batch; test_on_batch; predict_on_batch; fit_generator; evaluate_generator; predict_generator; trainable_weights; non_trainable_weights; get_weights; save; save_weights; load_weights; to_json; to_yaml; reset_states; state_updates; weights; summary; get_layer; save_spec
keras.engine.base_layer.Layer: add_weight; compute_output_signature; dtype; name; dynamic; trainable; activity_regularizer; losses; add_loss; add_metric; add_update; set_weights; finalize_state; get_input_mask_at; get_output_mask_at; input_mask; output_mask; get_input_shape_at; get_output_shape_at; get_input_at; get_output_at; count_params; dtype_policy; compute_dtype; variable_dtype; inbound_nodes; outbound_nodes; variables; trainable_variables; non_trainable_variables; add_variable
tensorflow.python.module.module.Module: name_scope; submodules; with_name_scope

def load_model(name: str, verbose=True) -> keras.engine.sequential.Sequential: View Source

83def load_model(name: str, verbose=True) -> Sequential:
84    """
85    Load model from disk.
86
87    The file name will be constructed from the `name` argument.
88    """
89    if verbose:
90        print(f"Loading model {name} from disk.")
91    filePath: str = join(dirname(__file__), "../models/target", name)
92    return keras.models.load_model(filePath)

Load model from disk.

The file name will be constructed from the name argument.

def save_model(name: str, model: keras.engine.sequential.Sequential) -> None: View Source

 95def save_model(name: str, model: Sequential) -> None:
 96    """
 97    Save model to disk.
 98
 99    The file name will be constructed from the `name` argument.
100    """
101    folderPath: str = join(dirname(__file__),"../models/target")
102    if not isdir(folderPath):
103        makedirs(folderPath, exist_ok=True)
104    filePath: str = join(folderPath, name)
105    model.save(filePath)

Save model to disk.

The file name will be constructed from the name argument.

def train_model( model: keras.engine.sequential.Sequential, modelName: str, trainData: tensorflow.python.data.ops.dataset_ops.DatasetV2, testData: tensorflow.python.data.ops.dataset_ops.DatasetV2, hyperpar: Dict): View Source

108def train_model(model: Sequential, modelName: str, trainData: Dataset,
109                testData: Dataset, hyperpar: Dict):
110    epochs: int = int(hyperpar["epochs"])
111    learningRate: float = float(hyperpar["learningRate"])
112    batchSize: int = int(hyperpar["batchSize"])
113    print(
114        f"Training model {modelName} for {epochs} epochs with learning rate {learningRate} and batch size {batchSize}.")
115
116    optimizer = keras.optimizers.Adam(learning_rate=learningRate, name="Adam")
117    loss = keras.losses.CategoricalCrossentropy()
118    metrics = [keras.metrics.CategoricalAccuracy()]
119
120    model.compile(optimizer, loss, metrics)
121    trainData = trainData.batch(batchSize, drop_remainder=True)
122    testData = testData.batch(batchSize, drop_remainder=True)
123    log_dir = "logs/target/" + modelName
124    makedirs(log_dir, exist_ok=True)
125    cb = keras.callbacks.TensorBoard(histogram_freq=1, log_dir=log_dir)
126    return model.fit(trainData, epochs=epochs, callbacks=[cb], validation_data=testData)

def evaluate_model( model: keras.engine.sequential.Sequential, dataset: tensorflow.python.data.ops.dataset_ops.DatasetV2): View Source

129def evaluate_model(model: Sequential, dataset: Dataset):
130    # TODO: batchSize is hardcoded
131    batchSize = 100
132    dataset = dataset.batch(batchSize, drop_remainder=True)
133    return model.evaluate(dataset)

def get_model_name(config: Dict) -> str: View Source

136def get_model_name(config: Dict) -> str:
137    modelConfig = config["targetModel"]["hyperparameters"]
138    return \
139        f'{config["targetDataset"]["name"]}_' + \
140        f'classes_{config["targetModel"]["classes"]}_' + \
141        f'lr_{modelConfig["learningRate"]}_' + \
142        f'bs_{modelConfig["batchSize"]}_' + \
143        f'epochs_{modelConfig["epochs"]}_' + \
144        f'trainsize_{config["targetDataset"]["trainSize"]}'

def get_target_model(config: Dict, targetDataset) -> keras.engine.sequential.Sequential: View Source

147def get_target_model(config: Dict, targetDataset) -> Sequential:
148    """
149    Try to load target model. If it doesn't work, train it.
150    """
151
152    modelName = get_model_name(config)
153
154    try:
155        print(f"Loading target model from disk.")
156        model: KaggleModel = load_model(modelName, verbose=config["verbose"])
157
158    except BaseException:
159        print("Didn't work, retraining target model.")
160        model: KaggleModel = train_target_model(config, targetDataset)
161
162    print("Evaluating target model on training data:")
163    trainDataName = modelName + "_train_data"
164    trainData = ds.load_target(trainDataName)
165    trainAcc = evaluate_model(model,trainData)[1]
166
167    print("Evaluating target model on testing data:")
168    testDataName = modelName + "_test_data"
169    testData = ds.load_target(testDataName)
170    testAcc = evaluate_model(model,testData)[1]
171
172    hash = utils.hash(str(config))
173    with open(f"{hash}_targetModelAccuracy.csv",'w') as file:
174        file.write(f"Target Model Training Accuracy: {trainAcc}\n")
175        file.write(f"Target Model Testing Accuracy: {testAcc}\n")
176
177    return model

Try to load target model. If it doesn't work, train it.

def train_target_model(config: Dict, targetDataset) -> keras.engine.sequential.Sequential: View Source

180def train_target_model(config: Dict, targetDataset) -> Sequential:
181
182    targetDataset = ds.shuffle(targetDataset)
183    dataConfig = config["targetDataset"]
184    modelConfig = config["targetModel"]["hyperparameters"]
185
186    modelName = get_model_name(config)
187    trainDataName = modelName + "_train_data"
188    testDataName = modelName + "_test_data"
189    restDataName = modelName + "_rest_data"
190
191    trainData = targetDataset.take(dataConfig["trainSize"])
192    testData = targetDataset.skip(dataConfig["trainSize"]).take(dataConfig["testSize"])
193    restData = targetDataset.skip(dataConfig["trainSize"]).skip(dataConfig["testSize"])
194
195    ds.save_target(trainData,trainDataName)
196    ds.save_target(testData,testDataName)
197    ds.save_target(restData,restDataName)
198
199    model = KaggleModel(config["targetModel"]["classes"])
200
201    train_model(model, modelName, trainData, testData, modelConfig)
202
203    print("Saving target model to disk.")
204    save_model(modelName, model)
205    evaluate_model(model, testData)
206    return model