shadow_models
1from typing import Dict, List, Tuple 2 3import utils 4import datasets as ds 5import target_models as tm 6import shadow_data as sd 7 8from tensorflow.python.framework import random_seed 9from tensorflow.keras import Sequential # pyright: ignore 10from tensorflow.data import Dataset 11from tensorflow import keras 12 13import numpy as np 14from os.path import dirname, isdir, join 15from os import makedirs 16 17global_seed: int = 1234 18 19 20def set_seed(new_seed: int): 21 """ 22 Set the global seed that will be used for all functions that include 23 randomness. 24 """ 25 global global_seed 26 global_seed = new_seed 27 random_seed.set_seed(global_seed) 28 29 30def get_shadow_model_name(config: Dict, i: int): 31 numModels: int = config["shadowModels"]["number"] 32 method: str = config["shadowDataset"]["method"] + "_" 33 split: float = config["shadowModels"]["split"] 34 return "shadow_" + method + tm.get_model_name(config) + f"_split_{split}_{i+1}_of_{numModels}" 35 36 37def load_shadow_models_and_datasets(config: Dict) -> Tuple[List[tm.Sequential], List[Tuple[ds.Dataset, ds.Dataset]]]: 38 verbose = config["verbose"] 39 numModels: int = config["shadowModels"]["number"] 40 datasets = [] 41 models = [] 42 43 print(f"Loading shadow models from disk.") 44 for i in range(numModels): 45 modelName = get_shadow_model_name(config, i) 46 model: tm.KaggleModel = load_model(modelName, verbose=verbose) 47 48 trainDataName = modelName + "_train_data" 49 testDataName = modelName + "_test_data" 50 trainData: ds.Dataset = ds.load_shadow(trainDataName, verbose=verbose) 51 testData: ds.Dataset = ds.load_shadow(testDataName, verbose=verbose) 52 53 datasets.append((trainData, testData)) 54 models.append(model) 55 56 return models, datasets 57 58def load_model(name: str, verbose=True) -> Sequential: 59 """ 60 Load model from disk. 61 62 The file name will be constructed from the `name` argument. 63 """ 64 if verbose: 65 print(f"Loading model {name} from disk.") 66 filePath: str = join(dirname(__file__), "../models/shadow", name) 67 return keras.models.load_model(filePath) 68 69def save_model(name: str, model: Sequential) -> None: 70 """ 71 Save model to disk. 72 73 The file name will be constructed from the `name` argument. 74 """ 75 folderPath: str = join(dirname(__file__),"../models/shadow") 76 if not isdir(folderPath): 77 makedirs(folderPath, exist_ok=True) 78 filePath: str = join(folderPath, name) 79 model.save(filePath) 80 81def train_shadow_models(config: Dict, shadowDatasets: List[ds.Dataset] 82 ) -> Tuple[List[tm.Sequential], List[Tuple[ds.Dataset, ds.Dataset]]]: 83 84 numModels: int = config["shadowModels"]["number"] 85 split: float = config["shadowModels"]["split"] 86 dataSize = shadowDatasets[0].cardinality().numpy() 87 assert dataSize != 0, "Loaded shadow dataset that seems empty." 88 trainSize = np.ceil(split * dataSize) 89 testSize = dataSize - trainSize 90 datasets = [] 91 models = [] 92 93 for i in range(numModels): 94 print(f"Training shadow model {i+1}.") 95 96 modelName = get_shadow_model_name(config, i) 97 trainDataName = modelName + "_train_data" 98 testDataName = modelName + "_test_data" 99 100 dataset = shadowDatasets[i] 101 trainData = dataset.take(trainSize) 102 testData = dataset.skip(trainSize).take(testSize) 103 104 # Shadow models have same architecture as target model 105 model = tm.KaggleModel(config["targetModel"]["classes"]) 106 modelConfig = config["targetModel"]["hyperparameters"] 107 108 tm.train_model(model, modelName, trainData, testData, modelConfig) 109 110 print(f"Saving shadow model {i+1} and its data to disk.") 111 save_model(modelName, model) 112 ds.save_shadow(trainData, trainDataName) 113 ds.save_shadow(testData, testDataName) 114 115 print(f"Evaluating shadow model {i+1}") 116 tm.evaluate_model(model, testData) 117 118 datasets.append((trainData, testData)) 119 models.append(model) 120 121 return models, datasets 122 123 124def get_shadow_models_and_datasets(config: Dict, shadowDatasets: List[ds.Dataset] 125 ) -> Tuple[List[tm.Sequential], List[Tuple[ds.Dataset, ds.Dataset]]]: 126 """ 127 Tries to load shadow datasets from disk, alternatively trains from scratch. 128 129 Returns 2 lists: 130 models: the trained shadow models and a list of tuples, containing 131 datasets: the training and test data for the corresponding shadow models 132 133 E.g. models[0] is trained with datasets[0,0] and tested on datasets[0,1] 134 """ 135 try: 136 print("Trying to load shadow models and data from disk.") 137 models, datasets = load_shadow_models_and_datasets(config) 138 except BaseException: 139 print("Didn't work, training shadow models.") 140 models, datasets = train_shadow_models(config, shadowDatasets) 141 142 return models, datasets 143 144def evaluate_model(model: Sequential, dataset: Dataset): 145 # TODO: batchSize is hardcoded 146 batchSize = 10 147 dataset = dataset.batch(batchSize, drop_remainder=False) 148 return model.evaluate(dataset) 149 150def evaluate_models(models:List[tm.Sequential], datasets: List[Tuple[ds.Dataset, ds.Dataset]]): 151 assert len(models) == len(datasets) 152 test_accuracies = [] 153 train_accuracies = [] 154 for i in range(len(models)): 155 testData = datasets[i][1] 156 trainData = datasets[i][0] 157 test_accuracy = evaluate_model(models[i], testData)[1] 158 train_accuracy = evaluate_model(models[i], trainData)[1] 159 test_accuracies.append(test_accuracy) 160 train_accuracies.append(train_accuracy) 161 162 hash = utils.hash(str(config)) 163 164 with open(f"{hash}_shadowModelTrainAccuracy.csv",'w') as file: 165 file.write(f"Shadow Model Training Accuracies (Overall:{np.average(train_accuracies)})\n") 166 for train_acc in train_accuracies: 167 file.write(f"{train_acc}\n") 168 with open(f"{hash}_shadowModelTestAccuracy.csv",'w') as file: 169 file.write(f"Shadow Model Testing Accuracies (Overall:{np.average(test_accuracies)})\n") 170 for test_acc in test_accuracies: 171 file.write(f"{test_acc}\n") 172 173if __name__ == "__main__": 174 import argparse 175 import configuration as con 176 import datasets as ds 177 import target_models as tm 178 import shadow_data as sd 179 180 parser = argparse.ArgumentParser(description='Save one shadow dataset per model and train the models.') 181 parser.add_argument('--config', help='Relative path to config file.',) 182 config = con.from_cli_options(vars(parser.parse_args())) 183 set_seed(config["seed"]) 184 185 shadowData = sd.load_shadow_data(config) 186 shadowDatasets = sd.split_shadow_data(config, shadowData) 187 shadowModels, shadowDatasets = get_shadow_models_and_datasets(config, shadowDatasets) 188 evaluate_models(shadowModels,shadowDatasets)
def
set_seed(new_seed: int):
21def set_seed(new_seed: int): 22 """ 23 Set the global seed that will be used for all functions that include 24 randomness. 25 """ 26 global global_seed 27 global_seed = new_seed 28 random_seed.set_seed(global_seed)
Set the global seed that will be used for all functions that include randomness.
def
get_shadow_model_name(config: Dict, i: int):
31def get_shadow_model_name(config: Dict, i: int): 32 numModels: int = config["shadowModels"]["number"] 33 method: str = config["shadowDataset"]["method"] + "_" 34 split: float = config["shadowModels"]["split"] 35 return "shadow_" + method + tm.get_model_name(config) + f"_split_{split}_{i+1}_of_{numModels}"
def
load_shadow_models_and_datasets( config: Dict) -> Tuple[List[keras.engine.sequential.Sequential], List[Tuple[tensorflow.python.data.ops.dataset_ops.DatasetV2, tensorflow.python.data.ops.dataset_ops.DatasetV2]]]:
38def load_shadow_models_and_datasets(config: Dict) -> Tuple[List[tm.Sequential], List[Tuple[ds.Dataset, ds.Dataset]]]: 39 verbose = config["verbose"] 40 numModels: int = config["shadowModels"]["number"] 41 datasets = [] 42 models = [] 43 44 print(f"Loading shadow models from disk.") 45 for i in range(numModels): 46 modelName = get_shadow_model_name(config, i) 47 model: tm.KaggleModel = load_model(modelName, verbose=verbose) 48 49 trainDataName = modelName + "_train_data" 50 testDataName = modelName + "_test_data" 51 trainData: ds.Dataset = ds.load_shadow(trainDataName, verbose=verbose) 52 testData: ds.Dataset = ds.load_shadow(testDataName, verbose=verbose) 53 54 datasets.append((trainData, testData)) 55 models.append(model) 56 57 return models, datasets
def
load_model(name: str, verbose=True) -> keras.engine.sequential.Sequential:
59def load_model(name: str, verbose=True) -> Sequential: 60 """ 61 Load model from disk. 62 63 The file name will be constructed from the `name` argument. 64 """ 65 if verbose: 66 print(f"Loading model {name} from disk.") 67 filePath: str = join(dirname(__file__), "../models/shadow", name) 68 return keras.models.load_model(filePath)
Load model from disk.
The file name will be constructed from the name
argument.
def
save_model(name: str, model: keras.engine.sequential.Sequential) -> None:
70def save_model(name: str, model: Sequential) -> None: 71 """ 72 Save model to disk. 73 74 The file name will be constructed from the `name` argument. 75 """ 76 folderPath: str = join(dirname(__file__),"../models/shadow") 77 if not isdir(folderPath): 78 makedirs(folderPath, exist_ok=True) 79 filePath: str = join(folderPath, name) 80 model.save(filePath)
Save model to disk.
The file name will be constructed from the name
argument.
def
train_shadow_models( config: Dict, shadowDatasets: List[tensorflow.python.data.ops.dataset_ops.DatasetV2]) -> Tuple[List[keras.engine.sequential.Sequential], List[Tuple[tensorflow.python.data.ops.dataset_ops.DatasetV2, tensorflow.python.data.ops.dataset_ops.DatasetV2]]]:
82def train_shadow_models(config: Dict, shadowDatasets: List[ds.Dataset] 83 ) -> Tuple[List[tm.Sequential], List[Tuple[ds.Dataset, ds.Dataset]]]: 84 85 numModels: int = config["shadowModels"]["number"] 86 split: float = config["shadowModels"]["split"] 87 dataSize = shadowDatasets[0].cardinality().numpy() 88 assert dataSize != 0, "Loaded shadow dataset that seems empty." 89 trainSize = np.ceil(split * dataSize) 90 testSize = dataSize - trainSize 91 datasets = [] 92 models = [] 93 94 for i in range(numModels): 95 print(f"Training shadow model {i+1}.") 96 97 modelName = get_shadow_model_name(config, i) 98 trainDataName = modelName + "_train_data" 99 testDataName = modelName + "_test_data" 100 101 dataset = shadowDatasets[i] 102 trainData = dataset.take(trainSize) 103 testData = dataset.skip(trainSize).take(testSize) 104 105 # Shadow models have same architecture as target model 106 model = tm.KaggleModel(config["targetModel"]["classes"]) 107 modelConfig = config["targetModel"]["hyperparameters"] 108 109 tm.train_model(model, modelName, trainData, testData, modelConfig) 110 111 print(f"Saving shadow model {i+1} and its data to disk.") 112 save_model(modelName, model) 113 ds.save_shadow(trainData, trainDataName) 114 ds.save_shadow(testData, testDataName) 115 116 print(f"Evaluating shadow model {i+1}") 117 tm.evaluate_model(model, testData) 118 119 datasets.append((trainData, testData)) 120 models.append(model) 121 122 return models, datasets
def
get_shadow_models_and_datasets( config: Dict, shadowDatasets: List[tensorflow.python.data.ops.dataset_ops.DatasetV2]) -> Tuple[List[keras.engine.sequential.Sequential], List[Tuple[tensorflow.python.data.ops.dataset_ops.DatasetV2, tensorflow.python.data.ops.dataset_ops.DatasetV2]]]:
125def get_shadow_models_and_datasets(config: Dict, shadowDatasets: List[ds.Dataset] 126 ) -> Tuple[List[tm.Sequential], List[Tuple[ds.Dataset, ds.Dataset]]]: 127 """ 128 Tries to load shadow datasets from disk, alternatively trains from scratch. 129 130 Returns 2 lists: 131 models: the trained shadow models and a list of tuples, containing 132 datasets: the training and test data for the corresponding shadow models 133 134 E.g. models[0] is trained with datasets[0,0] and tested on datasets[0,1] 135 """ 136 try: 137 print("Trying to load shadow models and data from disk.") 138 models, datasets = load_shadow_models_and_datasets(config) 139 except BaseException: 140 print("Didn't work, training shadow models.") 141 models, datasets = train_shadow_models(config, shadowDatasets) 142 143 return models, datasets
Tries to load shadow datasets from disk, alternatively trains from scratch.
Returns 2 lists: models: the trained shadow models and a list of tuples, containing datasets: the training and test data for the corresponding shadow models
E.g. models[0] is trained with datasets[0,0] and tested on datasets[0,1]
def
evaluate_model( model: keras.engine.sequential.Sequential, dataset: tensorflow.python.data.ops.dataset_ops.DatasetV2):
def
evaluate_models( models: List[keras.engine.sequential.Sequential], datasets: List[Tuple[tensorflow.python.data.ops.dataset_ops.DatasetV2, tensorflow.python.data.ops.dataset_ops.DatasetV2]]):
151def evaluate_models(models:List[tm.Sequential], datasets: List[Tuple[ds.Dataset, ds.Dataset]]): 152 assert len(models) == len(datasets) 153 test_accuracies = [] 154 train_accuracies = [] 155 for i in range(len(models)): 156 testData = datasets[i][1] 157 trainData = datasets[i][0] 158 test_accuracy = evaluate_model(models[i], testData)[1] 159 train_accuracy = evaluate_model(models[i], trainData)[1] 160 test_accuracies.append(test_accuracy) 161 train_accuracies.append(train_accuracy) 162 163 hash = utils.hash(str(config)) 164 165 with open(f"{hash}_shadowModelTrainAccuracy.csv",'w') as file: 166 file.write(f"Shadow Model Training Accuracies (Overall:{np.average(train_accuracies)})\n") 167 for train_acc in train_accuracies: 168 file.write(f"{train_acc}\n") 169 with open(f"{hash}_shadowModelTestAccuracy.csv",'w') as file: 170 file.write(f"Shadow Model Testing Accuracies (Overall:{np.average(test_accuracies)})\n") 171 for test_acc in test_accuracies: 172 file.write(f"{test_acc}\n")