Loading src/suggestions.py +76 −37 Original line number Diff line number Diff line Loading @@ -17,16 +17,16 @@ def suggest_params_general(trial: Trial) -> None: # Parameters of learning trial.suggest_int("epochs", 6, 72) trial.suggest_int("batch_size", 4, 80, step=4) trial.suggest_int("batch_size", 4, 64, step=4) # Parameters of loss function trial.suggest_float("margin", 1.5, 2.5) # Parameters of optimizer trial.suggest_float("lr", 1e-5, 0.01) trial.suggest_float("weight_decay", 1e-10, 1e-5, log=True) trial.suggest_float("beta1", 0.65, 0.95) trial.suggest_float("beta2", 0.945, 0.999) trial.suggest_float("weight_decay", 1e-5, 1e-3) trial.suggest_float("beta1", 0.85, 0.95) trial.suggest_float("beta2", 0.85, 0.999) # Parameters of the network itself trial.suggest_int("c_layers", 3, 5) Loading Loading @@ -95,7 +95,7 @@ def suggest_dropout(trial: Trial, n: int, is_in_fc: bool) -> None: @return: None """ chances = [False, False, False] if is_in_fc else [True, True, False] chances = [False, False, False] if is_in_fc else [True, False, False] name_prefix = "l_layers_{}_layer_{}".format(trial.params["l_layers"], n) if is_in_fc \ else "c_layers_{}_layer_{}".format(trial.params["c_layers"], n) Loading @@ -122,32 +122,59 @@ def suggest_c_layers(trial: Trial) -> None: # Config for count of convolutional layers if trial.params["c_layers"] == 3: kernel_sizes = [(7, 13), (3, 9), (3, 5)] strides = [(1, 3), (1, 2), (1, 1)] paddings = [(1, 6), (1, 4), (1, 2)] outputs = [[16, 32, 64, 96, 128], [64, 128, 192, 224, 256], [128, 256, 320, 384, 448, 512]] # outputs = [(16, 128, 16), (64, 256, 64), (128, 640, 64)] kernel_sizes = [(3, 7), (5, 9), (5, 13)] strides = [(1, 2), (1, 3), (1, 3)] paddings = [(1, 3), (1, 4), (1, 6)] outputs = [[16, 32, 48, 64, 96, 128], [64, 128, 192, 224, 256, 320, 384], [96, 128, 192, 256, 384, 512, 640]] # ============================================================================= # kernel_sizes = [(7, 13), (3, 9), (3, 5)] # strides = [(1, 3), (1, 2), (1, 1)] # paddings = [(1, 6), (1, 4), (1, 2)] # outputs = [[16, 32, 64, 96, 128], [64, 128, 192, 224, 256], # [128, 256, 320, 384, 448, 512]] # # outputs = [(16, 128, 16), (64, 256, 64), (128, 640, 64)] # ============================================================================= elif trial.params["c_layers"] == 4: kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3)] strides = [(1, 3), (1, 2), (1, 1), (1, 1)] paddings = [(1, 6), (1, 4), (1, 2), (1, 1)] outputs = [[32, 64, 96, 128, 160, 192], kernel_sizes = [(3, 5), (3, 7), (5, 11), (5, 13)] strides = [(1, 2), (1, 2), (1, 3), (1, 3)] paddings = [(1, 2), (1, 3), (1, 5), (1, 6)] outputs = [[16, 32, 48, 64, 96, 128], [64, 96, 128, 160, 192, 224, 256], [96, 128, 160, 192, 256, 320, 384], [128, 160, 192, 256, 320, 384, 448], [192, 256, 320, 448, 512]] # outputs = [(32, 192, 32), (96, 384, 32), (128, 448, 32), [128, 192, 256, 384, 512, 640]] # ============================================================================= # kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3)] # strides = [(1, 3), (1, 2), (1, 1), (1, 1)] # paddings = [(1, 6), (1, 4), (1, 2), (1, 1)] # outputs = [[32, 64, 96, 128, 160, 192], # [96, 128, 160, 192, 256, 320, 384], # [128, 160, 192, 256, 320, 384, 448], # [192, 256, 320, 448, 512]] # # outputs = [(32, 192, 32), (96, 384, 32), (128, 448, 32), # ============================================================================= # (192, 512, 64)] else: kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3), (3, 3)] strides = [(1, 3), (1, 2), (1, 1), (1, 1), (1, 1)] paddings = [(1, 6), (1, 4), (1, 2), (1, 1), (1, 1)] outputs = [[32, 64, 96, 128, 160, 192], [96, 128, 160, 192, 256, 320], [128, 160, 192, 256, 320, 384], [192, 256, 320, 384, 448], [192, 256, 320, 384, 448, 512]] # outputs = [(32, 192, 16), (96, 320, 32), (128, 384, 32), kernel_sizes = [(3, 3), (3, 5), (3, 7), (5, 11), (5, 13)] strides = [(1, 2), (1, 2), (1, 2), (1, 3), (1, 3)] paddings = [(1, 1), (1, 2), (1, 3), (1, 5), (1, 6)] outputs = [[16, 32, 48, 64, 80, 96], [64, 96, 128, 160, 192, 224], [96, 128, 160, 192, 256, 320], [96, 192, 256, 320, 384, 448], [128, 192, 256, 384, 512, 640]] # ============================================================================= # kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3), (3, 3)] # strides = [(1, 3), (1, 2), (1, 1), (1, 1), (1, 1)] # paddings = [(1, 6), (1, 4), (1, 2), (1, 1), (1, 1)] # outputs = [[32, 64, 96, 128, 160, 192], # [96, 128, 160, 192, 256, 320], [128, 160, 192, 256, 320, 384], # [192, 256, 320, 384, 448], [192, 256, 320, 384, 448, 512]] # # outputs = [(32, 192, 16), (96, 320, 32), (128, 384, 32), # ============================================================================= # (192, 448, 32), (192, 512, 64)] # Suggest each convolutional layer Loading @@ -172,22 +199,34 @@ def suggest_l_layers(trial: Trial) -> None: # Config for count of fully-connected layers if trial.params["l_layers"] == 3: outputs = [(256, 4096, 256), (128, 1536, 32), (2, 32, 2)] outputs = [[256, 512, 768, 1024, 1536, 2048, 3072, 4096], [128, 256, 384, 512, 768, 1024, 1536], [2, 4, 8, 16, 32, 64, 96, 128, 160, 192, 256]] # ============================================================================= # outputs = [(256, 4096, 256), (128, 1536, 64), (2, 128, 2)] # # ============================================================================= elif trial.params["l_layers"] == 4: outputs = [(256, 6144, 256), (256, 2048, 64), (96, 768, 32), (2, 32, 2)] outputs = [[256, 512, 1024, 2048, 3072, 4096, 5120, 6144], [128, 256, 512, 768, 1024, 1536, 2048, 3072], [96, 128, 256, 384, 512, 768, 1024, 1536], [2, 4, 8, 16, 32, 48, 64, 96, 128, 160, 192, 256]] # ============================================================================= # outputs = [(256, 6144, 256), (256, 2048, 64), (96, 768, 32), # (2, 32, 2)] # # ============================================================================= else: outputs = [(256, 8192, 256), (256, 3072, 128), (128, 896, 32), (96, 384, 16), (2, 32, 2)] outputs = [[256, 512, 1024, 2048, 4096, 5120, 6144, 7168, 8192], [128, 256, 512, 1024, 1536, 2048, 3072, 4096], [96, 160, 256, 512, 768, 1024, 1536, 2048], [64, 128, 256, 384, 512, 768, 1024, 1536], [2, 4, 8, 16, 32, 48, 64, 96, 128, 160, 192, 256]] # Suggest each fully-connected layer for i in range(0, trial.params["l_layers"]): trial.suggest_int( "l_layers_{}_layer_{}_output".format(trial.params["l_layers"], i + 1), outputs[i][0], outputs[i][1], step=outputs[i][2]) trial.suggest_categorical( "l_layers_{}_layer_{}_output".format(trial.params["l_layers"], i + 1), outputs[i]) if i != trial.params["l_layers"] - 1: suggest_dropout(trial, n=i + 1, is_in_fc=True) Loading
src/suggestions.py +76 −37 Original line number Diff line number Diff line Loading @@ -17,16 +17,16 @@ def suggest_params_general(trial: Trial) -> None: # Parameters of learning trial.suggest_int("epochs", 6, 72) trial.suggest_int("batch_size", 4, 80, step=4) trial.suggest_int("batch_size", 4, 64, step=4) # Parameters of loss function trial.suggest_float("margin", 1.5, 2.5) # Parameters of optimizer trial.suggest_float("lr", 1e-5, 0.01) trial.suggest_float("weight_decay", 1e-10, 1e-5, log=True) trial.suggest_float("beta1", 0.65, 0.95) trial.suggest_float("beta2", 0.945, 0.999) trial.suggest_float("weight_decay", 1e-5, 1e-3) trial.suggest_float("beta1", 0.85, 0.95) trial.suggest_float("beta2", 0.85, 0.999) # Parameters of the network itself trial.suggest_int("c_layers", 3, 5) Loading Loading @@ -95,7 +95,7 @@ def suggest_dropout(trial: Trial, n: int, is_in_fc: bool) -> None: @return: None """ chances = [False, False, False] if is_in_fc else [True, True, False] chances = [False, False, False] if is_in_fc else [True, False, False] name_prefix = "l_layers_{}_layer_{}".format(trial.params["l_layers"], n) if is_in_fc \ else "c_layers_{}_layer_{}".format(trial.params["c_layers"], n) Loading @@ -122,32 +122,59 @@ def suggest_c_layers(trial: Trial) -> None: # Config for count of convolutional layers if trial.params["c_layers"] == 3: kernel_sizes = [(7, 13), (3, 9), (3, 5)] strides = [(1, 3), (1, 2), (1, 1)] paddings = [(1, 6), (1, 4), (1, 2)] outputs = [[16, 32, 64, 96, 128], [64, 128, 192, 224, 256], [128, 256, 320, 384, 448, 512]] # outputs = [(16, 128, 16), (64, 256, 64), (128, 640, 64)] kernel_sizes = [(3, 7), (5, 9), (5, 13)] strides = [(1, 2), (1, 3), (1, 3)] paddings = [(1, 3), (1, 4), (1, 6)] outputs = [[16, 32, 48, 64, 96, 128], [64, 128, 192, 224, 256, 320, 384], [96, 128, 192, 256, 384, 512, 640]] # ============================================================================= # kernel_sizes = [(7, 13), (3, 9), (3, 5)] # strides = [(1, 3), (1, 2), (1, 1)] # paddings = [(1, 6), (1, 4), (1, 2)] # outputs = [[16, 32, 64, 96, 128], [64, 128, 192, 224, 256], # [128, 256, 320, 384, 448, 512]] # # outputs = [(16, 128, 16), (64, 256, 64), (128, 640, 64)] # ============================================================================= elif trial.params["c_layers"] == 4: kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3)] strides = [(1, 3), (1, 2), (1, 1), (1, 1)] paddings = [(1, 6), (1, 4), (1, 2), (1, 1)] outputs = [[32, 64, 96, 128, 160, 192], kernel_sizes = [(3, 5), (3, 7), (5, 11), (5, 13)] strides = [(1, 2), (1, 2), (1, 3), (1, 3)] paddings = [(1, 2), (1, 3), (1, 5), (1, 6)] outputs = [[16, 32, 48, 64, 96, 128], [64, 96, 128, 160, 192, 224, 256], [96, 128, 160, 192, 256, 320, 384], [128, 160, 192, 256, 320, 384, 448], [192, 256, 320, 448, 512]] # outputs = [(32, 192, 32), (96, 384, 32), (128, 448, 32), [128, 192, 256, 384, 512, 640]] # ============================================================================= # kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3)] # strides = [(1, 3), (1, 2), (1, 1), (1, 1)] # paddings = [(1, 6), (1, 4), (1, 2), (1, 1)] # outputs = [[32, 64, 96, 128, 160, 192], # [96, 128, 160, 192, 256, 320, 384], # [128, 160, 192, 256, 320, 384, 448], # [192, 256, 320, 448, 512]] # # outputs = [(32, 192, 32), (96, 384, 32), (128, 448, 32), # ============================================================================= # (192, 512, 64)] else: kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3), (3, 3)] strides = [(1, 3), (1, 2), (1, 1), (1, 1), (1, 1)] paddings = [(1, 6), (1, 4), (1, 2), (1, 1), (1, 1)] outputs = [[32, 64, 96, 128, 160, 192], [96, 128, 160, 192, 256, 320], [128, 160, 192, 256, 320, 384], [192, 256, 320, 384, 448], [192, 256, 320, 384, 448, 512]] # outputs = [(32, 192, 16), (96, 320, 32), (128, 384, 32), kernel_sizes = [(3, 3), (3, 5), (3, 7), (5, 11), (5, 13)] strides = [(1, 2), (1, 2), (1, 2), (1, 3), (1, 3)] paddings = [(1, 1), (1, 2), (1, 3), (1, 5), (1, 6)] outputs = [[16, 32, 48, 64, 80, 96], [64, 96, 128, 160, 192, 224], [96, 128, 160, 192, 256, 320], [96, 192, 256, 320, 384, 448], [128, 192, 256, 384, 512, 640]] # ============================================================================= # kernel_sizes = [(7, 13), (3, 9), (3, 5), (3, 3), (3, 3)] # strides = [(1, 3), (1, 2), (1, 1), (1, 1), (1, 1)] # paddings = [(1, 6), (1, 4), (1, 2), (1, 1), (1, 1)] # outputs = [[32, 64, 96, 128, 160, 192], # [96, 128, 160, 192, 256, 320], [128, 160, 192, 256, 320, 384], # [192, 256, 320, 384, 448], [192, 256, 320, 384, 448, 512]] # # outputs = [(32, 192, 16), (96, 320, 32), (128, 384, 32), # ============================================================================= # (192, 448, 32), (192, 512, 64)] # Suggest each convolutional layer Loading @@ -172,22 +199,34 @@ def suggest_l_layers(trial: Trial) -> None: # Config for count of fully-connected layers if trial.params["l_layers"] == 3: outputs = [(256, 4096, 256), (128, 1536, 32), (2, 32, 2)] outputs = [[256, 512, 768, 1024, 1536, 2048, 3072, 4096], [128, 256, 384, 512, 768, 1024, 1536], [2, 4, 8, 16, 32, 64, 96, 128, 160, 192, 256]] # ============================================================================= # outputs = [(256, 4096, 256), (128, 1536, 64), (2, 128, 2)] # # ============================================================================= elif trial.params["l_layers"] == 4: outputs = [(256, 6144, 256), (256, 2048, 64), (96, 768, 32), (2, 32, 2)] outputs = [[256, 512, 1024, 2048, 3072, 4096, 5120, 6144], [128, 256, 512, 768, 1024, 1536, 2048, 3072], [96, 128, 256, 384, 512, 768, 1024, 1536], [2, 4, 8, 16, 32, 48, 64, 96, 128, 160, 192, 256]] # ============================================================================= # outputs = [(256, 6144, 256), (256, 2048, 64), (96, 768, 32), # (2, 32, 2)] # # ============================================================================= else: outputs = [(256, 8192, 256), (256, 3072, 128), (128, 896, 32), (96, 384, 16), (2, 32, 2)] outputs = [[256, 512, 1024, 2048, 4096, 5120, 6144, 7168, 8192], [128, 256, 512, 1024, 1536, 2048, 3072, 4096], [96, 160, 256, 512, 768, 1024, 1536, 2048], [64, 128, 256, 384, 512, 768, 1024, 1536], [2, 4, 8, 16, 32, 48, 64, 96, 128, 160, 192, 256]] # Suggest each fully-connected layer for i in range(0, trial.params["l_layers"]): trial.suggest_int( "l_layers_{}_layer_{}_output".format(trial.params["l_layers"], i + 1), outputs[i][0], outputs[i][1], step=outputs[i][2]) trial.suggest_categorical( "l_layers_{}_layer_{}_output".format(trial.params["l_layers"], i + 1), outputs[i]) if i != trial.params["l_layers"] - 1: suggest_dropout(trial, n=i + 1, is_in_fc=True)