Commit fb9ee074 authored by Jan Sedmidubsky's avatar Jan Sedmidubsky
Browse files

testing classification environment

parent d535c557
Loading
Loading
Loading
Loading

classifier.py

0 → 100644
+218 −0
Original line number Diff line number Diff line
from PIL import Image
from matplotlib.backends.backend_agg import FigureCanvasAgg
import io

from torchvision.transforms import ToTensor, Lambda, Compose
import torchvision.transforms as transforms

from matplotlib.colors import Normalize
import matplotlib.pyplot as plt

import json
import numpy as np
import os
import pandas as pd
import torch

from models import MLP, binary_resnet18, binary_resnet50
from utils import load_kNN_classifier_sklearn

import feature_extractor as feat_ext

from sklearn.preprocessing import StandardScaler

from torch.utils.data import Dataset, DataLoader, TensorDataset, SequentialSampler, Subset
from torchvision import transforms

import pickle

# constants
meta_dir = "y:/datasets/dyslex/experiment-final/meta"
out_models_dir = "y:/trials/xsedmid/dyslex/experiment-final/v2.1/models/"

fill_null_values_by_zero = True
summarize_characteristics_by_mean = True
normalize_values = False
generate_fixation_image_for_each_trialid = False




#
def classify(task_def, model_def, feature_file_path_fixations, feature_file_path_saccades, feature_file_path_metrics):

    accuracy_score = 0.5




    return accuracy_score

# main
def main():
    
    # Read task-invariant properties from a json file
    with open(os.path.join(meta_dir, 'properties.json')) as property_file:
        properties = json.loads(property_file.read())

    aoi_id_col_name = properties['aoi_id_col_name']
    subject_id_col_name = properties['subject_id_col_name']
    degrees_visual_angle_pixels = properties['degrees_visual_angle_pixels']

    # Fixation image parameters
    fixation_image_characteristics_names = [
        properties['fixation_image_fix_x_col_name'],
        properties['fixation_image_fix_y_col_name'],
        properties['fixation_image_disp_x_col_name'],
        properties['fixation_image_disp_y_col_name'],
        properties['fixation_image_duration_ms_col_name']
        ]
    fixation_image_visual_params = eval(properties['fixation_image_visual_params'])

    #task_definitions = [('T1', 'Syllables'), ('T2', 'Prosaccades'), ('T3', 'Antisaccades'), ('T4', 'Meaningful_Text'), ('T5', 'Pseudo_Text'), ('T6', 'Visual_Diff_1'), ('T6', 'Visual_Diff_2')]
    tasks_def = eval(properties['tasks'])

    models_def = eval(properties['models'])
    
    task_feature_def_dict = feat_ext.load_task_feature_definition_dict(tasks_def, meta_dir)

    # 0
    # feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1257_T1_Syllables_metrics.csv"
    # feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1257_T1_Syllables_fixations.csv"
    feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1999_T1_Syllables_metrics.csv"
    feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1999_T1_Syllables_fixations.csv"

    # 1
    # feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1038_T1_Syllables_metrics.csv"
    # feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1038_T1_Syllables_fixations.csv"
    # feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1879_T1_Syllables_metrics.csv"
    # feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1879_T1_Syllables_fixations.csv"

    task_type_id = 'T1'
    
    subject_id, subject_data_dict = feat_ext.load_and_transform_subject_characteristics(task_type_id, task_feature_def_dict, fill_null_values_by_zero, subject_id_col_name, aoi_id_col_name, summarize_characteristics_by_mean, feature_file_path_metrics)
    df = feat_ext.create_subject_characteristics_profile(subject_id, subject_data_dict)

    #df.to_csv('c:/temp/out.csv', sep=';', index=False)

    X = df.drop('subject_id', axis=1).astype(np.float64)
    print(f'Feature count: {len(X.columns)}')
    print(X)

    classifier_knn = load_kNN_classifier_sklearn(
        'T1_Syllables_3NN',
        out_models_dir
    )
    X_preds = classifier_knn.predict(X.values)
    print(f'kNN: {X_preds}')

    print(X.values.shape)
    n_features = len(X.columns)

    # MLP
    #classifier_mlp = torch.load(os.path.join(out_models_dir, 'T1_Syllables_MLP_lc2_lf2_d0.25-e20_lr0.001.pt'))
    classifier_mlp = MLP(n_features, n_features // 2, 2, 2, 0.25)
    classifier_mlp.load_state_dict(torch.load(os.path.join(out_models_dir, 'T1_Syllables_MLP_lc2_lf2_d0.25-e20_lr0.001.pt')))
    classifier_mlp.eval()

    # Load scaler from file
    scaler = pickle.load(open(os.path.join(out_models_dir, 'T1_Syllables_MLP_lc2_lf2_d0.25-e20_lr0.001.scaler.pkl'), 'rb'))

    # Classify with MLP
    with torch.no_grad():
        feature = torch.from_numpy(scaler.transform(X.values))
        # Convert feature to float64
        feature = feature.float()
        #feature = X.values
        outputs = classifier_mlp(feature)
        print(f'MLP: {outputs}')
        outputs = outputs.softmax(dim=1)
        probs, preds = torch.max(outputs, 1)
        print(f'MLP: {probs} {preds}')

    # with torch.no_grad():
    #     for inputs in test_loader:

    #         #print(len(inputs))

    #         outputs = classifier_mlp(inputs)
    #         #outputs = classifier_mlp(X.values)
    #         print(f'MLP: {outputs}')






    x_min, x_max, y_min, y_max, d_max = fixation_image_visual_params[task_type_id]
    fixation_duration_color_norm = Normalize(0, d_max)

    # Load the fixation image
    subject_id, figs_dict, df_fixations_all = feat_ext.generate_subject_fixation_images(generate_fixation_image_for_each_trialid, fixation_image_characteristics_names, fill_null_values_by_zero, subject_id_col_name, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max, feature_file_path_fixations)
    print(f'subject_id: {subject_id}')
    fig = figs_dict[-1]
    print(f'Fixation image: {fig}')
    # fixima_path = 'c:/temp/out-fixima.png'
    # fig.savefig(fixima_path, bbox_inches='tight', facecolor='white', transparent=False, pad_inches=0)
    # plt.close(fig)

    #fig = Image.open(fixima_path).convert('RGB')
    
    # Create a FigureCanvasAgg instance and render the figure to it
    img_buf = io.BytesIO()
    plt.savefig(img_buf, format='png')    # Create a PIL (Pillow) Image object from the byte array
    plt.close(fig)
    fig = Image.open(img_buf)
    
    fig = fig.convert('RGB')
    transform=transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]
    )
    fig = transform(fig)
    fig = fig.unsqueeze(0)
    #print(f'fig: {fig}')


    classifier_cnn = binary_resnet18()
    classifier_cnn.load_state_dict(torch.load(os.path.join(out_models_dir, 'T1_Syllables_ResNet18-e20_lr0.001.pt')))
    classifier_cnn.eval()

    outputs = classifier_cnn(fig)
    print(f'CNN: {outputs}')
    outputs = outputs.softmax(dim=1)
    probs, preds = torch.max(outputs, 1)
    print(f'CNN: {probs} {preds}')
    

    classifier_cnn = binary_resnet50()
    classifier_cnn.load_state_dict(torch.load(os.path.join(out_models_dir, 'T1_Syllables_ResNet50-e20_lr0.001.pt')))
    classifier_cnn.eval()

    outputs = classifier_cnn(fig)
    print(f'CNN: {outputs}')
    outputs = outputs.softmax(dim=1)
    probs, preds = torch.max(outputs, 1)
    print(f'CNN: {probs} {preds}')


    
    # # get task definitions
    # tasks = get_task_definitions()

    # # get model definitions
    # models = get_model_definitions()

    # # read trained models from files
    # trained_models = []

    # # classify
    # classify(tasks[0]['id'], models[0]['file_name'])

    # # print results

if __name__ == "__main__":
    main()

feature_extractor.py

0 → 100644
+198 −0
Original line number Diff line number Diff line
import json
import numpy as np
import os
import pandas as pd

import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.patches import Ellipse
import matplotlib.pyplot as plt

def load_task_feature_definition_dict(tasks_def, meta_dir):

    task_feature_def_dict = {}
    for task_type_id in [task['type_id'] for task in tasks_def]:

        # Definition files for the task
        definition_characteristics_global_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-characteristics-global.txt')
        definition_characteristics_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-characteristics.txt')
        definition_AOIs_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-AOIs.txt')
        definition_AOIs_characteristics_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-AOIs-characteristics.txt')

        # Read the names of characteristics-global if the file exists
        if not os.path.exists(definition_characteristics_global_file):
            characteristics_global = None
        else:
            #characteristics_global = list(np.loadtxt(definition_characteristics_global_file, dtype=str, comments='#', delimiter='\n'))
            characteristics_global = list(np.loadtxt(definition_characteristics_global_file, dtype=str, comments='#'))
            #with open(definition_characteristics_global_file) as f:
            #    characteristics_global = [line.rstrip() for line in f if not line.startswith('#') and line.rstrip() != '']
            print(f'* characteristics-global: {len(characteristics_global)}')

        # Read the names of characteristics if the file exists
        if not os.path.exists(definition_characteristics_file):
            characteristics = None
            characteristics_expected_line_count = None
        else:
            #characteristics = list(np.loadtxt(definition_characteristics_file, dtype=str, comments='#', delimiter='\n'))
            characteristics = list(np.loadtxt(definition_characteristics_file, dtype=str, comments='#'))
            # Get the number of expected lines with characteristics
            characteristics_expected_line_count = int(characteristics.pop(0))
            print(f'* characteristics: {len(characteristics)}, expected line count: {characteristics_expected_line_count}')

        # Read the names of AOIs and AOI characteristics if the file exists
        if (not os.path.exists(definition_AOIs_file)) or (not os.path.exists(definition_AOIs_characteristics_file)):
            aoi_names = None
            aoi_characteristics = None
        else:
            #aoi_names = list(np.loadtxt(definition_AOIs_file, dtype=str, comments='#', delimiter='\n'))
            aoi_names = list(np.loadtxt(definition_AOIs_file, dtype=str, comments='#'))
            #aoi_characteristics = list(np.loadtxt(definition_AOIs_characteristics_file, dtype=str, comments='#', delimiter='\n'))
            aoi_characteristics = list(np.loadtxt(definition_AOIs_characteristics_file, dtype=str, comments='#'))
            print(f'* AOI characteristics: {len(aoi_characteristics)}, AOIs: {len(aoi_names)}')
        
        task_feature_def_dict[task_type_id] = {
            'characteristics_global': characteristics_global,
            'characteristics': characteristics,
            'characteristics_expected_line_count': characteristics_expected_line_count,
            'aoi_names': aoi_names,
            'aoi_characteristics': aoi_characteristics
        }
    
    return task_feature_def_dict

def load_and_transform_subject_characteristics(task_type_id, task_feature_def_dict, fill_null_values_by_zero, subject_id_col_name, aoi_id_col_name, summarize_characteristics_by_mean, subject_characteristics_file_path):

    # Read the subject data from a csv file
    df_original = pd.read_csv(subject_characteristics_file_path, skiprows=0, sep=',')

    # Fill the missing (nan) values with 0
    if fill_null_values_by_zero:
        df_original = df_original.fillna(0.0)

    # Get the subject id from the first line
    subject_id = str(df_original[subject_id_col_name].iloc[0])

    characteristics_dict = {'characteristics_global': None, 'characteristics': None, 'AOIs_characteristics': None}

    # Characteristics-global
    characteristics_global = task_feature_def_dict[task_type_id]['characteristics_global']
    if characteristics_global is not None:
        df = df_original.copy()
        # Get the values of characteristics-global from the first line
        characteristics_dict['characteristics_global'] = df[characteristics_global].head(1)

    # Characteristics
    characteristics = task_feature_def_dict[task_type_id]['characteristics']
    characteristics_expected_line_count = task_feature_def_dict[task_type_id]['characteristics_expected_line_count']
    if characteristics is not None:
        df = df_original.copy()
        if df.shape[0] != characteristics_expected_line_count:
            #raise Exception(f'The number of lines in the file {file_name}:{df.shape[0]} is not the same as the number of expected lines: {characteristics_expected_line_count}!')
            print(f'The number of lines in the file {subject_characteristics_file_path}:{df.shape[0]} is not the same as the number of expected lines: {characteristics_expected_line_count}!')

        # Compute the mean of the characteristics
        df = df[characteristics]
        if summarize_characteristics_by_mean:
            df = pd.DataFrame(df[characteristics].mean(), columns=['mean']).transpose()

        # Get the values of characteristics
        characteristics_dict['characteristics'] = df
        
    # AOIs characteristics
    aoi_names = task_feature_def_dict[task_type_id]['aoi_names']
    aoi_characteristics = task_feature_def_dict[task_type_id]['aoi_characteristics']
    if (aoi_names is not None) and (aoi_characteristics is not None):
        df = df_original.copy()
        # Retain lines containing only the AOIs of the task
        df = df[df[aoi_id_col_name].isin(aoi_names)]
        # Check if the number of AOIs in the file is the same as the number of AOIs in the task
        if (df.shape[0] != len(aoi_names)):
            raise Exception(f'The number of AOIs in the file {subject_characteristics_file_path} is not the same as the number of AOIs in the task {task_type_id}.')
        # Sort the lines by the AOI name
        df = df.sort_values(by=aoi_id_col_name)
        df = df.reset_index(drop=True)
        # Get the values of AOI characteristics
        characteristics_dict['AOIs_characteristics'] = df[aoi_characteristics]

    # Determine whether there are missing values in the data
    for df in [characteristics_dict['characteristics_global'], characteristics_dict['characteristics'], characteristics_dict['AOIs_characteristics']]:
        if df is not None:
            missing_values = df.isnull().values.any()
            if missing_values:
                print(f'  - {subject_characteristics_file_path}, missing values: {missing_values}')
    
    return subject_id, characteristics_dict

def create_subject_characteristics_profile(subject_id, characteristics_dict):
    subject_dfs = []

    # Create a new data frame with the subject id
    subject_dfs.append(pd.DataFrame({'subject_id': [subject_id]}))

    for df in [characteristics_dict['characteristics_global'], characteristics_dict['characteristics'], characteristics_dict['AOIs_characteristics']]:
        if df is not None:   
            df_flattened = df.unstack().to_frame().sort_index(level=1).T
            subject_dfs.append(df_flattened)
    
    return pd.concat(subject_dfs, axis=1)

def generate_fixation_image(df_fixations, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max):

    # Plot the fixations
    #fig, ax = plt.subplots(figsize=(16, 10))
    fig, ax = plt.subplots()
    for x, y, w, h, d in df_fixations.values:
        ellipse = Ellipse(xy=(x, y), width=w*degrees_visual_angle_pixels, height=h*degrees_visual_angle_pixels)
        ellipse.set_clip_box(ax.bbox)
        ellipse.set_alpha(0.5)
        ellipse.set_facecolor(plt.colormaps['hot'](fixation_duration_color_norm(d)))
        ax.add_patch(ellipse)

    plt.box(False)
    plt.axis('equal')
    #plt.gca().set_aspect('equal')
    #ax.set_aspect('equal')
    ax.set_ylim(y_max, y_min) # Set the y-axis in the opposite direction
    ax.set_xlim(x_min, x_max)
    plt.axis("off")
    #ax.set_facecolor("yellow")

    #plt.plot()
    #plt.show()
    return fig

def save_fixation_image(fig, out_data_fixation_images_dir, task_id, subject_id, trial_id):
    fig_dir = os.path.join(out_data_fixation_images_dir, task_id)
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)
    fig.savefig(os.path.join(fig_dir, subject_id + '_' + str(trial_id) + '.png'), bbox_inches='tight', facecolor='white', transparent=False, pad_inches=0)
    plt.close(fig)

def generate_subject_fixation_images(generate_fixation_image_for_each_trialid, fixation_image_characteristics_names, fill_null_values_by_zero, subject_id_col_name, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max, subject_fixations_file_path):

    figs_dict = {}
    df_fixations_all = []

    df_original = pd.read_csv(subject_fixations_file_path, skiprows=0, sep=',')

    # Fill the missing (nan) values with 0
    if fill_null_values_by_zero:
        df_original = df_original.fillna(0.0)

    # Get the subject id from the first line
    subject_id = str(df_original[subject_id_col_name].iloc[0])

    # For each distint value in 'trialid' column, create a separate fixation image
    if generate_fixation_image_for_each_trialid:
        for trial_id in df_original['trialid'].unique():
            df_fixations = df_original[df_original['trialid'] == trial_id][fixation_image_characteristics_names]
            df_fixations_all.append(df_fixations)
            figs_dict[trial_id] = generate_fixation_image(df_fixations, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max)
    else:
        trial_id = -1
        df_fixations = df_original[fixation_image_characteristics_names]
        df_fixations_all.append(df_fixations)
        figs_dict[trial_id] = generate_fixation_image(df_fixations, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max)
    
    return subject_id, figs_dict, df_fixations_all
+4851 −0

File added.

Preview size limit exceeded, changes collapsed.

+382 −4254

File changed.

Preview size limit exceeded, changes collapsed.

+2 −1
Original line number Diff line number Diff line
@@ -2532,8 +2532,9 @@
    "    print(f'************************* {task_id}_{task_desc} *************************')\n",
    "    \n",
    "    exp_name = 'AOI_lines-allTrialsFixIma-hyper_params'\n",
    "    approach_name_prefix = 'MLP_lc1_lf2_'\n",
    "    \n",
    "    ############## MLP\n",
    "    approach_name_prefix = 'MLP_lc1_lf2_'\n",
    "    training_param_lr_values = [0.01, 0.005, 0.001, 0.0005, 0.0001]\n",
    "    training_param_lr_values = [str(lr) for lr in training_param_lr_values]\n",
    "    training_param_epoch_count_values = [10, 20, 30, 50]\n",
Loading