testing classification environment (fb9ee074) · Commits · Jan Sedmidubský / dyslex

classifier.py

0 → 100644

+218 −0

Original line number	Diff line number	Diff line
		from PIL import Image
		from matplotlib.backends.backend_agg import FigureCanvasAgg
		import io

		from torchvision.transforms import ToTensor, Lambda, Compose
		import torchvision.transforms as transforms

		from matplotlib.colors import Normalize
		import matplotlib.pyplot as plt

		import json
		import numpy as np
		import os
		import pandas as pd
		import torch

		from models import MLP, binary_resnet18, binary_resnet50
		from utils import load_kNN_classifier_sklearn

		import feature_extractor as feat_ext

		from sklearn.preprocessing import StandardScaler

		from torch.utils.data import Dataset, DataLoader, TensorDataset, SequentialSampler, Subset
		from torchvision import transforms

		import pickle

		# constants
		meta_dir = "y:/datasets/dyslex/experiment-final/meta"
		out_models_dir = "y:/trials/xsedmid/dyslex/experiment-final/v2.1/models/"

		fill_null_values_by_zero = True
		summarize_characteristics_by_mean = True
		normalize_values = False
		generate_fixation_image_for_each_trialid = False




		#
		def classify(task_def, model_def, feature_file_path_fixations, feature_file_path_saccades, feature_file_path_metrics):

		accuracy_score = 0.5




		return accuracy_score

		# main
		def main():

		# Read task-invariant properties from a json file
		with open(os.path.join(meta_dir, 'properties.json')) as property_file:
		properties = json.loads(property_file.read())

		aoi_id_col_name = properties['aoi_id_col_name']
		subject_id_col_name = properties['subject_id_col_name']
		degrees_visual_angle_pixels = properties['degrees_visual_angle_pixels']

		# Fixation image parameters
		fixation_image_characteristics_names = [
		properties['fixation_image_fix_x_col_name'],
		properties['fixation_image_fix_y_col_name'],
		properties['fixation_image_disp_x_col_name'],
		properties['fixation_image_disp_y_col_name'],
		properties['fixation_image_duration_ms_col_name']
		]
		fixation_image_visual_params = eval(properties['fixation_image_visual_params'])

		#task_definitions = [('T1', 'Syllables'), ('T2', 'Prosaccades'), ('T3', 'Antisaccades'), ('T4', 'Meaningful_Text'), ('T5', 'Pseudo_Text'), ('T6', 'Visual_Diff_1'), ('T6', 'Visual_Diff_2')]
		tasks_def = eval(properties['tasks'])

		models_def = eval(properties['models'])

		task_feature_def_dict = feat_ext.load_task_feature_definition_dict(tasks_def, meta_dir)

		# 0
		# feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1257_T1_Syllables_metrics.csv"
		# feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1257_T1_Syllables_fixations.csv"
		feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1999_T1_Syllables_metrics.csv"
		feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1999_T1_Syllables_fixations.csv"

		# 1
		# feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1038_T1_Syllables_metrics.csv"
		# feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v2.1/original/Subject_1038_T1_Syllables_fixations.csv"
		# feature_file_path_metrics = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1879_T1_Syllables_metrics.csv"
		# feature_file_path_fixations = "y:/datasets/dyslex/experiment-final/v3.0/original/Subject_1879_T1_Syllables_fixations.csv"

		task_type_id = 'T1'

		subject_id, subject_data_dict = feat_ext.load_and_transform_subject_characteristics(task_type_id, task_feature_def_dict, fill_null_values_by_zero, subject_id_col_name, aoi_id_col_name, summarize_characteristics_by_mean, feature_file_path_metrics)
		df = feat_ext.create_subject_characteristics_profile(subject_id, subject_data_dict)

		#df.to_csv('c:/temp/out.csv', sep=';', index=False)

		X = df.drop('subject_id', axis=1).astype(np.float64)
		print(f'Feature count: {len(X.columns)}')
		print(X)

		classifier_knn = load_kNN_classifier_sklearn(
		'T1_Syllables_3NN',
		out_models_dir
		)
		X_preds = classifier_knn.predict(X.values)
		print(f'kNN: {X_preds}')

		print(X.values.shape)
		n_features = len(X.columns)

		# MLP
		#classifier_mlp = torch.load(os.path.join(out_models_dir, 'T1_Syllables_MLP_lc2_lf2_d0.25-e20_lr0.001.pt'))
		classifier_mlp = MLP(n_features, n_features // 2, 2, 2, 0.25)
		classifier_mlp.load_state_dict(torch.load(os.path.join(out_models_dir, 'T1_Syllables_MLP_lc2_lf2_d0.25-e20_lr0.001.pt')))
		classifier_mlp.eval()

		# Load scaler from file
		scaler = pickle.load(open(os.path.join(out_models_dir, 'T1_Syllables_MLP_lc2_lf2_d0.25-e20_lr0.001.scaler.pkl'), 'rb'))

		# Classify with MLP
		with torch.no_grad():
		feature = torch.from_numpy(scaler.transform(X.values))
		# Convert feature to float64
		feature = feature.float()
		#feature = X.values
		outputs = classifier_mlp(feature)
		print(f'MLP: {outputs}')
		outputs = outputs.softmax(dim=1)
		probs, preds = torch.max(outputs, 1)
		print(f'MLP: {probs} {preds}')

		# with torch.no_grad():
		# for inputs in test_loader:

		# #print(len(inputs))

		# outputs = classifier_mlp(inputs)
		# #outputs = classifier_mlp(X.values)
		# print(f'MLP: {outputs}')






		x_min, x_max, y_min, y_max, d_max = fixation_image_visual_params[task_type_id]
		fixation_duration_color_norm = Normalize(0, d_max)

		# Load the fixation image
		subject_id, figs_dict, df_fixations_all = feat_ext.generate_subject_fixation_images(generate_fixation_image_for_each_trialid, fixation_image_characteristics_names, fill_null_values_by_zero, subject_id_col_name, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max, feature_file_path_fixations)
		print(f'subject_id: {subject_id}')
		fig = figs_dict[-1]
		print(f'Fixation image: {fig}')
		# fixima_path = 'c:/temp/out-fixima.png'
		# fig.savefig(fixima_path, bbox_inches='tight', facecolor='white', transparent=False, pad_inches=0)
		# plt.close(fig)

		#fig = Image.open(fixima_path).convert('RGB')

		# Create a FigureCanvasAgg instance and render the figure to it
		img_buf = io.BytesIO()
		plt.savefig(img_buf, format='png') # Create a PIL (Pillow) Image object from the byte array
		plt.close(fig)
		fig = Image.open(img_buf)

		fig = fig.convert('RGB')
		transform=transforms.Compose(
		[
		transforms.Resize((224, 224)),
		transforms.ToTensor(),
		transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
		]
		)
		fig = transform(fig)
		fig = fig.unsqueeze(0)
		#print(f'fig: {fig}')


		classifier_cnn = binary_resnet18()
		classifier_cnn.load_state_dict(torch.load(os.path.join(out_models_dir, 'T1_Syllables_ResNet18-e20_lr0.001.pt')))
		classifier_cnn.eval()

		outputs = classifier_cnn(fig)
		print(f'CNN: {outputs}')
		outputs = outputs.softmax(dim=1)
		probs, preds = torch.max(outputs, 1)
		print(f'CNN: {probs} {preds}')


		classifier_cnn = binary_resnet50()
		classifier_cnn.load_state_dict(torch.load(os.path.join(out_models_dir, 'T1_Syllables_ResNet50-e20_lr0.001.pt')))
		classifier_cnn.eval()

		outputs = classifier_cnn(fig)
		print(f'CNN: {outputs}')
		outputs = outputs.softmax(dim=1)
		probs, preds = torch.max(outputs, 1)
		print(f'CNN: {probs} {preds}')



		# # get task definitions
		# tasks = get_task_definitions()

		# # get model definitions
		# models = get_model_definitions()

		# # read trained models from files
		# trained_models = []

		# # classify
		# classify(tasks[0]['id'], models[0]['file_name'])

		# # print results

		if __name__ == "__main__":
		main()

feature_extractor.py

0 → 100644

+198 −0

Original line number	Diff line number	Diff line
		import json
		import numpy as np
		import os
		import pandas as pd

		import matplotlib.cm as cm
		from matplotlib.colors import Normalize
		from matplotlib.patches import Ellipse
		import matplotlib.pyplot as plt

		def load_task_feature_definition_dict(tasks_def, meta_dir):

		task_feature_def_dict = {}
		for task_type_id in [task['type_id'] for task in tasks_def]:

		# Definition files for the task
		definition_characteristics_global_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-characteristics-global.txt')
		definition_characteristics_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-characteristics.txt')
		definition_AOIs_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-AOIs.txt')
		definition_AOIs_characteristics_file = os.path.join(meta_dir, 'definition-' + task_type_id + '-AOIs-characteristics.txt')

		# Read the names of characteristics-global if the file exists
		if not os.path.exists(definition_characteristics_global_file):
		characteristics_global = None
		else:
		#characteristics_global = list(np.loadtxt(definition_characteristics_global_file, dtype=str, comments='#', delimiter='\n'))
		characteristics_global = list(np.loadtxt(definition_characteristics_global_file, dtype=str, comments='#'))
		#with open(definition_characteristics_global_file) as f:
		# characteristics_global = [line.rstrip() for line in f if not line.startswith('#') and line.rstrip() != '']
		print(f'* characteristics-global: {len(characteristics_global)}')

		# Read the names of characteristics if the file exists
		if not os.path.exists(definition_characteristics_file):
		characteristics = None
		characteristics_expected_line_count = None
		else:
		#characteristics = list(np.loadtxt(definition_characteristics_file, dtype=str, comments='#', delimiter='\n'))
		characteristics = list(np.loadtxt(definition_characteristics_file, dtype=str, comments='#'))
		# Get the number of expected lines with characteristics
		characteristics_expected_line_count = int(characteristics.pop(0))
		print(f'* characteristics: {len(characteristics)}, expected line count: {characteristics_expected_line_count}')

		# Read the names of AOIs and AOI characteristics if the file exists
		if (not os.path.exists(definition_AOIs_file)) or (not os.path.exists(definition_AOIs_characteristics_file)):
		aoi_names = None
		aoi_characteristics = None
		else:
		#aoi_names = list(np.loadtxt(definition_AOIs_file, dtype=str, comments='#', delimiter='\n'))
		aoi_names = list(np.loadtxt(definition_AOIs_file, dtype=str, comments='#'))
		#aoi_characteristics = list(np.loadtxt(definition_AOIs_characteristics_file, dtype=str, comments='#', delimiter='\n'))
		aoi_characteristics = list(np.loadtxt(definition_AOIs_characteristics_file, dtype=str, comments='#'))
		print(f'* AOI characteristics: {len(aoi_characteristics)}, AOIs: {len(aoi_names)}')

		task_feature_def_dict[task_type_id] = {
		'characteristics_global': characteristics_global,
		'characteristics': characteristics,
		'characteristics_expected_line_count': characteristics_expected_line_count,
		'aoi_names': aoi_names,
		'aoi_characteristics': aoi_characteristics
		}

		return task_feature_def_dict

		def load_and_transform_subject_characteristics(task_type_id, task_feature_def_dict, fill_null_values_by_zero, subject_id_col_name, aoi_id_col_name, summarize_characteristics_by_mean, subject_characteristics_file_path):

		# Read the subject data from a csv file
		df_original = pd.read_csv(subject_characteristics_file_path, skiprows=0, sep=',')

		# Fill the missing (nan) values with 0
		if fill_null_values_by_zero:
		df_original = df_original.fillna(0.0)

		# Get the subject id from the first line
		subject_id = str(df_original[subject_id_col_name].iloc[0])

		characteristics_dict = {'characteristics_global': None, 'characteristics': None, 'AOIs_characteristics': None}

		# Characteristics-global
		characteristics_global = task_feature_def_dict[task_type_id]['characteristics_global']
		if characteristics_global is not None:
		df = df_original.copy()
		# Get the values of characteristics-global from the first line
		characteristics_dict['characteristics_global'] = df[characteristics_global].head(1)

		# Characteristics
		characteristics = task_feature_def_dict[task_type_id]['characteristics']
		characteristics_expected_line_count = task_feature_def_dict[task_type_id]['characteristics_expected_line_count']
		if characteristics is not None:
		df = df_original.copy()
		if df.shape[0] != characteristics_expected_line_count:
		#raise Exception(f'The number of lines in the file {file_name}:{df.shape[0]} is not the same as the number of expected lines: {characteristics_expected_line_count}!')
		print(f'The number of lines in the file {subject_characteristics_file_path}:{df.shape[0]} is not the same as the number of expected lines: {characteristics_expected_line_count}!')

		# Compute the mean of the characteristics
		df = df[characteristics]
		if summarize_characteristics_by_mean:
		df = pd.DataFrame(df[characteristics].mean(), columns=['mean']).transpose()

		# Get the values of characteristics
		characteristics_dict['characteristics'] = df

		# AOIs characteristics
		aoi_names = task_feature_def_dict[task_type_id]['aoi_names']
		aoi_characteristics = task_feature_def_dict[task_type_id]['aoi_characteristics']
		if (aoi_names is not None) and (aoi_characteristics is not None):
		df = df_original.copy()
		# Retain lines containing only the AOIs of the task
		df = df[df[aoi_id_col_name].isin(aoi_names)]
		# Check if the number of AOIs in the file is the same as the number of AOIs in the task
		if (df.shape[0] != len(aoi_names)):
		raise Exception(f'The number of AOIs in the file {subject_characteristics_file_path} is not the same as the number of AOIs in the task {task_type_id}.')
		# Sort the lines by the AOI name
		df = df.sort_values(by=aoi_id_col_name)
		df = df.reset_index(drop=True)
		# Get the values of AOI characteristics
		characteristics_dict['AOIs_characteristics'] = df[aoi_characteristics]

		# Determine whether there are missing values in the data
		for df in [characteristics_dict['characteristics_global'], characteristics_dict['characteristics'], characteristics_dict['AOIs_characteristics']]:
		if df is not None:
		missing_values = df.isnull().values.any()
		if missing_values:
		print(f' - {subject_characteristics_file_path}, missing values: {missing_values}')

		return subject_id, characteristics_dict

		def create_subject_characteristics_profile(subject_id, characteristics_dict):
		subject_dfs = []

		# Create a new data frame with the subject id
		subject_dfs.append(pd.DataFrame({'subject_id': [subject_id]}))

		for df in [characteristics_dict['characteristics_global'], characteristics_dict['characteristics'], characteristics_dict['AOIs_characteristics']]:
		if df is not None:
		df_flattened = df.unstack().to_frame().sort_index(level=1).T
		subject_dfs.append(df_flattened)

		return pd.concat(subject_dfs, axis=1)

		def generate_fixation_image(df_fixations, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max):

		# Plot the fixations
		#fig, ax = plt.subplots(figsize=(16, 10))
		fig, ax = plt.subplots()
		for x, y, w, h, d in df_fixations.values:
		ellipse = Ellipse(xy=(x, y), width=wdegrees_visual_angle_pixels, height=hdegrees_visual_angle_pixels)
		ellipse.set_clip_box(ax.bbox)
		ellipse.set_alpha(0.5)
		ellipse.set_facecolor(plt.colormaps['hot'](fixation_duration_color_norm(d)))
		ax.add_patch(ellipse)

		plt.box(False)
		plt.axis('equal')
		#plt.gca().set_aspect('equal')
		#ax.set_aspect('equal')
		ax.set_ylim(y_max, y_min) # Set the y-axis in the opposite direction
		ax.set_xlim(x_min, x_max)
		plt.axis("off")
		#ax.set_facecolor("yellow")

		#plt.plot()
		#plt.show()
		return fig

		def save_fixation_image(fig, out_data_fixation_images_dir, task_id, subject_id, trial_id):
		fig_dir = os.path.join(out_data_fixation_images_dir, task_id)
		if not os.path.exists(fig_dir):
		os.makedirs(fig_dir)
		fig.savefig(os.path.join(fig_dir, subject_id + '_' + str(trial_id) + '.png'), bbox_inches='tight', facecolor='white', transparent=False, pad_inches=0)
		plt.close(fig)

		def generate_subject_fixation_images(generate_fixation_image_for_each_trialid, fixation_image_characteristics_names, fill_null_values_by_zero, subject_id_col_name, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max, subject_fixations_file_path):

		figs_dict = {}
		df_fixations_all = []

		df_original = pd.read_csv(subject_fixations_file_path, skiprows=0, sep=',')

		# Fill the missing (nan) values with 0
		if fill_null_values_by_zero:
		df_original = df_original.fillna(0.0)

		# Get the subject id from the first line
		subject_id = str(df_original[subject_id_col_name].iloc[0])

		# For each distint value in 'trialid' column, create a separate fixation image
		if generate_fixation_image_for_each_trialid:
		for trial_id in df_original['trialid'].unique():
		df_fixations = df_original[df_original['trialid'] == trial_id][fixation_image_characteristics_names]
		df_fixations_all.append(df_fixations)
		figs_dict[trial_id] = generate_fixation_image(df_fixations, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max)
		else:
		trial_id = -1
		df_fixations = df_original[fixation_image_characteristics_names]
		df_fixations_all.append(df_fixations)
		figs_dict[trial_id] = generate_fixation_image(df_fixations, degrees_visual_angle_pixels, fixation_duration_color_norm, x_min, x_max, y_min, y_max)

		return subject_id, figs_dict, df_fixations_all

test-backup_before_refactorization.ipynb

0 → 100644

+4851 −0

File added.

Preview size limit exceeded, changes collapsed.

test.ipynb

+382 −4254

File changed.

Preview size limit exceeded, changes collapsed.

transform_results.ipynb

+2 −1

Original line number	Diff line number	Diff line
		@@ -2532,8 +2532,9 @@
		" print(f'*********************** {task_id}_{task_desc} ***********************')\n",
		" \n",
		" exp_name = 'AOI_lines-allTrialsFixIma-hyper_params'\n",
		" approach_name_prefix = 'MLP_lc1_lf2_'\n",
		" \n",
		" ############## MLP\n",
		" approach_name_prefix = 'MLP_lc1_lf2_'\n",
		" training_param_lr_values = [0.01, 0.005, 0.001, 0.0005, 0.0001]\n",
		" training_param_lr_values = [str(lr) for lr in training_param_lr_values]\n",
		" training_param_epoch_count_values = [10, 20, 30, 50]\n",