Skip to content
Snippets Groups Projects
Commit 0cea7fe6 authored by Filip Lux's avatar Filip Lux
Browse files

clean repository

parent c7ace223
No related branches found
No related tags found
No related merge requests found
../EmbedTrack/ctc_raw_data/challenge
\ No newline at end of file
../EmbedTrack/ctc_raw_data/train
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -8,13 +8,15 @@ import numpy as np
import pandas as pd
from tqdm import tqdm
def compute_divergence(path, plot=False):
import tifffile as tiff
from skimage import io
import os
'''
offset_path = path / 'wavefunc_offset.csv'
seg_path = path / 'wavefunc_seg.csv'
vertex_prob_path = path / 'vertex_prob.csv'
'''
def compute_divergence(path,
plot=False, # TO REMOVE
divergence_thr=50,
):
offset_path = path / 'tra_offsets.csv'
seg_path = path / 'seg_offsets.csv'
......@@ -25,6 +27,12 @@ def compute_divergence(path, plot=False):
assert seg_path.exists()
assert vertex_prob_path.exists(), vertex_prob_path
divergence_path = path / "edge_prob_divergence.csv"
if divergence_path.exists():
print(divergence_path, 'already exists')
return
df_off = pd.read_csv(offset_path)
df_seg = pd.read_csv(seg_path)
......@@ -35,58 +43,60 @@ def compute_divergence(path, plot=False):
times = np.unique(df_off.time)
times[::-1].sort()
for time_offset in tqdm(times):
for time_curr in tqdm(times):
# correction of the offset time
time = time_offset - 1
time_prev = time_curr - 1
# get slices of time 'time'
seg_prev = df_seg[df_seg.time == time - 1]
off_curr = df_off[df_off.time == time_offset]
seg_curr = df_seg[df_seg.time == time]
seg_prev = df_seg[df_seg.time == time_prev]
off_curr = df_off[df_off.time == time_curr]
# validation of the input file
# TODO: remove
seg_curr = df_seg[df_seg.time == time_curr]
assert len(off_curr) == len(seg_curr)
if plot:
plot_offsets(seg, off)
samples += compute_kl_multivariate(seg_prev,
off_curr,
time_curr,
divergence_thr=divergence_thr)
samples += compute_kl_multivariate(seg_prev, off_curr, time)
edge_prob_df = pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence'])
edge_prob_df = pd.DataFrame(samples, columns=['time_curr',
'time_prev',
'label_curr',
'label_prev',
'divergence'])
edge_prob_df['cost'] = np.log(edge_prob_df.divergence)
# store edge and vertex probs as .csv files
divergence_path = path / "edge_prob_divergence.csv"
# store edge and vertex probs as .csv files
print(f'storing {divergence_path}')
edge_prob_df.to_csv(
divergence_path,
index=False,
)
def compute_divergence_v2(path, plot=False):
def compute_distance(path, distance_thr=100):
offset_path = path / 'tra_offsets.csv'
seg_path = path / 'seg_offsets.csv'
vertex_prob_path = path / 'vertex_prob.csv'
print('offset_path', offset_path)
print('seg_path', seg_path)
assert offset_path.exists(), offset_path
assert seg_path.exists()
assert vertex_prob_path.exists(), vertex_prob_path
assert seg_path.exists(), seg_path
divergence_path = path / "edge_prob_divergence.csv"
distance_path = path / "edge_prob_distance.csv"
if divergence_path.exists():
print(divergence_path, 'already exists')
if distance_path.exists():
print(distance_path, 'already exists')
return
df_off = pd.read_csv(offset_path)
df_seg = pd.read_csv(seg_path)
df_vprob = pd.read_csv(vertex_prob_path)
samples = []
......@@ -101,28 +111,279 @@ def compute_divergence_v2(path, plot=False):
# get slices of time 'time'
seg_prev = df_seg[df_seg.time == time_prev]
off_curr = df_off[df_off.time == time_curr]
seg_curr = df_seg[df_seg.time == time_curr]
# validation of the input file
# TODO: remove
seg_curr = df_seg[df_seg.time == time_curr]
assert len(off_curr) == len(seg_curr)
if plot:
plot_offsets(seg, off)
samples += compute_distance_samples(seg_prev,
off_curr,
time_curr,
distance_thr=distance_thr)
samples += compute_kl_multivariate(seg_prev, off_curr, time_curr)
edge_prob_df = pd.DataFrame(samples, columns=['time_curr',
'time_prev',
'label_curr',
'label_prev',
'distance'])
# store edge and vertex probs as .csv files
print(f'storing {distance_path}')
edge_prob_df.to_csv(
distance_path,
index=False,
)
def compute_distance2(path, distance_thr=100):
offset_path = path / 'tra_offsets.csv'
seg_path = path / 'seg_offsets.csv'
edge_prob_df = pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence'])
edge_prob_df['cost'] = np.log(edge_prob_df.divergence)
assert offset_path.exists(), offset_path
assert seg_path.exists(), seg_path
distance_path = path / "edge_prob_distance2.csv"
if distance_path.exists():
print(distance_path, 'already exists')
return
df_off = pd.read_csv(offset_path)
df_seg = pd.read_csv(seg_path)
samples = []
times = np.unique(df_off.time)
times[::-1].sort()
for time_curr in tqdm(times):
# correction of the offset time
time_prev = time_curr - 1
# get slices of time 'time'
seg_prev = df_seg[df_seg.time == time_prev]
off_curr = df_off[df_off.time == time_curr]
# read labeled images
li_curr = io.imread(path / f'mask{time_curr:04d}.tif', dtype=np.uint16)
li_prev = io.imread(path / f'mask{time_prev:04d}.tif', dtype=np.uint16)
samples += compute_distance2_samples(seg_prev,
off_curr,
time_curr,
distance_thr=distance_thr,
li_curr=li_curr,
li_prev=li_prev)
edge_prob_df = pd.DataFrame(samples, columns=['time_curr',
'time_prev',
'label_curr',
'label_prev',
'distance',
'distance_markers'])
# store edge and vertex probs as .csv files
print(f'storing {divergence_path}')
print(f'storing {distance_path}')
edge_prob_df.to_csv(
divergence_path,
distance_path,
index=False,
)
def analyze_csv(data_path, csv_name, gt_path):
'''
csv contains the following columns: label_prev, label_curr, time_prev, time_curr
the function adds columns:
class : {VALID, NONOVALID, DIVISION}
v_prev_prob : float (0, 1)
v_curr_prob : float (0, 1)
params
res_path : string
path to the directory with results: seg_offset
gt_path : string
'''
assert os.path.isdir(gt_path), gt_path
assert os.path.isdir(data_path), data_path
# get additional informations about the vertices
csv_path = os.path.join(data_path, csv_name)
seg_path = os.path.join(data_path, 'seg_offsets.csv')
prob_path = os.path.join(data_path, 'vertex_prob.csv')
assert os.path.isfile(csv_path), csv_path
assert os.path.isfile(seg_path), seg_path
assert os.path.isfile(prob_path), prob_path
# get resources
seg_df = pd.read_csv(seg_path, index_col=['time', 'label'])
prob_df = pd.read_csv(prob_path, index_col=['time', 'label'])
df = pd.read_csv(csv_path)
mothers = read_mothers(os.path.join(gt_path, 'TRA', 'man_track.txt'))
print('mothers', mothers)
# false negatives - at least one detetion is missing
# columns = (time_curr, time_prev, label_gt_curr, label_gt_prev)
false_negatives = []
# gt_edges - all gt edges, where both detections exist in res
# columns = (time_curr, time_prev, label_gt_curr, label_gt_prev, tag)
gt_edges = []
print('np.unique(df.time_curr)', np.unique(df.time_curr))
# for every time_curr
for time_curr in tqdm(np.unique(df.time_curr)):
view = df[df.time_curr == time_curr]
time_prev = time_curr - 1
# read gt
gt_curr_path = os.path.join(gt_path, 'TRA', f'man_track{time_curr:04d}.tif')
gt_prev_path = os.path.join(gt_path, 'TRA', f'man_track{time_prev:04d}.tif')
gt_curr = tiff.imread(gt_curr_path)
gt_prev = tiff.imread(gt_prev_path)
# read res
res_curr_path = os.path.join(data_path, f'mask{time_curr:04d}.tif')
res_prev_path = os.path.join(data_path, f'mask{time_prev:04d}.tif')
res_curr = tiff.imread(res_curr_path)
res_prev = tiff.imread(res_prev_path)
labels_gt_prev = np.unique(gt_prev)
'''
# mapping = { gt_label : res_label }
mapping_curr = {label_gt : get_result_label(res_curr, gt_curr == label_gt )
for label_gt in np.unique(gt_curr) if label_gt != 0}
mapping_prev = {label_gt : get_result_label(res_prev, gt_prev == label_gt )
for label_gt in np.unique(gt_prev) if label_gt != 0}
'''
if (time_curr % 100) == 0:
print(time_curr, 'np.unique(gt_curr)', np.unique(gt_curr), gt_curr.dtype)
#iterate over all indexes in a gt_curr
for label_gt_curr in np.unique(gt_curr):
if label_gt_curr == 0:
continue
# 1. MOVE
if label_gt_curr in labels_gt_prev:
label_gt_prev = label_gt_curr
tag = 'MOVE'
else:
label_gt_prev = mothers[label_gt_curr]
tag = 'DIVISION'
mask_gt_curr = gt_curr == label_gt_curr
mask_gt_prev = gt_prev == label_gt_prev
label_curr = get_result_label(res_curr, mask_gt_curr)
label_prev = get_result_label(res_prev, mask_gt_prev)
if (time_curr % 100) == 0:
print(time_curr, 'label_gt_curr', label_gt_curr, label_curr, label_prev)
# missing detections
if (label_curr == 0) or (label_prev == 0):
# report in false negatives
# TODO: debug
# reports wierd values
false_negatives.append((time_curr, time_prev, label_gt_curr, label_gt_prev))
else:
# gt_edges
gt_edges.append((time_curr, time_prev, label_curr, label_prev, tag))
# merge gt_edges with df and save
# TODO: add tags
df_gt_edges = pd.DataFrame(gt_edges, columns = ('time_curr', 'time_prev', 'label_curr', 'label_prev', 'tag'))
df_gt_edges.set_index(['time_curr', 'time_prev', 'label_curr', 'label_prev'], inplace=True)
df.set_index(['time_curr', 'time_prev', 'label_curr', 'label_prev'], inplace=True)
result = df.join(df_gt_edges, how='outer')
result['tag'] = result['tag'].replace('NaN','NONVALID')
analysis_path = csv_path[:-4] + '_analysis.csv'
print(f'storing {analysis_path}')
result.to_csv(
analysis_path,
)
# save false_negatives
df_fn = pd.DataFrame(false_negatives, columns = ('time_curr', 'time_prev', 'label_gt_curr', 'label_gt_prev'))
fn_path = os.path.join(data_path, 'false_negatives.csv')
print(f'storing {fn_path}')
df_fn.to_csv(
fn_path,
)
def get_result_label(res, gt_mask):
'''
reads label that covers at least 50 % of the mask
'''
size = gt_mask.sum()
labels, counts = np.unique(res * gt_mask, return_counts=True)
#print(counts, labels, size)
for count, label in zip(counts, labels):
if (label != 0) and (count >= (size//2)):
return label
return 0
def read_mothers(man_track_path):
'''
mother = {daughter_idx : mother_idx}
'''
os.path.isfile(man_track_path), man_track_path
mothers = {}
with open(man_track_path, 'r') as f:
for line in f.readlines():
daughter_idx, _, _, mother_idx = line.strip().split(' ')
mothers[int(daughter_idx)] = int(mother_idx)
return mothers
def kl_divergence(mu1, mu2, sigma1, sigma2):
return np.log1p(sigma2/sigma1) + (sigma1**2 + (mu1 - mu2) ** 2) / (2 * sigma2 ** 2) - .5
......@@ -145,7 +406,7 @@ def print_kl(seg, off):
return pd.DataFrame(samples, columns=['label from', 'label to', 'divergence'])
def compute_kl_multivariate(seg, off, time, divergence_thr=50, distance_thr=100):
def compute_kl_multivariate(seg, off, time, divergence_thr=50):
samples = []
for i, line in enumerate(seg.values):
......@@ -164,16 +425,126 @@ def compute_kl_multivariate(seg, off, time, divergence_thr=50, distance_thr=100)
S1 = np.array([[c00, c01], [c10, c11]])
m1 = np.array([mx, my])
if euklidian_distance(m0, m1) > distance_thr:
continue
div = kl_mvn(m0, S0, m1, S1)
if div < divergence_thr:
samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), div])
return samples
def compute_distance_samples(seg, off, time, distance_thr=100):
samples = []
for i, line in enumerate(seg.values):
# time prev
_, label_seg_prev, mx, my, sx, sy, c00, c01, c10, c11 = line
S0 = np.array([[c00, c01], [c10, c11]])
m0 = np.array([mx, my])
for j, line in enumerate(off.values):
# time curr
_, label_off_curr, mx, my, sx, sy, c00, c01, c10, c11 = line
S1 = np.array([[c00, c01], [c10, c11]])
m1 = np.array([mx, my])
dist = euklidian_distance(m0, m1)
if dist < distance_thr:
samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), dist])
return samples
def compute_distance2_samples(seg, off, time, distance_thr=100, li_curr=None, li_prev=None):
assert li_curr is not None
assert li_prev is not None
off_coo_dict = {}
for j, line in enumerate(off.values):
# time curr
_, label_off_curr, mx, my, sx, sy, c00, c01, c10, c11 = line
mask_curr = (li_curr == label_off_curr)
coo_curr = np.array([coo.mean() for coo in mask_curr.nonzero()])
S1 = np.array([[c00, c01], [c10, c11]])
m1 = np.array([mx, my])
off_coo_dict[label_off_curr] = coo_curr, S1, m1
samples = []
for i, line in enumerate(seg.values):
# time prev
_, label_seg_prev, mx, my, sx, sy, c00, c01, c10, c11 = line
mask_prev = (li_prev == label_seg_prev)
coo_prev = np.array([coo.mean() for coo in mask_prev.nonzero()])
S0 = np.array([[c00, c01], [c10, c11]])
m0 = np.array([mx, my])
for label_off_curr in off_coo_dict.keys():
coo_curr, S1, m1 = off_coo_dict[label_off_curr]
dist = euklidian_distance(m0, m1)
dist_masks = euklidian_distance(coo_prev, coo_curr)
if dist < distance_thr:
samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), dist, dist_masks])
return samples
def compute_distance2_samples_fast(seg, off, time, distance_thr=100, li_curr=None, li_prev=None):
assert li_curr is not None
assert li_prev is not None
labels_curr = np.unique(li_curr)
labels_prev = np.unique(li_prev)
samples = []
dict_coo_prev = {}
for l_prev in labels_prev:
if l_prev == 0:
continue
mask_prev = (li_prev == l_prev)
coo_prev = np.array([coo.mean() for coo in mask_prev.nonzero()])
dict_coo_prev[l_prev] = coo_prev
for l_curr in labels_curr:
if l_curr == 0:
continue
mask_curr = (li_curr == l_curr)
coo_curr = np.array([coo.mean() for coo in mask_curr.nonzero()])
for l_prev in dict_coo_prev.keys():
coo_prev = dict_coo_prev[l_prev]
kl_divergence = kl_mvn(m0, S0, m1, S1)
dist_masks = euklidian_distance(coo_prev, coo_curr)
if kl_divergence < divergence_thr:
# columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence']
samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), kl_divergence])
if dist_masks < distance_thr:
samples.append([time, time-1, int(l_curr), int(l_prev), 0, dist_masks])
#return pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence'])
return samples
......
......@@ -2,8 +2,8 @@ from .sys_tools import run_procedure
# TODO: communicate with an embedtrack a config file
def run_embedtrack(data_path, seq, model_path):
def run_embedtrack(data_path, seq, model_path, batch_size=1):
prompt = f'python3 EmbedTrack/embedtrack.py --sequence {seq} --data_path {data_path} --model_path {model_path}'
prompt = f'python3 EmbedTrack/embedtrack.py --sequence {seq} --data_path {data_path} --model_path {model_path} --batch_size {batch_size}'
run_procedure(prompt)
This diff is collapsed.
......@@ -3,8 +3,9 @@ import sys
def run_procedure(args):
print(f'running procedure: ${args}')
status_output = subprocess.call(args.split(' '))
res = 'OK:' if status_output == 0 else 'ERROR:'
if status_output != 0:
print(f'ERROR: procedure ${args} failed with an output {status_output}')
return status_output
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment