diff --git a/.gitmodules b/.gitmodules index 23ba923063dc9a340ee290adca0f843a94be8bbf..1b3fd8c271188442707689c510cbd5c1d7f775dc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "tracking/libct"] - path = tracking/libct - url = https://github.com/vislearn/libct.git -[submodule "embedtrack"] - path = embedtrack - url = https://git.scc.kit.edu/kit-loe-ge/embedtrack.git +[submodule "EmbedTrack"] + path = EmbedTrack + url = https://github.com/xluxf/EmbedTrack diff --git a/DATA b/DATA new file mode 120000 index 0000000000000000000000000000000000000000..24bbc67ab4d028426d8d0dc858c2352b21387afa --- /dev/null +++ b/DATA @@ -0,0 +1 @@ +embedtrack_me/ctc_raw_data \ No newline at end of file diff --git a/ctc_metrics/DETMeasure b/ctc_metrics/DETMeasure new file mode 100755 index 0000000000000000000000000000000000000000..61c5373274dfb44ae7e9a659e64285443b51256f Binary files /dev/null and b/ctc_metrics/DETMeasure differ diff --git a/ctc_metrics/SEGMeasure b/ctc_metrics/SEGMeasure new file mode 100755 index 0000000000000000000000000000000000000000..bd6413e2e6eca8ef56cc6673f1e8d5ea0f22c07f Binary files /dev/null and b/ctc_metrics/SEGMeasure differ diff --git a/ctc_metrics/TRAMeasure b/ctc_metrics/TRAMeasure new file mode 100755 index 0000000000000000000000000000000000000000..92204218d129c74d33286724f234351a0db36e13 Binary files /dev/null and b/ctc_metrics/TRAMeasure differ diff --git a/embedtrack b/embedtrack deleted file mode 160000 index 7ba7e3db40a4592b50f9cb070bf772e5c769ef86..0000000000000000000000000000000000000000 --- a/embedtrack +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7ba7e3db40a4592b50f9cb070bf772e5c769ef86 diff --git a/tracking/__init__ b/tracking/__init__ new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tracking/divergence_tools.py b/tracking/divergence_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..9ee23a083db02b3c1325b3a74289f678a1644415 --- /dev/null +++ b/tracking/divergence_tools.py @@ -0,0 +1,206 @@ +""" +Author: Filip Lux (2023), Masaryk University +Licensed under MIT License +""" + +from pathlib import Path +import numpy as np +import pandas as pd +from tqdm import tqdm + +def compute_divergence(path, plot=False): + + ''' + offset_path = path / 'wavefunc_offset.csv' + seg_path = path / 'wavefunc_seg.csv' + vertex_prob_path = path / 'vertex_prob.csv' + ''' + + offset_path = path / 'tra_offsets.csv' + seg_path = path / 'seg_offsets.csv' + vertex_prob_path = path / 'vertex_prob.csv' + + + assert offset_path.exists(), offset_path + assert seg_path.exists() + assert vertex_prob_path.exists(), vertex_prob_path + + + df_off = pd.read_csv(offset_path) + df_seg = pd.read_csv(seg_path) + df_vprob = pd.read_csv(vertex_prob_path) + + samples = [] + + times = np.unique(df_off.time) + times[::-1].sort() + + for time_offset in tqdm(times): + + # correction of the offset time + time = time_offset - 1 + + # get slices of time 'time' + seg_prev = df_seg[df_seg.time == time - 1] + off_curr = df_off[df_off.time == time_offset] + + seg_curr = df_seg[df_seg.time == time] + + assert len(off_curr) == len(seg_curr) + + if plot: + plot_offsets(seg, off) + + samples += compute_kl_multivariate(seg_prev, off_curr, time) + + + edge_prob_df = pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence']) + edge_prob_df['cost'] = np.log(edge_prob_df.divergence) + + # store edge and vertex probs as .csv files + divergence_path = path / "edge_prob_divergence.csv" + + print(f'storing {divergence_path}') + edge_prob_df.to_csv( + divergence_path, + index=False, + ) + +def compute_divergence_v2(path, plot=False): + + offset_path = path / 'tra_offsets.csv' + seg_path = path / 'seg_offsets.csv' + vertex_prob_path = path / 'vertex_prob.csv' + + + assert offset_path.exists(), offset_path + assert seg_path.exists() + assert vertex_prob_path.exists(), vertex_prob_path + + divergence_path = path / "edge_prob_divergence.csv" + + if divergence_path.exists(): + print(divergence_path, 'already exists') + return + + + df_off = pd.read_csv(offset_path) + df_seg = pd.read_csv(seg_path) + df_vprob = pd.read_csv(vertex_prob_path) + + samples = [] + + times = np.unique(df_off.time) + times[::-1].sort() + + for time_curr in tqdm(times): + + # correction of the offset time + time_prev = time_curr - 1 + + # get slices of time 'time' + seg_prev = df_seg[df_seg.time == time_prev] + off_curr = df_off[df_off.time == time_curr] + + seg_curr = df_seg[df_seg.time == time_curr] + + assert len(off_curr) == len(seg_curr) + + if plot: + plot_offsets(seg, off) + + samples += compute_kl_multivariate(seg_prev, off_curr, time_curr) + + + edge_prob_df = pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence']) + edge_prob_df['cost'] = np.log(edge_prob_df.divergence) + + # store edge and vertex probs as .csv files + print(f'storing {divergence_path}') + edge_prob_df.to_csv( + divergence_path, + index=False, + ) + + +def kl_divergence(mu1, mu2, sigma1, sigma2): + return np.log1p(sigma2/sigma1) + (sigma1**2 + (mu1 - mu2) ** 2) / (2 * sigma2 ** 2) - .5 + + +def print_kl(seg, off): + + samples = [] + for i, line in enumerate(seg.values): + + _, label1, mx1, my1, sx1, sy1 = line + + + for j, line in enumerate(off.values): + _, label2, mx2, my2, sx2, sy2 = line + + kl = kl_divergence(mx1, mx2, sx1, sx2) + kl_divergence(my1, my2, sy1, sy2) + + samples.append([label1, label2, kl]) + + return pd.DataFrame(samples, columns=['label from', 'label to', 'divergence']) + + +def compute_kl_multivariate(seg, off, time, divergence_thr=50, distance_thr=100): + + samples = [] + for i, line in enumerate(seg.values): + + # time prev + _, label_seg_prev, mx, my, sx, sy, c00, c01, c10, c11 = line + + S0 = np.array([[c00, c01], [c10, c11]]) + m0 = np.array([mx, my]) + + for j, line in enumerate(off.values): + + # time curr + _, label_off_curr, mx, my, sx, sy, c00, c01, c10, c11 = line + + S1 = np.array([[c00, c01], [c10, c11]]) + m1 = np.array([mx, my]) + + if euklidian_distance(m0, m1) > distance_thr: + continue + + kl_divergence = kl_mvn(m0, S0, m1, S1) + + if kl_divergence < divergence_thr: + # columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence'] + samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), kl_divergence]) + + #return pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence']) + return samples + + +def euklidian_distance(m0, m1): + return np.linalg.norm(m0 - m1) + + +def kl_mvn(m0, S0, m1, S1): + """ + Kullback-Liebler divergence from Gaussian pm,pv to Gaussian qm,qv. + Also computes KL divergence from a single Gaussian pm,pv to a set + of Gaussians qm,qv. + + + From wikipedia + KL( (m0, S0) || (m1, S1)) + = .5 * ( tr(S1^{-1} S0) + log |S1|/|S0| + + (m1 - m0)^T S1^{-1} (m1 - m0) - N ) + """ + # store inv diag covariance of S1 and diff between means + N = m0.shape[0] + iS1 = np.linalg.inv(S1) + diff = m1 - m0 + + # kl is made of three terms + tr_term = np.trace(iS1 @ S0) + det_term = np.log(np.linalg.det(S1)/np.linalg.det(S0)) #np.sum(np.log(S1)) - np.sum(np.log(S0)) + quad_term = diff.T @ np.linalg.inv(S1) @ diff #np.sum( (diff*diff) * iS1, axis=1) + #print(tr_term,det_term,quad_term) + return .5 * (tr_term + det_term + quad_term - N) \ No newline at end of file diff --git a/tracking/global_tracker.py b/tracking/global_tracker.py index a56ee58e33c3290fc867acbd1e93b61978aed23f..292fec192b0aaa3ac8974a0cee696a818eeb8b96 100644 --- a/tracking/global_tracker.py +++ b/tracking/global_tracker.py @@ -1,219 +1,680 @@ -from pathlib import Path +""" +Author: Filip Lux (2023), Masaryk University +Licensed under MIT License +""" -class FlowGraph(): - - def __init__(self, path, sequence): - +import os +import math - # unique indexes of source and termination vertices - self.source_idx = 0 # source - self.sink_idx = 1 # target +import pandas as pd +import numpy as np +from tqdm import tqdm - # first cell index - self.vertex_index = 2 - self.edge_index = 0 - - self.vertices = {} # vertices[f_idx][label] = v_idx - - - # define res_path - res_path = Path(path, f'{sequence}_DATA') - assert res_path.exists() - - # get datset of frames - self.dataset = Dataset(res_path, name_pattern='mask') +from pathlib import Path +from .sys_tools import run_procedure +from .libct import run_libct, libct2ctc +from .graph import FlowGraph +from .divergence_tools import compute_divergence_v2 - ''' - ######### - OUTPUTS - ######### - - each vertex and edge has its index - v_idx, e_idx - using this index you can get vertices that it represents - ''' - self.edges = {} # edges[e_idx] = (v_idx, v_idx) - - 'vertex_map maps vetex id to location on a particular frame' - self.vertex_map = {} # maps v_idx to tuple (frame, x, y) - 'veiws to sets of edges' - self.edge_from_me = {self.source_idx: [], self.sink_idx: []} # edge_from[v_idx] = [e_idx, e_idx, ...] - self.edge_to_me = {self.source_idx: [], self.sink_idx: []} # edge_to[v_idx] = [e_idx, e_idx, ...] +from itertools import repeat, chain +from matplotlib import pyplot as plt +class GlobalTracker(): + + ''' + one instance per a sequence + operates in a one directory XX_DATA, where outputs of embedtrack are stored + ''' + + def __init__(self, + data_path, + res_path, + lbd=.5, + mean_dist=10, + max_dist=20, + vertex_thr=0.95, + app_cost=10000, + edge_min_cost=-1): ''' - 'pd DataFrames to store events' - EDGE_COLUMNS = ['edge_index', 'f0', 'f1', 'x0', 'y0', 'x1', 'y1'] - df_edges = pd.DataFrame(columns=MOVE_COLUMNS) - df_vertices = pd.DataFrame(columns=MOVE_COLUMNS) + Parameters: + ----------- + data_path : str + path to the raw data + schema of the directory: + + <data_path>/01 + /01_GT + + # added directories + /01_RES # symlink to <res_path>/01_RES + /01_DATA/mask0000.tif # output of the segmentation procedure + /... + /tra_offsets.csv + /... + ... + res_path : str + path to the stored results + + <res_path>/01_RES/tracking.txt # self.run_tracking() + /libct.sol + /... + /mask0000.tif # self.libctsol2ctc() + /... + /res_track.txt # self.evaluate_ctc() + /... ''' - - # edge_df_rows = [] - - self.vertex_prob = {0: 1., 1: 1.} - self.edge_prob = {} - - 'list of coos from the frames' - self.coos = {} - - def _read_frame(self, frame_idx): - frame = self.dataset.get_frame(frame_idx) - - res = {} - for reg in measure.regionprops(frame): - res[reg.label] = reg.centroid + sequence = os.path.basename(res_path)[:2] + assert sequence == os.path.basename(data_path)[:2] - return res - - - def read_coos(self, frame_idx, label): - - if frame_idx not in self.coos.keys(): - self.coos[frame_idx] = self._read_frame(frame_idx) + self.res_path = res_path + self.data_path = data_path + assert os.path.isdir(data_path), data_path + assert os.path.isdir(res_path), res_path + + self.graph = None + self.sequence = sequence + self.lbd=lbd + self.mean_dist = mean_dist + self.max_dist = max_dist + self.vertex_thr = vertex_thr + self.app_cost = app_cost + self.edge_min_cost = edge_min_cost + + def run_tracking(self, + idx_min=0, + idx_max=10000, + limit_dist=100, + n_neighbours=2): + + # compute divergence + div_path = os.path.join(self.data_path, 'edge_prob_divergence.csv') + if not os.path.isfile(div_path): + print('computing divergence') + compute_divergence_v2(self.data_path) - coos = self.coos[frame_idx] - - assert label in coos.keys() + assert os.path.isfile(div_path) + + # create graph + print('creating graph') + self.graph = get_graph_embedtrack( + self.data_path, + min_frame=idx_min, + max_frame=idx_max, + limit_dist=limit_dist, + n_neighbours=n_neighbours, + mean_dist=self.mean_dist, + max_dist=self.max_dist) + + # create and save tracking file + print('creating tracking file') + tracking = graph2tracking(self.graph, + lbd=self.lbd, + vertex_thr=self.vertex_thr, + max_cost=self.app_cost, + mean_dist=self.mean_dist, + max_dist=self.max_dist, + edge_min_cost=self.edge_min_cost) + save_tracking(tracking, self.res_path) + + # compute solution + print('computiong solution') + run_libct(self.res_path) - return coos[label] - - def get_dictionary_from_masks(self): - - dataset = Dataset(res_path, name_pattern='mask') + def save_results_ctc(self): + # translate libct.sol to ctc result format + lm = libct2ctc(data_path=self.data_path, + res_path=self.res_path) + return lm + + def solution_stats(self, plot=False): + ''' + outputs 'solution_stats.txt' file, describing + a contribution of individual items on a solution + ''' + tracking_file_path = Path(self.res_path, 'tracking.txt') + sol_file_path = Path(self.res_path, 'tracking.sol') + assert os.path.isfile(tracking_file_path), tracking_file_path + assert os.path.isfile(sol_file_path), sol_file_path - def add_vertex(self, frame_idx, label, prob): + sol_stats(sol_file_path, tracking_file_path, plot=plot) - frame_idx, label = int(frame_idx), int(label) - # get new index - v_idx = self.vertex_index - self.vertex_index += 1 + def evaluate_ctc(self): + ''' + computes CTC metrics to evaluate the results + ''' - # create dict, if necessary - if frame_idx not in self.vertices.keys(): - self.vertices[frame_idx] = {} - - # check - assert label not in self.vertices[frame_idx].keys() + # TODO: test if the data_path containt GT directory - self.vertices[frame_idx][label] = v_idx + dirname = os.path.dirname(self.data_path) + directory = os.path.basename(self.res_path) - #if prob < . + gt_path = os.path.join(dirname, directory.replace('RES', 'GT')) + if not os.path.isdir(gt_path): + print(f'ground truth masks are not available {gt_path}') + return - self.vertex_prob[v_idx] = prob + temp_res_path = os.path.join(os.getcwd(), dirname, directory) + src_path = os.path.join(os.getcwd(), self.res_path) - # frame, label, x, y - # TODO: add coordinates - # - contain dataset - # - load each frame only ones - x, y = self.read_coos(frame_idx, label) #TODO: read from dictonary + print(src_path) - self.vertex_map[v_idx] = (frame_idx, label, x, y) + # try to remove symlink + run_procedure(f'rm {temp_res_path}') - # prepare edge containers - self.edge_from_me[v_idx] = [] - self.edge_to_me[v_idx] = [] + assert not Path(temp_res_path).exists(), f'the target folder already exists {temp_res_path}' + assert Path(src_path).exists() - return v_idx + # create symlink from results to original data folder + run_procedure(f'ln -s {src_path} {temp_res_path}') - def add_edge(self, frame1, label1, frame2, label2, prob): + # run evaluation procedures + measures = ['TRAMeasure', 'DETMeasure', 'SEGMeasure'] + #measures = ['TRAMeasure', 'SEGMeasure'] + for m in measures: + command = f'./ctc_metrics/{m} {dirname} {self.sequence} 4' + print(f'running {command}') + run_procedure(command) + + # remove symlink + run_procedure(f'rm {temp_res_path}') + + +def compute_edge_cost(log_kl_divergence, distance, avg_dist=10): + + # A) cost depends only on a distance of objects + cost = distance - avg_dist + + + # B) combine time 5 + #cost = cost + np.log1p(np.maximum((dist - avg_dist)**3, 0)) + + #cost = cost - (((dist - avg_dist) / 9) **3) + + return - cost + + +def get_graph_embedtrack( + res_path, + min_frame=0, + max_frame=2000, + virtual_edge_prob=50, # equal to divergence_thr=50 in divergence_tools.py + limit_dist=50, + n_neighbours=3, + mean_dist=10, + max_dist=20, + edge_min_cost=-1 + ): + ''' + creates instance of candidate graph + + Parameters + ---------- + res_path: str + path to the dataset + min_frame: int + first frame to analyze + max_frame: int + last frame to analyze + + Returns + ------- + + graph: tuple + structured data about the graph + + ''' + + # create consistent graph structure + graph = FlowGraph(res_path) + + #pd_edge_path = os.path.join(res_path, 'edge_prob.csv') + pd_edge_path = os.path.join(res_path, 'edge_prob_divergence.csv') + pd_vertex_path = os.path.join(res_path, 'vertex_prob.csv') + assert os.path.isfile(pd_edge_path), pd_edge_path + assert os.path.isfile(pd_vertex_path), pd_vertex_path + + df_edges = pd.read_csv(pd_edge_path, index_col=False) + df_vertices = pd.read_csv(pd_vertex_path, index_col=False) + + # compute cost + df_edges['cost'] = np.log(df_edges.divergence) - frame1, label1, frame2, label2 = int(frame1), int(label1), int(frame2), int(label2) + # list of real frame indexes + frame_indexes = df_vertices['time'].unique() + + # TODO: sort in reverse order + frame_indexes.sort() + min_frame = int(np.maximum(frame_indexes.min(), min_frame)) + max_frame = int(np.minimum(frame_indexes.max(), max_frame)) + + ########## + # ADD ALL VERTICES + print('GET GRAPH: adding vertices') + for i, row in df_vertices.iterrows(): + + prob = row['prob'] + label = row['label'] + frame_idx = row['time'] - v1_idx = self.get_vertex_index(frame1, label1) - v2_idx = self.get_vertex_index(frame2, label2) + if not (min_frame <= frame_idx <= max_frame): + continue - e_idx = self.edge_index - self.edge_index += 1 + assert label > 0 - self.__add_edge(v1_idx, v2_idx, e_idx, prob) + graph.add_vertex(frame_idx, label, prob) - return e_idx + ########## + # ADD SOURCE AND SINK EDGES + if int(frame_idx) == min_frame: + graph.add_source_edge(frame_idx, label) + + elif int(frame_idx) == max_frame: + graph.add_sink_edge(frame_idx, label) + + ########### + # ADD ALL MIDDLE EDGES + print('GET GRAPH: adding edges') + for i, row in df_edges.iterrows(): + + #prob = row['prob'] + cost = row['cost'] + label_curr = row['label_curr'] + frame_curr = row['time_curr'] + label_prev = row['label_prev'] + frame_prev = row['time_prev'] + + if (frame_prev < min_frame) or (frame_curr > max_frame): + continue + + # no appearance in the sequence + graph.add_edge(frame_curr, label_curr, frame_prev, label_prev, cost) + + + ########### + # ADD VIRTUAL edges + # for every edge, that has no connection to t+1: + # - add adges to three closest edges (up to 'limit_dist') + # - the edge prob is 'virtual_edge_prob' (=.25) + # TODO: replace with creating full graph only based on a distance + # virtual edges then only supports predicted - def __add_edge(self, v1_idx, v2_idx, e_idx, prob): + print(f'GET GRAPH: entangling to {n_neighbours} neighbours') + if n_neighbours > 0: - self.edges[e_idx] = (v1_idx, v2_idx) - self.edge_prob[e_idx] = prob + # to_me, from_me = [], [] # REMOVE + for v_idx in graph.vertex_map.keys(): + + if len(graph.edge_to_me[v_idx]) < n_neighbours: + from_me_ = [graph.edges[e_idx][1] for e_idx in graph.edge_from_me[v_idx]] + + frame_idx, label, x2, y2 = graph.vertex_map[v_idx] + if frame_idx == min_frame: + continue + + # to_me.append(v_idx) REMOVE + neighbors = graph.knn(frame_idx-1, x2, y2, + n=n_neighbours, + limit_dist=limit_dist) # returns list of v_idxs + + # add virtual edges + for n_v_idx in neighbors: + + # skip already included neighbours + if n_v_idx in from_me_: + continue + + frame_prev, label_prev, x1, y1 = graph.vertex_map[n_v_idx] + + graph.add_edge(frame_idx, + label, + frame_prev, + label_prev, + virtual_edge_prob) + + ''' REMOVE + cost = get_distance_cost(x1, y1, x2, y2, + mean_dist=mean_dist, + max_dist=max_dist, + min_cost=edge_min_cost) + graph.add_edge(frame_idx, label, frame_prev, label_prev, cost) + ''' + + ''' OUTPUT ''' + indexes = graph.vertex_index, graph.edge_index + probs = graph.edge_prob, graph.vertex_prob + move_edges = graph.edge_to_me, graph.edge_from_me + + return indexes, probs, graph.edges, move_edges, graph.vertex_map + + +def get_distance_cost(dist, + mean_dist=10, + max_dist=20, + min_cost=-1): + #dist = np.linalg.norm(np.array([x2 - x1, y2 - y1])) + #return -max((dist - max_dist) / (max_dist - mean_dist), min_cost) + # TODO: include mean_dist + return - (dist * min_cost / max_dist - min_cost) + #return ((dist/max_dist)**2 - 1) * (-min_cost) + + +# GRAPH TO TRACKING +# KL +def vertex_cost(prob, + vertex_thr=0.95, + min_cost=5): + ''' + probs of real vertices is higher than .95 + ''' + # TODO: extravagant hand-made metrics + # replace + + return - min_cost / (1 - vertex_thr) * ( prob - vertex_thr) + + +# DEPRECIATED +def edge_cost(prob, x1, y1, x2, y2, lbd=.5, mean_dist=10, max_dist=20, min_cost=-1): + ''' + edge cost is log(divergence) + ''' + + dist = np.linalg.norm(np.array([x2 - x1, y2 - y1])) + dst_cost = - get_distance_cost(dist, mean_dist, max_dist, min_cost) + dvg_cost = - prob - self.edge_from_me[v1_idx].append(e_idx) - self.edge_to_me[v2_idx].append(e_idx) + return lbd * dst_cost + (1 - lbd) * dvg_cost + + +def graph2tracking(graph, + max_cost=10000, + lbd=.5, + vertex_thr=0.95, + mean_dist=10, + max_dist=20, + edge_min_cost=-1): + + # decompose graph + indexes, probs, edges, move_edges, vertex_map = graph + + vertex_index, edge_index = indexes + edge_prob, vertex_prob = probs + edge_to, edge_from = move_edges + + # variables + tracking = [] + + last_frame_index = np.max([f_idx for f_idx, _, _, _ in vertex_map.values()]) + first_frame_index = np.min([f_idx for f_idx, _, _, _ in vertex_map.values()]) + + frame_shift = first_frame_index - def add_source_edge(self, frame2, label2): + # vertex hypothesis + # iterate over vertex_map + + for v_idx in tqdm(vertex_map.keys()): - frame2, label2 = int(frame2), int(label2) + frame_idx, label, x, y = vertex_map[v_idx] + prob = vertex_prob[v_idx] - v2_idx = self.get_vertex_index(frame2, label2) + # compute cost + cost = vertex_cost(prob, vertex_thr=vertex_thr) - e_idx = self.edge_index - self.edge_index += 1 + #unique_id = f'V_{time}_{label}' + unique_id = f'1{frame_idx:05d}{label:05d}' - self.__add_edge(self.source_idx, v2_idx, e_idx, 1.) + assert len(unique_id) == 11, unique_id - return e_idx - - def add_sink_edge(self, frame1, label1): - frame1, label1 = int(frame1), int(label1) + tracking.append(f'H {frame_idx-frame_shift} {unique_id} {cost} {x} {y}') - v1_idx = self.get_vertex_index(frame1, label1) + # no appearance and diappearance -> cost 10000 + app_cost, disapp_cost = max_cost, max_cost + if (frame_idx == first_frame_index): + app_cost = 0. + if (frame_idx == last_frame_index): + disapp_cost = 0. + + # add appearance and disappearance + unique_id_app = f'4{frame_idx:05d}{label:05d}' + unique_id_dis = f'5{frame_idx:05d}{label:05d}' + tracking.append(f'APP {unique_id_app} {unique_id} {app_cost}') + tracking.append(f'DISAPP {unique_id_dis} {unique_id} {disapp_cost}') + + edges_ = {} + for e_idx in edges.keys(): - e_idx = self.edge_index - self.edge_index += 1 + v_idx_curr, v_idx_prev = edges[e_idx] - self.__add_edge(v1_idx, self.sink_idx, e_idx, 1.) + # appearance + if v_idx_prev in [0, 1]: + continue + + if v_idx_curr == 0: + continue - return e_idx + frame_idx_prev, label_prev, x1, y1 = vertex_map[v_idx_prev] + frame_idx_curr, label_curr, x2, y2 = vertex_map[v_idx_curr] + + dist = np.linalg.norm(np.array([x2 - x1, y2 - y1])) + + # embed cost - bonus term, only negative + embed_cost = min(edge_prob[e_idx], 0) + embed_cost = edge_prob[e_idx] + + # dist cost - standard term, zero for ~.95 percentile + dist_cost = get_distance_cost(dist, + mean_dist=mean_dist, + max_dist=max_dist, + min_cost=edge_min_cost) + + # final cost, weighted by lambda + cost = lbd * dist_cost + (1-lbd) * embed_cost + + # create the edge descriptor + seg_id_right = f'1{frame_idx_curr:05d}{label_curr:05d}' + seg_id_left = f'1{frame_idx_prev:05d}{label_prev:05d}' + unique_id = f'2{seg_id_left}{seg_id_right}' + assert len(unique_id) == 23, unique_id + + # update tracking + tracking.append(f'MOVE {unique_id} {seg_id_left} {seg_id_right} {cost}') + + # propagate info to compute division events later on + edges_[seg_id_left] = edges_.get(seg_id_left, []) + edges_[seg_id_left].append((seg_id_right, cost, dist_cost)) + + # division + div_idx = 0 + for id_left in tqdm(edges_.keys(), 'divisions'): + + right_ids = edges_[id_left] + + for id_right1, cost1, dist_cost1 in right_ids: + for id_right2, cost2, dist_cost2 in right_ids: + if id_right1 >= id_right2: + continue + + # define unique id + unique_id = f'3{id_left}{id_right1}{id_right2}' + assert len(unique_id) == 34, unique_id + + # experimental cost + #cost = - max(cost1, cost2) + + # experimental cost + # penalize if costs are different + #diff_penalty = (dist_cost1 - dist_cost2)**2 + #cost = (cost1 + cost2) // 2 + diff_penalty / 10 + + cost = (cost1 + cost2) // 2 + + if cost > 1: + continue + + tracking.append(f'DIV {unique_id} {id_left} {id_right1} {id_right2} {cost}') + + div_idx += 1 + + print(f'Added {div_idx} division candidates.') + return tracking + + +def save_tracking(tracking, data_path): - def get_vertex_index(self, frame, label): + tracking_path = os.path.join(data_path, 'tracking.txt') + with open(tracking_path, 'w', encoding='utf8') as f: + line_end = repeat("\n") + lines = chain.from_iterable(zip(tracking, line_end)) + f.writelines(lines) - # appearance - if label == 0: - return self.source_idx - assert frame in self.vertices.keys(), f'{frame} {label}' - assert label in self.vertices[frame].keys(), f'{frame} {label}' - return self.vertices[frame][label] +def sol_stats(sol_path, + tra_path, + plot=False): + ''' + Displays candidate graph + + Parameters + ---------- + + sol_path : str + path to 'tracking.sol' file, in libct format + tra_path : str + path to 'tracking.txt' file, in libct format + plot : bool + plot the histograms + + ''' + + assert os.path.isfile(tra_path), tra_path + assert os.path.isfile(sol_path), sol_path + dirname = os.path.dirname(tra_path) - def knn(self, frame_idx, x, y, n=3, limit_dist=100): + # COLORS + + # get vertex map from 'tracking.txt' + vertex_map = {} # vertex_id : (x, y, frame_idx) + + tra = {'H': {}, + 'APP': {}, + 'DISAPP': {}, + 'MOVE': {}, + 'DIV': {}} + + all_tra = {'H': [], + 'APP': [], + 'DISAPP': [], + 'MOVE': [], + 'DIV': []} - # get v_idxs in the frame - # for each compute distance (limit by x, y square - # sort by distance - # take first n + with open(tra_path, 'r') as f: - all_dets = self.vertices[frame_idx].values() + - cands = {} - for v_idx in all_dets: - _, _, x0, y0 = self.vertex_map[v_idx] - - if (abs(x0 - x) < limit_dist) and (abs(y0 - y) < limit_dist): - cands[v_idx] = abs(x0 - x) + abs(y0 - y) + # show vertices + for line in tqdm(f.readlines()): + tokens = line.split(' ') + key = tokens[0] + if key == 'H': + unique_id, cost = tokens[2], tokens[3] + elif key == 'APP': + unique_id, cost = tokens[1], tokens[3] + elif key == 'DISAPP': + unique_id, cost = tokens[1], tokens[3] + elif key == 'MOVE': + unique_id, cost = tokens[1], tokens[4] + elif key == 'DIV': + unique_id, cost = tokens[1], tokens[5] + else: + assert False, key - res = sorted(cands.items(), key=lambda x : x[1])[:3] - return [key for key, _ in res] + tra[key][unique_id] = cost + all_tra[key].append(cost) + + + sol = {'H': [], + 'APP': [], + 'DISAPP': [], + 'MOVE': [], + 'DIV': []} + with open(sol_path, 'r') as f: + + # show vertices + for line in tqdm(f.readlines()): + + key, unique_id = line.rstrip().split(' ') + cost = tra[key][unique_id] + + sol[key].append(cost) + + dir_name = os.path.dirname(sol_path) + res_path = os.path.join(dir_name, 'sol_stats.txt') + with open(res_path, 'w') as f: + + lines = [] + + # show vertices + for key in sol.keys(): + + # tra + vals = np.array(all_tra[key], dtype=float) + line = f'TRA {str(key)} {vals.sum()} {len(vals)} {vals.min()} {vals.mean()} {vals.max()}\n' + + lines.append(line) + + # sol + vals = np.array(sol[key], dtype=float) + if len(vals) > 0: + line = f'SOL {str(key)} {vals.sum()} {len(vals)} {vals.min()} {vals.mean()} {vals.max()}\n' -''' -subset = 'train' -sequence = '01' -dataset = 'BF-C2DL-HSC' -model = "adam_norm_onecycle_allcrops_15" -model = "model2" - -path = Path('embedtrack_results', dataset, model, subset) -assert path.exists(), path - + lines.append(line) + f.writelines(lines) + + if plot: + plt.figure() + + for key in sol.keys(): + + vals = np.array(sol[key], dtype=float) + plt.hist(vals, + histtype='step', + density=True, + bins=100) + plt.legend(sol.keys()) + plt.title('tracking.sol') + plt.xlim((-5, 5)) + plt.ylim((0, 5)) + plt.savefig(os.path.join(dirname, 'sol.png')) + plt.show() + + + plt.figure() + + for key in sol.keys(): + + vals = np.array(all_tra[key], dtype=float) + plt.hist(vals, + histtype='step', + density=True, + bins=100) + plt.legend(sol.keys()) + plt.title('tracking.txt') + plt.xlim((-5, 5)) + plt.ylim((0, 5)) + plt.savefig(os.path.join(dirname, 'tra.png')) + plt.show() -res_path = path / f'{sequence}_DATA' -assert res_path.exists(), res_path -''' + + \ No newline at end of file diff --git a/tracking/graph.py b/tracking/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..8ffc9fc604e070286bffadcf55ee18dd7c924188 --- /dev/null +++ b/tracking/graph.py @@ -0,0 +1,216 @@ +""" +Author: Filip Lux (2023), Masaryk University +Licensed under MIT License +""" + +from skimage import measure +import numpy as np + + +from .my_utils.image import Dataset + +class FlowGraph(): + + def __init__(self, res_path): + + + # unique indexes of source and termination vertices + self.source_idx = 0 # source + self.sink_idx = 1 # target + + # first cell index + self.vertex_index = 2 + self.edge_index = 0 + + self.vertices = {} # vertices[f_idx][label] = v_idx + + + # define res_path + #res_path = Path(path, f'{sequence}_DATA') + assert res_path.exists(), res_path + + # get datset of frames + self.dataset = Dataset(res_path, name_pattern='mask') + + + ''' + ######### + OUTPUTS + ######### + + each vertex and edge has its index + v_idx, e_idx + using this index you can get vertices that it represents + ''' + self.edges = {} # edges[e_idx] = (v_idx, v_idx) + + 'vertex_map maps vetex id to location on a particular frame' + self.vertex_map = {} # maps v_idx to tuple (frame, x, y) + + 'veiws to sets of edges' + self.edge_from_me = {self.source_idx: [], self.sink_idx: []} # edge_from[v_idx] = [e_idx, e_idx, ...] + self.edge_to_me = {self.source_idx: [], self.sink_idx: []} # edge_to[v_idx] = [e_idx, e_idx, ...] + + ''' + 'pd DataFrames to store events' + EDGE_COLUMNS = ['edge_index', 'f0', 'f1', 'x0', 'y0', 'x1', 'y1'] + df_edges = pd.DataFrame(columns=MOVE_COLUMNS) + df_vertices = pd.DataFrame(columns=MOVE_COLUMNS) + ''' + + # edge_df_rows = [] + + self.vertex_prob = {0: 1., 1: 1.} + self.edge_prob = {} + + 'list of coos from the frames' + self.coos = {} + + def _read_frame(self, frame_idx): + + frame = self.dataset.get_frame(frame_idx) + + res = {} + for reg in measure.regionprops(frame): + res[reg.label] = reg.centroid + + return res + + def read_coos(self, frame_idx, label): + + if frame_idx not in self.coos.keys(): + self.coos[frame_idx] = self._read_frame(frame_idx) + + coos = self.coos[frame_idx] + + assert label in coos.keys() + + return coos[label] + + def get_dictionary_from_masks(self): + dataset = Dataset(self.res_path, name_pattern='mask') + + def add_vertex(self, frame_idx, label, prob): + frame_idx, label = int(frame_idx), int(label) + + # get new index + v_idx = self.vertex_index + self.vertex_index += 1 + + # create dict, if necessary + if frame_idx not in self.vertices.keys(): + self.vertices[frame_idx] = {} + + # check + assert label not in self.vertices[frame_idx].keys() + + self.vertices[frame_idx][label] = v_idx + + #if prob < . + + self.vertex_prob[v_idx] = prob + + # frame, label, x, y + # TODO: add coordinates + # - contain dataset + # - load each frame only ones + x, y = self.read_coos(frame_idx, label) #TODO: read from dictonary + + self.vertex_map[v_idx] = (frame_idx, label, x, y) + + # prepare edge containers + self.edge_from_me[v_idx] = [] + self.edge_to_me[v_idx] = [] + + return v_idx + + def add_edge(self, frame1, label1, frame2, label2, prob): + + frame1, label1, frame2, label2 = int(frame1), int(label1), int(frame2), int(label2) + + v1_idx = self.get_vertex_index(frame1, label1) + v2_idx = self.get_vertex_index(frame2, label2) + + e_idx = self.edge_index + self.edge_index += 1 + + self.__add_edge(v1_idx, v2_idx, e_idx, prob) + + return e_idx + + def __add_edge(self, v1_idx, v2_idx, e_idx, prob): + + self.edges[e_idx] = (v1_idx, v2_idx) + self.edge_prob[e_idx] = prob + + self.edge_from_me[v1_idx].append(e_idx) + self.edge_to_me[v2_idx].append(e_idx) + + def add_source_edge(self, frame2, label2): + + frame2, label2 = int(frame2), int(label2) + + v2_idx = self.get_vertex_index(frame2, label2) + + e_idx = self.edge_index + self.edge_index += 1 + + self.__add_edge(self.source_idx, v2_idx, e_idx, 1.) + + return e_idx + + def add_sink_edge(self, frame1, label1): + frame1, label1 = int(frame1), int(label1) + + v1_idx = self.get_vertex_index(frame1, label1) + + e_idx = self.edge_index + self.edge_index += 1 + + self.__add_edge(v1_idx, self.sink_idx, e_idx, 1.) + + return e_idx + + def get_vertex_index(self, frame, label): + + # appearance + if label == 0: + return self.source_idx + + assert frame in self.vertices.keys(), f'{frame} {label}' + assert label in self.vertices[frame].keys(), f'{frame} {label}' + + return self.vertices[frame][label] + + def get_distance(self, frame1, label1, frame2, label2): + f_idx1 = self.get_vertex_index(frame1, label1) + f_idx2 = self.get_vertex_index(frame2, label2) + + _, _, x1, y1 = self.vertex_map[f_idx1] + _, _, x2, y2 = self.vertex_map[f_idx2] + + return np.linalg.norm(np.array([x2 - x1, y2 - y1])) + + def knn(self, frame_idx, x1, y1, n=3, limit_dist=100): + + # get v_idxs in the frame + # for each compute distance (limit by x, y square + # sort by distance + # take first n + + all_dets = self.vertices[frame_idx].values() + + cands = {} + for v_idx in all_dets: + _, _, x2, y2 = self.vertex_map[v_idx] + + # speed optimization + if (abs(x1 - x2) >= limit_dist) or (abs(y1 - y2) >= limit_dist): + continue + + dist = np.linalg.norm(np.array([x2 - x1, y2 - y1])) + if dist < limit_dist: + cands[v_idx] = dist + + res = sorted(cands.items(), key=lambda x : x[1])[:n] + return [key for key, _ in res] diff --git a/tracking/libct b/tracking/libct deleted file mode 160000 index 62417c01bd5d476bd1e38d56035b30f1423262ab..0000000000000000000000000000000000000000 --- a/tracking/libct +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 62417c01bd5d476bd1e38d56035b30f1423262ab diff --git a/tracking/libct.py b/tracking/libct.py new file mode 100644 index 0000000000000000000000000000000000000000..b4a7efb27336c01cf46d97cf9cf673856fe409b7 --- /dev/null +++ b/tracking/libct.py @@ -0,0 +1,320 @@ +""" +Author: Filip Lux (2023), Masaryk University +Licensed under MIT License +""" + +import os +import shutil +from pathlib import Path +from tqdm import tqdm +from skimage import io +import numpy as np + +from .graph import FlowGraph +from .sys_tools import run_procedure + +def run_libct(res_path, + idx_min=0): + + # copy tracking.txt to libct + source_file = Path(res_path, 'tracking.txt') + source_solution = Path(res_path, 'tracking.sol') + assert os.path.isfile(source_file), source_file + + local_input = 'tracking.txt' + local_output = 'tracking.sol' + + # copy to local + shutil.copyfile(source_file, local_input) + + #show_libct(local_input, z_shift=idx_min, description=f'{sequence}_test', time=blender_time) + + # run solution + #!tracking/libct/setup-dev-env + res = run_procedure(f'ct {local_input}') + if res != 0: + return + + + + # copy to results + shutil.copyfile(local_output, source_solution) + + assert os.path.isfile(source_solution) + + print('source file', source_file) + print('source solution', source_solution) + + #show_libct_solution(source_solution, z_shift=idx_min, time=blender_time+1) + + +def parse_vertex_id(vertex_id): + + frame_idx, label = vertex_id[-10:-5], vertex_id[-5:] + return int(frame_idx), int(label) + + +def libct2ctc(data_path, res_path): + ''' + Save labeled images in a format 'maskXXX.tif to the <res_path> + + Parameters: + ----------- + data_path : str + path where raw segmentations are stored + res_path : str + directory containing libct.sol and tracking.txt + also output directory + + + Returns: + -------- + + None + + ''' + + # remove all the files from the res_path directory + for file in os.listdir(res_path): + if 'tracking' not in file: + os.remove(os.path.join(res_path, file)) + + # test if libct.sol exists + sol_path = os.path.join(res_path, 'tracking.sol') + assert os.path.isfile(sol_path), sol_path + + # read sol file + edges_from, start_vertices = parse_sol_file(sol_path) + + tracker = DFSTracker(edges_from, start_vertices) + label_map = tracker.dfs() + + # save ctc res_track.txt file + tracker.save_ctc_tracking(res_path) + + + translate_results(data_path, res_path, label_map) + + return label_map + + +def get_frame_idx(img_name): + res = '' + for d in img_name: + if d.isdigit(): + res = res + d + return int(res) + + +def translate_results(data_path, res_path, label_map): + + print('storing res images') + + # iterate over images in res_path + img_names = [n for n in os.listdir(data_path) if '.tif' in n] + for img_name in tqdm(img_names): + + f_idx = get_frame_idx(img_name) + + # read original labeled image + image_path = os.path.join(data_path, img_name) + assert os.path.isfile(image_path) + + img = io.imread(image_path) + res = np.zeros_like(img, dtype=np.uint16) + + mapping = label_map.get(f_idx, {}) + + for old_label in mapping.keys(): + new_label = mapping[old_label] + + res[img == old_label] = new_label + + + # save result + store_path = os.path.join(res_path, img_name) + io.imsave(store_path, + res, + check_contrast=False, + plugin='tifffile', + compression ='zlib') + + + +class DFSTracker(): + + def __init__(self, + edges_from, + start_vertices): + + self.edges_from = edges_from + self.start_vertices = start_vertices + + + # DFS + self.label_map = {} + # label_map = { f_idx : { old_label : new_label } } + self.start_frames = {} + # start_frames = { label : ( start_frame_idx, mother ) } + self.ctc_tracking = [] # label start_frame, end_frame, mother_idx + + def dfs(self): + new_label = 1 + + # start all tracks from start vertices + for v_id in self.start_vertices: + + f_idx, old_label = parse_vertex_id(v_id) + mother_idx = 0 + + # start new track + self.start_frames[new_label] = (f_idx, mother_idx) + self.update_label_map(f_idx, old_label, new_label) + + # recursive call + new_label = self.dfs_rec(v_id, new_label, f_idx) + + return self.label_map + + def dfs_rec(self, v_id, label, f_idx): + + # move to the next frames + edges = self.edges_from[v_id] + + # stop condition + if (len(edges) == 1) and (edges[0] == 0): + + # add info to the tracking file + self.close_track(label, f_idx) + + # increase the label value + return label + 1 + + # next iteration + if len(edges) == 1: + + v_id_next = edges[0] + frame_next, old_label = parse_vertex_id(v_id_next) + assert frame_next == (f_idx + 1) + + # continue the track + self.update_label_map(frame_next, old_label, label) + return self.dfs_rec(v_id_next, label, frame_next) + + elif len(edges) == 2: + + # close current track + self.close_track(label, f_idx) + + # new label index + new_label = label + 1 + mother_idx = label + + for v_id_next in edges: + + frame_next, old_label = parse_vertex_id(v_id_next) + assert frame_next == (f_idx + 1) + + # start new track + self.start_frames[new_label] = (frame_next, mother_idx) + + # continue new track + self.update_label_map(frame_next, old_label, new_label) + new_label = self.dfs_rec(v_id_next, new_label, frame_next) + + return new_label + else: + assert False, f'improper successors {edges}' + + + + def update_label_map(self, f_idx, old_label, new_label): + + self.label_map[f_idx] = self.label_map.get(f_idx, {}) + self.label_map[f_idx][old_label] = new_label + + + + def close_track(self, label, f_idx): + + start_frame, mother_idx = self.start_frames[label] + tracklet = (str(label), str(start_frame), str(f_idx), str(mother_idx)) + self.ctc_tracking.append(tracklet) + + + def save_ctc_tracking(self, res_path): + + path = os.path.join(res_path, 'res_track.txt') + + lines = [' '.join(list(tr)) + '\n' for tr in self.ctc_tracking] + + + print(f'saving {path}') + with open(path, 'w') as f: + f.writelines(lines) + + + + + + + +def parse_sol_file(sol_path): + + assert os.path.isfile(sol_path), sol_path + + # process tracking result + # create dictionary path_to + edge_from = {} + start_vertices = [] + + # every vertex is indexed as libct ID + # f'{frame_idx:05d}{label:05d}' + + + # iterate over + + with open(sol_path, 'r') as f: + + for line in f.readlines(): + + tokens = line.rstrip().split(' ') + + if tokens[0] == 'H': + v_idx = tokens[1][-10:] + edge_from[v_idx] = [] + + if tokens[0] == 'DISAPP': + v_idx = tokens[1][-10:] + + # diappearing track is marked by 0 + edge_from[v_idx].append(0) + + if tokens[0] == 'APP': + v_idx = tokens[1][-10:] + + # diappearing track is marked by 0 + start_vertices.append(v_idx) + + if tokens[0] == 'MOVE': + v_idx_src = tokens[1][-21:-11] + v_idx_dst = tokens[1][-10:] + + assert len(v_idx_src) == len(v_idx_dst) == 10 + + edge_from[v_idx_src].append(v_idx_dst) + + + if tokens[0] == 'DIV': + v_idx_src = tokens[1][-32:-22] + v_idx_ds1 = tokens[1][-21:-11] + v_idx_ds2 = tokens[1][-10:] + + assert len(v_idx_src) == len(v_idx_ds1) == len(v_idx_ds2) == 10 + + edge_from[v_idx_src].append(v_idx_ds1) + edge_from[v_idx_src].append(v_idx_ds2) + + + return edge_from, start_vertices +