clean repository

0cea7fe6 · Filip Lux · c7ace223 · c7ace223 · c7ace223 · 0cea7fe6
Commit 0cea7fe6 authored 1 year ago by Filip Lux
--- a/DATA/challenge
+++ b/DATA/challenge
-../EmbedTrack/ctc_raw_data/challenge
\ No newline at end of file
--- a/DATA/train
+++ b/DATA/train
-../EmbedTrack/ctc_raw_data/train
\ No newline at end of file
--- a/global_tracking.ipynb
+++ b/global_tracking.ipynb
--- a/tracking/blender/blender_tools.py
+++ b/tracking/blender/blender_tools.py
--- a/tracking/blender/buckets_with_graphics_pb2.py
+++ b/tracking/blender/buckets_with_graphics_pb2.py
--- a/tracking/blender/buckets_with_graphics_pb2_grpc.py
+++ b/tracking/blender/buckets_with_graphics_pb2_grpc.py
--- a/tracking/display_graph.ipynb
+++ b/tracking/display_graph.ipynb
--- a/tracking/divergence_tools.py
+++ b/tracking/divergence_tools.py
@@ -8,13 +8,15 @@ import numpy as np
 import pandas as pd
 from tqdm import tqdm

-def compute_divergence(path, plot=False):
+import tifffile as tiff
+
+from skimage import io
+import os
    
-    '''
-    offset_path = path / 'wavefunc_offset.csv'
-    seg_path = path / 'wavefunc_seg.csv'
-    vertex_prob_path =  path / 'vertex_prob.csv'
-    '''
+def compute_divergence(path,
+                       plot=False,          # TO REMOVE
+                       divergence_thr=50,
+                      ):
    
    offset_path = path / 'tra_offsets.csv'
    seg_path = path / 'seg_offsets.csv'
@@ -25,6 +27,12 @@ def compute_divergence(path, plot=False):
    assert seg_path.exists()
    assert vertex_prob_path.exists(), vertex_prob_path
    
+    divergence_path = path / "edge_prob_divergence.csv"
+    
+    if divergence_path.exists():
+        print(divergence_path, 'already exists')
+        return 
+    

    df_off = pd.read_csv(offset_path)
    df_seg = pd.read_csv(seg_path)
@@ -35,58 +43,60 @@ def compute_divergence(path, plot=False):
    times = np.unique(df_off.time)
    times[::-1].sort()

-    for time_offset in tqdm(times):
+    for time_curr in tqdm(times):

        # correction of the offset time
-        time = time_offset - 1
+        time_prev = time_curr - 1

        # get slices of time 'time'
-        seg_prev = df_seg[df_seg.time == time - 1]
-        off_curr = df_off[df_off.time == time_offset]
-
-        seg_curr = df_seg[df_seg.time == time]
-
+        seg_prev = df_seg[df_seg.time == time_prev]
+        off_curr = df_off[df_off.time == time_curr]
+        
+        # validation of the input file
+        # TODO: remove
+        seg_curr = df_seg[df_seg.time == time_curr]
        assert len(off_curr) == len(seg_curr)

-        if plot:
-            plot_offsets(seg, off)
+        samples += compute_kl_multivariate(seg_prev,
+                                           off_curr,
+                                           time_curr,
+                                           divergence_thr=divergence_thr)

-        samples += compute_kl_multivariate(seg_prev, off_curr, time)
-
-
-    edge_prob_df = pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence'])
+    edge_prob_df = pd.DataFrame(samples, columns=['time_curr',
+                                                  'time_prev',
+                                                  'label_curr',
+                                                  'label_prev',
+                                                  'divergence'])
    edge_prob_df['cost'] = np.log(edge_prob_df.divergence)

-    # store edge and vertex probs as .csv files
-    divergence_path = path / "edge_prob_divergence.csv"
-    
+    # store edge and vertex probs as .csv files   
    print(f'storing {divergence_path}')
    edge_prob_df.to_csv(
        divergence_path,
        index=False,
    )
    
-def compute_divergence_v2(path, plot=False):
+    
+def compute_distance(path, distance_thr=100):
    
    offset_path = path / 'tra_offsets.csv'
    seg_path = path / 'seg_offsets.csv'
-    vertex_prob_path =  path / 'vertex_prob.csv'
-
+    
+    print('offset_path', offset_path)
+    print('seg_path', seg_path)

    assert offset_path.exists(), offset_path
-    assert seg_path.exists()
-    assert vertex_prob_path.exists(), vertex_prob_path
+    assert seg_path.exists(), seg_path
    
-    divergence_path = path / "edge_prob_divergence.csv"
+    distance_path = path / "edge_prob_distance.csv"
    
-    if divergence_path.exists():
-        print(divergence_path, 'already exists')
+    if distance_path.exists():
+        print(distance_path, 'already exists')
        return 
    

    df_off = pd.read_csv(offset_path)
    df_seg = pd.read_csv(seg_path)
-    df_vprob = pd.read_csv(vertex_prob_path)

    samples = []

@@ -101,28 +111,279 @@ def compute_divergence_v2(path, plot=False):
        # get slices of time 'time'
        seg_prev = df_seg[df_seg.time == time_prev]
        off_curr = df_off[df_off.time == time_curr]
-
-        seg_curr = df_seg[df_seg.time == time_curr]
        
+        # validation of the input file
+        # TODO: remove
+        seg_curr = df_seg[df_seg.time == time_curr]
        assert len(off_curr) == len(seg_curr)

-        if plot:
-            plot_offsets(seg, off)
+        samples += compute_distance_samples(seg_prev,
+                                           off_curr,
+                                           time_curr,
+                                           distance_thr=distance_thr)

-        samples += compute_kl_multivariate(seg_prev, off_curr, time_curr)
+    edge_prob_df = pd.DataFrame(samples, columns=['time_curr',
+                                                  'time_prev',
+                                                  'label_curr',
+                                                  'label_prev',
+                                                  'distance'])

+    # store edge and vertex probs as .csv files   
+    print(f'storing {distance_path}')
+    edge_prob_df.to_csv(
+        distance_path,
+        index=False,
+    )
+    
+    
+def compute_distance2(path, distance_thr=100):
+    
+    offset_path = path / 'tra_offsets.csv'
+    seg_path = path / 'seg_offsets.csv'

-    edge_prob_df = pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence'])
-    edge_prob_df['cost'] = np.log(edge_prob_df.divergence)
+    assert offset_path.exists(), offset_path
+    assert seg_path.exists(), seg_path
+    
+    distance_path = path / "edge_prob_distance2.csv"
+    
+    if distance_path.exists():
+        print(distance_path, 'already exists')
+        return 
+    
+
+    df_off = pd.read_csv(offset_path)
+    df_seg = pd.read_csv(seg_path)
+
+    samples = []
+
+    times = np.unique(df_off.time)
+    times[::-1].sort()
+
+    for time_curr in tqdm(times):
+
+        # correction of the offset time
+        time_prev = time_curr - 1
+
+        # get slices of time 'time'
+        seg_prev = df_seg[df_seg.time == time_prev]
+        off_curr = df_off[df_off.time == time_curr]
+        
+        # read labeled images
+        li_curr = io.imread(path / f'mask{time_curr:04d}.tif', dtype=np.uint16)
+        li_prev = io.imread(path / f'mask{time_prev:04d}.tif', dtype=np.uint16)
+
+        samples += compute_distance2_samples(seg_prev,
+                                           off_curr,
+                                           time_curr,
+                                           distance_thr=distance_thr,
+                                           li_curr=li_curr,
+                                           li_prev=li_prev)
+
+    edge_prob_df = pd.DataFrame(samples, columns=['time_curr',
+                                                  'time_prev',
+                                                  'label_curr',
+                                                  'label_prev',
+                                                  'distance',
+                                                  'distance_markers'])

    # store edge and vertex probs as .csv files   
-    print(f'storing {divergence_path}')
+    print(f'storing {distance_path}')
    edge_prob_df.to_csv(
-        divergence_path,
+        distance_path,
        index=False,
    )
    
    
+def analyze_csv(data_path, csv_name, gt_path):
+    
+    '''
+    csv contains the following columns: label_prev, label_curr, time_prev, time_curr
+    
+    the function adds columns:
+        class :  {VALID, NONOVALID, DIVISION}
+        v_prev_prob : float (0, 1)
+        v_curr_prob : float (0, 1)
+        
+    params
+    res_path : string
+        path to the directory with results: seg_offset
+    gt_path : string
+        
+    '''
+    
+    
+
+    assert os.path.isdir(gt_path), gt_path
+    assert os.path.isdir(data_path), data_path
+
+    
+    # get additional informations about the vertices
+    csv_path = os.path.join(data_path, csv_name)
+    seg_path = os.path.join(data_path, 'seg_offsets.csv')
+    prob_path = os.path.join(data_path, 'vertex_prob.csv')
+    
+    assert os.path.isfile(csv_path), csv_path
+    assert os.path.isfile(seg_path), seg_path
+    assert os.path.isfile(prob_path), prob_path
+    
+    # get resources
+    seg_df = pd.read_csv(seg_path, index_col=['time', 'label'])
+    prob_df = pd.read_csv(prob_path, index_col=['time', 'label'])
+    df = pd.read_csv(csv_path)
+    mothers = read_mothers(os.path.join(gt_path, 'TRA', 'man_track.txt'))
+    print('mothers', mothers)
+    
+    # false negatives - at least one detetion is missing
+    # columns = (time_curr, time_prev, label_gt_curr, label_gt_prev)
+    false_negatives = []
+    
+    # gt_edges - all gt edges, where both detections exist in res
+    # columns = (time_curr, time_prev, label_gt_curr, label_gt_prev, tag)
+    gt_edges = []
+    
+    print('np.unique(df.time_curr)', np.unique(df.time_curr))
+    
+    # for every time_curr
+    for time_curr in tqdm(np.unique(df.time_curr)):
+        
+        view = df[df.time_curr == time_curr]
+        time_prev = time_curr - 1
+        
+        # read gt
+        gt_curr_path = os.path.join(gt_path, 'TRA', f'man_track{time_curr:04d}.tif')
+        gt_prev_path = os.path.join(gt_path, 'TRA', f'man_track{time_prev:04d}.tif')
+        
+        
+        
+        gt_curr = tiff.imread(gt_curr_path)
+        gt_prev = tiff.imread(gt_prev_path)
+        
+        # read res
+        res_curr_path = os.path.join(data_path, f'mask{time_curr:04d}.tif')
+        res_prev_path = os.path.join(data_path, f'mask{time_prev:04d}.tif')
+        
+        res_curr = tiff.imread(res_curr_path)
+        res_prev = tiff.imread(res_prev_path)
+        
+        labels_gt_prev = np.unique(gt_prev)
+        
+        '''
+        
+        # mapping = { gt_label : res_label }
+        mapping_curr = {label_gt : get_result_label(res_curr, gt_curr == label_gt )
+                        for label_gt in np.unique(gt_curr) if label_gt != 0}
+        mapping_prev = {label_gt : get_result_label(res_prev, gt_prev == label_gt )
+                        for label_gt in np.unique(gt_prev) if label_gt != 0}
+        
+        
+        
+        '''
+        if (time_curr % 100) == 0:
+            print(time_curr, 'np.unique(gt_curr)', np.unique(gt_curr), gt_curr.dtype)
+        
+        #iterate over all indexes in a gt_curr
+        for label_gt_curr in np.unique(gt_curr):
+            
+            if label_gt_curr == 0:
+                continue
+                
+            # 1. MOVE
+            if label_gt_curr in labels_gt_prev:
+                label_gt_prev = label_gt_curr
+                tag = 'MOVE'
+            else:
+                label_gt_prev = mothers[label_gt_curr]
+                tag = 'DIVISION'
+                
+            mask_gt_curr = gt_curr == label_gt_curr
+            mask_gt_prev = gt_prev == label_gt_prev
+
+            label_curr = get_result_label(res_curr, mask_gt_curr) 
+            label_prev = get_result_label(res_prev, mask_gt_prev) 
+            
+            
+            if (time_curr % 100) == 0:
+                print(time_curr, 'label_gt_curr', label_gt_curr, label_curr, label_prev)
+            
+            # missing detections
+            if (label_curr == 0) or (label_prev == 0):
+                
+                # report in false negatives
+                # TODO: debug
+                # reports wierd values
+                false_negatives.append((time_curr, time_prev, label_gt_curr, label_gt_prev))
+                
+            else:
+                
+                # gt_edges
+                gt_edges.append((time_curr, time_prev, label_curr, label_prev, tag))
+            
+            
+    # merge gt_edges with df and save
+    # TODO: add tags
+    df_gt_edges = pd.DataFrame(gt_edges, columns = ('time_curr', 'time_prev', 'label_curr', 'label_prev', 'tag'))
+    
+    df_gt_edges.set_index(['time_curr', 'time_prev', 'label_curr', 'label_prev'], inplace=True)
+    df.set_index(['time_curr', 'time_prev', 'label_curr', 'label_prev'], inplace=True)
+    
+    result = df.join(df_gt_edges, how='outer')
+    result['tag'] = result['tag'].replace('NaN','NONVALID')
+    
+    analysis_path = csv_path[:-4] + '_analysis.csv'
+    print(f'storing {analysis_path}')
+    result.to_csv(
+        analysis_path,
+    )
+    
+    # save false_negatives
+    df_fn = pd.DataFrame(false_negatives, columns = ('time_curr', 'time_prev', 'label_gt_curr', 'label_gt_prev'))
+    
+    fn_path = os.path.join(data_path, 'false_negatives.csv')
+    print(f'storing {fn_path}')
+    df_fn.to_csv(
+        fn_path,
+    )
+            
+
+            
+            
+def get_result_label(res, gt_mask):
+    '''
+    reads label that covers at least 50 % of the mask
+    '''
+    size = gt_mask.sum()
+    labels, counts = np.unique(res * gt_mask, return_counts=True)
+    
+    #print(counts, labels, size)
+
+    
+    for count, label in zip(counts, labels):
+        if (label != 0) and (count >= (size//2)):
+            return label
+        
+    return 0
+            
+        
+        
+        
+def read_mothers(man_track_path):
+    '''
+    mother = {daughter_idx : mother_idx}
+    '''
+    
+    os.path.isfile(man_track_path), man_track_path
+    
+    mothers = {}
+    
+    with open(man_track_path, 'r') as f:
+        
+        for line in f.readlines():
+            daughter_idx, _, _, mother_idx = line.strip().split(' ')
+            mothers[int(daughter_idx)] = int(mother_idx)
+            
+    return mothers
+    
+    
 def kl_divergence(mu1, mu2, sigma1, sigma2):
    return np.log1p(sigma2/sigma1) + (sigma1**2 + (mu1 - mu2) ** 2) / (2 * sigma2 ** 2) - .5

@@ -145,7 +406,7 @@ def print_kl(seg, off):
    return pd.DataFrame(samples, columns=['label from', 'label to', 'divergence'])


-def compute_kl_multivariate(seg, off, time, divergence_thr=50, distance_thr=100):
+def compute_kl_multivariate(seg, off, time, divergence_thr=50):
    
    samples = []
    for i, line in enumerate(seg.values):
@@ -164,16 +425,126 @@ def compute_kl_multivariate(seg, off, time, divergence_thr=50, distance_thr=100)
            S1 = np.array([[c00, c01], [c10, c11]])
            m1 = np.array([mx, my])
            
-            if euklidian_distance(m0, m1) > distance_thr:
-                continue
+            div = kl_mvn(m0, S0, m1, S1)
+            
+            if div < divergence_thr:
+                samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), div])
+            
+    return samples
+
+
+def compute_distance_samples(seg, off, time, distance_thr=100):
+    
+    samples = []
+    for i, line in enumerate(seg.values):
+        
+        # time prev
+        _, label_seg_prev, mx, my, sx, sy, c00, c01, c10, c11 = line
+        
+        S0 = np.array([[c00, c01], [c10, c11]])
+        m0 = np.array([mx, my])
+        
+        for j, line in enumerate(off.values):
+            
+            # time curr
+            _, label_off_curr, mx, my, sx, sy, c00, c01, c10, c11 = line
+            
+            S1 = np.array([[c00, c01], [c10, c11]])
+            m1 = np.array([mx, my])
+            
+            dist = euklidian_distance(m0, m1)
+            
+            if dist < distance_thr:
+                samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), dist])
+            
+    return samples
+
+
+def compute_distance2_samples(seg, off, time, distance_thr=100, li_curr=None, li_prev=None):
+    
+    assert li_curr is not None
+    assert li_prev is not None
+    
+    off_coo_dict = {}
+    for j, line in enumerate(off.values):
+
+        # time curr
+        _, label_off_curr, mx, my, sx, sy, c00, c01, c10, c11 = line
+        mask_curr = (li_curr == label_off_curr)
+        coo_curr = np.array([coo.mean() for coo in mask_curr.nonzero()])
+        
+        S1 = np.array([[c00, c01], [c10, c11]])
+        m1 = np.array([mx, my])
+        
+        off_coo_dict[label_off_curr] = coo_curr, S1, m1
+    
+    samples = []
+    for i, line in enumerate(seg.values):
+        
+        # time prev
+        _, label_seg_prev, mx, my, sx, sy, c00, c01, c10, c11 = line
+        
+        mask_prev = (li_prev == label_seg_prev)
+        coo_prev = np.array([coo.mean() for coo in mask_prev.nonzero()])
+
+        
+        S0 = np.array([[c00, c01], [c10, c11]])
+        m0 = np.array([mx, my])
+        
+        
+        for label_off_curr in off_coo_dict.keys():
+            coo_curr, S1, m1 = off_coo_dict[label_off_curr]
+
+            dist = euklidian_distance(m0, m1)
+            dist_masks = euklidian_distance(coo_prev, coo_curr)
+            
+            if dist < distance_thr:
+                samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), dist, dist_masks])
+            
+    return samples
+
+
+
+def compute_distance2_samples_fast(seg, off, time, distance_thr=100, li_curr=None, li_prev=None):
+    
+    assert li_curr is not None
+    assert li_prev is not None
+    
+    
+    labels_curr = np.unique(li_curr)
+    labels_prev = np.unique(li_prev)
+    
+    samples = []
+    
+    
+    dict_coo_prev = {}
+    for l_prev in labels_prev:
+        if l_prev == 0:
+            continue
+
+        mask_prev = (li_prev == l_prev)
+        coo_prev = np.array([coo.mean() for coo in mask_prev.nonzero()])
+        
+        dict_coo_prev[l_prev] = coo_prev
+        
+        
+
+    for l_curr in labels_curr:
+        
+        if l_curr == 0:
+            continue
+            
+        mask_curr = (li_curr == l_curr)
+        coo_curr = np.array([coo.mean() for coo in mask_curr.nonzero()])
+        
+        for l_prev in dict_coo_prev.keys():
+            coo_prev = dict_coo_prev[l_prev]
            
-            kl_divergence = kl_mvn(m0, S0, m1, S1)
+            dist_masks = euklidian_distance(coo_prev, coo_curr)
            
-            if kl_divergence < divergence_thr:
-                # columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence']
-                samples.append([time, time-1, int(label_off_curr), int(label_seg_prev), kl_divergence])
+            if dist_masks < distance_thr:
+                samples.append([time, time-1, int(l_curr), int(l_prev), 0, dist_masks])
            
-    #return pd.DataFrame(samples, columns=['time_curr', 'time_prev', 'label_curr', 'label_prev', 'divergence'])
    return samples



--- a/tracking/embedtrack.py
+++ b/tracking/embedtrack.py
@@ -2,8 +2,8 @@ from .sys_tools import run_procedure


 # TODO: communicate with an embedtrack  a config file
-def run_embedtrack(data_path, seq, model_path):
+def run_embedtrack(data_path, seq, model_path, batch_size=1):

-    prompt = f'python3 EmbedTrack/embedtrack.py --sequence {seq} --data_path {data_path} --model_path {model_path}'
+    prompt = f'python3 EmbedTrack/embedtrack.py --sequence {seq} --data_path {data_path} --model_path {model_path} --batch_size {batch_size}'
    run_procedure(prompt)

--- a/tracking/global_tracker.py
+++ b/tracking/global_tracker.py
--- a/tracking/sys_tools.py
+++ b/tracking/sys_tools.py
@@ -3,8 +3,9 @@ import sys

 def run_procedure(args):

-    print(f'running procedure: ${args}')
+    
    status_output = subprocess.call(args.split(' '))
-    res = 'OK:' if status_output == 0 else 'ERROR:'
+    if status_output != 0:
+        print(f'ERROR: procedure ${args} failed with an output {status_output}')

    return status_output