Commit 9aa6e981 authored by Ivan Vanat's avatar Ivan Vanat
Browse files

grapher: change in data structure, fixed coverage computation

parent b471cb51
Loading
Loading
Loading
Loading
+24 −10
Original line number Diff line number Diff line
@@ -3,6 +3,8 @@ import sys
from nested.utils import intervals

data = {}
lengths = {}
module_coverage = {}


def main():
@@ -13,17 +15,16 @@ def main():
    for gff in sys.argv[1:]:
        with open(gff, 'r') as gff_file:
            for line in gff_file:
                if not line or line[0] == '#':
                if not line.rstrip() or line[0] == '#':
                    if "sequence-region" in line:
                        split_line = line.split(' ')
                        sequence_id = split_line[0][2:]
                        sequence_id = split_line[1]
                        sequence_length = intervals.length([int(split_line[-2]), int(split_line[-1])])

                        if sequence_id not in data.keys():
                            data[sequence_id] = {}

                        data[sequence_id]["length"] = sequence_length

                        lengths[sequence_id] = sequence_length
                    continue

                split_line = line.split('\t')
@@ -49,27 +50,40 @@ def main():
                else:
                    data[sequence_id][source].append(element_dict)

    compute_all_coverage()

    for sequence_id in module_coverage.keys():
        print(sequence_id)
        for module, length in module_coverage[sequence_id].items():
            print(module)
            print(length / lengths[sequence_id])


def compute_all_coverage():
    for sequence_id in data.keys():
        compute_coverage(data[sequence_id])
        module_coverage[sequence_id] = compute_coverage(data[sequence_id])


def compute_coverage(elements):
    coverage = {}
    for source in elements.keys():
        covered_length = 0
        for element in elements[source]:
            actual_location = element["location"]

            if source == "nested-nester":
                for feature in element["features"]:
                    if "TSR LEFT" in feature["ID"] or "TSR RIGHT" in feature["ID"]:
                        actual_location = extend(actual_location, feature["location"])

            children_length = get_children_length(elements, actual_location)
            element["actual_length"] = intervals.length(actual_location) - children_length

            covered_length += element["actual_length"]

            covered_length += intervals.length(actual_location) - children_length
        coverage[source] = covered_length

        elements[source]["covered_length"] = covered_length
    return coverage


def extend(element_location, tsr_location):
+1 −2
Original line number Diff line number Diff line
@@ -24,6 +24,5 @@ class TrfOutputGenerator(BaseOutputGenerator):
                          + ".\t.\t.\t"
                          + "ID=tandem_repeat_" + str(i)
                          + ";monomer_count=" + str(repeat.copies)
                          + ";monomer=" + repeat.monomer
                          + '\n')
                          + ";monomer=" + repeat.monomer)
                i += 1