Loading nested/nester-grapher/grapher.py +24 −10 Original line number Diff line number Diff line Loading @@ -3,6 +3,8 @@ import sys from nested.utils import intervals data = {} lengths = {} module_coverage = {} def main(): Loading @@ -13,17 +15,16 @@ def main(): for gff in sys.argv[1:]: with open(gff, 'r') as gff_file: for line in gff_file: if not line or line[0] == '#': if not line.rstrip() or line[0] == '#': if "sequence-region" in line: split_line = line.split(' ') sequence_id = split_line[0][2:] sequence_id = split_line[1] sequence_length = intervals.length([int(split_line[-2]), int(split_line[-1])]) if sequence_id not in data.keys(): data[sequence_id] = {} data[sequence_id]["length"] = sequence_length lengths[sequence_id] = sequence_length continue split_line = line.split('\t') Loading @@ -49,27 +50,40 @@ def main(): else: data[sequence_id][source].append(element_dict) compute_all_coverage() for sequence_id in module_coverage.keys(): print(sequence_id) for module, length in module_coverage[sequence_id].items(): print(module) print(length / lengths[sequence_id]) def compute_all_coverage(): for sequence_id in data.keys(): compute_coverage(data[sequence_id]) module_coverage[sequence_id] = compute_coverage(data[sequence_id]) def compute_coverage(elements): coverage = {} for source in elements.keys(): covered_length = 0 for element in elements[source]: actual_location = element["location"] if source == "nested-nester": for feature in element["features"]: if "TSR LEFT" in feature["ID"] or "TSR RIGHT" in feature["ID"]: actual_location = extend(actual_location, feature["location"]) children_length = get_children_length(elements, actual_location) element["actual_length"] = intervals.length(actual_location) - children_length covered_length += element["actual_length"] covered_length += intervals.length(actual_location) - children_length coverage[source] = covered_length elements[source]["covered_length"] = covered_length return coverage def extend(element_location, tsr_location): Loading nested/output/output_generators/trf_output_generator.py +1 −2 Original line number Diff line number Diff line Loading @@ -24,6 +24,5 @@ class TrfOutputGenerator(BaseOutputGenerator): + ".\t.\t.\t" + "ID=tandem_repeat_" + str(i) + ";monomer_count=" + str(repeat.copies) + ";monomer=" + repeat.monomer + '\n') + ";monomer=" + repeat.monomer) i += 1 Loading
nested/nester-grapher/grapher.py +24 −10 Original line number Diff line number Diff line Loading @@ -3,6 +3,8 @@ import sys from nested.utils import intervals data = {} lengths = {} module_coverage = {} def main(): Loading @@ -13,17 +15,16 @@ def main(): for gff in sys.argv[1:]: with open(gff, 'r') as gff_file: for line in gff_file: if not line or line[0] == '#': if not line.rstrip() or line[0] == '#': if "sequence-region" in line: split_line = line.split(' ') sequence_id = split_line[0][2:] sequence_id = split_line[1] sequence_length = intervals.length([int(split_line[-2]), int(split_line[-1])]) if sequence_id not in data.keys(): data[sequence_id] = {} data[sequence_id]["length"] = sequence_length lengths[sequence_id] = sequence_length continue split_line = line.split('\t') Loading @@ -49,27 +50,40 @@ def main(): else: data[sequence_id][source].append(element_dict) compute_all_coverage() for sequence_id in module_coverage.keys(): print(sequence_id) for module, length in module_coverage[sequence_id].items(): print(module) print(length / lengths[sequence_id]) def compute_all_coverage(): for sequence_id in data.keys(): compute_coverage(data[sequence_id]) module_coverage[sequence_id] = compute_coverage(data[sequence_id]) def compute_coverage(elements): coverage = {} for source in elements.keys(): covered_length = 0 for element in elements[source]: actual_location = element["location"] if source == "nested-nester": for feature in element["features"]: if "TSR LEFT" in feature["ID"] or "TSR RIGHT" in feature["ID"]: actual_location = extend(actual_location, feature["location"]) children_length = get_children_length(elements, actual_location) element["actual_length"] = intervals.length(actual_location) - children_length covered_length += element["actual_length"] covered_length += intervals.length(actual_location) - children_length coverage[source] = covered_length elements[source]["covered_length"] = covered_length return coverage def extend(element_location, tsr_location): Loading
nested/output/output_generators/trf_output_generator.py +1 −2 Original line number Diff line number Diff line Loading @@ -24,6 +24,5 @@ class TrfOutputGenerator(BaseOutputGenerator): + ".\t.\t.\t" + "ID=tandem_repeat_" + str(i) + ";monomer_count=" + str(repeat.copies) + ";monomer=" + repeat.monomer + '\n') + ";monomer=" + repeat.monomer) i += 1