Commit 3fedaecf authored by Ivan Vanat's avatar Ivan Vanat
Browse files

merging trfs with overlap

parent 18588449
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -67,6 +67,23 @@ class TandemRepeatFinder(ToolInterface):
        result = []
        candidates.sort(key=lambda repeat: repeat.location[0])

        for candidate in candidates:
            if not result:
                result.append(candidate)
            else:
                last = result[-1]
                if intervals.intersect(last.location, candidate.location):
                    right = max(last.location[1], candidate.location[1])
                    last.location[1] = right
                else:
                    result.append(candidate)

        return result

    def _filter_candidates_assign_overlap_strategy(self, candidates: List[TandemRepeat]) -> List[TandemRepeat]:
        result = []
        candidates.sort(key=lambda repeat: repeat.location[0])

        for candidate in candidates:
            if not result:
                result.append(candidate)
+6 −6
Original line number Diff line number Diff line
@@ -13,8 +13,7 @@ class TrfOutputGenerator(OutputGeneratorInterface):
    def generate_output(self, output_object: TrfOutputObject) -> NoReturn:
        self.create_data_directory(output_object.sequence_id)

        with open(f"{self.directory}/{output_object.sequence_id}/{output_object.sequence_id}_trf.gff", "w+")\
                as gff:
        with open(f"{self.directory}/{output_object.sequence_id}/{output_object.sequence_id}_trf.gff", "w+") as gff:
            i = 0
            gff.write("##gff-version 3\n")
            for repeat in output_object.elements:
@@ -23,9 +22,10 @@ class TrfOutputGenerator(OutputGeneratorInterface):
                          + str(repeat.location[0]) + "\t"
                          + str(repeat.location[1]) + "\t"
                          + ".\t.\t.\t"
                          + "ID=tandem_repeat_" + str(i)
                          + ";monomer_count=" + str(repeat.copies)
                          + ";monomer=" + repeat.monomer
                          + ";color=" + config["igv_colors"]["trf"][repeat.type]
                          # + "ID=tandem_repeat_" + str(i)
                          # + ";monomer_count=" + str(repeat.copies)
                          # + ";monomer=" + repeat.monomer
                          # + ";color=" + config["igv_colors"]["trf"][repeat.type]
                          + "."
                          + "\n")
                i += 1