Commit b45a6edc authored by Matej Lexa's avatar Matej Lexa
Browse files

A script to import RepeatMasker output

parent 3a12d251
Loading
Loading
Loading
Loading
+43 −0
Original line number Diff line number Diff line
#!/usr/bin/perl -w

# 
# A perl script to enrich the GFF3 annotation. After rmsk2bed | bed2gff3 conversion 
# the repeat class remains in the *.out file. This script adds it to the Name variable
# and changes it to lowercase name.
# 
# Usage: perl enrich_rmsk_gff3_annotation.pl RM_output/Athaliana_167_TAIR10.fa.out.gff3 RM_output/Athaliana_167_TAIR10.fa.out
# 

open(GFF,"$ARGV[0]") || die "Can't open GFF. Exiting\n";
open(OUT,"$ARGV[1]") || die "Can't open OUT. Exiting\n";

# Read headers
$line_gff = <GFF>;
$line_out = <OUT>;
$line_out = <OUT>;
$line_out = <OUT>;

# Read the lines from the two files
while($line_gff = <GFF>) {
  chop($line_gff);
  $line_out = <OUT>;
  chop($line_out);

  # extract the repeat class from *.out (column 11)
  $line_out =~ m/^ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +([^ ]+) /;
  $repclass = $1;
  if(defined($repclass)) {
  # add repeat class to the GFF file and print it out
  #print STDERR "GFF: $line_gff\n";
  #print STDERR "REP: $repclass\n";
  $line_gff =~ s/Name=[^ \t]+/annot=$repclass/;
  #print STDERR "OUT: $line_out\n";
  print "$line_gff\n";
  }
}

close(OUT);
close(GFF);

exit 1;