Loading bin/perl/enrich_rmsk_gff3_annotation.pl 0 → 100644 +43 −0 Original line number Diff line number Diff line #!/usr/bin/perl -w # # A perl script to enrich the GFF3 annotation. After rmsk2bed | bed2gff3 conversion # the repeat class remains in the *.out file. This script adds it to the Name variable # and changes it to lowercase name. # # Usage: perl enrich_rmsk_gff3_annotation.pl RM_output/Athaliana_167_TAIR10.fa.out.gff3 RM_output/Athaliana_167_TAIR10.fa.out # open(GFF,"$ARGV[0]") || die "Can't open GFF. Exiting\n"; open(OUT,"$ARGV[1]") || die "Can't open OUT. Exiting\n"; # Read headers $line_gff = <GFF>; $line_out = <OUT>; $line_out = <OUT>; $line_out = <OUT>; # Read the lines from the two files while($line_gff = <GFF>) { chop($line_gff); $line_out = <OUT>; chop($line_out); # extract the repeat class from *.out (column 11) $line_out =~ m/^ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +([^ ]+) /; $repclass = $1; if(defined($repclass)) { # add repeat class to the GFF file and print it out #print STDERR "GFF: $line_gff\n"; #print STDERR "REP: $repclass\n"; $line_gff =~ s/Name=[^ \t]+/annot=$repclass/; #print STDERR "OUT: $line_out\n"; print "$line_gff\n"; } } close(OUT); close(GFF); exit 1; Loading
bin/perl/enrich_rmsk_gff3_annotation.pl 0 → 100644 +43 −0 Original line number Diff line number Diff line #!/usr/bin/perl -w # # A perl script to enrich the GFF3 annotation. After rmsk2bed | bed2gff3 conversion # the repeat class remains in the *.out file. This script adds it to the Name variable # and changes it to lowercase name. # # Usage: perl enrich_rmsk_gff3_annotation.pl RM_output/Athaliana_167_TAIR10.fa.out.gff3 RM_output/Athaliana_167_TAIR10.fa.out # open(GFF,"$ARGV[0]") || die "Can't open GFF. Exiting\n"; open(OUT,"$ARGV[1]") || die "Can't open OUT. Exiting\n"; # Read headers $line_gff = <GFF>; $line_out = <OUT>; $line_out = <OUT>; $line_out = <OUT>; # Read the lines from the two files while($line_gff = <GFF>) { chop($line_gff); $line_out = <OUT>; chop($line_out); # extract the repeat class from *.out (column 11) $line_out =~ m/^ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +([^ ]+) /; $repclass = $1; if(defined($repclass)) { # add repeat class to the GFF file and print it out #print STDERR "GFF: $line_gff\n"; #print STDERR "REP: $repclass\n"; $line_gff =~ s/Name=[^ \t]+/annot=$repclass/; #print STDERR "OUT: $line_out\n"; print "$line_gff\n"; } } close(OUT); close(GFF); exit 1;