Loading Makefile 0 → 100644 +14 −0 Original line number Original line Diff line number Diff line alignments = set1.fasta set2.fasta set3.fasta consensi = set1.fasta_cons.fa set2.fasta_cons.fa set3.fasta_cons.fa %.fasta: %.fa clustalw -OUTPUT=FASTA -INFILE=$< rm *.dnd %.fasta_cons.fa: %.fasta cons -sequence $< -outseq $<_cons.fa all: $(alignments) $(consensi) clean: rm *.fasta *_cons.fa Snakefile 0 → 100644 +16 −0 Original line number Original line Diff line number Diff line rule align: input: "{name}.fa" output: "{name}.fasta" run: shell("clustalw -OUTPUT=FASTA -INFILE={input}") shell("rm {wildcards.name}.dnd") rule consensi: input: "{name}.fasta" output: "{name}.fasta_cons.fa" shell: "em_cons -sequence {input} -outseq {output}" main.nf 0 → 100644 +35 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env nextflow input = Channel.fromPath('*.fa') process ALIGN { publishDir "/home/lexa/pipelines" input: file x from input output: file '*.fasta' into alignment script: """ clustalw -OUTPUT=FASTA -INFILE=${x} rm *.dnd """ } process GET_CONSENSUS { publishDir "/home/lexa/pipelines" input: file a from alignment output: file '*_cons.fa' """ em_cons -sequence ${a} -outseq ${a}_cons.fa """ } make_seqlogo.R 0 → 100755 +25 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env Rscript args = commandArgs(trailingOnly=TRUE) filename <- args[1] filebase <- strsplit(filename,split="\\.")[[1]][1] library(seqLogo) library(Biostrings) dna <- readDNAStringSet(filename) cm <-consensusMatrix(dna)[1:4,] sumcm <- apply(cm,2,sum) cm[1,] <- cm[1,]/sumcm cm[2,] <- cm[2,]/sumcm cm[3,] <- cm[3,]/sumcm cm[4,] <- cm[4,]/sumcm if(length(cm[1,]) > 16) { cm <- cm[,1:16] } pwmcm <- makePWM(cm) png(file=paste0(filebase,".png"), width=480, height=240) seqLogo(pwmcm) dev.off() set1.fa 0 → 100644 +10 −0 Original line number Original line Diff line number Diff line >Acom_chr1 CTGGATAAGGGTTTTGTGCGACCGAGTGTTTCAACTTGGGAAGCACCGGTCCTATTTGTCAAGAAGAAGGATGGATCGCTTCGCTTGCGCGTGGATTATTGTGAACTCAATAAAGTCACGATCAAGAATAAGTATCCTTTACCGAGGATTGATGATTTATTTGATCAGCTCCAGGGATCTAGGGTGTATTCTAAGATCGACTTGCAGTCGGGATACCACCAGTTGAAGATCAAGCTTGAGGATGTGTCGAAAACGGCTTTTAGAACACAGTATGGACATTATGAGTTTACAGTTATGCCGTTTGGGCTTACTAACGCCCCGGCCGCTTTTATGGATTTGATGAGCAGTGTTTTCAAGCCTTATCTAGACAGGTTCGTCGTAGTATTCATCGATGACATTTTGGTCTATTCCTGAAGTGATGCGGATCACGAGGAACACTTGAGACTTGTACTTCAAGTACTTTGGAAAAAGGAGCTCTATGCCAAATTGAAAAAGTATGAGCTTTGGCTTAGAGAGGTGGCGTTTTTGGGCCATTTGATTTTAGGATCAGGTATAGCTGTGGATCCTAGGAAGATTGAAGCTATCAAGGACTAGCCGAGGCCGACGAGTGTCACCGAGATTCGGAGTTTTCTTGGCTTAGCCGGCTACTACCGAGGGTTTGTTGAGGGATTTGCAA >Acom_chr2 GCGTGACATTTTCTTTGTTCTTTTGAGCGGGTTCATTTTTAACTGGTATTCTCTCAATCGAATGAAGACAGTTTTTAAATCTTGGAAGAGATTTTCTTTGTTCTTTGTTTTCACAACCAAATCATTGACATAGCACTCTACTGTCTTGTGGAGTAGATCATCAAATATTCTAGTCATTACTCGCTGATATATAGCTTCGACATTCTTGAGGCCGAAAGGCATGATTTTATAACAATATATTCCAATGGGAGTTCTGAATGCAGTATGCTTCTCGTCCTCCGGTGCCATCTTGATTTTATTGTATCCTGATGACCCCATAAACGCGAATTTCTCACAACTCGATGTGTTGTCAATCATTAGTTCCGTTATTGATAATGGAAAGTCATCTTTTGGGCAAGCTTTATTTAAATCTCGAAAATCGACGCATATACAAATTTAGCTATTCTTTTTCTTGACCGGGACGATGCTAGCGATCCAATTAGCATATTTCTCCTCCCTTATAAAGCCTGCTTTAATGAGCTTTTTGGTTTTC >Acom_chr3 AAGGCACAGTTACAAGATCTATTGGATAAAGGCTTCATTCGACCTAGTATATCGCCTTAGGGAGCACCAGTTTTGTTTGTTAAGAAAAATGACGGATCTTTTCGATCGTGTGTGGACTATCGAGAGATTAACAAGGTTACAATCAAGAATAAGTATTTGTTGCCAAAGATTGATGATTTATTCGATCAACTCCAAGGGTCAAAAGTTTATTCCAAGATTGATCTACAATCAGGTTACCATTAGCTCAAGATCAAGCCTGAGGATGTTTCTAAAATCATCTTTAGAACACGGTATGGTCATTATGAGTTCATAGTTATGCTATTTGGGCTTACTAATGCCCCGGCAGCTTTTATAGATTTAATGAACCGTGTTTTCAAGTCTTACTTGGACAGGTTTGTGGTAGTATTTATCGACGATATCCTGATCAATTCTCAAAGTGACGAAGAGCATGAAAACCATTTGAGATTGGTGTTGCAAGTATTTCCAGAAAAGAAACTGTTTGCTAAGTTAAAGAAATGCAAATTTTGGCTTCGAGAGGTAGCATTTCTAGGACATGTAATTTCAGAGGCTGGGATAGCCGTGGATCCAAAAAAAATTGAGGCAATCAAAGAATGGCCTAGGCCGACTAGTGTTGCTGCGATTCAGAGTTTTCTTGGGCTAGCAGGATACTATCGGAGATTTGTTGAGAGATTTGCTA >Acom_CRM AAAAAACAAGTACCCATTGCCAAGGATTGACGATTTATTCGATCAGTTGCAGGGTCAAAGGTGTATTCAAAGATTGATCTCCAATCGGGGTATCATGAACTAAAGATTAGGCCGAAGGATGTGCACAAAACGGCGTACCGTACGCGGTATGGCCATTATGAGTTCACGGTAATGCCGTTTGGGCTTACTAATGCCCCTGCAGCATTTATGTACTTGATGAATCGAGTCTTTCGGCCATTATTGGATCGATGCGTCGTGGTGTTCATTGATGATGTGTTGGTCTACTCGAAAAGTAAGGAGGAGCACGAGGAGCATTTGAGAGCCGTGCTACAGATTCTCCGACAAAAGAAGCTCTATGCTAAGCTGAAGAAATGTGGCTTTTGGCTTACGGAGGTCACATTCTTGGGCCACGTGATATCGGGCGATGGGGTCTCGGTAGATCCTAAGAAGGTGGAGGCGATCAAGGACTTGCCTCGCCCAACAAATGTAGCGGAGGTCCGCAGTTTTCTTGGACTCGCGGGCTACTACCGGCGGTTTGTGGAGGGGTTCGCTA >Acom_Galadriel GTAGCCTGCAATGAAACGGCGGTAGTAGTTGACGAGCCCAAGGAAAGACCGCAACTCGGACACCGTCGTTGGGGTCTCCCATTCGCAGATGGCTCGCACCTTCCGCTGGTCCATGCGAATCAGGCCCTGGCCTATCCAGTGGCCAAGGAAGTGCACCTCTTCCTTTGCGAATATACACTTCTCCTTCTTGACCAACAACTGGTTCTCCCGCAGCACTTGGAAGACGATTCGCAAGTGCTCGATGTGCTCCTACAAGGTGTTACTGTATACCATGATGTCGTCGATGTACACCACCACAAAGCGATCGAGATAGGGATGAAACAGCTTGTTCATGAGTGTGCAGAAGGTTGCCGGCGCATTTATCAAGCTGAACGGCATGACAAGGAACTCATAGGCCCCGTACCTCGTCACGCACGTGGTCTTTTCCTCATCTCCTTCCGCAATCCACACTTGG Loading
Makefile 0 → 100644 +14 −0 Original line number Original line Diff line number Diff line alignments = set1.fasta set2.fasta set3.fasta consensi = set1.fasta_cons.fa set2.fasta_cons.fa set3.fasta_cons.fa %.fasta: %.fa clustalw -OUTPUT=FASTA -INFILE=$< rm *.dnd %.fasta_cons.fa: %.fasta cons -sequence $< -outseq $<_cons.fa all: $(alignments) $(consensi) clean: rm *.fasta *_cons.fa
Snakefile 0 → 100644 +16 −0 Original line number Original line Diff line number Diff line rule align: input: "{name}.fa" output: "{name}.fasta" run: shell("clustalw -OUTPUT=FASTA -INFILE={input}") shell("rm {wildcards.name}.dnd") rule consensi: input: "{name}.fasta" output: "{name}.fasta_cons.fa" shell: "em_cons -sequence {input} -outseq {output}"
main.nf 0 → 100644 +35 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env nextflow input = Channel.fromPath('*.fa') process ALIGN { publishDir "/home/lexa/pipelines" input: file x from input output: file '*.fasta' into alignment script: """ clustalw -OUTPUT=FASTA -INFILE=${x} rm *.dnd """ } process GET_CONSENSUS { publishDir "/home/lexa/pipelines" input: file a from alignment output: file '*_cons.fa' """ em_cons -sequence ${a} -outseq ${a}_cons.fa """ }
make_seqlogo.R 0 → 100755 +25 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env Rscript args = commandArgs(trailingOnly=TRUE) filename <- args[1] filebase <- strsplit(filename,split="\\.")[[1]][1] library(seqLogo) library(Biostrings) dna <- readDNAStringSet(filename) cm <-consensusMatrix(dna)[1:4,] sumcm <- apply(cm,2,sum) cm[1,] <- cm[1,]/sumcm cm[2,] <- cm[2,]/sumcm cm[3,] <- cm[3,]/sumcm cm[4,] <- cm[4,]/sumcm if(length(cm[1,]) > 16) { cm <- cm[,1:16] } pwmcm <- makePWM(cm) png(file=paste0(filebase,".png"), width=480, height=240) seqLogo(pwmcm) dev.off()
set1.fa 0 → 100644 +10 −0 Original line number Original line Diff line number Diff line >Acom_chr1 CTGGATAAGGGTTTTGTGCGACCGAGTGTTTCAACTTGGGAAGCACCGGTCCTATTTGTCAAGAAGAAGGATGGATCGCTTCGCTTGCGCGTGGATTATTGTGAACTCAATAAAGTCACGATCAAGAATAAGTATCCTTTACCGAGGATTGATGATTTATTTGATCAGCTCCAGGGATCTAGGGTGTATTCTAAGATCGACTTGCAGTCGGGATACCACCAGTTGAAGATCAAGCTTGAGGATGTGTCGAAAACGGCTTTTAGAACACAGTATGGACATTATGAGTTTACAGTTATGCCGTTTGGGCTTACTAACGCCCCGGCCGCTTTTATGGATTTGATGAGCAGTGTTTTCAAGCCTTATCTAGACAGGTTCGTCGTAGTATTCATCGATGACATTTTGGTCTATTCCTGAAGTGATGCGGATCACGAGGAACACTTGAGACTTGTACTTCAAGTACTTTGGAAAAAGGAGCTCTATGCCAAATTGAAAAAGTATGAGCTTTGGCTTAGAGAGGTGGCGTTTTTGGGCCATTTGATTTTAGGATCAGGTATAGCTGTGGATCCTAGGAAGATTGAAGCTATCAAGGACTAGCCGAGGCCGACGAGTGTCACCGAGATTCGGAGTTTTCTTGGCTTAGCCGGCTACTACCGAGGGTTTGTTGAGGGATTTGCAA >Acom_chr2 GCGTGACATTTTCTTTGTTCTTTTGAGCGGGTTCATTTTTAACTGGTATTCTCTCAATCGAATGAAGACAGTTTTTAAATCTTGGAAGAGATTTTCTTTGTTCTTTGTTTTCACAACCAAATCATTGACATAGCACTCTACTGTCTTGTGGAGTAGATCATCAAATATTCTAGTCATTACTCGCTGATATATAGCTTCGACATTCTTGAGGCCGAAAGGCATGATTTTATAACAATATATTCCAATGGGAGTTCTGAATGCAGTATGCTTCTCGTCCTCCGGTGCCATCTTGATTTTATTGTATCCTGATGACCCCATAAACGCGAATTTCTCACAACTCGATGTGTTGTCAATCATTAGTTCCGTTATTGATAATGGAAAGTCATCTTTTGGGCAAGCTTTATTTAAATCTCGAAAATCGACGCATATACAAATTTAGCTATTCTTTTTCTTGACCGGGACGATGCTAGCGATCCAATTAGCATATTTCTCCTCCCTTATAAAGCCTGCTTTAATGAGCTTTTTGGTTTTC >Acom_chr3 AAGGCACAGTTACAAGATCTATTGGATAAAGGCTTCATTCGACCTAGTATATCGCCTTAGGGAGCACCAGTTTTGTTTGTTAAGAAAAATGACGGATCTTTTCGATCGTGTGTGGACTATCGAGAGATTAACAAGGTTACAATCAAGAATAAGTATTTGTTGCCAAAGATTGATGATTTATTCGATCAACTCCAAGGGTCAAAAGTTTATTCCAAGATTGATCTACAATCAGGTTACCATTAGCTCAAGATCAAGCCTGAGGATGTTTCTAAAATCATCTTTAGAACACGGTATGGTCATTATGAGTTCATAGTTATGCTATTTGGGCTTACTAATGCCCCGGCAGCTTTTATAGATTTAATGAACCGTGTTTTCAAGTCTTACTTGGACAGGTTTGTGGTAGTATTTATCGACGATATCCTGATCAATTCTCAAAGTGACGAAGAGCATGAAAACCATTTGAGATTGGTGTTGCAAGTATTTCCAGAAAAGAAACTGTTTGCTAAGTTAAAGAAATGCAAATTTTGGCTTCGAGAGGTAGCATTTCTAGGACATGTAATTTCAGAGGCTGGGATAGCCGTGGATCCAAAAAAAATTGAGGCAATCAAAGAATGGCCTAGGCCGACTAGTGTTGCTGCGATTCAGAGTTTTCTTGGGCTAGCAGGATACTATCGGAGATTTGTTGAGAGATTTGCTA >Acom_CRM AAAAAACAAGTACCCATTGCCAAGGATTGACGATTTATTCGATCAGTTGCAGGGTCAAAGGTGTATTCAAAGATTGATCTCCAATCGGGGTATCATGAACTAAAGATTAGGCCGAAGGATGTGCACAAAACGGCGTACCGTACGCGGTATGGCCATTATGAGTTCACGGTAATGCCGTTTGGGCTTACTAATGCCCCTGCAGCATTTATGTACTTGATGAATCGAGTCTTTCGGCCATTATTGGATCGATGCGTCGTGGTGTTCATTGATGATGTGTTGGTCTACTCGAAAAGTAAGGAGGAGCACGAGGAGCATTTGAGAGCCGTGCTACAGATTCTCCGACAAAAGAAGCTCTATGCTAAGCTGAAGAAATGTGGCTTTTGGCTTACGGAGGTCACATTCTTGGGCCACGTGATATCGGGCGATGGGGTCTCGGTAGATCCTAAGAAGGTGGAGGCGATCAAGGACTTGCCTCGCCCAACAAATGTAGCGGAGGTCCGCAGTTTTCTTGGACTCGCGGGCTACTACCGGCGGTTTGTGGAGGGGTTCGCTA >Acom_Galadriel GTAGCCTGCAATGAAACGGCGGTAGTAGTTGACGAGCCCAAGGAAAGACCGCAACTCGGACACCGTCGTTGGGGTCTCCCATTCGCAGATGGCTCGCACCTTCCGCTGGTCCATGCGAATCAGGCCCTGGCCTATCCAGTGGCCAAGGAAGTGCACCTCTTCCTTTGCGAATATACACTTCTCCTTCTTGACCAACAACTGGTTCTCCCGCAGCACTTGGAAGACGATTCGCAAGTGCTCGATGTGCTCCTACAAGGTGTTACTGTATACCATGATGTCGTCGATGTACACCACCACAAAGCGATCGAGATAGGGATGAAACAGCTTGTTCATGAGTGTGCAGAAGGTTGCCGGCGCATTTATCAAGCTGAACGGCATGACAAGGAACTCATAGGCCCCGTACCTCGTCACGCACGTGGTCTTTTCCTCATCTCCTTCCGCAATCCACACTTGG