Commit 2e3f2bc0 authored by Matej Lexa's avatar Matej Lexa
Browse files

New Rscript with comments and better logo length control

parent 33575ec5
Loading
Loading
Loading
Loading
+22 −6
Original line number Original line Diff line number Diff line
#!/usr/bin/env Rscript
#!/usr/bin/env Rscript

library(seqLogo)
library(Biostrings)

# Get command line arguments
args = commandArgs(trailingOnly=TRUE)
args = commandArgs(trailingOnly=TRUE)


# Set variables based on command arguments
filename <- args[1]
filename <- args[1]
logolen = 0
if(args[2] > 0) {
  logolen = args[2]
}
filebase <- strsplit(filename,split="\\.")[[1]][1]
filebase <- strsplit(filename,split="\\.")[[1]][1]


library(seqLogo)
# Read in input sequences from input filename (=args[1])
library(Biostrings)

dna <- readDNAStringSet(filename)
dna <- readDNAStringSet(filename)
numseq = length(dna)

# Align input sequences
cm <-consensusMatrix(dna)[1:4,]
cm <-consensusMatrix(dna)[1:4,]

# Count number of aligned bases (without indels) at each position
sumcm <- apply(cm,2,sum)
sumcm <- apply(cm,2,sum)
cm[1,] <- cm[1,]/sumcm
cm[1,] <- cm[1,]/sumcm
cm[2,] <- cm[2,]/sumcm
cm[2,] <- cm[2,]/sumcm
cm[3,] <- cm[3,]/sumcm
cm[3,] <- cm[3,]/sumcm
cm[4,] <- cm[4,]/sumcm
cm[4,] <- cm[4,]/sumcm
if(length(cm[1,]) > 16) {

  cm <- cm[,1:16]
# If consensus matrix too wide, shorten to logolen (=args[2])
if(length(cm[1,]) > logolen) {
  cm <- cm[,(which(sumcm>numseq-1)[1]+1):(which(sumcm>numseq-1)[1]+logolen)]
}
}
pwmcm <- makePWM(cm)


# Create sequence logo and save it as PNG file
pwmcm <- makePWM(cm)
png(file=paste0(filebase,".png"), width=480, height=240)
png(file=paste0(filebase,".png"), width=480, height=240)
seqLogo(pwmcm)
seqLogo(pwmcm)
dev.off()
dev.off()