Commit 2e3f2bc0 authored by Matej Lexa's avatar Matej Lexa
Browse files

New Rscript with comments and better logo length control

parent 33575ec5
Loading
Loading
Loading
Loading
+22 −6
Original line number Diff line number Diff line
#!/usr/bin/env Rscript

library(seqLogo)
library(Biostrings)

# Get command line arguments
args = commandArgs(trailingOnly=TRUE)

# Set variables based on command arguments
filename <- args[1]
logolen = 0
if(args[2] > 0) {
  logolen = args[2]
}
filebase <- strsplit(filename,split="\\.")[[1]][1]

library(seqLogo)
library(Biostrings)

# Read in input sequences from input filename (=args[1])
dna <- readDNAStringSet(filename)
numseq = length(dna)

# Align input sequences
cm <-consensusMatrix(dna)[1:4,]

# Count number of aligned bases (without indels) at each position
sumcm <- apply(cm,2,sum)
cm[1,] <- cm[1,]/sumcm
cm[2,] <- cm[2,]/sumcm
cm[3,] <- cm[3,]/sumcm
cm[4,] <- cm[4,]/sumcm
if(length(cm[1,]) > 16) {
  cm <- cm[,1:16]

# If consensus matrix too wide, shorten to logolen (=args[2])
if(length(cm[1,]) > logolen) {
  cm <- cm[,(which(sumcm>numseq-1)[1]+1):(which(sumcm>numseq-1)[1]+logolen)]
}
pwmcm <- makePWM(cm)

# Create sequence logo and save it as PNG file
pwmcm <- makePWM(cm)
png(file=paste0(filebase,".png"), width=480, height=240)
seqLogo(pwmcm)
dev.off()